nonoJDWAOIDAWKDA commited on
Commit
915f7e7
·
verified ·
1 Parent(s): add0767

Upload StyleTTS2 checkpoint epoch_2nd_00003.pth with all inference components

Browse files
README.md CHANGED
@@ -16,16 +16,16 @@ This model is a fine-tuned version of StyleTTS2, containing all necessary compon
16
  - **Base Model:** StyleTTS2-LibriTTS
17
  - **Architecture:** StyleTTS2
18
  - **Task:** Text-to-Speech
19
- - **Last Checkpoint:** epoch_2nd_00004.pth
20
 
21
  ## Training Details
22
- - **Total Epochs:** 5
23
- - **Completed Epochs:** 4
24
- - **Total Iterations:** 389
25
  - **Batch Size:** 2
26
  - **Max Length:** 120
27
  - **Learning Rate:** 0.0001
28
- - **Final Validation Loss:** 0.431213
29
 
30
  ## Model Components
31
  The repository includes all necessary components for inference:
 
16
  - **Base Model:** StyleTTS2-LibriTTS
17
  - **Architecture:** StyleTTS2
18
  - **Task:** Text-to-Speech
19
+ - **Last Checkpoint:** epoch_2nd_00003.pth
20
 
21
  ## Training Details
22
+ - **Total Epochs:** 4
23
+ - **Completed Epochs:** 3
24
+ - **Total Iterations:** 310
25
  - **Batch Size:** 2
26
  - **Max Length:** 120
27
  - **Learning Rate:** 0.0001
28
+ - **Final Validation Loss:** 0.416427
29
 
30
  ## Model Components
31
  The repository includes all necessary components for inference:
bert.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a44b76d2190d73b5cf36558f165e774d7ed0c15f0538fc4227b3e18eb6df3372
3
  size 25178740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a1b8eb6c1691f85a86685ddd7a158666e1c36a09ca79a3858ec778c07709ad1
3
  size 25178740
bert_encoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:302984f023cfb39f509083de56204c0736e02383036fc8ab2e8b02aebf95d402
3
  size 1576502
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644603557b868de4173769f81b6808c1b254fc5e4f0271722523d720296b39f3
3
  size 1576502
checkpoint.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c975986907b0afb3e0da5292e916f8c3a5a71a956db792c95425233586ea3f
3
- size 2040092478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50ba663b621bc53eb1de825cdd2271553771e9009d719b6415515049a632d627
3
+ size 2201837262
config.json CHANGED
@@ -58,7 +58,7 @@
58
  "hidden_dim": 512,
59
  "max_conv_dim": 512,
60
  "max_dur": 50,
61
- "multispeaker": true,
62
  "n_layer": 3,
63
  "n_mels": 80,
64
  "n_token": 178,
@@ -72,7 +72,7 @@
72
  "style_dim": 128
73
  },
74
  "training_config": {
75
- "epochs": 5,
76
  "batch_size": 2,
77
  "max_len": 120,
78
  "optimizer": {
@@ -81,7 +81,7 @@
81
  "lr": 0.0001
82
  },
83
  "loss_params": {
84
- "diff_epoch": 10,
85
  "joint_epoch": 110,
86
  "lambda_F0": 1.0,
87
  "lambda_ce": 20.0,
@@ -112,39 +112,35 @@
112
  "val_data": "Data/val_list.txt"
113
  },
114
  "model_state": {
115
- "epoch": 4,
116
- "iterations": 389,
117
- "val_loss": 0.4312129616737366
118
  },
119
  "training_metrics": {
120
  "train_loss": [],
121
  "val_loss": [
122
- 21.0,
123
- 20.0,
124
- 19.0,
125
- 14.0,
126
- 10.0
127
  ],
128
  "dur_loss": [
129
- 0.461,
130
- 0.473,
131
- 0.46,
132
- 0.437,
133
- 0.431
134
  ],
135
  "F0_loss": [
136
- 1.17,
137
- 1.196,
138
- 1.234,
139
- 1.112,
140
- 1.095
141
  ],
142
  "epochs": [
143
  1,
144
  2,
145
  3,
146
- 4,
147
- 5
148
  ]
149
  }
150
  }
 
58
  "hidden_dim": 512,
59
  "max_conv_dim": 512,
60
  "max_dur": 50,
61
+ "multispeaker": false,
62
  "n_layer": 3,
63
  "n_mels": 80,
64
  "n_token": 178,
 
72
  "style_dim": 128
73
  },
74
  "training_config": {
75
+ "epochs": 4,
76
  "batch_size": 2,
77
  "max_len": 120,
78
  "optimizer": {
 
81
  "lr": 0.0001
82
  },
83
  "loss_params": {
84
+ "diff_epoch": 1,
85
  "joint_epoch": 110,
86
  "lambda_F0": 1.0,
87
  "lambda_ce": 20.0,
 
112
  "val_data": "Data/val_list.txt"
113
  },
114
  "model_state": {
115
+ "epoch": 3,
116
+ "iterations": 310,
117
+ "val_loss": 0.41642701625823975
118
  },
119
  "training_metrics": {
120
  "train_loss": [],
121
  "val_loss": [
122
+ 18.0,
123
+ 38.0,
124
+ 58.0,
125
+ 15.0
 
126
  ],
127
  "dur_loss": [
128
+ 0.458,
129
+ 0.444,
130
+ 0.428,
131
+ 0.416
 
132
  ],
133
  "F0_loss": [
134
+ 1.186,
135
+ 1.157,
136
+ 1.089,
137
+ 1.198
 
138
  ],
139
  "epochs": [
140
  1,
141
  2,
142
  3,
143
+ 4
 
144
  ]
145
  }
146
  }
config.yml CHANGED
@@ -47,7 +47,7 @@ model_params:
47
  hidden_dim: 512
48
  max_conv_dim: 512
49
  max_dur: 50
50
- multispeaker: true
51
  n_layer: 3
52
  n_mels: 80
53
  n_token: 178
 
47
  hidden_dim: 512
48
  max_conv_dim: 512
49
  max_dur: 50
50
+ multispeaker: false
51
  n_layer: 3
52
  n_mels: 80
53
  n_token: 178
decoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc5e3e5b031bdf49f7191aece94ad7c013a1254d3d52d7b093d5f84b6087375
3
  size 217409318
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c7195cf0cfc447d8d931386cc7f5acbc28c06c4819d205446ec540119c319c
3
  size 217409318
diffusion.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69229a607b119bf2c180406e680c1e06acefc44ec4d51fe294040f5215cb8a68
3
- size 101337326
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5836a6d7cbf6cbf4dfca6227858e2a1036a43ec8505f17370f305b8e2a9e425c
3
+ size 87699504
mpd.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3753249a76efff6d9775094fa703060bd21ae078b45b5ab8faa7231105564675
3
  size 164447824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3a57f166238f1ea201adddf0a874b465f92ba1b1ec3df5d404b80532e1fde7
3
  size 164447824
msd.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034efa1672e34f56d1cc6595e7484293653cbd7982d159683b7f3bf11b2bc84e
3
  size 1139020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8dbadfe935c64c3de0334e9b39be51ca98d9dc66e990ef8fb651c20ef0a5e4
3
  size 1139020
predictor.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f706f246bfc1f46bca97fd61d33aa458feaaa756948acacd2922ecade8823d63
3
  size 64813639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e29b9f36dd384572681aab86aad08e5d5a048a8a3b2e1628fec7ec984d6870
3
  size 64813639
predictor_encoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a25c32951cb14384fd5ea1ce9bd97cdad44988a7a1d7201a2e2104a159f231b
3
  size 55547155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e3c19823d9db9173efeac4d83a1eaae143ed164cf92eac2c82d31f3c98e9b1
3
  size 55547155
style_encoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47863e20c23b8520ff91accb21de07959902d8d610ddf960e976eda6c78a12c6
3
  size 55546871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f771cabc78e821ab2bfb9a58911490b067096c48a9b86ab3ce06c71d4c2c307c
3
  size 55546871
text_aligner.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a30bbf04712782187a6a5c4cff9e251c2662092e91a435cd3b1a6ef1dd0044b9
3
  size 31531315
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039329ced02a94742d4d25b7d9333e08edee385a2a0b054977444be77b72dff0
3
  size 31531315
text_encoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0830414969e6619a2db28dff809b5d8922bf39dddde39ae423fb9a7a21774f
3
  size 22432460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdba9fac38935746c95bd93914f36ff64f0196e672bb38eefeeff6be302d7348
3
  size 22432460
training_metrics.png CHANGED