nonoJDWAOIDAWKDA
commited on
Upload StyleTTS2 checkpoint epoch_2nd_00003.pth with all inference components
Browse files- README.md +5 -5
- bert.pth +1 -1
- bert_encoder.pth +1 -1
- checkpoint.pth +2 -2
- config.json +19 -23
- config.yml +1 -1
- decoder.pth +1 -1
- diffusion.pth +2 -2
- mpd.pth +1 -1
- msd.pth +1 -1
- predictor.pth +1 -1
- predictor_encoder.pth +1 -1
- style_encoder.pth +1 -1
- text_aligner.pth +1 -1
- text_encoder.pth +1 -1
- training_metrics.png +0 -0
README.md
CHANGED
@@ -16,16 +16,16 @@ This model is a fine-tuned version of StyleTTS2, containing all necessary compon
|
|
16 |
- **Base Model:** StyleTTS2-LibriTTS
|
17 |
- **Architecture:** StyleTTS2
|
18 |
- **Task:** Text-to-Speech
|
19 |
-
- **Last Checkpoint:**
|
20 |
|
21 |
## Training Details
|
22 |
-
- **Total Epochs:**
|
23 |
-
- **Completed Epochs:**
|
24 |
-
- **Total Iterations:**
|
25 |
- **Batch Size:** 2
|
26 |
- **Max Length:** 120
|
27 |
- **Learning Rate:** 0.0001
|
28 |
-
- **Final Validation Loss:** 0.
|
29 |
|
30 |
## Model Components
|
31 |
The repository includes all necessary components for inference:
|
|
|
16 |
- **Base Model:** StyleTTS2-LibriTTS
|
17 |
- **Architecture:** StyleTTS2
|
18 |
- **Task:** Text-to-Speech
|
19 |
+
- **Last Checkpoint:** epoch_2nd_00003.pth
|
20 |
|
21 |
## Training Details
|
22 |
+
- **Total Epochs:** 4
|
23 |
+
- **Completed Epochs:** 3
|
24 |
+
- **Total Iterations:** 310
|
25 |
- **Batch Size:** 2
|
26 |
- **Max Length:** 120
|
27 |
- **Learning Rate:** 0.0001
|
28 |
+
- **Final Validation Loss:** 0.416427
|
29 |
|
30 |
## Model Components
|
31 |
The repository includes all necessary components for inference:
|
bert.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25178740
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a1b8eb6c1691f85a86685ddd7a158666e1c36a09ca79a3858ec778c07709ad1
|
3 |
size 25178740
|
bert_encoder.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1576502
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644603557b868de4173769f81b6808c1b254fc5e4f0271722523d720296b39f3
|
3 |
size 1576502
|
checkpoint.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50ba663b621bc53eb1de825cdd2271553771e9009d719b6415515049a632d627
|
3 |
+
size 2201837262
|
config.json
CHANGED
@@ -58,7 +58,7 @@
|
|
58 |
"hidden_dim": 512,
|
59 |
"max_conv_dim": 512,
|
60 |
"max_dur": 50,
|
61 |
-
"multispeaker":
|
62 |
"n_layer": 3,
|
63 |
"n_mels": 80,
|
64 |
"n_token": 178,
|
@@ -72,7 +72,7 @@
|
|
72 |
"style_dim": 128
|
73 |
},
|
74 |
"training_config": {
|
75 |
-
"epochs":
|
76 |
"batch_size": 2,
|
77 |
"max_len": 120,
|
78 |
"optimizer": {
|
@@ -81,7 +81,7 @@
|
|
81 |
"lr": 0.0001
|
82 |
},
|
83 |
"loss_params": {
|
84 |
-
"diff_epoch":
|
85 |
"joint_epoch": 110,
|
86 |
"lambda_F0": 1.0,
|
87 |
"lambda_ce": 20.0,
|
@@ -112,39 +112,35 @@
|
|
112 |
"val_data": "Data/val_list.txt"
|
113 |
},
|
114 |
"model_state": {
|
115 |
-
"epoch":
|
116 |
-
"iterations":
|
117 |
-
"val_loss": 0.
|
118 |
},
|
119 |
"training_metrics": {
|
120 |
"train_loss": [],
|
121 |
"val_loss": [
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
10.0
|
127 |
],
|
128 |
"dur_loss": [
|
129 |
-
0.
|
130 |
-
0.
|
131 |
-
0.
|
132 |
-
0.
|
133 |
-
0.431
|
134 |
],
|
135 |
"F0_loss": [
|
136 |
-
1.
|
137 |
-
1.
|
138 |
-
1.
|
139 |
-
1.
|
140 |
-
1.095
|
141 |
],
|
142 |
"epochs": [
|
143 |
1,
|
144 |
2,
|
145 |
3,
|
146 |
-
4
|
147 |
-
5
|
148 |
]
|
149 |
}
|
150 |
}
|
|
|
58 |
"hidden_dim": 512,
|
59 |
"max_conv_dim": 512,
|
60 |
"max_dur": 50,
|
61 |
+
"multispeaker": false,
|
62 |
"n_layer": 3,
|
63 |
"n_mels": 80,
|
64 |
"n_token": 178,
|
|
|
72 |
"style_dim": 128
|
73 |
},
|
74 |
"training_config": {
|
75 |
+
"epochs": 4,
|
76 |
"batch_size": 2,
|
77 |
"max_len": 120,
|
78 |
"optimizer": {
|
|
|
81 |
"lr": 0.0001
|
82 |
},
|
83 |
"loss_params": {
|
84 |
+
"diff_epoch": 1,
|
85 |
"joint_epoch": 110,
|
86 |
"lambda_F0": 1.0,
|
87 |
"lambda_ce": 20.0,
|
|
|
112 |
"val_data": "Data/val_list.txt"
|
113 |
},
|
114 |
"model_state": {
|
115 |
+
"epoch": 3,
|
116 |
+
"iterations": 310,
|
117 |
+
"val_loss": 0.41642701625823975
|
118 |
},
|
119 |
"training_metrics": {
|
120 |
"train_loss": [],
|
121 |
"val_loss": [
|
122 |
+
18.0,
|
123 |
+
38.0,
|
124 |
+
58.0,
|
125 |
+
15.0
|
|
|
126 |
],
|
127 |
"dur_loss": [
|
128 |
+
0.458,
|
129 |
+
0.444,
|
130 |
+
0.428,
|
131 |
+
0.416
|
|
|
132 |
],
|
133 |
"F0_loss": [
|
134 |
+
1.186,
|
135 |
+
1.157,
|
136 |
+
1.089,
|
137 |
+
1.198
|
|
|
138 |
],
|
139 |
"epochs": [
|
140 |
1,
|
141 |
2,
|
142 |
3,
|
143 |
+
4
|
|
|
144 |
]
|
145 |
}
|
146 |
}
|
config.yml
CHANGED
@@ -47,7 +47,7 @@ model_params:
|
|
47 |
hidden_dim: 512
|
48 |
max_conv_dim: 512
|
49 |
max_dur: 50
|
50 |
-
multispeaker:
|
51 |
n_layer: 3
|
52 |
n_mels: 80
|
53 |
n_token: 178
|
|
|
47 |
hidden_dim: 512
|
48 |
max_conv_dim: 512
|
49 |
max_dur: 50
|
50 |
+
multispeaker: false
|
51 |
n_layer: 3
|
52 |
n_mels: 80
|
53 |
n_token: 178
|
decoder.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 217409318
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30c7195cf0cfc447d8d931386cc7f5acbc28c06c4819d205446ec540119c319c
|
3 |
size 217409318
|
diffusion.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5836a6d7cbf6cbf4dfca6227858e2a1036a43ec8505f17370f305b8e2a9e425c
|
3 |
+
size 87699504
|
mpd.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 164447824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d3a57f166238f1ea201adddf0a874b465f92ba1b1ec3df5d404b80532e1fde7
|
3 |
size 164447824
|
msd.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1139020
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c8dbadfe935c64c3de0334e9b39be51ca98d9dc66e990ef8fb651c20ef0a5e4
|
3 |
size 1139020
|
predictor.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 64813639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90e29b9f36dd384572681aab86aad08e5d5a048a8a3b2e1628fec7ec984d6870
|
3 |
size 64813639
|
predictor_encoder.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 55547155
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1e3c19823d9db9173efeac4d83a1eaae143ed164cf92eac2c82d31f3c98e9b1
|
3 |
size 55547155
|
style_encoder.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 55546871
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f771cabc78e821ab2bfb9a58911490b067096c48a9b86ab3ce06c71d4c2c307c
|
3 |
size 55546871
|
text_aligner.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 31531315
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:039329ced02a94742d4d25b7d9333e08edee385a2a0b054977444be77b72dff0
|
3 |
size 31531315
|
text_encoder.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22432460
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdba9fac38935746c95bd93914f36ff64f0196e672bb38eefeeff6be302d7348
|
3 |
size 22432460
|
training_metrics.png
CHANGED