Upload StyleTTS2 checkpoint epoch_2nd_00003.pth with all inference components

Files changed (16) hide show

README.md CHANGED Viewed

@@ -16,16 +16,16 @@ This model is a fine-tuned version of StyleTTS2, containing all necessary compon
 - **Base Model:** StyleTTS2-LibriTTS
 - **Architecture:** StyleTTS2
 - **Task:** Text-to-Speech
-- **Last Checkpoint:** epoch_2nd_00004.pth
 ## Training Details
-- **Total Epochs:** 5
-- **Completed Epochs:** 4
-- **Total Iterations:** 389
 - **Batch Size:** 2
 - **Max Length:** 120
 - **Learning Rate:** 0.0001
-- **Final Validation Loss:** 0.431213
 ## Model Components
 The repository includes all necessary components for inference:

 - **Base Model:** StyleTTS2-LibriTTS
 - **Architecture:** StyleTTS2
 - **Task:** Text-to-Speech
+- **Last Checkpoint:** epoch_2nd_00003.pth
 ## Training Details
+- **Total Epochs:** 4
+- **Completed Epochs:** 3
+- **Total Iterations:** 310
 - **Batch Size:** 2
 - **Max Length:** 120
 - **Learning Rate:** 0.0001
+- **Final Validation Loss:** 0.416427
 ## Model Components
 The repository includes all necessary components for inference:

bert.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a44b76d2190d73b5cf36558f165e774d7ed0c15f0538fc4227b3e18eb6df3372
 size 25178740

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a1b8eb6c1691f85a86685ddd7a158666e1c36a09ca79a3858ec778c07709ad1
 size 25178740

bert_encoder.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:302984f023cfb39f509083de56204c0736e02383036fc8ab2e8b02aebf95d402
 size 1576502

 version https://git-lfs.github.com/spec/v1
+oid sha256:644603557b868de4173769f81b6808c1b254fc5e4f0271722523d720296b39f3
 size 1576502

checkpoint.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33c975986907b0afb3e0da5292e916f8c3a5a71a956db792c95425233586ea3f
-size 2040092478

 version https://git-lfs.github.com/spec/v1
+oid sha256:50ba663b621bc53eb1de825cdd2271553771e9009d719b6415515049a632d627
+size 2201837262

config.json CHANGED Viewed

@@ -58,7 +58,7 @@
     "hidden_dim": 512,
     "max_conv_dim": 512,
     "max_dur": 50,
-    "multispeaker": true,
     "n_layer": 3,
     "n_mels": 80,
     "n_token": 178,
@@ -72,7 +72,7 @@
     "style_dim": 128
   },
   "training_config": {
-    "epochs": 5,
     "batch_size": 2,
     "max_len": 120,
     "optimizer": {
@@ -81,7 +81,7 @@
       "lr": 0.0001
     },
     "loss_params": {
-      "diff_epoch": 10,
       "joint_epoch": 110,
       "lambda_F0": 1.0,
       "lambda_ce": 20.0,
@@ -112,39 +112,35 @@
     "val_data": "Data/val_list.txt"
   },
   "model_state": {
-    "epoch": 4,
-    "iterations": 389,
-    "val_loss": 0.4312129616737366
   },
   "training_metrics": {
     "train_loss": [],
     "val_loss": [
-      21.0,
-      20.0,
-      19.0,
-      14.0,
-      10.0
     ],
     "dur_loss": [
-      0.461,
-      0.473,
-      0.46,
-      0.437,
-      0.431
     ],
     "F0_loss": [
-      1.17,
-      1.196,
-      1.234,
-      1.112,
-      1.095
     ],
     "epochs": [
       1,
       2,
       3,
-      4,
-      5
     ]
   }
 }

     "hidden_dim": 512,
     "max_conv_dim": 512,
     "max_dur": 50,
+    "multispeaker": false,
     "n_layer": 3,
     "n_mels": 80,
     "n_token": 178,
     "style_dim": 128
   },
   "training_config": {
+    "epochs": 4,
     "batch_size": 2,
     "max_len": 120,
     "optimizer": {
       "lr": 0.0001
     },
     "loss_params": {
+      "diff_epoch": 1,
       "joint_epoch": 110,
       "lambda_F0": 1.0,
       "lambda_ce": 20.0,
     "val_data": "Data/val_list.txt"
   },
   "model_state": {
+    "epoch": 3,
+    "iterations": 310,
+    "val_loss": 0.41642701625823975
   },
   "training_metrics": {
     "train_loss": [],
     "val_loss": [
+      18.0,
+      38.0,
+      58.0,
+      15.0
     ],
     "dur_loss": [
+      0.458,
+      0.444,
+      0.428,
+      0.416
     ],
     "F0_loss": [
+      1.186,
+      1.157,
+      1.089,
+      1.198
     ],
     "epochs": [
       1,
       2,
       3,
+      4
     ]
   }
 }

config.yml CHANGED Viewed

@@ -47,7 +47,7 @@ model_params:
   hidden_dim: 512
   max_conv_dim: 512
   max_dur: 50
-  multispeaker: true
   n_layer: 3
   n_mels: 80
   n_token: 178

   hidden_dim: 512
   max_conv_dim: 512
   max_dur: 50
+  multispeaker: false
   n_layer: 3
   n_mels: 80
   n_token: 178

decoder.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbc5e3e5b031bdf49f7191aece94ad7c013a1254d3d52d7b093d5f84b6087375
 size 217409318

 version https://git-lfs.github.com/spec/v1
+oid sha256:30c7195cf0cfc447d8d931386cc7f5acbc28c06c4819d205446ec540119c319c
 size 217409318

diffusion.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69229a607b119bf2c180406e680c1e06acefc44ec4d51fe294040f5215cb8a68
-size 101337326

 version https://git-lfs.github.com/spec/v1
+oid sha256:5836a6d7cbf6cbf4dfca6227858e2a1036a43ec8505f17370f305b8e2a9e425c
+size 87699504

mpd.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3753249a76efff6d9775094fa703060bd21ae078b45b5ab8faa7231105564675
 size 164447824

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d3a57f166238f1ea201adddf0a874b465f92ba1b1ec3df5d404b80532e1fde7
 size 164447824

msd.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:034efa1672e34f56d1cc6595e7484293653cbd7982d159683b7f3bf11b2bc84e
 size 1139020

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c8dbadfe935c64c3de0334e9b39be51ca98d9dc66e990ef8fb651c20ef0a5e4
 size 1139020

predictor.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f706f246bfc1f46bca97fd61d33aa458feaaa756948acacd2922ecade8823d63
 size 64813639

 version https://git-lfs.github.com/spec/v1
+oid sha256:90e29b9f36dd384572681aab86aad08e5d5a048a8a3b2e1628fec7ec984d6870
 size 64813639

predictor_encoder.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a25c32951cb14384fd5ea1ce9bd97cdad44988a7a1d7201a2e2104a159f231b
 size 55547155

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1e3c19823d9db9173efeac4d83a1eaae143ed164cf92eac2c82d31f3c98e9b1
 size 55547155

style_encoder.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47863e20c23b8520ff91accb21de07959902d8d610ddf960e976eda6c78a12c6
 size 55546871

 version https://git-lfs.github.com/spec/v1
+oid sha256:f771cabc78e821ab2bfb9a58911490b067096c48a9b86ab3ce06c71d4c2c307c
 size 55546871

text_aligner.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a30bbf04712782187a6a5c4cff9e251c2662092e91a435cd3b1a6ef1dd0044b9
 size 31531315

 version https://git-lfs.github.com/spec/v1
+oid sha256:039329ced02a94742d4d25b7d9333e08edee385a2a0b054977444be77b72dff0
 size 31531315

text_encoder.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e0830414969e6619a2db28dff809b5d8922bf39dddde39ae423fb9a7a21774f
 size 22432460

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdba9fac38935746c95bd93914f36ff64f0196e672bb38eefeeff6be302d7348
 size 22432460

training_metrics.png CHANGED Viewed