diff --git a/.DS_Store b/.DS_Store
index 661f9f3cb2e9c9e0fe24ce0d2f445f3a6b1aaf45..95b28b583a25d9bcb0ce79af195354a5d255f708 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/TTS/.DS_Store b/TTS/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..fef718e76ad8d46b287bdc05ef235b72b0ec87a8
Binary files /dev/null and b/TTS/.DS_Store differ
diff --git a/TTS/.models.json b/TTS/.models.json
new file mode 100644
index 0000000000000000000000000000000000000000..84a7cd7dba263d8d32878889c199ededa3566601
--- /dev/null
+++ b/TTS/.models.json
@@ -0,0 +1,515 @@
+{
+    "tts_models": {
+        "multilingual":{
+            "multi-dataset":{
+                "your_tts":{
+                    "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--multilingual--multi-dataset--your_tts.zip",
+                    "default_vocoder": null,
+                    "commit": "e9a1953e",
+                    "license": "CC BY-NC-ND 4.0",
+                    "contact": "egolge@coqui.ai"
+                }
+            }
+        },
+        "en": {
+            "ek1": {
+                "tacotron2": {
+                    "description": "EK1 en-rp tacotron2 by NMStoker",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ek1--tacotron2.zip",
+                    "default_vocoder": "vocoder_models/en/ek1/wavegrad",
+                    "commit": "c802255",
+                    "license": "apache 2.0"
+                }
+            },
+            "ljspeech": {
+                "tacotron2-DDC": {
+                    "description": "Tacotron2 with Double Decoder Consistency.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip",
+                    "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
+                    "commit": "bae2ad0f",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                },
+                "tacotron2-DDC_ph": {
+                    "description": "Tacotron2 with Double Decoder Consistency with phonemes.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip",
+                    "default_vocoder": "vocoder_models/en/ljspeech/univnet",
+                    "commit": "3900448",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                },
+                "glow-tts": {
+                    "description": "",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip",
+                    "stats_file": null,
+                    "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
+                    "commit": "",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                },
+                "speedy-speech": {
+                    "description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip",
+                    "stats_file": null,
+                    "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
+                    "commit": "4581e3d",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                },
+                "tacotron2-DCA": {
+                    "description": "",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip",
+                    "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
+                    "commit": "",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                },
+                "vits": {
+                    "description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--vits.zip",
+                    "default_vocoder": null,
+                    "commit": "3900448",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                },
+                "fast_pitch": {
+                    "description": "FastPitch model trained on LJSpeech using the Aligner Network",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip",
+                    "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
+                    "commit": "b27b3ba",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                }
+            },
+            "vctk": {
+                "vits": {
+                    "description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--vits.zip",
+                    "default_vocoder": null,
+                    "commit": "3900448",
+                    "author": "Eren @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.ai"
+                },
+                "fast_pitch":{
+                    "description": "FastPitch model trained on VCTK dataseset.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip",
+                    "default_vocoder": null,
+                    "commit": "bdab788d",
+                    "author": "Eren @erogol",
+                    "license": "CC BY-NC-ND 4.0",
+                    "contact": "egolge@coqui.ai"
+                }
+            },
+            "sam": {
+                "tacotron-DDC": {
+                    "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip",
+                    "default_vocoder": "vocoder_models/en/sam/hifigan_v2",
+                    "commit": "bae2ad0f",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.com"
+                }
+            },
+            "blizzard2013": {
+                "capacitron-t2-c50": {
+                    "description": "Capacitron additions to Tacotron 2 with Capacity at 50 as in https://arxiv.org/pdf/1906.03402.pdf",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip",
+                    "commit": "d6284e7",
+                    "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
+                    "author": "Adam Froghyar @a-froghyar",
+                    "license": "apache 2.0",
+                    "contact": "adamfroghyar@gmail.com"
+                },
+                "capacitron-t2-c150_v2": {
+                    "description": "Capacitron additions to Tacotron 2 with Capacity at 150 as in https://arxiv.org/pdf/1906.03402.pdf",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip",
+                    "commit": "a67039d",
+                    "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
+                    "author": "Adam Froghyar @a-froghyar",
+                    "license": "apache 2.0",
+                    "contact": "adamfroghyar@gmail.com"
+                }
+            }
+        },
+        "es": {
+            "mai": {
+                "tacotron2-DDC": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip",
+                    "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
+                    "commit": "",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                }
+            }
+        },
+        "fr": {
+            "mai": {
+                "tacotron2-DDC": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip",
+                    "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
+                    "commit": "",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                }
+            }
+        },
+        "uk":{
+            "mai": {
+                "glow-tts": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip",
+                    "author":"@robinhad",
+                    "commit": "bdab788d",
+                    "license": "MIT",
+                    "contact": "",
+                    "default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
+                }
+            }
+        },
+        "zh-CN": {
+            "baker": {
+                "tacotron2-DDC-GST": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
+                    "commit": "unknown",
+                    "author": "@kirianguiller",
+                    "license": "apache 2.0",
+                    "default_vocoder": null
+                }
+            }
+        },
+        "nl": {
+            "mai": {
+                "tacotron2-DDC": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip",
+                    "author": "@r-dh",
+                    "license": "apache 2.0",
+                    "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
+                    "stats_file": null,
+                    "commit": "540d811"
+                }
+            }
+        },
+        "de": {
+            "thorsten": {
+                "tacotron2-DCA": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip",
+                    "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                },
+                "vits": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--de--thorsten--vits.zip",
+                    "default_vocoder": null,
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                },
+                "tacotron2-DDC": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip",
+                    "default_vocoder": "vocoder_models/de/thorsten/hifigan_v1",
+                    "description": "Thorsten-Dec2021-22k-DDC",
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                }
+            }
+        },
+        "ja": {
+            "kokoro": {
+                "tacotron2-DDC": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip",
+                    "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
+                    "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
+                    "author": "@kaiidams",
+                    "license": "apache 2.0",
+                    "commit": "401fbd89"
+                }
+            }
+        },
+        "tr":{
+            "common-voice": {
+                "glow-tts":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip",
+                    "default_vocoder": "vocoder_models/tr/common-voice/hifigan",
+                    "license": "MIT",
+                    "description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.",
+                    "author": "Fatih Akademi",
+                    "commit": null
+                }
+            }
+        },
+        "it": {
+            "mai_female": {
+                "glow-tts":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip",
+                    "default_vocoder": null,
+                    "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
+                    "author": "@nicolalandro",
+                    "license": "apache 2.0",
+                    "commit": null
+                },
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip",
+                    "default_vocoder": null,
+                    "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
+                    "author": "@nicolalandro",
+                    "license": "apache 2.0",
+                    "commit": null
+                }
+            },
+            "mai_male": {
+                "glow-tts":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip",
+                    "default_vocoder": null,
+                    "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
+                    "author": "@nicolalandro",
+                    "license": "apache 2.0",
+                    "commit": null
+                },
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip",
+                    "default_vocoder": null,
+                    "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
+                    "author": "@nicolalandro",
+                    "license": "apache 2.0",
+                    "commit": null
+                }
+            }
+        },
+        "ewe": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        },
+        "hau": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        },
+        "lin": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        },
+        "tw_akuapem": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        },
+        "tw_asante": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        },
+        "yor": {
+            "openbible": {
+                "vits":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip",
+                    "default_vocoder": null,
+                    "license": "CC-BY-SA 4.0",
+                    "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
+                    "author": "@coqui_ai",
+                    "commit": "1b22f03"
+                }
+            }
+        }
+    },
+    "vocoder_models": {
+        "universal": {
+            "libri-tts": {
+                "wavegrad": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip",
+                    "commit": "ea976b0",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                },
+                "fullband-melgan": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip",
+                    "commit": "4132240",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                }
+            }
+        },
+        "en": {
+            "ek1": {
+                "wavegrad": {
+                    "description": "EK1 en-rp wavegrad by NMStoker",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip",
+                    "commit": "c802255",
+                    "license": "apache 2.0"
+                }
+            },
+            "ljspeech": {
+                "multiband-melgan": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip",
+                    "commit": "ea976b0",
+                    "author": "Eren Gölge @erogol",
+                    "license": "MPL",
+                    "contact": "egolge@coqui.com"
+                },
+                "hifigan_v2": {
+                    "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip",
+                    "commit": "bae2ad0f",
+                    "author": "@erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.ai"
+                },
+                "univnet": {
+                    "description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip",
+                    "commit": "4581e3d",
+                    "author": "Eren @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.ai"
+                }
+            },
+            "blizzard2013": {
+                "hifigan_v2": {
+                    "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip",
+                    "commit": "d6284e7",
+                    "author": "Adam Froghyar @a-froghyar",
+                    "license": "apache 2.0",
+                    "contact": "adamfroghyar@gmail.com"
+                }
+            },
+            "vctk": {
+                "hifigan_v2": {
+                    "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip",
+                    "commit": "2f07160",
+                    "author": "Edresson Casanova",
+                    "license": "apache 2.0",
+                    "contact": ""
+                }
+            },
+            "sam": {
+                "hifigan_v2": {
+                    "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip",
+                    "commit": "2f07160",
+                    "author": "Eren Gölge @erogol",
+                    "license": "apache 2.0",
+                    "contact": "egolge@coqui.ai"
+                }
+            }
+        },
+        "nl": {
+            "mai": {
+                "parallel-wavegan": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip",
+                    "author": "@r-dh",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                }
+            }
+        },
+        "de": {
+            "thorsten": {
+                "wavegrad": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip",
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                },
+                "fullband-melgan": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip",
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                },
+                "hifigan_v1": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip",
+                    "description": "HifiGAN vocoder model for Thorsten Neutral Dec2021 22k Samplerate Tacotron2 DDC model",
+                    "author": "@thorstenMueller",
+                    "license": "apache 2.0",
+                    "commit": "unknown"
+                }
+            }
+        },
+        "ja": {
+            "kokoro": {
+                "hifigan_v1": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip",
+                    "description": "HifiGAN model trained for kokoro dataset by @kaiidams",
+                    "author": "@kaiidams",
+                    "license": "apache 2.0",
+                    "commit": "3900448"
+                }
+            }
+        },
+        "uk": {
+            "mai": {
+                "multiband-melgan": {
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip",
+                    "author":"@robinhad",
+                    "commit": "bdab788d",
+                    "license": "MIT",
+                    "contact": ""
+                }
+            }
+        },
+        "tr":{
+            "common-voice": {
+                "hifigan":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip",
+                    "description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.",
+                    "author": "Fatih Akademi",
+                    "license": "MIT",
+                    "commit": null
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/TTS/VERSION b/TTS/VERSION
new file mode 100644
index 0000000000000000000000000000000000000000..8adc70fdd9d6a086cc636d251adf0c7a7bca8319
--- /dev/null
+++ b/TTS/VERSION
@@ -0,0 +1 @@
+0.8.0
\ No newline at end of file
diff --git a/TTS/__init__.py b/TTS/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eaf05db1b950d82bfd7e20857e09a0fef45b430a
--- /dev/null
+++ b/TTS/__init__.py
@@ -0,0 +1,6 @@
+import os
+
+with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f:
+    version = f.read().strip()
+
+__version__ = version
diff --git a/TTS/__pycache__/__init__.cpython-310.pyc b/TTS/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bbaea3a1104905215a651c726253b6b48b1729da
Binary files /dev/null and b/TTS/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/__pycache__/__init__.cpython-37.pyc b/TTS/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8a22fad7067f1c5eb049860ca58a6adcdb1e010
Binary files /dev/null and b/TTS/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/__pycache__/__init__.cpython-38.pyc b/TTS/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97c071df17636799f9af9009105f5b963380b0cb
Binary files /dev/null and b/TTS/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/__pycache__/__init__.cpython-39.pyc b/TTS/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d81cf001da414556f6e092e10dd4eab1bd25c047
Binary files /dev/null and b/TTS/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/__pycache__/model.cpython-310.pyc b/TTS/__pycache__/model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4a72baee575fe4cdbd1bb864b7ca1771aed2b92
Binary files /dev/null and b/TTS/__pycache__/model.cpython-310.pyc differ
diff --git a/TTS/__pycache__/model.cpython-37.pyc b/TTS/__pycache__/model.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..965fe63b5c2f4a203bd44bcb6c337d9f8a05700d
Binary files /dev/null and b/TTS/__pycache__/model.cpython-37.pyc differ
diff --git a/TTS/__pycache__/model.cpython-38.pyc b/TTS/__pycache__/model.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4757d2737a5481b08f9e03858cafa65b4c539c39
Binary files /dev/null and b/TTS/__pycache__/model.cpython-38.pyc differ
diff --git a/TTS/__pycache__/model.cpython-39.pyc b/TTS/__pycache__/model.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f62b846bb721da2397f18ce363235d7aac654958
Binary files /dev/null and b/TTS/__pycache__/model.cpython-39.pyc differ
diff --git a/TTS/bin/.ipynb_checkpoints/find_unique_chars-checkpoint.py b/TTS/bin/.ipynb_checkpoints/find_unique_chars-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea16974839df6cf9942ef24a5535597940fde5b2
--- /dev/null
+++ b/TTS/bin/.ipynb_checkpoints/find_unique_chars-checkpoint.py
@@ -0,0 +1,45 @@
+"""Find all the unique characters in a dataset"""
+import argparse
+from argparse import RawTextHelpFormatter
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+
+
+def main():
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Find all the unique characters or phonemes in a dataset.\n\n"""
+        """
+    Example runs:
+
+    python TTS/bin/find_unique_chars.py --config_path config.json
+    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_tts_samples(
+        c.datasets, eval_split=True, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+
+    items = train_items + eval_items
+
+    texts = "".join(item["text"] for item in items)
+    chars = set(texts)
+    lower_chars = filter(lambda c: c.islower(), chars)
+    chars_force_lower = [c.lower() for c in chars]
+    chars_force_lower = set(chars_force_lower)
+
+    print(f" > Number of unique characters: {len(chars)}")
+    print(f" > Unique characters: {''.join(sorted(chars))}")
+    print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
+    print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/__init__.py b/TTS/bin/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/bin/collect_env_info.py b/TTS/bin/collect_env_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..662fcd02ece0fad387b6bfc4bad9316c7e2a0bad
--- /dev/null
+++ b/TTS/bin/collect_env_info.py
@@ -0,0 +1,48 @@
+"""Get detailed info about the working environment."""
+import os
+import platform
+import sys
+
+import numpy
+import torch
+
+sys.path += [os.path.abspath(".."), os.path.abspath(".")]
+import json
+
+import TTS
+
+
+def system_info():
+    return {
+        "OS": platform.system(),
+        "architecture": platform.architecture(),
+        "version": platform.version(),
+        "processor": platform.processor(),
+        "python": platform.python_version(),
+    }
+
+
+def cuda_info():
+    return {
+        "GPU": [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())],
+        "available": torch.cuda.is_available(),
+        "version": torch.version.cuda,
+    }
+
+
+def package_info():
+    return {
+        "numpy": numpy.__version__,
+        "PyTorch_version": torch.__version__,
+        "PyTorch_debug": torch.version.debug,
+        "TTS": TTS.__version__,
+    }
+
+
+def main():
+    details = {"System": system_info(), "CUDA": cuda_info(), "Packages": package_info()}
+    print(json.dumps(details, indent=4, sort_keys=True))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ab520be7d9f41ecf4f124446400b5e1b597ae8b
--- /dev/null
+++ b/TTS/bin/compute_attention_masks.py
@@ -0,0 +1,165 @@
+import argparse
+import importlib
+import os
+from argparse import RawTextHelpFormatter
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets.TTSDataset import TTSDataset
+from TTS.tts.models import setup_model
+from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.io import load_checkpoint
+
+if __name__ == "__main__":
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Extract attention masks from trained Tacotron/Tacotron2 models.
+These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n"""
+        """Each attention mask is written to the same path as the input wav file with ".npy" file extension.
+(e.g. path/bla.wav (wav file) --> path/bla.npy (attention mask))\n"""
+        """
+Example run:
+    CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
+        --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
+        --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
+        --dataset_metafile metadata.csv
+        --data_path /root/LJSpeech-1.1/
+        --batch_size 32
+        --dataset ljspeech
+        --use_cuda True
+""",
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--model_path", type=str, required=True, help="Path to Tacotron/Tacotron2 model file ")
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        required=True,
+        help="Path to Tacotron/Tacotron2 config file.",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="",
+        required=True,
+        help="Target dataset processor name from TTS.tts.dataset.preprocess.",
+    )
+
+    parser.add_argument(
+        "--dataset_metafile",
+        type=str,
+        default="",
+        required=True,
+        help="Dataset metafile inclusing file paths with transcripts.",
+    )
+    parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.")
+    parser.add_argument("--use_cuda", type=bool, default=False, help="enable/disable cuda.")
+
+    parser.add_argument(
+        "--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA."
+    )
+    args = parser.parse_args()
+
+    C = load_config(args.config_path)
+    ap = AudioProcessor(**C.audio)
+
+    # if the vocabulary was passed, replace the default
+    if "characters" in C.keys():
+        symbols, phonemes = make_symbols(**C.characters)
+
+    # load the model
+    num_chars = len(phonemes) if C.use_phonemes else len(symbols)
+    # TODO: handle multi-speaker
+    model = setup_model(C)
+    model, _ = load_checkpoint(model, args.model_path, args.use_cuda, True)
+
+    # data loader
+    preprocessor = importlib.import_module("TTS.tts.datasets.formatters")
+    preprocessor = getattr(preprocessor, args.dataset)
+    meta_data = preprocessor(args.data_path, args.dataset_metafile)
+    dataset = TTSDataset(
+        model.decoder.r,
+        C.text_cleaner,
+        compute_linear_spec=False,
+        ap=ap,
+        meta_data=meta_data,
+        characters=C.characters if "characters" in C.keys() else None,
+        add_blank=C["add_blank"] if "add_blank" in C.keys() else False,
+        use_phonemes=C.use_phonemes,
+        phoneme_cache_path=C.phoneme_cache_path,
+        phoneme_language=C.phoneme_language,
+        enable_eos_bos=C.enable_eos_bos_chars,
+    )
+
+    dataset.sort_and_filter_items(C.get("sort_by_audio_len", default=False))
+    loader = DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        num_workers=4,
+        collate_fn=dataset.collate_fn,
+        shuffle=False,
+        drop_last=False,
+    )
+
+    # compute attentions
+    file_paths = []
+    with torch.no_grad():
+        for data in tqdm(loader):
+            # setup input data
+            text_input = data[0]
+            text_lengths = data[1]
+            linear_input = data[3]
+            mel_input = data[4]
+            mel_lengths = data[5]
+            stop_targets = data[6]
+            item_idxs = data[7]
+
+            # dispatch data to GPU
+            if args.use_cuda:
+                text_input = text_input.cuda()
+                text_lengths = text_lengths.cuda()
+                mel_input = mel_input.cuda()
+                mel_lengths = mel_lengths.cuda()
+
+            model_outputs = model.forward(text_input, text_lengths, mel_input)
+
+            alignments = model_outputs["alignments"].detach()
+            for idx, alignment in enumerate(alignments):
+                item_idx = item_idxs[idx]
+                # interpolate if r > 1
+                alignment = (
+                    torch.nn.functional.interpolate(
+                        alignment.transpose(0, 1).unsqueeze(0),
+                        size=None,
+                        scale_factor=model.decoder.r,
+                        mode="nearest",
+                        align_corners=None,
+                        recompute_scale_factor=None,
+                    )
+                    .squeeze(0)
+                    .transpose(0, 1)
+                )
+                # remove paddings
+                alignment = alignment[: mel_lengths[idx], : text_lengths[idx]].cpu().numpy()
+                # set file paths
+                wav_file_name = os.path.basename(item_idx)
+                align_file_name = os.path.splitext(wav_file_name)[0] + "_attn.npy"
+                file_path = item_idx.replace(wav_file_name, align_file_name)
+                # save output
+                wav_file_abs_path = os.path.abspath(item_idx)
+                file_abs_path = os.path.abspath(file_path)
+                file_paths.append([wav_file_abs_path, file_abs_path])
+                np.save(file_path, alignment)
+
+        # ourput metafile
+        metafile = os.path.join(args.data_path, "metadata_attn_mask.txt")
+
+        with open(metafile, "w", encoding="utf-8") as f:
+            for p in file_paths:
+                f.write(f"{p[0]}|{p[1]}\n")
+        print(f" >> Metafile created: {metafile}")
diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..3650ce3205a4aaf50e42a99833f4307f73e110fd
--- /dev/null
+++ b/TTS/bin/compute_embeddings.py
@@ -0,0 +1,131 @@
+import argparse
+import os
+from argparse import RawTextHelpFormatter
+
+import torch
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.config.shared_configs import BaseDatasetConfig
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.managers import save_file
+from TTS.tts.utils.speakers import SpeakerManager
+
+parser = argparse.ArgumentParser(
+    description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
+    """
+    Example runs:
+    python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json  --config_dataset_path dataset_config.json
+
+    python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json  --fomatter vctk --dataset_path /path/to/vctk/dataset --dataset_name my_vctk --metafile /path/to/vctk/metafile.csv
+    """,
+    formatter_class=RawTextHelpFormatter,
+)
+parser.add_argument(
+    "--model_path",
+    type=str,
+    help="Path to model checkpoint file. It defaults to the released speaker encoder.",
+    default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
+)
+parser.add_argument(
+    "--config_path",
+    type=str,
+    help="Path to model config file. It defaults to the released speaker encoder config.",
+    default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
+)
+parser.add_argument(
+    "--config_dataset_path",
+    type=str,
+    help="Path to dataset config file. You either need to provide this or `formatter_name`, `dataset_name` and `dataset_path` arguments.",
+    default=None,
+)
+parser.add_argument("--output_path", type=str, help="Path for output `pth` or `json` file.", default="speakers.pth")
+parser.add_argument("--old_file", type=str, help="Previous embedding file to only compute new audios.", default=None)
+parser.add_argument("--disable_cuda", type=bool, help="Flag to disable cuda.", default=False)
+parser.add_argument("--no_eval", type=bool, help="Do not compute eval?. Default False", default=False)
+parser.add_argument(
+    "--formatter_name",
+    type=str,
+    help="Name of the formatter to use. You either need to provide this or `config_dataset_path`",
+    default=None,
+)
+parser.add_argument(
+    "--dataset_name",
+    type=str,
+    help="Name of the dataset to use. You either need to provide this or `config_dataset_path`",
+    default=None,
+)
+parser.add_argument(
+    "--dataset_path",
+    type=str,
+    help="Path to the dataset. You either need to provide this or `config_dataset_path`",
+    default=None,
+)
+parser.add_argument(
+    "--metafile",
+    type=str,
+    help="Path to the meta file. If not set, dataset formatter uses the default metafile if it is defined in the formatter. You either need to provide this or `config_dataset_path`",
+    default=None,
+)
+args = parser.parse_args()
+
+use_cuda = torch.cuda.is_available() and not args.disable_cuda
+
+if args.config_dataset_path is not None:
+    c_dataset = load_config(args.config_dataset_path)
+    meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=not args.no_eval)
+else:
+    c_dataset = BaseDatasetConfig()
+    c_dataset.formatter = args.formatter_name
+    c_dataset.dataset_name = args.dataset_name
+    c_dataset.path = args.dataset_path
+    c_dataset.meta_file_train = args.metafile if args.metafile else None
+    meta_data_train, meta_data_eval = load_tts_samples(c_dataset, eval_split=not args.no_eval)
+
+
+if meta_data_eval is None:
+    samples = meta_data_train
+else:
+    samples = meta_data_train + meta_data_eval
+
+encoder_manager = SpeakerManager(
+    encoder_model_path=args.model_path,
+    encoder_config_path=args.config_path,
+    d_vectors_file_path=args.old_file,
+    use_cuda=use_cuda,
+)
+
+class_name_key = encoder_manager.encoder_config.class_name_key
+
+# compute speaker embeddings
+speaker_mapping = {}
+for idx, fields in enumerate(tqdm(samples)):
+    class_name = fields[class_name_key]
+    audio_file = fields["audio_file"]
+    embedding_key = fields["audio_unique_name"]
+    root_path = fields["root_path"]
+
+    if args.old_file is not None and embedding_key in encoder_manager.clip_ids:
+        # get the embedding from the old file
+        embedd = encoder_manager.get_embedding_by_clip(embedding_key)
+    else:
+        # extract the embedding
+        embedd = encoder_manager.compute_embedding_from_clip(audio_file)
+
+    # create speaker_mapping if target dataset is defined
+    speaker_mapping[embedding_key] = {}
+    speaker_mapping[embedding_key]["name"] = class_name
+    speaker_mapping[embedding_key]["embedding"] = embedd
+
+if speaker_mapping:
+    # save speaker_mapping if target dataset is defined
+    if os.path.isdir(args.output_path):
+        mapping_file_path = os.path.join(args.output_path, "speakers.pth")
+    else:
+        mapping_file_path = args.output_path
+
+    if os.path.dirname(mapping_file_path) != "":
+        os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
+
+    save_file(speaker_mapping, mapping_file_path)
+    print("Speaker embeddings saved at:", mapping_file_path)
diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0d920941f147dca4606884da13753c944351306
--- /dev/null
+++ b/TTS/bin/compute_statistics.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import glob
+import os
+import sys
+
+sys.path.append('.')
+import numpy as np
+from tqdm import tqdm
+
+# from TTS.utils.io import load_config
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.utils.audio import AudioProcessor
+
+
+def main():
+    """Run preprocessing process."""
+    parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
+    parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
+    parser.add_argument("out_path", type=str, help="save path (directory and filename).")
+    parser.add_argument(
+        "--data_path",
+        type=str,
+        required=False,
+        help="folder including the target set of wavs overriding dataset config.",
+    )
+    args, overrides = parser.parse_known_args()
+
+    CONFIG = load_config(args.config_path)
+    CONFIG.parse_known_args(overrides, relaxed_parser=True)
+
+    # load config
+    CONFIG.audio.signal_norm = False  # do not apply earlier normalization
+    CONFIG.audio.stats_path = None  # discard pre-defined stats
+
+    # load audio processor
+    ap = AudioProcessor(**CONFIG.audio.to_dict())
+
+    # load the meta data of target dataset
+    if args.data_path:
+        dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
+    else:
+        dataset_items = load_tts_samples(CONFIG.datasets)[0]  # take only train data
+    print(f" > There are {len(dataset_items)} files.")
+
+    mel_sum = 0
+    mel_square_sum = 0
+    linear_sum = 0
+    linear_square_sum = 0
+    N = 0
+    for item in tqdm(dataset_items):
+        # compute features
+        wav = ap.load_wav(item if isinstance(item, str) else item["audio_file"])
+        linear = ap.spectrogram(wav)
+        mel = ap.melspectrogram(wav)
+
+        # compute stats
+        N += mel.shape[1]
+        mel_sum += mel.sum(1)
+        linear_sum += linear.sum(1)
+        mel_square_sum += (mel**2).sum(axis=1)
+        linear_square_sum += (linear**2).sum(axis=1)
+
+    mel_mean = mel_sum / N
+    mel_scale = np.sqrt(mel_square_sum / N - mel_mean**2)
+    linear_mean = linear_sum / N
+    linear_scale = np.sqrt(linear_square_sum / N - linear_mean**2)
+
+    output_file_path = args.out_path
+    stats = {}
+    stats["mel_mean"] = mel_mean
+    stats["mel_std"] = mel_scale
+    stats["linear_mean"] = linear_mean
+    stats["linear_std"] = linear_scale
+
+    print(f" > Avg mel spec mean: {mel_mean.mean()}")
+    print(f" > Avg mel spec scale: {mel_scale.mean()}")
+    print(f" > Avg linear spec mean: {linear_mean.mean()}")
+    print(f" > Avg linear spec scale: {linear_scale.mean()}")
+
+    # set default config values for mean-var scaling
+    CONFIG.audio.stats_path = output_file_path
+    CONFIG.audio.signal_norm = True
+    # remove redundant values
+    del CONFIG.audio.max_norm
+    del CONFIG.audio.min_level_db
+    del CONFIG.audio.symmetric_norm
+    del CONFIG.audio.clip_norm
+    stats["audio_config"] = CONFIG.audio.to_dict()
+    np.save(output_file_path, stats, allow_pickle=True)
+    print(f" > stats saved to {output_file_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f9fdf937079d75a673654471871130129c13c0a
--- /dev/null
+++ b/TTS/bin/eval_encoder.py
@@ -0,0 +1,89 @@
+import argparse
+from argparse import RawTextHelpFormatter
+
+import torch
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.speakers import SpeakerManager
+
+
+def compute_encoder_accuracy(dataset_items, encoder_manager):
+
+    class_name_key = encoder_manager.encoder_config.class_name_key
+    map_classid_to_classname = getattr(encoder_manager.encoder_config, "map_classid_to_classname", None)
+
+    class_acc_dict = {}
+
+    # compute embeddings for all wav_files
+    for item in tqdm(dataset_items):
+        class_name = item[class_name_key]
+        wav_file = item["audio_file"]
+
+        # extract the embedding
+        embedd = encoder_manager.compute_embedding_from_clip(wav_file)
+        if encoder_manager.encoder_criterion is not None and map_classid_to_classname is not None:
+            embedding = torch.FloatTensor(embedd).unsqueeze(0)
+            if encoder_manager.use_cuda:
+                embedding = embedding.cuda()
+
+            class_id = encoder_manager.encoder_criterion.softmax.inference(embedding).item()
+            predicted_label = map_classid_to_classname[str(class_id)]
+        else:
+            predicted_label = None
+
+        if class_name is not None and predicted_label is not None:
+            is_equal = int(class_name == predicted_label)
+            if class_name not in class_acc_dict:
+                class_acc_dict[class_name] = [is_equal]
+            else:
+                class_acc_dict[class_name].append(is_equal)
+        else:
+            raise RuntimeError("Error: class_name or/and predicted_label are None")
+
+    acc_avg = 0
+    for key, values in class_acc_dict.items():
+        acc = sum(values) / len(values)
+        print("Class", key, "Accuracy:", acc)
+        acc_avg += acc
+
+    print("Average Accuracy:", acc_avg / len(class_acc_dict))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="""Compute the accuracy of the encoder.\n\n"""
+        """
+        Example runs:
+        python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json  dataset_config.json
+        """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("model_path", type=str, help="Path to model checkpoint file.")
+    parser.add_argument(
+        "config_path",
+        type=str,
+        help="Path to model config file.",
+    )
+
+    parser.add_argument(
+        "config_dataset_path",
+        type=str,
+        help="Path to dataset config file.",
+    )
+    parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=True)
+    parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
+
+    args = parser.parse_args()
+
+    c_dataset = load_config(args.config_dataset_path)
+
+    meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
+    items = meta_data_train + meta_data_eval
+
+    enc_manager = SpeakerManager(
+        encoder_model_path=args.model_path, encoder_config_path=args.config_path, use_cuda=args.use_cuda
+    )
+
+    compute_encoder_accuracy(items, enc_manager)
diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cfd156b9d925ace9de0cdb4479c3a6f3c4ce317
--- /dev/null
+++ b/TTS/bin/extract_tts_spectrograms.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Extract Mel spectrograms with teacher forcing."""
+
+import argparse
+import os
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets import TTSDataset, load_tts_samples
+from TTS.tts.models import setup_model
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.generic_utils import count_parameters
+
+use_cuda = torch.cuda.is_available()
+
+
+def setup_loader(ap, r, verbose=False):
+    tokenizer, _ = TTSTokenizer.init_from_config(c)
+    dataset = TTSDataset(
+        outputs_per_step=r,
+        compute_linear_spec=False,
+        samples=meta_data,
+        tokenizer=tokenizer,
+        ap=ap,
+        batch_group_size=0,
+        min_text_len=c.min_text_len,
+        max_text_len=c.max_text_len,
+        min_audio_len=c.min_audio_len,
+        max_audio_len=c.max_audio_len,
+        phoneme_cache_path=c.phoneme_cache_path,
+        precompute_num_workers=0,
+        use_noise_augment=False,
+        verbose=verbose,
+        speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
+        d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
+    )
+
+    if c.use_phonemes and c.compute_input_seq_cache:
+        # precompute phonemes to have a better estimate of sequence lengths.
+        dataset.compute_input_seq(c.num_loader_workers)
+    dataset.preprocess_samples()
+
+    loader = DataLoader(
+        dataset,
+        batch_size=c.batch_size,
+        shuffle=False,
+        collate_fn=dataset.collate_fn,
+        drop_last=False,
+        sampler=None,
+        num_workers=c.num_loader_workers,
+        pin_memory=False,
+    )
+    return loader
+
+
+def set_filename(wav_path, out_path):
+    wav_file = os.path.basename(wav_path)
+    file_name = wav_file.split(".")[0]
+    os.makedirs(os.path.join(out_path, "quant"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "mel"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "wav_gl"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "wav"), exist_ok=True)
+    wavq_path = os.path.join(out_path, "quant", file_name)
+    mel_path = os.path.join(out_path, "mel", file_name)
+    wav_gl_path = os.path.join(out_path, "wav_gl", file_name + ".wav")
+    wav_path = os.path.join(out_path, "wav", file_name + ".wav")
+    return file_name, wavq_path, mel_path, wav_gl_path, wav_path
+
+
+def format_data(data):
+    # setup input data
+    text_input = data["token_id"]
+    text_lengths = data["token_id_lengths"]
+    mel_input = data["mel"]
+    mel_lengths = data["mel_lengths"]
+    item_idx = data["item_idxs"]
+    d_vectors = data["d_vectors"]
+    speaker_ids = data["speaker_ids"]
+    attn_mask = data["attns"]
+    avg_text_length = torch.mean(text_lengths.float())
+    avg_spec_length = torch.mean(mel_lengths.float())
+
+    # dispatch data to GPU
+    if use_cuda:
+        text_input = text_input.cuda(non_blocking=True)
+        text_lengths = text_lengths.cuda(non_blocking=True)
+        mel_input = mel_input.cuda(non_blocking=True)
+        mel_lengths = mel_lengths.cuda(non_blocking=True)
+        if speaker_ids is not None:
+            speaker_ids = speaker_ids.cuda(non_blocking=True)
+        if d_vectors is not None:
+            d_vectors = d_vectors.cuda(non_blocking=True)
+        if attn_mask is not None:
+            attn_mask = attn_mask.cuda(non_blocking=True)
+    return (
+        text_input,
+        text_lengths,
+        mel_input,
+        mel_lengths,
+        speaker_ids,
+        d_vectors,
+        avg_text_length,
+        avg_spec_length,
+        attn_mask,
+        item_idx,
+    )
+
+
+@torch.no_grad()
+def inference(
+    model_name,
+    model,
+    ap,
+    text_input,
+    text_lengths,
+    mel_input,
+    mel_lengths,
+    speaker_ids=None,
+    d_vectors=None,
+):
+    if model_name == "glow_tts":
+        speaker_c = None
+        if speaker_ids is not None:
+            speaker_c = speaker_ids
+        elif d_vectors is not None:
+            speaker_c = d_vectors
+        outputs = model.inference_with_MAS(
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            aux_input={"d_vectors": speaker_c, "speaker_ids": speaker_ids},
+        )
+        model_output = outputs["model_outputs"]
+        model_output = model_output.detach().cpu().numpy()
+
+    elif "tacotron" in model_name:
+        aux_input = {"speaker_ids": speaker_ids, "d_vectors": d_vectors}
+        outputs = model(text_input, text_lengths, mel_input, mel_lengths, aux_input)
+        postnet_outputs = outputs["model_outputs"]
+        # normalize tacotron output
+        if model_name == "tacotron":
+            mel_specs = []
+            postnet_outputs = postnet_outputs.data.cpu().numpy()
+            for b in range(postnet_outputs.shape[0]):
+                postnet_output = postnet_outputs[b]
+                mel_specs.append(torch.FloatTensor(ap.out_linear_to_mel(postnet_output.T).T))
+            model_output = torch.stack(mel_specs).cpu().numpy()
+
+        elif model_name == "tacotron2":
+            model_output = postnet_outputs.detach().cpu().numpy()
+    return model_output
+
+
+def extract_spectrograms(
+    data_loader, model, ap, output_path, quantized_wav=False, save_audio=False, debug=False, metada_name="metada.txt"
+):
+    model.eval()
+    export_metadata = []
+    for _, data in tqdm(enumerate(data_loader), total=len(data_loader)):
+
+        # format data
+        (
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            speaker_ids,
+            d_vectors,
+            _,
+            _,
+            _,
+            item_idx,
+        ) = format_data(data)
+
+        model_output = inference(
+            c.model.lower(),
+            model,
+            ap,
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            speaker_ids,
+            d_vectors,
+        )
+
+        for idx in range(text_input.shape[0]):
+            wav_file_path = item_idx[idx]
+            wav = ap.load_wav(wav_file_path)
+            _, wavq_path, mel_path, wav_gl_path, wav_path = set_filename(wav_file_path, output_path)
+
+            # quantize and save wav
+            if quantized_wav:
+                wavq = ap.quantize(wav)
+                np.save(wavq_path, wavq)
+
+            # save TTS mel
+            mel = model_output[idx]
+            mel_length = mel_lengths[idx]
+            mel = mel[:mel_length, :].T
+            np.save(mel_path, mel)
+
+            export_metadata.append([wav_file_path, mel_path])
+            if save_audio:
+                ap.save_wav(wav, wav_path)
+
+            if debug:
+                print("Audio for debug saved at:", wav_gl_path)
+                wav = ap.inv_melspectrogram(mel)
+                ap.save_wav(wav, wav_gl_path)
+
+    with open(os.path.join(output_path, metada_name), "w", encoding="utf-8") as f:
+        for data in export_metadata:
+            f.write(f"{data[0]}|{data[1]+'.npy'}\n")
+
+
+def main(args):  # pylint: disable=redefined-outer-name
+    # pylint: disable=global-variable-undefined
+    global meta_data, speaker_manager
+
+    # Audio processor
+    ap = AudioProcessor(**c.audio)
+
+    # load data instances
+    meta_data_train, meta_data_eval = load_tts_samples(
+        c.datasets, eval_split=args.eval, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+
+    # use eval and training partitions
+    meta_data = meta_data_train + meta_data_eval
+
+    # init speaker manager
+    if c.use_speaker_embedding:
+        speaker_manager = SpeakerManager(data_items=meta_data)
+    elif c.use_d_vector_file:
+        speaker_manager = SpeakerManager(d_vectors_file_path=c.d_vector_file)
+    else:
+        speaker_manager = None
+
+    # setup model
+    model = setup_model(c)
+
+    # restore model
+    model.load_checkpoint(c, args.checkpoint_path, eval=True)
+
+    if use_cuda:
+        model.cuda()
+
+    num_params = count_parameters(model)
+    print("\n > Model has {} parameters".format(num_params), flush=True)
+    # set r
+    r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
+    own_loader = setup_loader(ap, r, verbose=True)
+
+    extract_spectrograms(
+        own_loader,
+        model,
+        ap,
+        args.output_path,
+        quantized_wav=args.quantized,
+        save_audio=args.save_audio,
+        debug=args.debug,
+        metada_name="metada.txt",
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
+    parser.add_argument("--checkpoint_path", type=str, help="Model file to be restored.", required=True)
+    parser.add_argument("--output_path", type=str, help="Path to save mel specs", required=True)
+    parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
+    parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
+    parser.add_argument("--quantized", action="store_true", help="Save quantized audio files")
+    parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+    c.audio.trim_silence = False
+    main(args)
diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d18156b72b594a328d10aeb9976767af4414b46
--- /dev/null
+++ b/TTS/bin/find_unique_chars.py
@@ -0,0 +1,48 @@
+"""Find all the unique characters in a dataset"""
+import argparse
+from argparse import RawTextHelpFormatter
+
+import sys
+
+sys.path.append('.')
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+
+
+def main():
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Find all the unique characters or phonemes in a dataset.\n\n"""
+        """
+    Example runs:
+
+    python TTS/bin/find_unique_chars.py --config_path config.json
+    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_tts_samples(
+        c.datasets, eval_split=True, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+
+    items = train_items + eval_items
+
+    texts = "".join(item["text"] for item in items)
+    chars = set(texts)
+    lower_chars = filter(lambda c: c.islower(), chars)
+    chars_force_lower = [c.lower() for c in chars]
+    chars_force_lower = set(chars_force_lower)
+
+    print(f" > Number of unique characters: {len(chars)}")
+    print(f" > Unique characters: {''.join(sorted(chars))}")
+    print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
+    print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py
new file mode 100644
index 0000000000000000000000000000000000000000..acd9defe3142074e2a50eb768794fd810f135812
--- /dev/null
+++ b/TTS/bin/find_unique_phonemes.py
@@ -0,0 +1,78 @@
+"""Find all the unique characters in a dataset"""
+import argparse
+import multiprocessing
+import sys
+
+sys.path.append('.')
+from argparse import RawTextHelpFormatter
+
+from tqdm.contrib.concurrent import process_map
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.text.phonemizers import Gruut,ESpeak
+
+
+def compute_phonemes(item):
+    text = item["text"]
+    ph = phonemizer.phonemize(text).replace("|", "")
+    return set(list(ph))
+
+
+def main():
+    # pylint: disable=W0601
+    global c, phonemizer
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Find all the unique characters or phonemes in a dataset.\n\n"""
+        """
+    Example runs:
+
+    python TTS/bin/find_unique_phonemes.py --config_path config.json
+    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_tts_samples(
+        c.datasets, eval_split=True, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+    items = train_items + eval_items
+    print("Num items:", len(items))
+
+    language_list = [item["language"] for item in items]
+    is_lang_def = all(language_list)
+
+    if not c.phoneme_language or not is_lang_def:
+        raise ValueError("Phoneme language must be defined in config.")
+
+    if not language_list.count(language_list[0]) == len(language_list):
+        raise ValueError(
+            "Currently, just one phoneme language per config file is supported !! Please split the dataset config into different configs and run it individually for each language !!"
+        )
+
+    # phonemizer = Gruut(language=language_list[0], keep_puncs=True)
+    phonemizer = ESpeak(language="vi", backend="espeak")
+    phonemes = process_map(compute_phonemes, items, max_workers=multiprocessing.cpu_count(), chunksize=15)
+    print(phonemes)
+    phones = []
+    for ph in phonemes:
+        phones.extend(ph)
+
+    phones = set(phones)
+    lower_phones = filter(lambda c: c.islower(), phones)
+    phones_force_lower = [c.lower() for c in phones]
+    phones_force_lower = set(phones_force_lower)
+
+    print(f" > Number of unique phonemes: {len(phones)}")
+    print(f" > Unique phonemes: {''.join(sorted(phones))}")
+    print(f" > Unique lower phonemes: {''.join(sorted(lower_phones))}")
+    print(f" > Unique all forced to lower phonemes: {''.join(sorted(phones_force_lower))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py
new file mode 100644
index 0000000000000000000000000000000000000000..352628bbc163338668edfe33ea328825d5b9a015
--- /dev/null
+++ b/TTS/bin/remove_silence_using_vad.py
@@ -0,0 +1,93 @@
+import argparse
+import glob
+import os
+import pathlib
+
+from tqdm import tqdm
+
+from TTS.utils.vad import get_vad_model_and_utils, remove_silence
+
+
+def adjust_path_and_remove_silence(audio_path):
+    output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
+    # ignore if the file exists
+    if os.path.exists(output_path) and not args.force:
+        return output_path
+
+    # create all directory structure
+    pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    # remove the silence and save the audio
+    output_path, is_speech = remove_silence(
+        model_and_utils,
+        audio_path,
+        output_path,
+        trim_just_beginning_and_end=args.trim_just_beginning_and_end,
+        use_cuda=args.use_cuda,
+    )
+
+    return output_path, is_speech
+
+
+def preprocess_audios():
+    files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True))
+    print("> Number of files: ", len(files))
+    if not args.force:
+        print("> Ignoring files that already exist in the output idrectory.")
+
+    if args.trim_just_beginning_and_end:
+        print("> Trimming just the beginning and the end with nonspeech parts.")
+    else:
+        print("> Trimming all nonspeech parts.")
+
+    filtered_files = []
+    if files:
+        # create threads
+        # num_threads = multiprocessing.cpu_count()
+        # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
+        for f in tqdm(files):
+            output_path, is_speech = adjust_path_and_remove_silence(f)
+            if not is_speech:
+                filtered_files.append(output_path)
+
+        # write files that do not have speech
+        with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f:
+            for file in filtered_files:
+                f.write(file + "\n")
+    else:
+        print("> No files Found !")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True"
+    )
+    parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir")
+    parser.add_argument(
+        "-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir"
+    )
+    parser.add_argument("-f", "--force", default=False, action="store_true", help="Force the replace of exists files")
+    parser.add_argument(
+        "-g",
+        "--glob",
+        type=str,
+        default="**/*.wav",
+        help="path in glob format for acess wavs from input_dir. ex: wav48/*/*.wav",
+    )
+    parser.add_argument(
+        "-t",
+        "--trim_just_beginning_and_end",
+        type=bool,
+        default=True,
+        help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True",
+    )
+    parser.add_argument(
+        "-c",
+        "--use_cuda",
+        type=bool,
+        default=False,
+        help="If True use cuda",
+    )
+    args = parser.parse_args()
+    # load the model and utils
+    model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda)
+    preprocess_audios()
diff --git a/TTS/bin/resample.py b/TTS/bin/resample.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9f1166a647e2e761118862c2e8ac82a131428a9
--- /dev/null
+++ b/TTS/bin/resample.py
@@ -0,0 +1,87 @@
+import argparse
+import glob
+import os
+from argparse import RawTextHelpFormatter
+from distutils.dir_util import copy_tree
+from multiprocessing import Pool
+
+import librosa
+import soundfile as sf
+from tqdm import tqdm
+
+
+def resample_file(func_args):
+    filename, output_sr = func_args
+    y, sr = librosa.load(filename, sr=output_sr)
+    sf.write(filename, y, sr)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        description="""Resample a folder recusively with librosa
+                       Can be used in place or create a copy of the folder as an output.\n\n
+                       Example run:
+                            python TTS/bin/resample.py
+                                --input_dir /root/LJSpeech-1.1/
+                                --output_sr 22050
+                                --output_dir /root/resampled_LJSpeech-1.1/
+                                --file_ext wav
+                                --n_jobs 24
+                    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--input_dir",
+        type=str,
+        default=None,
+        required=True,
+        help="Path of the folder containing the audio files to resample",
+    )
+
+    parser.add_argument(
+        "--output_sr",
+        type=int,
+        default=22050,
+        required=False,
+        help="Samlple rate to which the audio files should be resampled",
+    )
+
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default=None,
+        required=False,
+        help="Path of the destination folder. If not defined, the operation is done in place",
+    )
+
+    parser.add_argument(
+        "--file_ext",
+        type=str,
+        default="wav",
+        required=False,
+        help="Extension of the audio files to resample",
+    )
+
+    parser.add_argument(
+        "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores"
+    )
+
+    args = parser.parse_args()
+
+    if args.output_dir:
+        print("Recursively copying the input folder...")
+        copy_tree(args.input_dir, args.output_dir)
+        args.input_dir = args.output_dir
+
+    print("Resampling the audio files...")
+    audio_files = glob.glob(os.path.join(args.input_dir, f"**/*.{args.file_ext}"), recursive=True)
+    print(f"Found {len(audio_files)} files...")
+    audio_files = list(zip(audio_files, len(audio_files) * [args.output_sr]))
+    with Pool(processes=args.n_jobs) as p:
+        with tqdm(total=len(audio_files)) as pbar:
+            for i, _ in enumerate(p.imap_unordered(resample_file, audio_files)):
+                pbar.update()
+
+    print("Done !")
diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbcb9c958d2666889b0fc1c15615cc8995130af4
--- /dev/null
+++ b/TTS/bin/synthesize.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import sys
+from argparse import RawTextHelpFormatter
+
+# pylint: disable=redefined-outer-name, unused-argument
+from pathlib import Path
+
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+
+
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    if v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    raise argparse.ArgumentTypeError("Boolean value expected.")
+
+
+def main():
+    description = """Synthesize speech on command line.
+
+You can either use your trained model or choose a model from the provided list.
+
+If you don't specify any models, then it uses LJSpeech based English model.
+
+## Example Runs
+
+### Single Speaker Models
+
+- List provided models:
+
+    ```
+    $ tts --list_models
+    ```
+
+- Query info for model info by idx:
+
+    ```
+    $ tts --model_info_by_idx "<model_type>/<model_query_idx>"
+    ```
+
+- Query info for model info by full name:
+
+    ```
+    $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
+    ```
+
+- Run TTS with default models:
+
+    ```
+    $ tts --text "Text for TTS"
+    ```
+
+- Run a TTS model with its default vocoder model:
+
+    ```
+    $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>
+    ```
+
+- Run with specific TTS and vocoder models from the list:
+
+    ```
+    $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --output_path
+    ```
+
+- Run your own TTS model (Using Griffin-Lim Vocoder):
+
+    ```
+    $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
+    ```
+
+- Run your own TTS and Vocoder models:
+    ```
+    $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
+        --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
+    ```
+
+### Multi-speaker Models
+
+- List the available speakers and choose as <speaker_id> among them:
+
+    ```
+    $ tts --model_name "<language>/<dataset>/<model_name>"  --list_speaker_idxs
+    ```
+
+- Run the multi-speaker TTS model with the target speaker ID:
+
+    ```
+    $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>"  --speaker_idx <speaker_id>
+    ```
+
+- Run your own multi-speaker TTS model:
+
+    ```
+    $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
+    ```
+    """
+    # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep
+    # documentation in sync more easily.
+    parser = argparse.ArgumentParser(
+        description=description.replace("    ```\n", ""),
+        formatter_class=RawTextHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--list_models",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+        help="list available pre-trained TTS and vocoder models.",
+    )
+
+    parser.add_argument(
+        "--model_info_by_idx",
+        type=str,
+        default=None,
+        help="model info using query format: <model_type>/<model_query_idx>",
+    )
+
+    parser.add_argument(
+        "--model_info_by_name",
+        type=str,
+        default=None,
+        help="model info using query format: <model_type>/<language>/<dataset>/<model_name>",
+    )
+
+    parser.add_argument("--text", type=str, default=None, help="Text to generate speech.")
+
+    # Args for running pre-trained TTS models.
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="tts_models/en/ljspeech/tacotron2-DDC",
+        help="Name of one of the pre-trained TTS models in format <language>/<dataset>/<model_name>",
+    )
+    parser.add_argument(
+        "--vocoder_name",
+        type=str,
+        default=None,
+        help="Name of one of the pre-trained  vocoder models in format <language>/<dataset>/<model_name>",
+    )
+
+    # Args for running custom models
+    parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default=None,
+        help="Path to model file.",
+    )
+    parser.add_argument(
+        "--out_path",
+        type=str,
+        default="tts_output.wav",
+        help="Output wav file path.",
+    )
+    parser.add_argument("--use_cuda", type=bool, help="Run model on CUDA.", default=False)
+    parser.add_argument(
+        "--vocoder_path",
+        type=str,
+        help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
+        default=None,
+    )
+    parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
+    parser.add_argument(
+        "--encoder_path",
+        type=str,
+        help="Path to speaker encoder model file.",
+        default=None,
+    )
+    parser.add_argument("--encoder_config_path", type=str, help="Path to speaker encoder config file.", default=None)
+
+    # args for multi-speaker synthesis
+    parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
+    parser.add_argument("--language_ids_file_path", type=str, help="JSON file for multi-lingual model.", default=None)
+    parser.add_argument(
+        "--speaker_idx",
+        type=str,
+        help="Target speaker ID for a multi-speaker TTS model.",
+        default=None,
+    )
+    parser.add_argument(
+        "--language_idx",
+        type=str,
+        help="Target language ID for a multi-lingual TTS model.",
+        default=None,
+    )
+    parser.add_argument(
+        "--speaker_wav",
+        nargs="+",
+        help="wav file(s) to condition a multi-speaker TTS model with a Speaker Encoder. You can give multiple file paths. The d_vectors is computed as their average.",
+        default=None,
+    )
+    parser.add_argument("--gst_style", help="Wav path file for GST style reference.", default=None)
+    parser.add_argument(
+        "--capacitron_style_wav", type=str, help="Wav path file for Capacitron prosody reference.", default=None
+    )
+    parser.add_argument("--capacitron_style_text", type=str, help="Transcription of the reference.", default=None)
+    parser.add_argument(
+        "--list_speaker_idxs",
+        help="List available speaker ids for the defined multi-speaker model.",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+    )
+    parser.add_argument(
+        "--list_language_idxs",
+        help="List available language ids for the defined multi-lingual model.",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+    )
+    # aux args
+    parser.add_argument(
+        "--save_spectogram",
+        type=bool,
+        help="If true save raw spectogram for further (vocoder) processing in out_path.",
+        default=False,
+    )
+    parser.add_argument(
+        "--reference_wav",
+        type=str,
+        help="Reference wav file to convert in the voice of the speaker_idx or speaker_wav",
+        default=None,
+    )
+    parser.add_argument(
+        "--reference_speaker_idx",
+        type=str,
+        help="speaker ID of the reference_wav speaker (If not provided the embedding will be computed using the Speaker Encoder).",
+        default=None,
+    )
+    parser.add_argument(
+        "--progress_bar",
+        type=str2bool,
+        help="If true shows a progress bar for the model download. Defaults to True",
+        default=True,
+    )
+
+    args = parser.parse_args()
+
+    # print the description if either text or list_models is not set
+    check_args = [
+        args.text,
+        args.list_models,
+        args.list_speaker_idxs,
+        args.list_language_idxs,
+        args.reference_wav,
+        args.model_info_by_idx,
+        args.model_info_by_name,
+    ]
+    if not any(check_args):
+        parser.parse_args(["-h"])
+
+    # load model manager
+    path = Path(__file__).parent / "../.models.json"
+    manager = ModelManager(path, progress_bar=args.progress_bar)
+
+    model_path = None
+    config_path = None
+    speakers_file_path = None
+    language_ids_file_path = None
+    vocoder_path = None
+    vocoder_config_path = None
+    encoder_path = None
+    encoder_config_path = None
+
+    # CASE1 #list : list pre-trained TTS models
+    if args.list_models:
+        manager.list_models()
+        sys.exit()
+
+    # CASE2 #info : model info of pre-trained TTS models
+    if args.model_info_by_idx:
+        model_query = args.model_info_by_idx
+        manager.model_info_by_idx(model_query)
+        sys.exit()
+
+    if args.model_info_by_name:
+        model_query_full_name = args.model_info_by_name
+        manager.model_info_by_full_name(model_query_full_name)
+        sys.exit()
+
+    # CASE3: load pre-trained model paths
+    if args.model_name is not None and not args.model_path:
+        model_path, config_path, model_item = manager.download_model(args.model_name)
+        args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
+
+    if args.vocoder_name is not None and not args.vocoder_path:
+        vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
+
+    # CASE4: set custom model paths
+    if args.model_path is not None:
+        model_path = args.model_path
+        config_path = args.config_path
+        speakers_file_path = args.speakers_file_path
+        language_ids_file_path = args.language_ids_file_path
+
+    if args.vocoder_path is not None:
+        vocoder_path = args.vocoder_path
+        vocoder_config_path = args.vocoder_config_path
+
+    if args.encoder_path is not None:
+        encoder_path = args.encoder_path
+        encoder_config_path = args.encoder_config_path
+
+    # load models
+    synthesizer = Synthesizer(
+        model_path,
+        config_path,
+        speakers_file_path,
+        language_ids_file_path,
+        vocoder_path,
+        vocoder_config_path,
+        encoder_path,
+        encoder_config_path,
+        args.use_cuda,
+    )
+
+    # query speaker ids of a multi-speaker model.
+    if args.list_speaker_idxs:
+        print(
+            " > Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
+        )
+        print(synthesizer.tts_model.speaker_manager.name_to_id)
+        return
+
+    # query langauge ids of a multi-lingual model.
+    if args.list_language_idxs:
+        print(
+            " > Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
+        )
+        print(synthesizer.tts_model.language_manager.name_to_id)
+        return
+
+    # check the arguments against a multi-speaker model.
+    if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav):
+        print(
+            " [!] Looks like you use a multi-speaker model. Define `--speaker_idx` to "
+            "select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`."
+        )
+        return
+
+    # RUN THE SYNTHESIS
+    if args.text:
+        print(" > Text: {}".format(args.text))
+
+    # kick it
+    wav = synthesizer.tts(
+        args.text,
+        args.speaker_idx,
+        args.language_idx,
+        args.speaker_wav,
+        reference_wav=args.reference_wav,
+        style_wav=args.capacitron_style_wav,
+        style_text=args.capacitron_style_text,
+        reference_speaker_name=args.reference_speaker_idx,
+    )
+
+    # save the results
+    print(" > Saving output to {}".format(args.out_path))
+    synthesizer.save_wav(wav, args.out_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2e7779c0c109a3ec78f1972ebf1147ec436048a
--- /dev/null
+++ b/TTS/bin/train_encoder.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import time
+import traceback
+
+import torch
+from torch.utils.data import DataLoader
+from trainer.torch import NoamLR
+from trainer.trainer_utils import get_optimizer
+
+from TTS.encoder.dataset import EncoderDataset
+from TTS.encoder.utils.generic_utils import save_best_model, save_checkpoint, setup_encoder_model
+from TTS.encoder.utils.training import init_training
+from TTS.encoder.utils.visual import plot_embeddings
+from TTS.tts.datasets import load_tts_samples
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.generic_utils import count_parameters, remove_experiment_folder
+from TTS.utils.io import copy_model_files
+from TTS.utils.samplers import PerfectBatchSampler
+from TTS.utils.training import check_update
+
+torch.backends.cudnn.enabled = True
+torch.backends.cudnn.benchmark = True
+torch.manual_seed(54321)
+use_cuda = torch.cuda.is_available()
+num_gpus = torch.cuda.device_count()
+print(" > Using CUDA: ", use_cuda)
+print(" > Number of GPUs: ", num_gpus)
+
+
+def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
+    num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
+    num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch
+
+    dataset = EncoderDataset(
+        c,
+        ap,
+        meta_data_eval if is_val else meta_data_train,
+        voice_len=c.voice_len,
+        num_utter_per_class=num_utter_per_class,
+        num_classes_in_batch=num_classes_in_batch,
+        verbose=verbose,
+        augmentation_config=c.audio_augmentation if not is_val else None,
+        use_torch_spec=c.model_params.get("use_torch_spec", False),
+    )
+    # get classes list
+    classes = dataset.get_class_list()
+
+    sampler = PerfectBatchSampler(
+        dataset.items,
+        classes,
+        batch_size=num_classes_in_batch * num_utter_per_class,  # total batch size
+        num_classes_in_batch=num_classes_in_batch,
+        num_gpus=1,
+        shuffle=not is_val,
+        drop_last=True,
+    )
+
+    if len(classes) < num_classes_in_batch:
+        if is_val:
+            raise RuntimeError(
+                f"config.eval_num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Eval dataset) !"
+            )
+        raise RuntimeError(
+            f"config.num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Train dataset) !"
+        )
+
+    # set the classes to avoid get wrong class_id when the number of training and eval classes are not equal
+    if is_val:
+        dataset.set_classes(train_classes)
+
+    loader = DataLoader(
+        dataset,
+        num_workers=c.num_loader_workers,
+        batch_sampler=sampler,
+        collate_fn=dataset.collate_fn,
+    )
+
+    return loader, classes, dataset.get_map_classid_to_classname()
+
+
+def evaluation(model, criterion, data_loader, global_step):
+    eval_loss = 0
+    for _, data in enumerate(data_loader):
+        with torch.no_grad():
+            # setup input data
+            inputs, labels = data
+
+            # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
+            labels = torch.transpose(
+                labels.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch), 0, 1
+            ).reshape(labels.shape)
+            inputs = torch.transpose(
+                inputs.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch, -1), 0, 1
+            ).reshape(inputs.shape)
+
+            # dispatch data to GPU
+            if use_cuda:
+                inputs = inputs.cuda(non_blocking=True)
+                labels = labels.cuda(non_blocking=True)
+
+            # forward pass model
+            outputs = model(inputs)
+
+            # loss computation
+            loss = criterion(
+                outputs.view(c.eval_num_classes_in_batch, outputs.shape[0] // c.eval_num_classes_in_batch, -1), labels
+            )
+
+            eval_loss += loss.item()
+
+    eval_avg_loss = eval_loss / len(data_loader)
+    # save stats
+    dashboard_logger.eval_stats(global_step, {"loss": eval_avg_loss})
+    # plot the last batch in the evaluation
+    figures = {
+        "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
+    }
+    dashboard_logger.eval_figures(global_step, figures)
+    return eval_avg_loss
+
+
+def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, global_step):
+    model.train()
+    best_loss = float("inf")
+    avg_loader_time = 0
+    end_time = time.time()
+    for epoch in range(c.epochs):
+        tot_loss = 0
+        epoch_time = 0
+        for _, data in enumerate(data_loader):
+            start_time = time.time()
+
+            # setup input data
+            inputs, labels = data
+            # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
+            labels = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(
+                labels.shape
+            )
+            inputs = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(
+                inputs.shape
+            )
+            # ToDo: move it to a unit test
+            # labels_converted = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(labels.shape)
+            # inputs_converted = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(inputs.shape)
+            # idx = 0
+            # for j in range(0, c.num_classes_in_batch, 1):
+            #     for i in range(j, len(labels), c.num_classes_in_batch):
+            #         if not torch.all(labels[i].eq(labels_converted[idx])) or not torch.all(inputs[i].eq(inputs_converted[idx])):
+            #             print("Invalid")
+            #             print(labels)
+            #             exit()
+            #         idx += 1
+            # labels = labels_converted
+            # inputs = inputs_converted
+
+            loader_time = time.time() - end_time
+            global_step += 1
+
+            # setup lr
+            if c.lr_decay:
+                scheduler.step()
+            optimizer.zero_grad()
+
+            # dispatch data to GPU
+            if use_cuda:
+                inputs = inputs.cuda(non_blocking=True)
+                labels = labels.cuda(non_blocking=True)
+
+            # forward pass model
+            outputs = model(inputs)
+
+            # loss computation
+            loss = criterion(
+                outputs.view(c.num_classes_in_batch, outputs.shape[0] // c.num_classes_in_batch, -1), labels
+            )
+            loss.backward()
+            grad_norm, _ = check_update(model, c.grad_clip)
+            optimizer.step()
+
+            step_time = time.time() - start_time
+            epoch_time += step_time
+
+            # acumulate the total epoch loss
+            tot_loss += loss.item()
+
+            # Averaged Loader Time
+            num_loader_workers = c.num_loader_workers if c.num_loader_workers > 0 else 1
+            avg_loader_time = (
+                1 / num_loader_workers * loader_time + (num_loader_workers - 1) / num_loader_workers * avg_loader_time
+                if avg_loader_time != 0
+                else loader_time
+            )
+            current_lr = optimizer.param_groups[0]["lr"]
+
+            if global_step % c.steps_plot_stats == 0:
+                # Plot Training Epoch Stats
+                train_stats = {
+                    "loss": loss.item(),
+                    "lr": current_lr,
+                    "grad_norm": grad_norm,
+                    "step_time": step_time,
+                    "avg_loader_time": avg_loader_time,
+                }
+                dashboard_logger.train_epoch_stats(global_step, train_stats)
+                figures = {
+                    "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
+                }
+                dashboard_logger.train_figures(global_step, figures)
+
+            if global_step % c.print_step == 0:
+                print(
+                    "   | > Step:{}  Loss:{:.5f}  GradNorm:{:.5f}  "
+                    "StepTime:{:.2f}  LoaderTime:{:.2f}  AvGLoaderTime:{:.2f}  LR:{:.6f}".format(
+                        global_step, loss.item(), grad_norm, step_time, loader_time, avg_loader_time, current_lr
+                    ),
+                    flush=True,
+                )
+
+            if global_step % c.save_step == 0:
+                # save model
+                save_checkpoint(model, optimizer, criterion, loss.item(), OUT_PATH, global_step, epoch)
+
+            end_time = time.time()
+
+        print("")
+        print(
+            ">>> Epoch:{}  AvgLoss: {:.5f} GradNorm:{:.5f}  "
+            "EpochTime:{:.2f} AvGLoaderTime:{:.2f} ".format(
+                epoch, tot_loss / len(data_loader), grad_norm, epoch_time, avg_loader_time
+            ),
+            flush=True,
+        )
+        # evaluation
+        if c.run_eval:
+            model.eval()
+            eval_loss = evaluation(model, criterion, eval_data_loader, global_step)
+            print("\n\n")
+            print("--> EVAL PERFORMANCE")
+            print(
+                "   | > Epoch:{}  AvgLoss: {:.5f} ".format(epoch, eval_loss),
+                flush=True,
+            )
+            # save the best checkpoint
+            best_loss = save_best_model(model, optimizer, criterion, eval_loss, best_loss, OUT_PATH, global_step, epoch)
+            model.train()
+
+    return best_loss, global_step
+
+
+def main(args):  # pylint: disable=redefined-outer-name
+    # pylint: disable=global-variable-undefined
+    global meta_data_train
+    global meta_data_eval
+    global train_classes
+
+    ap = AudioProcessor(**c.audio)
+    model = setup_encoder_model(c)
+
+    optimizer = get_optimizer(c.optimizer, c.optimizer_params, c.lr, model)
+
+    # pylint: disable=redefined-outer-name
+    meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)
+
+    train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
+    if c.run_eval:
+        eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
+    else:
+        eval_data_loader = None
+
+    num_classes = len(train_classes)
+    criterion = model.get_criterion(c, num_classes)
+
+    if c.loss == "softmaxproto" and c.model != "speaker_encoder":
+        c.map_classid_to_classname = map_classid_to_classname
+        copy_model_files(c, OUT_PATH)
+
+    if args.restore_path:
+        criterion, args.restore_step = model.load_checkpoint(
+            c, args.restore_path, eval=False, use_cuda=use_cuda, criterion=criterion
+        )
+        print(" > Model restored from step %d" % args.restore_step, flush=True)
+    else:
+        args.restore_step = 0
+
+    if c.lr_decay:
+        scheduler = NoamLR(optimizer, warmup_steps=c.warmup_steps, last_epoch=args.restore_step - 1)
+    else:
+        scheduler = None
+
+    num_params = count_parameters(model)
+    print("\n > Model has {} parameters".format(num_params), flush=True)
+
+    if use_cuda:
+        model = model.cuda()
+        criterion.cuda()
+
+    global_step = args.restore_step
+    _, global_step = train(model, optimizer, scheduler, criterion, train_data_loader, eval_data_loader, global_step)
+
+
+if __name__ == "__main__":
+    args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
+
+    try:
+        main(args)
+    except KeyboardInterrupt:
+        remove_experiment_folder(OUT_PATH)
+        try:
+            sys.exit(0)
+        except SystemExit:
+            os._exit(0)  # pylint: disable=protected-access
+    except Exception:  # pylint: disable=broad-except
+        remove_experiment_folder(OUT_PATH)
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..c281a49ca50a562cb2bedfedd17ba801970f20ef
--- /dev/null
+++ b/TTS/bin/train_tts.py
@@ -0,0 +1,75 @@
+import os
+import sys
+
+sys.path.append('.')
+from dataclasses import dataclass, field
+
+from trainer import Trainer, TrainerArgs
+
+from TTS.config import load_config, register_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.models import setup_model
+
+
+@dataclass
+class TrainTTSArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def main():
+    os.environ["CUDA_VISIBLE_DEVICES"]="0"
+    """Run `tts` model training directly by a `config.json` file."""
+    # init trainer args
+    train_args = TrainTTSArgs()
+    parser = train_args.init_argparse(arg_prefix="")
+
+    # override trainer args from comman-line args
+    args, config_overrides = parser.parse_known_args()
+    train_args.parse_args(args)
+
+    # load config.json and register
+    if args.config_path or args.continue_path:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        elif args.continue_path:
+            # continue from a prev experiment
+            config = load_config(os.path.join(args.continue_path, "config.json"))
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(config_overrides)
+            config = register_config(config_base.model)()
+
+    # load training samples
+    train_samples, eval_samples = load_tts_samples(
+        config.datasets,
+        eval_split=True,
+        eval_split_max_size=config.eval_split_max_size,
+        eval_split_size=config.eval_split_size,
+    )
+
+    # init the model from config
+    model = setup_model(config, train_samples + eval_samples)
+
+    # init the trainer and 🚀
+    trainer = Trainer(
+        train_args,
+        model.config,
+        config.output_path,
+        model=model,
+        train_samples=train_samples,
+        eval_samples=eval_samples,
+        parse_command_line_args=False,
+    )
+    trainer.fit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/train_vocoder.py b/TTS/bin/train_vocoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..0da9cf619de22202b966ed89089d1536772efaaa
--- /dev/null
+++ b/TTS/bin/train_vocoder.py
@@ -0,0 +1,81 @@
+import os
+from dataclasses import dataclass, field
+import sys
+
+sys.path.append('.')
+from trainer import Trainer, TrainerArgs
+import torch
+from TTS.config import load_config, register_config
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
+from TTS.vocoder.models import setup_model
+
+
+@dataclass
+class TrainVocoderArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def main():
+  
+    os.environ["CUDA_VISIBLE_DEVICES"]="0"
+    """Run `tts` model training directly by a `config.json` file."""
+    # init trainer args
+    train_args = TrainVocoderArgs()
+    parser = train_args.init_argparse(arg_prefix="")
+
+    # override trainer args from comman-line args
+    args, config_overrides = parser.parse_known_args()
+    train_args.parse_args(args)
+
+    # load config.json and register
+    if args.config_path or args.continue_path:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        elif args.continue_path:
+            # continue from a prev experiment
+            config = load_config(os.path.join(args.continue_path, "config.json"))
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(config_overrides)
+            config = register_config(config_base.model)()
+
+    # load training samples
+    if "feature_path" in config and config.feature_path:
+        # load pre-computed features
+        print(f" > Loading features from: {config.feature_path}")
+        eval_samples, train_samples = load_wav_feat_data(config.data_path, config.feature_path, config.eval_split_size)
+    else:
+        # load data raw wav files
+        eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
+
+    # setup audio processor
+    ap = AudioProcessor(**config.audio)
+
+    # init the model from config
+    model = setup_model(config)
+
+    # init the trainer and 🚀
+    trainer = Trainer(
+        train_args,
+        config,
+        config.output_path,
+        model=model,
+        train_samples=train_samples,
+        eval_samples=eval_samples,
+        training_assets={"audio_processor": ap},
+        parse_command_line_args=False,
+    )
+    trainer.fit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..09582cea7c7962b098efcde5754a02573d18264a
--- /dev/null
+++ b/TTS/bin/tune_wavegrad.py
@@ -0,0 +1,103 @@
+"""Search a good noise schedule for WaveGrad for a given number of inference iterations"""
+import argparse
+from itertools import product as cartesian_product
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.preprocess import load_wav_data
+from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
+from TTS.vocoder.models import setup_model
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
+    parser.add_argument("--config_path", type=str, help="Path to model config file.")
+    parser.add_argument("--data_path", type=str, help="Path to data directory.")
+    parser.add_argument("--output_path", type=str, help="path for output file including file name and extension.")
+    parser.add_argument(
+        "--num_iter",
+        type=int,
+        help="Number of model inference iterations that you like to optimize noise schedule for.",
+    )
+    parser.add_argument("--use_cuda", action="store_true", help="enable CUDA.")
+    parser.add_argument("--num_samples", type=int, default=1, help="Number of datasamples used for inference.")
+    parser.add_argument(
+        "--search_depth",
+        type=int,
+        default=3,
+        help="Search granularity. Increasing this increases the run-time exponentially.",
+    )
+
+    # load config
+    args = parser.parse_args()
+    config = load_config(args.config_path)
+
+    # setup audio processor
+    ap = AudioProcessor(**config.audio)
+
+    # load dataset
+    _, train_data = load_wav_data(args.data_path, 0)
+    train_data = train_data[: args.num_samples]
+    dataset = WaveGradDataset(
+        ap=ap,
+        items=train_data,
+        seq_len=-1,
+        hop_len=ap.hop_length,
+        pad_short=config.pad_short,
+        conv_pad=config.conv_pad,
+        is_training=True,
+        return_segments=False,
+        use_noise_augment=False,
+        use_cache=False,
+        verbose=True,
+    )
+    loader = DataLoader(
+        dataset,
+        batch_size=1,
+        shuffle=False,
+        collate_fn=dataset.collate_full_clips,
+        drop_last=False,
+        num_workers=config.num_loader_workers,
+        pin_memory=False,
+    )
+
+    # setup the model
+    model = setup_model(config)
+    if args.use_cuda:
+        model.cuda()
+
+    # setup optimization parameters
+    base_values = sorted(10 * np.random.uniform(size=args.search_depth))
+    print(f" > base values: {base_values}")
+    exponents = 10 ** np.linspace(-6, -1, num=args.num_iter)
+    best_error = float("inf")
+    best_schedule = None  # pylint: disable=C0103
+    total_search_iter = len(base_values) ** args.num_iter
+    for base in tqdm(cartesian_product(base_values, repeat=args.num_iter), total=total_search_iter):
+        beta = exponents * base
+        model.compute_noise_level(beta)
+        for data in loader:
+            mel, audio = data
+            y_hat = model.inference(mel.cuda() if args.use_cuda else mel)
+
+            if args.use_cuda:
+                y_hat = y_hat.cpu()
+            y_hat = y_hat.numpy()
+
+            mel_hat = []
+            for i in range(y_hat.shape[0]):
+                m = ap.melspectrogram(y_hat[i, 0])[:, :-1]
+                mel_hat.append(torch.from_numpy(m))
+
+            mel_hat = torch.stack(mel_hat)
+            mse = torch.sum((mel - mel_hat) ** 2).mean()
+            if mse.item() < best_error:
+                best_error = mse.item()
+                best_schedule = {"beta": beta}
+                print(f" > Found a better schedule. - MSE: {mse.item()}")
+                np.save(args.output_path, best_schedule)
diff --git a/TTS/config/.ipynb_checkpoints/__init__-checkpoint.py b/TTS/config/.ipynb_checkpoints/__init__-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..067c32d97db7a8a9a16ab7990a057f32bbd16238
--- /dev/null
+++ b/TTS/config/.ipynb_checkpoints/__init__-checkpoint.py
@@ -0,0 +1,132 @@
+import json
+import os
+import re
+from typing import Dict
+
+import fsspec
+import yaml
+from coqpit import Coqpit
+
+from TTS.config.shared_configs import *
+from TTS.utils.generic_utils import find_module
+
+
+def read_json_with_comments(json_path):
+    """for backward compat."""
+    # fallback to json
+    with fsspec.open(json_path, "r", encoding="utf-8") as f:
+        input_str = f.read()
+    # handle comments
+    input_str = re.sub(r"\\\n", "", input_str)
+    input_str = re.sub(r"//.*\n", "\n", input_str)
+    data = json.loads(input_str)
+    return data
+
+
+def register_config(model_name: str) -> Coqpit:
+    """Find the right config for the given model name.
+
+    Args:
+        model_name (str): Model name.
+
+    Raises:
+        ModuleNotFoundError: No matching config for the model name.
+
+    Returns:
+        Coqpit: config class.
+    """
+    config_class = None
+    config_name = model_name + "_config"
+    paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs"]
+    for path in paths:
+        try:
+            config_class = find_module(path, config_name)
+        except ModuleNotFoundError:
+            pass
+    if config_class is None:
+        raise ModuleNotFoundError(f" [!] Config for {model_name} cannot be found.")
+    return config_class
+
+
+def _process_model_name(config_dict: Dict) -> str:
+    """Format the model name as expected. It is a band-aid for the old `vocoder` model names.
+
+    Args:
+        config_dict (Dict): A dictionary including the config fields.
+
+    Returns:
+        str: Formatted modelname.
+    """
+    model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
+    model_name = model_name.replace("_generator", "").replace("_discriminator", "")
+    return model_name
+
+
+def load_config(config_path: str) -> Coqpit:
+    """Import `json` or `yaml` files as TTS configs. First, load the input file as a `dict` and check the model name
+    to find the corresponding Config class. Then initialize the Config.
+
+    Args:
+        config_path (str): path to the config file.
+
+    Raises:
+        TypeError: given config file has an unknown type.
+
+    Returns:
+        Coqpit: TTS config object.
+    """
+    config_dict = {}
+    ext = os.path.splitext(config_path)[1]
+    if ext in (".yml", ".yaml"):
+        with fsspec.open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    elif ext == ".json":
+        try:
+            with fsspec.open(config_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except json.decoder.JSONDecodeError:
+            # backwards compat.
+            data = read_json_with_comments(config_path)
+    else:
+        raise TypeError(f" [!] Unknown config file type {ext}")
+    config_dict.update(data)
+    model_name = _process_model_name(config_dict)
+    config_class = register_config(model_name.lower())
+    config = config_class()
+    config.from_dict(config_dict)
+    return config
+
+
+def check_config_and_model_args(config, arg_name, value):
+    """Check the give argument in `config.model_args` if exist or in `config` for
+    the given value.
+
+    Return False if the argument does not exist in `config.model_args` or `config`.
+    This is to patch up the compatibility between models with and without `model_args`.
+
+    TODO: Remove this in the future with a unified approach.
+    """
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name] == value
+    if hasattr(config, arg_name):
+        return config[arg_name] == value
+    return False
+
+
+def get_from_config_or_model_args(config, arg_name):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    return config[arg_name]
+
+
+def get_from_config_or_model_args_with_default(config, arg_name, def_val):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    if hasattr(config, arg_name):
+        return config[arg_name]
+    return def_val
diff --git a/TTS/config/.ipynb_checkpoints/config-checkpoint.json b/TTS/config/.ipynb_checkpoints/config-checkpoint.json
new file mode 100644
index 0000000000000000000000000000000000000000..a006ea683076733f0992358b3fc446c198d6b04d
--- /dev/null
+++ b/TTS/config/.ipynb_checkpoints/config-checkpoint.json
@@ -0,0 +1,20 @@
+{
+   "model": "glow_tts",
+   "batch_size": 32,
+   "eval_batch_size": 16,
+   "num_loader_workers": 4,
+   "num_eval_loader_workers": 4,
+   "run_eval": true,
+   "test_delay_epochs": -1,
+   "epochs": 1000,
+   "text_cleaner": "english_cleaners",
+   "use_phonemes": false,
+   "phoneme_language": "en-us",
+   "phoneme_cache_path": "phoneme_cache",
+   "print_step": 25,
+   "print_eval": true,
+   "mixed_precision": false,
+   "output_path": "recipes/ljspeech/glow_tts/",
+   "test_sentences": ["Test this sentence.", "This test sentence.", "Sentence this test."],
+   "datasets":[{"formatter": "infore", "meta_file_train":"scripts.csv", "path":"/Users/saltlux/Code/SpeechSynthesis/Dataset/25hours/"}]
+}
\ No newline at end of file
diff --git a/TTS/config/.ipynb_checkpoints/shared_configs-checkpoint.py b/TTS/config/.ipynb_checkpoints/shared_configs-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..77583332340182100a6e8cd051e31d3171e17a4b
--- /dev/null
+++ b/TTS/config/.ipynb_checkpoints/shared_configs-checkpoint.py
@@ -0,0 +1,264 @@
+from dataclasses import asdict, dataclass
+from typing import List
+
+from coqpit import Coqpit, check_argument
+from trainer import TrainerConfig
+
+
+@dataclass
+class BaseAudioConfig(Coqpit):
+    """Base config to definge audio processing parameters. It is used to initialize
+    ```TTS.utils.audio.AudioProcessor.```
+
+    Args:
+        fft_size (int):
+            Number of STFT frequency levels aka.size of the linear spectogram frame. Defaults to 1024.
+
+        win_length (int):
+            Each frame of audio is windowed by window of length ```win_length``` and then padded with zeros to match
+            ```fft_size```. Defaults to 1024.
+
+        hop_length (int):
+            Number of audio samples between adjacent STFT columns. Defaults to 1024.
+
+        frame_shift_ms (int):
+            Set ```hop_length``` based on milliseconds and sampling rate.
+
+        frame_length_ms (int):
+            Set ```win_length``` based on milliseconds and sampling rate.
+
+        stft_pad_mode (str):
+            Padding method used in STFT. 'reflect' or 'center'. Defaults to 'reflect'.
+
+        sample_rate (int):
+            Audio sampling rate. Defaults to 22050.
+
+        resample (bool):
+            Enable / Disable resampling audio to ```sample_rate```. Defaults to ```False```.
+
+        preemphasis (float):
+            Preemphasis coefficient. Defaults to 0.0.
+
+        ref_level_db (int): 20
+            Reference Db level to rebase the audio signal and ignore the level below. 20Db is assumed the sound of air.
+            Defaults to 20.
+
+        do_sound_norm (bool):
+            Enable / Disable sound normalization to reconcile the volume differences among samples. Defaults to False.
+
+        log_func (str):
+            Numpy log function used for amplitude to DB conversion. Defaults to 'np.log10'.
+
+        do_trim_silence (bool):
+            Enable / Disable trimming silences at the beginning and the end of the audio clip. Defaults to ```True```.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        pitch_fmax (float, optional):
+            Maximum frequency of the F0 frames. Defaults to ```640```.
+
+        pitch_fmin (float, optional):
+            Minimum frequency of the F0 frames. Defaults to ```1```.
+
+        trim_db (int):
+            Silence threshold used for silence trimming. Defaults to 45.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        power (float):
+            Exponent used for expanding spectrogra levels before running Griffin Lim. It helps to reduce the
+            artifacts in the synthesized voice. Defaults to 1.5.
+
+        griffin_lim_iters (int):
+            Number of Griffing Lim iterations. Defaults to 60.
+
+        num_mels (int):
+            Number of mel-basis frames that defines the frame lengths of each mel-spectrogram frame. Defaults to 80.
+
+        mel_fmin (float): Min frequency level used for the mel-basis filters. ~50 for male and ~95 for female voices.
+            It needs to be adjusted for a dataset. Defaults to 0.
+
+        mel_fmax (float):
+            Max frequency level used for the mel-basis filters. It needs to be adjusted for a dataset.
+
+        spec_gain (int):
+            Gain applied when converting amplitude to DB. Defaults to 20.
+
+        signal_norm (bool):
+            enable/disable signal normalization. Defaults to True.
+
+        min_level_db (int):
+            minimum db threshold for the computed melspectrograms. Defaults to -100.
+
+        symmetric_norm (bool):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else
+            [0, k], Defaults to True.
+
+        max_norm (float):
+            ```k``` defining the normalization range. Defaults to 4.0.
+
+        clip_norm (bool):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        stats_path (str):
+            Path to the computed stats file. Defaults to None.
+    """
+
+    # stft parameters
+    fft_size: int = 1024
+    win_length: int = 1024
+    hop_length: int = 256
+    frame_shift_ms: int = None
+    frame_length_ms: int = None
+    stft_pad_mode: str = "reflect"
+    # audio processing parameters
+    sample_rate: int = 22050
+    resample: bool = False
+    preemphasis: float = 0.0
+    ref_level_db: int = 20
+    do_sound_norm: bool = False
+    log_func: str = "np.log10"
+    # silence trimming
+    do_trim_silence: bool = True
+    trim_db: int = 45
+    # rms volume normalization
+    do_rms_norm: bool = False
+    db_level: float = None
+    # griffin-lim params
+    power: float = 1.5
+    griffin_lim_iters: int = 60
+    # mel-spec params
+    num_mels: int = 80
+    mel_fmin: float = 0.0
+    mel_fmax: float = None
+    spec_gain: int = 20
+    do_amp_to_db_linear: bool = True
+    do_amp_to_db_mel: bool = True
+    # f0 params
+    pitch_fmax: float = 640.0
+    pitch_fmin: float = 1.0
+    # normalization params
+    signal_norm: bool = True
+    min_level_db: int = -100
+    symmetric_norm: bool = True
+    max_norm: float = 4.0
+    clip_norm: bool = True
+    stats_path: str = None
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("num_mels", c, restricted=True, min_val=10, max_val=2056)
+        check_argument("fft_size", c, restricted=True, min_val=128, max_val=4058)
+        check_argument("sample_rate", c, restricted=True, min_val=512, max_val=100000)
+        check_argument(
+            "frame_length_ms",
+            c,
+            restricted=True,
+            min_val=10,
+            max_val=1000,
+            alternative="win_length",
+        )
+        check_argument("frame_shift_ms", c, restricted=True, min_val=1, max_val=1000, alternative="hop_length")
+        check_argument("preemphasis", c, restricted=True, min_val=0, max_val=1)
+        check_argument("min_level_db", c, restricted=True, min_val=-1000, max_val=10)
+        check_argument("ref_level_db", c, restricted=True, min_val=0, max_val=1000)
+        check_argument("power", c, restricted=True, min_val=1, max_val=5)
+        check_argument("griffin_lim_iters", c, restricted=True, min_val=10, max_val=1000)
+
+        # normalization parameters
+        check_argument("signal_norm", c, restricted=True)
+        check_argument("symmetric_norm", c, restricted=True)
+        check_argument("max_norm", c, restricted=True, min_val=0.1, max_val=1000)
+        check_argument("clip_norm", c, restricted=True)
+        check_argument("mel_fmin", c, restricted=True, min_val=0.0, max_val=1000)
+        check_argument("mel_fmax", c, restricted=True, min_val=500.0, allow_none=True)
+        check_argument("spec_gain", c, restricted=True, min_val=1, max_val=100)
+        check_argument("do_trim_silence", c, restricted=True)
+        check_argument("trim_db", c, restricted=True)
+
+
+@dataclass
+class BaseDatasetConfig(Coqpit):
+    """Base config for TTS datasets.
+
+    Args:
+        formatter (str):
+            Formatter name that defines used formatter in ```TTS.tts.datasets.formatter```. Defaults to `""`.
+
+        dataset_name (str):
+            Unique name for the dataset. Defaults to `""`.
+
+        path (str):
+            Root path to the dataset files. Defaults to `""`.
+
+        meta_file_train (str):
+            Name of the dataset meta file. Or a list of speakers to be ignored at training for multi-speaker datasets.
+            Defaults to `""`.
+
+        ignored_speakers (List):
+            List of speakers IDs that are not used at the training. Default None.
+
+        language (str):
+            Language code of the dataset. If defined, it overrides `phoneme_language`. Defaults to `""`.
+
+        meta_file_val (str):
+            Name of the dataset meta file that defines the instances used at validation.
+
+        meta_file_attn_mask (str):
+            Path to the file that lists the attention mask files used with models that require attention masks to
+            train the duration predictor.
+    """
+
+    formatter: str = ""
+    dataset_name: str = ""
+    path: str = ""
+    meta_file_train: str = ""
+    ignored_speakers: List[str] = None
+    language: str = ""
+    meta_file_val: str = ""
+    meta_file_attn_mask: str = ""
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("formatter", c, restricted=True)
+        check_argument("path", c, restricted=True)
+        check_argument("meta_file_train", c, restricted=True)
+        check_argument("meta_file_val", c, restricted=False)
+        check_argument("meta_file_attn_mask", c, restricted=False)
+
+
+@dataclass
+class BaseTrainingConfig(TrainerConfig):
+    """Base config to define the basic 🐸TTS training parameters that are shared
+    among all the models. It is based on ```Trainer.TrainingConfig```.
+
+    Args:
+        model (str):
+            Name of the model that is used in the training.
+
+        num_loader_workers (int):
+            Number of workers for training time dataloader.
+
+        num_eval_loader_workers (int):
+            Number of workers for evaluation time dataloader.
+    """
+
+    model: str = None
+    # dataloading
+    num_loader_workers: int = 0
+    num_eval_loader_workers: int = 0
+    use_noise_augment: bool = False
diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..067c32d97db7a8a9a16ab7990a057f32bbd16238
--- /dev/null
+++ b/TTS/config/__init__.py
@@ -0,0 +1,132 @@
+import json
+import os
+import re
+from typing import Dict
+
+import fsspec
+import yaml
+from coqpit import Coqpit
+
+from TTS.config.shared_configs import *
+from TTS.utils.generic_utils import find_module
+
+
+def read_json_with_comments(json_path):
+    """for backward compat."""
+    # fallback to json
+    with fsspec.open(json_path, "r", encoding="utf-8") as f:
+        input_str = f.read()
+    # handle comments
+    input_str = re.sub(r"\\\n", "", input_str)
+    input_str = re.sub(r"//.*\n", "\n", input_str)
+    data = json.loads(input_str)
+    return data
+
+
+def register_config(model_name: str) -> Coqpit:
+    """Find the right config for the given model name.
+
+    Args:
+        model_name (str): Model name.
+
+    Raises:
+        ModuleNotFoundError: No matching config for the model name.
+
+    Returns:
+        Coqpit: config class.
+    """
+    config_class = None
+    config_name = model_name + "_config"
+    paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs"]
+    for path in paths:
+        try:
+            config_class = find_module(path, config_name)
+        except ModuleNotFoundError:
+            pass
+    if config_class is None:
+        raise ModuleNotFoundError(f" [!] Config for {model_name} cannot be found.")
+    return config_class
+
+
+def _process_model_name(config_dict: Dict) -> str:
+    """Format the model name as expected. It is a band-aid for the old `vocoder` model names.
+
+    Args:
+        config_dict (Dict): A dictionary including the config fields.
+
+    Returns:
+        str: Formatted modelname.
+    """
+    model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
+    model_name = model_name.replace("_generator", "").replace("_discriminator", "")
+    return model_name
+
+
+def load_config(config_path: str) -> Coqpit:
+    """Import `json` or `yaml` files as TTS configs. First, load the input file as a `dict` and check the model name
+    to find the corresponding Config class. Then initialize the Config.
+
+    Args:
+        config_path (str): path to the config file.
+
+    Raises:
+        TypeError: given config file has an unknown type.
+
+    Returns:
+        Coqpit: TTS config object.
+    """
+    config_dict = {}
+    ext = os.path.splitext(config_path)[1]
+    if ext in (".yml", ".yaml"):
+        with fsspec.open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    elif ext == ".json":
+        try:
+            with fsspec.open(config_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except json.decoder.JSONDecodeError:
+            # backwards compat.
+            data = read_json_with_comments(config_path)
+    else:
+        raise TypeError(f" [!] Unknown config file type {ext}")
+    config_dict.update(data)
+    model_name = _process_model_name(config_dict)
+    config_class = register_config(model_name.lower())
+    config = config_class()
+    config.from_dict(config_dict)
+    return config
+
+
+def check_config_and_model_args(config, arg_name, value):
+    """Check the give argument in `config.model_args` if exist or in `config` for
+    the given value.
+
+    Return False if the argument does not exist in `config.model_args` or `config`.
+    This is to patch up the compatibility between models with and without `model_args`.
+
+    TODO: Remove this in the future with a unified approach.
+    """
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name] == value
+    if hasattr(config, arg_name):
+        return config[arg_name] == value
+    return False
+
+
+def get_from_config_or_model_args(config, arg_name):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    return config[arg_name]
+
+
+def get_from_config_or_model_args_with_default(config, arg_name, def_val):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    if hasattr(config, arg_name):
+        return config[arg_name]
+    return def_val
diff --git a/TTS/config/__pycache__/__init__.cpython-310.pyc b/TTS/config/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f42665724ab5374454c54b93b3558161ee2ba58a
Binary files /dev/null and b/TTS/config/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/config/__pycache__/__init__.cpython-37.pyc b/TTS/config/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..824a056dc849358215aaf4f9b23e8a4ae4f61c17
Binary files /dev/null and b/TTS/config/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/config/__pycache__/__init__.cpython-38.pyc b/TTS/config/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d95c7be8634dea62fa77e0534d28f0ef231faab4
Binary files /dev/null and b/TTS/config/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/config/__pycache__/__init__.cpython-39.pyc b/TTS/config/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62d466d1353fd09afa49ff7aea8ccedd073acef5
Binary files /dev/null and b/TTS/config/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/config/__pycache__/shared_configs.cpython-310.pyc b/TTS/config/__pycache__/shared_configs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7386ace97522be991a089b4bef785e2a0275abce
Binary files /dev/null and b/TTS/config/__pycache__/shared_configs.cpython-310.pyc differ
diff --git a/TTS/config/__pycache__/shared_configs.cpython-37.pyc b/TTS/config/__pycache__/shared_configs.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3355fd4d8718e3b4ac085d3e0dfa0b339e166261
Binary files /dev/null and b/TTS/config/__pycache__/shared_configs.cpython-37.pyc differ
diff --git a/TTS/config/__pycache__/shared_configs.cpython-38.pyc b/TTS/config/__pycache__/shared_configs.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1428e925e1ba8c0cc7a7ac4fd0b7d44dfef6e63b
Binary files /dev/null and b/TTS/config/__pycache__/shared_configs.cpython-38.pyc differ
diff --git a/TTS/config/__pycache__/shared_configs.cpython-39.pyc b/TTS/config/__pycache__/shared_configs.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..962f28c7a45c1c765cb22d5023fc95a4e84fddef
Binary files /dev/null and b/TTS/config/__pycache__/shared_configs.cpython-39.pyc differ
diff --git a/TTS/config/config.json b/TTS/config/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c8f881257fb09b684d845302da35b3e1051384ad
--- /dev/null
+++ b/TTS/config/config.json
@@ -0,0 +1,25 @@
+{
+   "model": "glow_tts",
+   "batch_size": 32,
+   "eval_batch_size": 16,
+   "num_loader_workers": 4,
+   "num_eval_loader_workers": 4,
+   "run_eval": true,
+   "test_delay_epochs": -1,
+   "epochs": 1000,
+   "text_cleaner": "english_cleaners",
+   "use_phonemes": false,
+   "phoneme_language": "en-us",
+   "phoneme_cache_path": "phoneme_cache",
+   "print_step": 25,
+   "print_eval": true,
+   "mixed_precision": false,
+   "output_path": "recipes/ljspeech/glow_tts/",
+   "test_sentences": ["Test this sentence.", "This test sentence.", "Sentence this test."],
+   "datasets":[{"formatter": "infore", "meta_file_train":"scripts.csv", "path":"/Users/saltlux/Code/SpeechSynthesis/Dataset/25hours/"}],
+   "characters":{
+        "characters": "abcdeghiklmnopqrstuvxyàáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ",
+        "phonemes": null,
+        "unique": true
+    }
+}
\ No newline at end of file
diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..77583332340182100a6e8cd051e31d3171e17a4b
--- /dev/null
+++ b/TTS/config/shared_configs.py
@@ -0,0 +1,264 @@
+from dataclasses import asdict, dataclass
+from typing import List
+
+from coqpit import Coqpit, check_argument
+from trainer import TrainerConfig
+
+
+@dataclass
+class BaseAudioConfig(Coqpit):
+    """Base config to definge audio processing parameters. It is used to initialize
+    ```TTS.utils.audio.AudioProcessor.```
+
+    Args:
+        fft_size (int):
+            Number of STFT frequency levels aka.size of the linear spectogram frame. Defaults to 1024.
+
+        win_length (int):
+            Each frame of audio is windowed by window of length ```win_length``` and then padded with zeros to match
+            ```fft_size```. Defaults to 1024.
+
+        hop_length (int):
+            Number of audio samples between adjacent STFT columns. Defaults to 1024.
+
+        frame_shift_ms (int):
+            Set ```hop_length``` based on milliseconds and sampling rate.
+
+        frame_length_ms (int):
+            Set ```win_length``` based on milliseconds and sampling rate.
+
+        stft_pad_mode (str):
+            Padding method used in STFT. 'reflect' or 'center'. Defaults to 'reflect'.
+
+        sample_rate (int):
+            Audio sampling rate. Defaults to 22050.
+
+        resample (bool):
+            Enable / Disable resampling audio to ```sample_rate```. Defaults to ```False```.
+
+        preemphasis (float):
+            Preemphasis coefficient. Defaults to 0.0.
+
+        ref_level_db (int): 20
+            Reference Db level to rebase the audio signal and ignore the level below. 20Db is assumed the sound of air.
+            Defaults to 20.
+
+        do_sound_norm (bool):
+            Enable / Disable sound normalization to reconcile the volume differences among samples. Defaults to False.
+
+        log_func (str):
+            Numpy log function used for amplitude to DB conversion. Defaults to 'np.log10'.
+
+        do_trim_silence (bool):
+            Enable / Disable trimming silences at the beginning and the end of the audio clip. Defaults to ```True```.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        pitch_fmax (float, optional):
+            Maximum frequency of the F0 frames. Defaults to ```640```.
+
+        pitch_fmin (float, optional):
+            Minimum frequency of the F0 frames. Defaults to ```1```.
+
+        trim_db (int):
+            Silence threshold used for silence trimming. Defaults to 45.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        power (float):
+            Exponent used for expanding spectrogra levels before running Griffin Lim. It helps to reduce the
+            artifacts in the synthesized voice. Defaults to 1.5.
+
+        griffin_lim_iters (int):
+            Number of Griffing Lim iterations. Defaults to 60.
+
+        num_mels (int):
+            Number of mel-basis frames that defines the frame lengths of each mel-spectrogram frame. Defaults to 80.
+
+        mel_fmin (float): Min frequency level used for the mel-basis filters. ~50 for male and ~95 for female voices.
+            It needs to be adjusted for a dataset. Defaults to 0.
+
+        mel_fmax (float):
+            Max frequency level used for the mel-basis filters. It needs to be adjusted for a dataset.
+
+        spec_gain (int):
+            Gain applied when converting amplitude to DB. Defaults to 20.
+
+        signal_norm (bool):
+            enable/disable signal normalization. Defaults to True.
+
+        min_level_db (int):
+            minimum db threshold for the computed melspectrograms. Defaults to -100.
+
+        symmetric_norm (bool):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else
+            [0, k], Defaults to True.
+
+        max_norm (float):
+            ```k``` defining the normalization range. Defaults to 4.0.
+
+        clip_norm (bool):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        stats_path (str):
+            Path to the computed stats file. Defaults to None.
+    """
+
+    # stft parameters
+    fft_size: int = 1024
+    win_length: int = 1024
+    hop_length: int = 256
+    frame_shift_ms: int = None
+    frame_length_ms: int = None
+    stft_pad_mode: str = "reflect"
+    # audio processing parameters
+    sample_rate: int = 22050
+    resample: bool = False
+    preemphasis: float = 0.0
+    ref_level_db: int = 20
+    do_sound_norm: bool = False
+    log_func: str = "np.log10"
+    # silence trimming
+    do_trim_silence: bool = True
+    trim_db: int = 45
+    # rms volume normalization
+    do_rms_norm: bool = False
+    db_level: float = None
+    # griffin-lim params
+    power: float = 1.5
+    griffin_lim_iters: int = 60
+    # mel-spec params
+    num_mels: int = 80
+    mel_fmin: float = 0.0
+    mel_fmax: float = None
+    spec_gain: int = 20
+    do_amp_to_db_linear: bool = True
+    do_amp_to_db_mel: bool = True
+    # f0 params
+    pitch_fmax: float = 640.0
+    pitch_fmin: float = 1.0
+    # normalization params
+    signal_norm: bool = True
+    min_level_db: int = -100
+    symmetric_norm: bool = True
+    max_norm: float = 4.0
+    clip_norm: bool = True
+    stats_path: str = None
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("num_mels", c, restricted=True, min_val=10, max_val=2056)
+        check_argument("fft_size", c, restricted=True, min_val=128, max_val=4058)
+        check_argument("sample_rate", c, restricted=True, min_val=512, max_val=100000)
+        check_argument(
+            "frame_length_ms",
+            c,
+            restricted=True,
+            min_val=10,
+            max_val=1000,
+            alternative="win_length",
+        )
+        check_argument("frame_shift_ms", c, restricted=True, min_val=1, max_val=1000, alternative="hop_length")
+        check_argument("preemphasis", c, restricted=True, min_val=0, max_val=1)
+        check_argument("min_level_db", c, restricted=True, min_val=-1000, max_val=10)
+        check_argument("ref_level_db", c, restricted=True, min_val=0, max_val=1000)
+        check_argument("power", c, restricted=True, min_val=1, max_val=5)
+        check_argument("griffin_lim_iters", c, restricted=True, min_val=10, max_val=1000)
+
+        # normalization parameters
+        check_argument("signal_norm", c, restricted=True)
+        check_argument("symmetric_norm", c, restricted=True)
+        check_argument("max_norm", c, restricted=True, min_val=0.1, max_val=1000)
+        check_argument("clip_norm", c, restricted=True)
+        check_argument("mel_fmin", c, restricted=True, min_val=0.0, max_val=1000)
+        check_argument("mel_fmax", c, restricted=True, min_val=500.0, allow_none=True)
+        check_argument("spec_gain", c, restricted=True, min_val=1, max_val=100)
+        check_argument("do_trim_silence", c, restricted=True)
+        check_argument("trim_db", c, restricted=True)
+
+
+@dataclass
+class BaseDatasetConfig(Coqpit):
+    """Base config for TTS datasets.
+
+    Args:
+        formatter (str):
+            Formatter name that defines used formatter in ```TTS.tts.datasets.formatter```. Defaults to `""`.
+
+        dataset_name (str):
+            Unique name for the dataset. Defaults to `""`.
+
+        path (str):
+            Root path to the dataset files. Defaults to `""`.
+
+        meta_file_train (str):
+            Name of the dataset meta file. Or a list of speakers to be ignored at training for multi-speaker datasets.
+            Defaults to `""`.
+
+        ignored_speakers (List):
+            List of speakers IDs that are not used at the training. Default None.
+
+        language (str):
+            Language code of the dataset. If defined, it overrides `phoneme_language`. Defaults to `""`.
+
+        meta_file_val (str):
+            Name of the dataset meta file that defines the instances used at validation.
+
+        meta_file_attn_mask (str):
+            Path to the file that lists the attention mask files used with models that require attention masks to
+            train the duration predictor.
+    """
+
+    formatter: str = ""
+    dataset_name: str = ""
+    path: str = ""
+    meta_file_train: str = ""
+    ignored_speakers: List[str] = None
+    language: str = ""
+    meta_file_val: str = ""
+    meta_file_attn_mask: str = ""
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("formatter", c, restricted=True)
+        check_argument("path", c, restricted=True)
+        check_argument("meta_file_train", c, restricted=True)
+        check_argument("meta_file_val", c, restricted=False)
+        check_argument("meta_file_attn_mask", c, restricted=False)
+
+
+@dataclass
+class BaseTrainingConfig(TrainerConfig):
+    """Base config to define the basic 🐸TTS training parameters that are shared
+    among all the models. It is based on ```Trainer.TrainingConfig```.
+
+    Args:
+        model (str):
+            Name of the model that is used in the training.
+
+        num_loader_workers (int):
+            Number of workers for training time dataloader.
+
+        num_eval_loader_workers (int):
+            Number of workers for evaluation time dataloader.
+    """
+
+    model: str = None
+    # dataloading
+    num_loader_workers: int = 0
+    num_eval_loader_workers: int = 0
+    use_noise_augment: bool = False
diff --git a/TTS/encoder/README.md b/TTS/encoder/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b38b20052b707b0358068bc0ce58bc300a149def
--- /dev/null
+++ b/TTS/encoder/README.md
@@ -0,0 +1,18 @@
+### Speaker Encoder
+
+This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding.
+
+With the code here you can generate d-vectors for both multi-speaker and single-speaker TTS datasets, then visualise and explore them along with the associated audio files in an interactive chart.
+
+Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook as demonstrated in [this video](https://youtu.be/KW3oO7JVa7Q).
+
+![](umap.png)
+
+Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
+
+To run the code, you need to follow the same flow as in TTS.
+
+- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
+- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
+- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
+- Watch training on Tensorboard as in TTS
diff --git a/TTS/encoder/__init__.py b/TTS/encoder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/encoder/__pycache__/__init__.cpython-310.pyc b/TTS/encoder/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e357989caa795ddd365ba4f5181dba556cff00aa
Binary files /dev/null and b/TTS/encoder/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/encoder/__pycache__/__init__.cpython-37.pyc b/TTS/encoder/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3e0bdd6e85871d7a8c4163f742660b76851625b
Binary files /dev/null and b/TTS/encoder/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/encoder/__pycache__/__init__.cpython-38.pyc b/TTS/encoder/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1669d1345beda21fee68be2f4d35b58e45421611
Binary files /dev/null and b/TTS/encoder/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/encoder/__pycache__/__init__.cpython-39.pyc b/TTS/encoder/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2267e9847b336c8872abfdd0d867ae890e069faa
Binary files /dev/null and b/TTS/encoder/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/encoder/__pycache__/losses.cpython-37.pyc b/TTS/encoder/__pycache__/losses.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08263018e07204317acbdcf776536d34d0cbb0c7
Binary files /dev/null and b/TTS/encoder/__pycache__/losses.cpython-37.pyc differ
diff --git a/TTS/encoder/__pycache__/losses.cpython-38.pyc b/TTS/encoder/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..404ee7d49266b0e14d4ddb9a853992c87484572c
Binary files /dev/null and b/TTS/encoder/__pycache__/losses.cpython-38.pyc differ
diff --git a/TTS/encoder/__pycache__/losses.cpython-39.pyc b/TTS/encoder/__pycache__/losses.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a9be1f5487cdc8c92fe17eb18da5532569a7ca8
Binary files /dev/null and b/TTS/encoder/__pycache__/losses.cpython-39.pyc differ
diff --git a/TTS/encoder/configs/base_encoder_config.py b/TTS/encoder/configs/base_encoder_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebbaa0457bb55aef70d54dd36fd9b2b7f7c702bb
--- /dev/null
+++ b/TTS/encoder/configs/base_encoder_config.py
@@ -0,0 +1,61 @@
+from dataclasses import asdict, dataclass, field
+from typing import Dict, List
+
+from coqpit import MISSING
+
+from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
+
+
+@dataclass
+class BaseEncoderConfig(BaseTrainingConfig):
+    """Defines parameters for a Generic Encoder model."""
+
+    model: str = None
+    audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
+    # model params
+    model_params: Dict = field(
+        default_factory=lambda: {
+            "model_name": "lstm",
+            "input_dim": 80,
+            "proj_dim": 256,
+            "lstm_dim": 768,
+            "num_lstm_layers": 3,
+            "use_lstm_with_projection": True,
+        }
+    )
+
+    audio_augmentation: Dict = field(default_factory=lambda: {})
+
+    # training params
+    epochs: int = 10000
+    loss: str = "angleproto"
+    grad_clip: float = 3.0
+    lr: float = 0.0001
+    optimizer: str = "radam"
+    optimizer_params: Dict = field(default_factory=lambda: {"betas": [0.9, 0.999], "weight_decay": 0})
+    lr_decay: bool = False
+    warmup_steps: int = 4000
+
+    # logging params
+    tb_model_param_stats: bool = False
+    steps_plot_stats: int = 10
+    save_step: int = 1000
+    print_step: int = 20
+    run_eval: bool = False
+
+    # data loader
+    num_classes_in_batch: int = MISSING
+    num_utter_per_class: int = MISSING
+    eval_num_classes_in_batch: int = None
+    eval_num_utter_per_class: int = None
+
+    num_loader_workers: int = MISSING
+    voice_len: float = 1.6
+
+    def check_values(self):
+        super().check_values()
+        c = asdict(self)
+        assert (
+            c["model_params"]["input_dim"] == self.audio.num_mels
+        ), " [!] model input dimendion must be equal to melspectrogram dimension."
diff --git a/TTS/encoder/configs/emotion_encoder_config.py b/TTS/encoder/configs/emotion_encoder_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..5eda2671be980abce4a0506a075387b601a1596c
--- /dev/null
+++ b/TTS/encoder/configs/emotion_encoder_config.py
@@ -0,0 +1,12 @@
+from dataclasses import asdict, dataclass
+
+from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
+
+
+@dataclass
+class EmotionEncoderConfig(BaseEncoderConfig):
+    """Defines parameters for Emotion Encoder model."""
+
+    model: str = "emotion_encoder"
+    map_classid_to_classname: dict = None
+    class_name_key: str = "emotion_name"
diff --git a/TTS/encoder/configs/speaker_encoder_config.py b/TTS/encoder/configs/speaker_encoder_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dceb00277ba68efe128936ff7f9456338f9753f
--- /dev/null
+++ b/TTS/encoder/configs/speaker_encoder_config.py
@@ -0,0 +1,11 @@
+from dataclasses import asdict, dataclass
+
+from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
+
+
+@dataclass
+class SpeakerEncoderConfig(BaseEncoderConfig):
+    """Defines parameters for Speaker Encoder model."""
+
+    model: str = "speaker_encoder"
+    class_name_key: str = "speaker_name"
diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..582b1fe9ca35cb9afbc20b8f72b6173282201272
--- /dev/null
+++ b/TTS/encoder/dataset.py
@@ -0,0 +1,147 @@
+import random
+
+import torch
+from torch.utils.data import Dataset
+
+from TTS.encoder.utils.generic_utils import AugmentWAV
+
+
+class EncoderDataset(Dataset):
+    def __init__(
+        self,
+        config,
+        ap,
+        meta_data,
+        voice_len=1.6,
+        num_classes_in_batch=64,
+        num_utter_per_class=10,
+        verbose=False,
+        augmentation_config=None,
+        use_torch_spec=None,
+    ):
+        """
+        Args:
+            ap (TTS.tts.utils.AudioProcessor): audio processor object.
+            meta_data (list): list of dataset instances.
+            seq_len (int): voice segment length in seconds.
+            verbose (bool): print diagnostic information.
+        """
+        super().__init__()
+        self.config = config
+        self.items = meta_data
+        self.sample_rate = ap.sample_rate
+        self.seq_len = int(voice_len * self.sample_rate)
+        self.num_utter_per_class = num_utter_per_class
+        self.ap = ap
+        self.verbose = verbose
+        self.use_torch_spec = use_torch_spec
+        self.classes, self.items = self.__parse_items()
+
+        self.classname_to_classid = {key: i for i, key in enumerate(self.classes)}
+
+        # Data Augmentation
+        self.augmentator = None
+        self.gaussian_augmentation_config = None
+        if augmentation_config:
+            self.data_augmentation_p = augmentation_config["p"]
+            if self.data_augmentation_p and ("additive" in augmentation_config or "rir" in augmentation_config):
+                self.augmentator = AugmentWAV(ap, augmentation_config)
+
+            if "gaussian" in augmentation_config.keys():
+                self.gaussian_augmentation_config = augmentation_config["gaussian"]
+
+        if self.verbose:
+            print("\n > DataLoader initialization")
+            print(f" | > Classes per Batch: {num_classes_in_batch}")
+            print(f" | > Number of instances : {len(self.items)}")
+            print(f" | > Sequence length: {self.seq_len}")
+            print(f" | > Num Classes: {len(self.classes)}")
+            print(f" | > Classes: {self.classes}")
+
+    def load_wav(self, filename):
+        audio = self.ap.load_wav(filename, sr=self.ap.sample_rate)
+        return audio
+
+    def __parse_items(self):
+        class_to_utters = {}
+        for item in self.items:
+            path_ = item["audio_file"]
+            class_name = item[self.config.class_name_key]
+            if class_name in class_to_utters.keys():
+                class_to_utters[class_name].append(path_)
+            else:
+                class_to_utters[class_name] = [
+                    path_,
+                ]
+
+        # skip classes with number of samples >= self.num_utter_per_class
+        class_to_utters = {k: v for (k, v) in class_to_utters.items() if len(v) >= self.num_utter_per_class}
+
+        classes = list(class_to_utters.keys())
+        classes.sort()
+
+        new_items = []
+        for item in self.items:
+            path_ = item["audio_file"]
+            class_name = item["emotion_name"] if self.config.model == "emotion_encoder" else item["speaker_name"]
+            # ignore filtered classes
+            if class_name not in classes:
+                continue
+            # ignore small audios
+            if self.load_wav(path_).shape[0] - self.seq_len <= 0:
+                continue
+
+            new_items.append({"wav_file_path": path_, "class_name": class_name})
+
+        return classes, new_items
+
+    def __len__(self):
+        return len(self.items)
+
+    def get_num_classes(self):
+        return len(self.classes)
+
+    def get_class_list(self):
+        return self.classes
+
+    def set_classes(self, classes):
+        self.classes = classes
+        self.classname_to_classid = {key: i for i, key in enumerate(self.classes)}
+
+    def get_map_classid_to_classname(self):
+        return dict((c_id, c_n) for c_n, c_id in self.classname_to_classid.items())
+
+    def __getitem__(self, idx):
+        return self.items[idx]
+
+    def collate_fn(self, batch):
+        # get the batch class_ids
+        labels = []
+        feats = []
+        for item in batch:
+            utter_path = item["wav_file_path"]
+            class_name = item["class_name"]
+
+            # get classid
+            class_id = self.classname_to_classid[class_name]
+            # load wav file
+            wav = self.load_wav(utter_path)
+            offset = random.randint(0, wav.shape[0] - self.seq_len)
+            wav = wav[offset : offset + self.seq_len]
+
+            if self.augmentator is not None and self.data_augmentation_p:
+                if random.random() < self.data_augmentation_p:
+                    wav = self.augmentator.apply_one(wav)
+
+            if not self.use_torch_spec:
+                mel = self.ap.melspectrogram(wav)
+                feats.append(torch.FloatTensor(mel))
+            else:
+                feats.append(torch.FloatTensor(wav))
+
+            labels.append(class_id)
+
+        feats = torch.stack(feats)
+        labels = torch.LongTensor(labels)
+
+        return feats, labels
diff --git a/TTS/encoder/losses.py b/TTS/encoder/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b5aa0fc48fe00aeedeff28ba48ed2af498ce582
--- /dev/null
+++ b/TTS/encoder/losses.py
@@ -0,0 +1,226 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+# adapted from https://github.com/cvqluu/GE2E-Loss
+class GE2ELoss(nn.Module):
+    def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"):
+        """
+        Implementation of the Generalized End-to-End loss defined in https://arxiv.org/abs/1710.10467 [1]
+        Accepts an input of size (N, M, D)
+            where N is the number of speakers in the batch,
+            M is the number of utterances per speaker,
+            and D is the dimensionality of the embedding vector (e.g. d-vector)
+        Args:
+            - init_w (float): defines the initial value of w in Equation (5) of [1]
+            - init_b (float): definies the initial value of b in Equation (5) of [1]
+        """
+        super().__init__()
+        # pylint: disable=E1102
+        self.w = nn.Parameter(torch.tensor(init_w))
+        # pylint: disable=E1102
+        self.b = nn.Parameter(torch.tensor(init_b))
+        self.loss_method = loss_method
+
+        print(" > Initialized Generalized End-to-End loss")
+
+        assert self.loss_method in ["softmax", "contrast"]
+
+        if self.loss_method == "softmax":
+            self.embed_loss = self.embed_loss_softmax
+        if self.loss_method == "contrast":
+            self.embed_loss = self.embed_loss_contrast
+
+    # pylint: disable=R0201
+    def calc_new_centroids(self, dvecs, centroids, spkr, utt):
+        """
+        Calculates the new centroids excluding the reference utterance
+        """
+        excl = torch.cat((dvecs[spkr, :utt], dvecs[spkr, utt + 1 :]))
+        excl = torch.mean(excl, 0)
+        new_centroids = []
+        for i, centroid in enumerate(centroids):
+            if i == spkr:
+                new_centroids.append(excl)
+            else:
+                new_centroids.append(centroid)
+        return torch.stack(new_centroids)
+
+    def calc_cosine_sim(self, dvecs, centroids):
+        """
+        Make the cosine similarity matrix with dims (N,M,N)
+        """
+        cos_sim_matrix = []
+        for spkr_idx, speaker in enumerate(dvecs):
+            cs_row = []
+            for utt_idx, utterance in enumerate(speaker):
+                new_centroids = self.calc_new_centroids(dvecs, centroids, spkr_idx, utt_idx)
+                # vector based cosine similarity for speed
+                cs_row.append(
+                    torch.clamp(
+                        torch.mm(
+                            utterance.unsqueeze(1).transpose(0, 1),
+                            new_centroids.transpose(0, 1),
+                        )
+                        / (torch.norm(utterance) * torch.norm(new_centroids, dim=1)),
+                        1e-6,
+                    )
+                )
+            cs_row = torch.cat(cs_row, dim=0)
+            cos_sim_matrix.append(cs_row)
+        return torch.stack(cos_sim_matrix)
+
+    # pylint: disable=R0201
+    def embed_loss_softmax(self, dvecs, cos_sim_matrix):
+        """
+        Calculates the loss on each embedding $L(e_{ji})$ by taking softmax
+        """
+        N, M, _ = dvecs.shape
+        L = []
+        for j in range(N):
+            L_row = []
+            for i in range(M):
+                L_row.append(-F.log_softmax(cos_sim_matrix[j, i], 0)[j])
+            L_row = torch.stack(L_row)
+            L.append(L_row)
+        return torch.stack(L)
+
+    # pylint: disable=R0201
+    def embed_loss_contrast(self, dvecs, cos_sim_matrix):
+        """
+        Calculates the loss on each embedding $L(e_{ji})$ by contrast loss with closest centroid
+        """
+        N, M, _ = dvecs.shape
+        L = []
+        for j in range(N):
+            L_row = []
+            for i in range(M):
+                centroids_sigmoids = torch.sigmoid(cos_sim_matrix[j, i])
+                excl_centroids_sigmoids = torch.cat((centroids_sigmoids[:j], centroids_sigmoids[j + 1 :]))
+                L_row.append(1.0 - torch.sigmoid(cos_sim_matrix[j, i, j]) + torch.max(excl_centroids_sigmoids))
+            L_row = torch.stack(L_row)
+            L.append(L_row)
+        return torch.stack(L)
+
+    def forward(self, x, _label=None):
+        """
+        Calculates the GE2E loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        assert x.size()[1] >= 2
+
+        centroids = torch.mean(x, 1)
+        cos_sim_matrix = self.calc_cosine_sim(x, centroids)
+        torch.clamp(self.w, 1e-6)
+        cos_sim_matrix = self.w * cos_sim_matrix + self.b
+        L = self.embed_loss(x, cos_sim_matrix)
+        return L.mean()
+
+
+# adapted from https://github.com/clovaai/voxceleb_trainer/blob/master/loss/angleproto.py
+class AngleProtoLoss(nn.Module):
+    """
+    Implementation of the Angular Prototypical loss defined in https://arxiv.org/abs/2003.11982
+        Accepts an input of size (N, M, D)
+            where N is the number of speakers in the batch,
+            M is the number of utterances per speaker,
+            and D is the dimensionality of the embedding vector
+        Args:
+            - init_w (float): defines the initial value of w
+            - init_b (float): definies the initial value of b
+    """
+
+    def __init__(self, init_w=10.0, init_b=-5.0):
+        super().__init__()
+        # pylint: disable=E1102
+        self.w = nn.Parameter(torch.tensor(init_w))
+        # pylint: disable=E1102
+        self.b = nn.Parameter(torch.tensor(init_b))
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        print(" > Initialized Angular Prototypical loss")
+
+    def forward(self, x, _label=None):
+        """
+        Calculates the AngleProto loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        assert x.size()[1] >= 2
+
+        out_anchor = torch.mean(x[:, 1:, :], 1)
+        out_positive = x[:, 0, :]
+        num_speakers = out_anchor.size()[0]
+
+        cos_sim_matrix = F.cosine_similarity(
+            out_positive.unsqueeze(-1).expand(-1, -1, num_speakers),
+            out_anchor.unsqueeze(-1).expand(-1, -1, num_speakers).transpose(0, 2),
+        )
+        torch.clamp(self.w, 1e-6)
+        cos_sim_matrix = cos_sim_matrix * self.w + self.b
+        label = torch.arange(num_speakers).to(cos_sim_matrix.device)
+        L = self.criterion(cos_sim_matrix, label)
+        return L
+
+
+class SoftmaxLoss(nn.Module):
+    """
+    Implementation of the Softmax loss as defined in https://arxiv.org/abs/2003.11982
+        Args:
+            - embedding_dim (float): speaker embedding dim
+            - n_speakers (float): number of speakers
+    """
+
+    def __init__(self, embedding_dim, n_speakers):
+        super().__init__()
+
+        self.criterion = torch.nn.CrossEntropyLoss()
+        self.fc = nn.Linear(embedding_dim, n_speakers)
+
+        print("Initialised Softmax Loss")
+
+    def forward(self, x, label=None):
+        # reshape for compatibility
+        x = x.reshape(-1, x.size()[-1])
+        label = label.reshape(-1)
+
+        x = self.fc(x)
+        L = self.criterion(x, label)
+
+        return L
+
+    def inference(self, embedding):
+        x = self.fc(embedding)
+        activations = torch.nn.functional.softmax(x, dim=1).squeeze(0)
+        class_id = torch.argmax(activations)
+        return class_id
+
+
+class SoftmaxAngleProtoLoss(nn.Module):
+    """
+    Implementation of the Softmax AnglePrototypical loss as defined in https://arxiv.org/abs/2009.14153
+        Args:
+            - embedding_dim (float): speaker embedding dim
+            - n_speakers (float): number of speakers
+            - init_w (float): defines the initial value of w
+            - init_b (float): definies the initial value of b
+    """
+
+    def __init__(self, embedding_dim, n_speakers, init_w=10.0, init_b=-5.0):
+        super().__init__()
+
+        self.softmax = SoftmaxLoss(embedding_dim, n_speakers)
+        self.angleproto = AngleProtoLoss(init_w, init_b)
+
+        print("Initialised SoftmaxAnglePrototypical Loss")
+
+    def forward(self, x, label=None):
+        """
+        Calculates the SoftmaxAnglePrototypical loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        Lp = self.angleproto(x)
+
+        Ls = self.softmax(x, label)
+
+        return Ls + Lp
diff --git a/TTS/encoder/models/__pycache__/base_encoder.cpython-37.pyc b/TTS/encoder/models/__pycache__/base_encoder.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1e9aa6abe376cb6e7e6e9e6a8d65865eaf0c83f
Binary files /dev/null and b/TTS/encoder/models/__pycache__/base_encoder.cpython-37.pyc differ
diff --git a/TTS/encoder/models/__pycache__/base_encoder.cpython-38.pyc b/TTS/encoder/models/__pycache__/base_encoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c70725bdd52b812ded0dc8378b945e5d7595cbc
Binary files /dev/null and b/TTS/encoder/models/__pycache__/base_encoder.cpython-38.pyc differ
diff --git a/TTS/encoder/models/__pycache__/base_encoder.cpython-39.pyc b/TTS/encoder/models/__pycache__/base_encoder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..945da3dc45bdef73cd121dd95835b73dad2de00e
Binary files /dev/null and b/TTS/encoder/models/__pycache__/base_encoder.cpython-39.pyc differ
diff --git a/TTS/encoder/models/__pycache__/lstm.cpython-37.pyc b/TTS/encoder/models/__pycache__/lstm.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af545f7f46808971b116324c875a0d254d5a21cb
Binary files /dev/null and b/TTS/encoder/models/__pycache__/lstm.cpython-37.pyc differ
diff --git a/TTS/encoder/models/__pycache__/lstm.cpython-38.pyc b/TTS/encoder/models/__pycache__/lstm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84ab9f71dc12509fe9964ddbf9cf31b88ee0a59c
Binary files /dev/null and b/TTS/encoder/models/__pycache__/lstm.cpython-38.pyc differ
diff --git a/TTS/encoder/models/__pycache__/lstm.cpython-39.pyc b/TTS/encoder/models/__pycache__/lstm.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e87d4359612cc254d7dde381129964bf5e7bb153
Binary files /dev/null and b/TTS/encoder/models/__pycache__/lstm.cpython-39.pyc differ
diff --git a/TTS/encoder/models/__pycache__/resnet.cpython-37.pyc b/TTS/encoder/models/__pycache__/resnet.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfd49349ddc51d35a9238883e9d632fe2c3f98
Binary files /dev/null and b/TTS/encoder/models/__pycache__/resnet.cpython-37.pyc differ
diff --git a/TTS/encoder/models/__pycache__/resnet.cpython-38.pyc b/TTS/encoder/models/__pycache__/resnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dec64d32ce163877cb90208e94df0aab974b6077
Binary files /dev/null and b/TTS/encoder/models/__pycache__/resnet.cpython-38.pyc differ
diff --git a/TTS/encoder/models/__pycache__/resnet.cpython-39.pyc b/TTS/encoder/models/__pycache__/resnet.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b4e9949f30d22201dd0bed98cb1460a546fbf9e
Binary files /dev/null and b/TTS/encoder/models/__pycache__/resnet.cpython-39.pyc differ
diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..957ea3c4ca719c2a054c93382787909e418288b2
--- /dev/null
+++ b/TTS/encoder/models/base_encoder.py
@@ -0,0 +1,161 @@
+import numpy as np
+import torch
+import torchaudio
+from coqpit import Coqpit
+from torch import nn
+
+from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss
+from TTS.utils.generic_utils import set_init_dict
+from TTS.utils.io import load_fsspec
+
+
+class PreEmphasis(nn.Module):
+    def __init__(self, coefficient=0.97):
+        super().__init__()
+        self.coefficient = coefficient
+        self.register_buffer("filter", torch.FloatTensor([-self.coefficient, 1.0]).unsqueeze(0).unsqueeze(0))
+
+    def forward(self, x):
+        assert len(x.size()) == 2
+
+        x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect")
+        return torch.nn.functional.conv1d(x, self.filter).squeeze(1)
+
+
+class BaseEncoder(nn.Module):
+    """Base `encoder` class. Every new `encoder` model must inherit this.
+
+    It defines common `encoder` specific functions.
+    """
+
+    # pylint: disable=W0102
+    def __init__(self):
+        super(BaseEncoder, self).__init__()
+
+    def get_torch_mel_spectrogram_class(self, audio_config):
+        return torch.nn.Sequential(
+            PreEmphasis(audio_config["preemphasis"]),
+            # TorchSTFT(
+            #     n_fft=audio_config["fft_size"],
+            #     hop_length=audio_config["hop_length"],
+            #     win_length=audio_config["win_length"],
+            #     sample_rate=audio_config["sample_rate"],
+            #     window="hamming_window",
+            #     mel_fmin=0.0,
+            #     mel_fmax=None,
+            #     use_htk=True,
+            #     do_amp_to_db=False,
+            #     n_mels=audio_config["num_mels"],
+            #     power=2.0,
+            #     use_mel=True,
+            #     mel_norm=None,
+            # )
+            torchaudio.transforms.MelSpectrogram(
+                sample_rate=audio_config["sample_rate"],
+                n_fft=audio_config["fft_size"],
+                win_length=audio_config["win_length"],
+                hop_length=audio_config["hop_length"],
+                window_fn=torch.hamming_window,
+                n_mels=audio_config["num_mels"],
+            ),
+        )
+
+    @torch.no_grad()
+    def inference(self, x, l2_norm=True):
+        return self.forward(x, l2_norm)
+
+    @torch.no_grad()
+    def compute_embedding(self, x, num_frames=250, num_eval=10, return_mean=True, l2_norm=True):
+        """
+        Generate embeddings for a batch of utterances
+        x: 1xTxD
+        """
+        # map to the waveform size
+        if self.use_torch_spec:
+            num_frames = num_frames * self.audio_config["hop_length"]
+
+        max_len = x.shape[1]
+
+        if max_len < num_frames:
+            num_frames = max_len
+
+        offsets = np.linspace(0, max_len - num_frames, num=num_eval)
+
+        frames_batch = []
+        for offset in offsets:
+            offset = int(offset)
+            end_offset = int(offset + num_frames)
+            frames = x[:, offset:end_offset]
+            frames_batch.append(frames)
+
+        frames_batch = torch.cat(frames_batch, dim=0)
+        embeddings = self.inference(frames_batch, l2_norm=l2_norm)
+
+        if return_mean:
+            embeddings = torch.mean(embeddings, dim=0, keepdim=True)
+        return embeddings
+
+    def get_criterion(self, c: Coqpit, num_classes=None):
+        if c.loss == "ge2e":
+            criterion = GE2ELoss(loss_method="softmax")
+        elif c.loss == "angleproto":
+            criterion = AngleProtoLoss()
+        elif c.loss == "softmaxproto":
+            criterion = SoftmaxAngleProtoLoss(c.model_params["proj_dim"], num_classes)
+        else:
+            raise Exception("The %s  not is a loss supported" % c.loss)
+        return criterion
+
+    def load_checkpoint(
+        self,
+        config: Coqpit,
+        checkpoint_path: str,
+        eval: bool = False,
+        use_cuda: bool = False,
+        criterion=None,
+        cache=False,
+    ):
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        try:
+            self.load_state_dict(state["model"])
+            print(" > Model fully restored. ")
+        except (KeyError, RuntimeError) as error:
+            # If eval raise the error
+            if eval:
+                raise error
+
+            print(" > Partial model initialization.")
+            model_dict = self.state_dict()
+            model_dict = set_init_dict(model_dict, state["model"], c)
+            self.load_state_dict(model_dict)
+            del model_dict
+
+        # load the criterion for restore_path
+        if criterion is not None and "criterion" in state:
+            try:
+                criterion.load_state_dict(state["criterion"])
+            except (KeyError, RuntimeError) as error:
+                print(" > Criterion load ignored because of:", error)
+
+        # instance and load the criterion for the encoder classifier in inference time
+        if (
+            eval
+            and criterion is None
+            and "criterion" in state
+            and getattr(config, "map_classid_to_classname", None) is not None
+        ):
+            criterion = self.get_criterion(config, len(config.map_classid_to_classname))
+            criterion.load_state_dict(state["criterion"])
+
+        if use_cuda:
+            self.cuda()
+            if criterion is not None:
+                criterion = criterion.cuda()
+
+        if eval:
+            self.eval()
+            assert not self.training
+
+        if not eval:
+            return criterion, state["step"]
+        return criterion
diff --git a/TTS/encoder/models/lstm.py b/TTS/encoder/models/lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..51852b5b820d181824b0db1a205cd5d7bd4fb20d
--- /dev/null
+++ b/TTS/encoder/models/lstm.py
@@ -0,0 +1,99 @@
+import torch
+from torch import nn
+
+from TTS.encoder.models.base_encoder import BaseEncoder
+
+
+class LSTMWithProjection(nn.Module):
+    def __init__(self, input_size, hidden_size, proj_size):
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.proj_size = proj_size
+        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.linear = nn.Linear(hidden_size, proj_size, bias=False)
+
+    def forward(self, x):
+        self.lstm.flatten_parameters()
+        o, (_, _) = self.lstm(x)
+        return self.linear(o)
+
+
+class LSTMWithoutProjection(nn.Module):
+    def __init__(self, input_dim, lstm_dim, proj_dim, num_lstm_layers):
+        super().__init__()
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_dim, num_layers=num_lstm_layers, batch_first=True)
+        self.linear = nn.Linear(lstm_dim, proj_dim, bias=True)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        _, (hidden, _) = self.lstm(x)
+        return self.relu(self.linear(hidden[-1]))
+
+
+class LSTMSpeakerEncoder(BaseEncoder):
+    def __init__(
+        self,
+        input_dim,
+        proj_dim=256,
+        lstm_dim=768,
+        num_lstm_layers=3,
+        use_lstm_with_projection=True,
+        use_torch_spec=False,
+        audio_config=None,
+    ):
+        super().__init__()
+        self.use_lstm_with_projection = use_lstm_with_projection
+        self.use_torch_spec = use_torch_spec
+        self.audio_config = audio_config
+        self.proj_dim = proj_dim
+
+        layers = []
+        # choise LSTM layer
+        if use_lstm_with_projection:
+            layers.append(LSTMWithProjection(input_dim, lstm_dim, proj_dim))
+            for _ in range(num_lstm_layers - 1):
+                layers.append(LSTMWithProjection(proj_dim, lstm_dim, proj_dim))
+            self.layers = nn.Sequential(*layers)
+        else:
+            self.layers = LSTMWithoutProjection(input_dim, lstm_dim, proj_dim, num_lstm_layers)
+
+        self.instancenorm = nn.InstanceNorm1d(input_dim)
+
+        if self.use_torch_spec:
+            self.torch_spec = self.get_torch_mel_spectrogram_class(audio_config)
+        else:
+            self.torch_spec = None
+
+        self._init_layers()
+
+    def _init_layers(self):
+        for name, param in self.layers.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0.0)
+            elif "weight" in name:
+                nn.init.xavier_normal_(param)
+
+    def forward(self, x, l2_norm=True):
+        """Forward pass of the model.
+
+        Args:
+            x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True`
+                to compute the spectrogram on-the-fly.
+            l2_norm (bool): Whether to L2-normalize the outputs.
+
+        Shapes:
+            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
+        """
+        with torch.no_grad():
+            with torch.cuda.amp.autocast(enabled=False):
+                if self.use_torch_spec:
+                    x.squeeze_(1)
+                    x = self.torch_spec(x)
+                x = self.instancenorm(x).transpose(1, 2)
+        d = self.layers(x)
+        if self.use_lstm_with_projection:
+            d = d[:, -1]
+        if l2_norm:
+            d = torch.nn.functional.normalize(d, p=2, dim=1)
+        return d
diff --git a/TTS/encoder/models/resnet.py b/TTS/encoder/models/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..e75ab6c4637d2b945de4cb81a2c06447cf78a49e
--- /dev/null
+++ b/TTS/encoder/models/resnet.py
@@ -0,0 +1,200 @@
+import torch
+from torch import nn
+
+# from TTS.utils.audio.torch_transforms import TorchSTFT
+from TTS.encoder.models.base_encoder import BaseEncoder
+
+
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=8):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+class SEBasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=8):
+        super(SEBasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.se = SELayer(planes, reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.relu(out)
+        out = self.bn1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class ResNetSpeakerEncoder(BaseEncoder):
+    """Implementation of the model H/ASP without batch normalization in speaker embedding. This model was proposed in: https://arxiv.org/abs/2009.14153
+    Adapted from: https://github.com/clovaai/voxceleb_trainer
+    """
+
+    # pylint: disable=W0102
+    def __init__(
+        self,
+        input_dim=64,
+        proj_dim=512,
+        layers=[3, 4, 6, 3],
+        num_filters=[32, 64, 128, 256],
+        encoder_type="ASP",
+        log_input=False,
+        use_torch_spec=False,
+        audio_config=None,
+    ):
+        super(ResNetSpeakerEncoder, self).__init__()
+
+        self.encoder_type = encoder_type
+        self.input_dim = input_dim
+        self.log_input = log_input
+        self.use_torch_spec = use_torch_spec
+        self.audio_config = audio_config
+        self.proj_dim = proj_dim
+
+        self.conv1 = nn.Conv2d(1, num_filters[0], kernel_size=3, stride=1, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.bn1 = nn.BatchNorm2d(num_filters[0])
+
+        self.inplanes = num_filters[0]
+        self.layer1 = self.create_layer(SEBasicBlock, num_filters[0], layers[0])
+        self.layer2 = self.create_layer(SEBasicBlock, num_filters[1], layers[1], stride=(2, 2))
+        self.layer3 = self.create_layer(SEBasicBlock, num_filters[2], layers[2], stride=(2, 2))
+        self.layer4 = self.create_layer(SEBasicBlock, num_filters[3], layers[3], stride=(2, 2))
+
+        self.instancenorm = nn.InstanceNorm1d(input_dim)
+
+        if self.use_torch_spec:
+            self.torch_spec = self.get_torch_mel_spectrogram_class(audio_config)
+        else:
+            self.torch_spec = None
+
+        outmap_size = int(self.input_dim / 8)
+
+        self.attention = nn.Sequential(
+            nn.Conv1d(num_filters[3] * outmap_size, 128, kernel_size=1),
+            nn.ReLU(),
+            nn.BatchNorm1d(128),
+            nn.Conv1d(128, num_filters[3] * outmap_size, kernel_size=1),
+            nn.Softmax(dim=2),
+        )
+
+        if self.encoder_type == "SAP":
+            out_dim = num_filters[3] * outmap_size
+        elif self.encoder_type == "ASP":
+            out_dim = num_filters[3] * outmap_size * 2
+        else:
+            raise ValueError("Undefined encoder")
+
+        self.fc = nn.Linear(out_dim, proj_dim)
+
+        self._init_layers()
+
+    def _init_layers(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def create_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    # pylint: disable=R0201
+    def new_parameter(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+
+    def forward(self, x, l2_norm=False):
+        """Forward pass of the model.
+
+        Args:
+            x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True`
+                to compute the spectrogram on-the-fly.
+            l2_norm (bool): Whether to L2-normalize the outputs.
+
+        Shapes:
+            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
+        """
+        with torch.no_grad():
+            with torch.cuda.amp.autocast(enabled=False):
+                x.squeeze_(1)
+                # if you torch spec compute it otherwise use the mel spec computed by the AP
+                if self.use_torch_spec:
+                    x = self.torch_spec(x)
+
+                if self.log_input:
+                    x = (x + 1e-6).log()
+                x = self.instancenorm(x).unsqueeze(1)
+
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.bn1(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = x.reshape(x.size()[0], -1, x.size()[-1])
+
+        w = self.attention(x)
+
+        if self.encoder_type == "SAP":
+            x = torch.sum(x * w, dim=2)
+        elif self.encoder_type == "ASP":
+            mu = torch.sum(x * w, dim=2)
+            sg = torch.sqrt((torch.sum((x**2) * w, dim=2) - mu**2).clamp(min=1e-5))
+            x = torch.cat((mu, sg), 1)
+
+        x = x.view(x.size()[0], -1)
+        x = self.fc(x)
+
+        if l2_norm:
+            x = torch.nn.functional.normalize(x, p=2, dim=1)
+        return x
diff --git a/TTS/encoder/requirements.txt b/TTS/encoder/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a486cc45ddb44591bd03c9c0df294fbe98c13884
--- /dev/null
+++ b/TTS/encoder/requirements.txt
@@ -0,0 +1,2 @@
+umap-learn
+numpy>=1.17.0
diff --git a/TTS/encoder/utils/__init__.py b/TTS/encoder/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/encoder/utils/__pycache__/__init__.cpython-37.pyc b/TTS/encoder/utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..993ea6133e3d2c2a41326be5757829443fc8c7db
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/encoder/utils/__pycache__/__init__.cpython-38.pyc b/TTS/encoder/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8387ba25398c44ef8a30acdcf708eacf4d52b6d8
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/encoder/utils/__pycache__/__init__.cpython-39.pyc b/TTS/encoder/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72d633259277f8b4df85580ef84f263b41e0eec1
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/encoder/utils/__pycache__/generic_utils.cpython-37.pyc b/TTS/encoder/utils/__pycache__/generic_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..578ce2f5b24ff4a34feb036a2a08567cf5b72a00
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/generic_utils.cpython-37.pyc differ
diff --git a/TTS/encoder/utils/__pycache__/generic_utils.cpython-38.pyc b/TTS/encoder/utils/__pycache__/generic_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0746dc0b4520001fb6e91c4fe60e32e1a1513f5
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/generic_utils.cpython-38.pyc differ
diff --git a/TTS/encoder/utils/__pycache__/generic_utils.cpython-39.pyc b/TTS/encoder/utils/__pycache__/generic_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7a4c69ce5ad5a9992dd3e70b3decb9adabb9713
Binary files /dev/null and b/TTS/encoder/utils/__pycache__/generic_utils.cpython-39.pyc differ
diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..91a896f60d272dc25cc6cfe62cf91c66b2f28e00
--- /dev/null
+++ b/TTS/encoder/utils/generic_utils.py
@@ -0,0 +1,184 @@
+import datetime
+import glob
+import os
+import random
+import re
+
+import numpy as np
+from scipy import signal
+
+from TTS.encoder.models.lstm import LSTMSpeakerEncoder
+from TTS.encoder.models.resnet import ResNetSpeakerEncoder
+from TTS.utils.io import save_fsspec
+
+
+class AugmentWAV(object):
+    def __init__(self, ap, augmentation_config):
+
+        self.ap = ap
+        self.use_additive_noise = False
+
+        if "additive" in augmentation_config.keys():
+            self.additive_noise_config = augmentation_config["additive"]
+            additive_path = self.additive_noise_config["sounds_path"]
+            if additive_path:
+                self.use_additive_noise = True
+                # get noise types
+                self.additive_noise_types = []
+                for key in self.additive_noise_config.keys():
+                    if isinstance(self.additive_noise_config[key], dict):
+                        self.additive_noise_types.append(key)
+
+                additive_files = glob.glob(os.path.join(additive_path, "**/*.wav"), recursive=True)
+
+                self.noise_list = {}
+
+                for wav_file in additive_files:
+                    noise_dir = wav_file.replace(additive_path, "").split(os.sep)[0]
+                    # ignore not listed directories
+                    if noise_dir not in self.additive_noise_types:
+                        continue
+                    if not noise_dir in self.noise_list:
+                        self.noise_list[noise_dir] = []
+                    self.noise_list[noise_dir].append(wav_file)
+
+                print(
+                    f" | > Using Additive Noise Augmentation: with {len(additive_files)} audios instances from {self.additive_noise_types}"
+                )
+
+        self.use_rir = False
+
+        if "rir" in augmentation_config.keys():
+            self.rir_config = augmentation_config["rir"]
+            if self.rir_config["rir_path"]:
+                self.rir_files = glob.glob(os.path.join(self.rir_config["rir_path"], "**/*.wav"), recursive=True)
+                self.use_rir = True
+
+            print(f" | > Using RIR Noise Augmentation: with {len(self.rir_files)} audios instances")
+
+        self.create_augmentation_global_list()
+
+    def create_augmentation_global_list(self):
+        if self.use_additive_noise:
+            self.global_noise_list = self.additive_noise_types
+        else:
+            self.global_noise_list = []
+        if self.use_rir:
+            self.global_noise_list.append("RIR_AUG")
+
+    def additive_noise(self, noise_type, audio):
+
+        clean_db = 10 * np.log10(np.mean(audio**2) + 1e-4)
+
+        noise_list = random.sample(
+            self.noise_list[noise_type],
+            random.randint(
+                self.additive_noise_config[noise_type]["min_num_noises"],
+                self.additive_noise_config[noise_type]["max_num_noises"],
+            ),
+        )
+
+        audio_len = audio.shape[0]
+        noises_wav = None
+        for noise in noise_list:
+            noiseaudio = self.ap.load_wav(noise, sr=self.ap.sample_rate)[:audio_len]
+
+            if noiseaudio.shape[0] < audio_len:
+                continue
+
+            noise_snr = random.uniform(
+                self.additive_noise_config[noise_type]["min_snr_in_db"],
+                self.additive_noise_config[noise_type]["max_num_noises"],
+            )
+            noise_db = 10 * np.log10(np.mean(noiseaudio**2) + 1e-4)
+            noise_wav = np.sqrt(10 ** ((clean_db - noise_db - noise_snr) / 10)) * noiseaudio
+
+            if noises_wav is None:
+                noises_wav = noise_wav
+            else:
+                noises_wav += noise_wav
+
+        # if all possible files is less than audio, choose other files
+        if noises_wav is None:
+            return self.additive_noise(noise_type, audio)
+
+        return audio + noises_wav
+
+    def reverberate(self, audio):
+        audio_len = audio.shape[0]
+
+        rir_file = random.choice(self.rir_files)
+        rir = self.ap.load_wav(rir_file, sr=self.ap.sample_rate)
+        rir = rir / np.sqrt(np.sum(rir**2))
+        return signal.convolve(audio, rir, mode=self.rir_config["conv_mode"])[:audio_len]
+
+    def apply_one(self, audio):
+        noise_type = random.choice(self.global_noise_list)
+        if noise_type == "RIR_AUG":
+            return self.reverberate(audio)
+
+        return self.additive_noise(noise_type, audio)
+
+
+def to_camel(text):
+    text = text.capitalize()
+    return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text)
+
+
+def setup_encoder_model(config: "Coqpit"):
+    if config.model_params["model_name"].lower() == "lstm":
+        model = LSTMSpeakerEncoder(
+            config.model_params["input_dim"],
+            config.model_params["proj_dim"],
+            config.model_params["lstm_dim"],
+            config.model_params["num_lstm_layers"],
+            use_torch_spec=config.model_params.get("use_torch_spec", False),
+            audio_config=config.audio,
+        )
+    elif config.model_params["model_name"].lower() == "resnet":
+        model = ResNetSpeakerEncoder(
+            input_dim=config.model_params["input_dim"],
+            proj_dim=config.model_params["proj_dim"],
+            log_input=config.model_params.get("log_input", False),
+            use_torch_spec=config.model_params.get("use_torch_spec", False),
+            audio_config=config.audio,
+        )
+    return model
+
+
+def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch):
+    checkpoint_path = "checkpoint_{}.pth".format(current_step)
+    checkpoint_path = os.path.join(out_path, checkpoint_path)
+    print(" | | > Checkpoint saving : {}".format(checkpoint_path))
+
+    new_state_dict = model.state_dict()
+    state = {
+        "model": new_state_dict,
+        "optimizer": optimizer.state_dict() if optimizer is not None else None,
+        "criterion": criterion.state_dict(),
+        "step": current_step,
+        "epoch": epoch,
+        "loss": model_loss,
+        "date": datetime.date.today().strftime("%B %d, %Y"),
+    }
+    save_fsspec(state, checkpoint_path)
+
+
+def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path, current_step, epoch):
+    if model_loss < best_loss:
+        new_state_dict = model.state_dict()
+        state = {
+            "model": new_state_dict,
+            "optimizer": optimizer.state_dict(),
+            "criterion": criterion.state_dict(),
+            "step": current_step,
+            "epoch": epoch,
+            "loss": model_loss,
+            "date": datetime.date.today().strftime("%B %d, %Y"),
+        }
+        best_loss = model_loss
+        bestmodel_path = "best_model.pth"
+        bestmodel_path = os.path.join(out_path, bestmodel_path)
+        print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
+        save_fsspec(state, bestmodel_path)
+    return best_loss
diff --git a/TTS/encoder/utils/io.py b/TTS/encoder/utils/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1dad3e24d234cdcb9616fb14bc87919c7e20291
--- /dev/null
+++ b/TTS/encoder/utils/io.py
@@ -0,0 +1,38 @@
+import datetime
+import os
+
+from TTS.utils.io import save_fsspec
+
+
+def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
+    checkpoint_path = "checkpoint_{}.pth".format(current_step)
+    checkpoint_path = os.path.join(out_path, checkpoint_path)
+    print(" | | > Checkpoint saving : {}".format(checkpoint_path))
+
+    new_state_dict = model.state_dict()
+    state = {
+        "model": new_state_dict,
+        "optimizer": optimizer.state_dict() if optimizer is not None else None,
+        "step": current_step,
+        "loss": model_loss,
+        "date": datetime.date.today().strftime("%B %d, %Y"),
+    }
+    save_fsspec(state, checkpoint_path)
+
+
+def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step):
+    if model_loss < best_loss:
+        new_state_dict = model.state_dict()
+        state = {
+            "model": new_state_dict,
+            "optimizer": optimizer.state_dict(),
+            "step": current_step,
+            "loss": model_loss,
+            "date": datetime.date.today().strftime("%B %d, %Y"),
+        }
+        best_loss = model_loss
+        bestmodel_path = "best_model.pth"
+        bestmodel_path = os.path.join(out_path, bestmodel_path)
+        print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
+        save_fsspec(state, bestmodel_path)
+    return best_loss
diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py
new file mode 100644
index 0000000000000000000000000000000000000000..b93baf9e60f0d5c35a4e86f6746e29f6097174b5
--- /dev/null
+++ b/TTS/encoder/utils/prepare_voxceleb.py
@@ -0,0 +1,219 @@
+# coding=utf-8
+# Copyright (C) 2020 ATHENA AUTHORS; Yiping Peng; Ne Luo
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Only support eager mode and TF>=2.0.0
+# pylint: disable=no-member, invalid-name, relative-beyond-top-level
+# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
+""" voxceleb 1 & 2 """
+
+import hashlib
+import os
+import subprocess
+import sys
+import zipfile
+
+import pandas
+import soundfile as sf
+from absl import logging
+
+SUBSETS = {
+    "vox1_dev_wav": [
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partac",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partad",
+    ],
+    "vox1_test_wav": ["https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip"],
+    "vox2_dev_aac": [
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaa",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partab",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partac",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partad",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partae",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaf",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partag",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partah",
+    ],
+    "vox2_test_aac": ["https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_test_aac.zip"],
+}
+
+MD5SUM = {
+    "vox1_dev_wav": "ae63e55b951748cc486645f532ba230b",
+    "vox2_dev_aac": "bbc063c46078a602ca71605645c2a402",
+    "vox1_test_wav": "185fdc63c3c739954633d50379a3d102",
+    "vox2_test_aac": "0d2b3ea430a821c33263b5ea37ede312",
+}
+
+USER = {"user": "", "password": ""}
+
+speaker_id_dict = {}
+
+
+def download_and_extract(directory, subset, urls):
+    """Download and extract the given split of dataset.
+
+    Args:
+        directory: the directory where to put the downloaded data.
+        subset: subset name of the corpus.
+        urls: the list of urls to download the data file.
+    """
+    os.makedirs(directory, exist_ok=True)
+
+    try:
+        for url in urls:
+            zip_filepath = os.path.join(directory, url.split("/")[-1])
+            if os.path.exists(zip_filepath):
+                continue
+            logging.info("Downloading %s to %s" % (url, zip_filepath))
+            subprocess.call(
+                "wget %s --user %s --password %s -O %s" % (url, USER["user"], USER["password"], zip_filepath),
+                shell=True,
+            )
+
+            statinfo = os.stat(zip_filepath)
+            logging.info("Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size))
+
+        # concatenate all parts into zip files
+        if ".zip" not in zip_filepath:
+            zip_filepath = "_".join(zip_filepath.split("_")[:-1])
+            subprocess.call("cat %s* > %s.zip" % (zip_filepath, zip_filepath), shell=True)
+            zip_filepath += ".zip"
+        extract_path = zip_filepath.strip(".zip")
+
+        # check zip file md5sum
+        with open(zip_filepath, "rb") as f_zip:
+            md5 = hashlib.md5(f_zip.read()).hexdigest()
+        if md5 != MD5SUM[subset]:
+            raise ValueError("md5sum of %s mismatch" % zip_filepath)
+
+        with zipfile.ZipFile(zip_filepath, "r") as zfile:
+            zfile.extractall(directory)
+            extract_path_ori = os.path.join(directory, zfile.infolist()[0].filename)
+            subprocess.call("mv %s %s" % (extract_path_ori, extract_path), shell=True)
+    finally:
+        # os.remove(zip_filepath)
+        pass
+
+
+def exec_cmd(cmd):
+    """Run a command in a subprocess.
+    Args:
+        cmd: command line to be executed.
+    Return:
+        int, the return code.
+    """
+    try:
+        retcode = subprocess.call(cmd, shell=True)
+        if retcode < 0:
+            logging.info(f"Child was terminated by signal {retcode}")
+    except OSError as e:
+        logging.info(f"Execution failed: {e}")
+        retcode = -999
+    return retcode
+
+
+def decode_aac_with_ffmpeg(aac_file, wav_file):
+    """Decode a given AAC file into WAV using ffmpeg.
+    Args:
+        aac_file: file path to input AAC file.
+        wav_file: file path to output WAV file.
+    Return:
+        bool, True if success.
+    """
+    cmd = f"ffmpeg -i {aac_file} {wav_file}"
+    logging.info(f"Decoding aac file using command line: {cmd}")
+    ret = exec_cmd(cmd)
+    if ret != 0:
+        logging.error(f"Failed to decode aac file with retcode {ret}")
+        logging.error("Please check your ffmpeg installation.")
+        return False
+    return True
+
+
+def convert_audio_and_make_label(input_dir, subset, output_dir, output_file):
+    """Optionally convert AAC to WAV and make speaker labels.
+    Args:
+        input_dir: the directory which holds the input dataset.
+        subset: the name of the specified subset. e.g. vox1_dev_wav
+        output_dir: the directory to place the newly generated csv files.
+        output_file: the name of the newly generated csv file. e.g. vox1_dev_wav.csv
+    """
+
+    logging.info("Preprocessing audio and label for subset %s" % subset)
+    source_dir = os.path.join(input_dir, subset)
+
+    files = []
+    # Convert all AAC file into WAV format. At the same time, generate the csv
+    for root, _, filenames in os.walk(source_dir):
+        for filename in filenames:
+            name, ext = os.path.splitext(filename)
+            if ext.lower() == ".wav":
+                _, ext2 = os.path.splitext(name)
+                if ext2:
+                    continue
+                wav_file = os.path.join(root, filename)
+            elif ext.lower() == ".m4a":
+                # Convert AAC to WAV.
+                aac_file = os.path.join(root, filename)
+                wav_file = aac_file + ".wav"
+                if not os.path.exists(wav_file):
+                    if not decode_aac_with_ffmpeg(aac_file, wav_file):
+                        raise RuntimeError("Audio decoding failed.")
+            else:
+                continue
+            speaker_name = root.split(os.path.sep)[-2]
+            if speaker_name not in speaker_id_dict:
+                num = len(speaker_id_dict)
+                speaker_id_dict[speaker_name] = num
+            # wav_filesize = os.path.getsize(wav_file)
+            wav_length = len(sf.read(wav_file)[0])
+            files.append((os.path.abspath(wav_file), wav_length, speaker_id_dict[speaker_name], speaker_name))
+
+    # Write to CSV file which contains four columns:
+    # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name".
+    csv_file_path = os.path.join(output_dir, output_file)
+    df = pandas.DataFrame(data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"])
+    df.to_csv(csv_file_path, index=False, sep="\t")
+    logging.info("Successfully generated csv file {}".format(csv_file_path))
+
+
+def processor(directory, subset, force_process):
+    """download and process"""
+    urls = SUBSETS
+    if subset not in urls:
+        raise ValueError(subset, "is not in voxceleb")
+
+    subset_csv = os.path.join(directory, subset + ".csv")
+    if not force_process and os.path.exists(subset_csv):
+        return subset_csv
+
+    logging.info("Downloading and process the voxceleb in %s", directory)
+    logging.info("Preparing subset %s", subset)
+    download_and_extract(directory, subset, urls[subset])
+    convert_audio_and_make_label(directory, subset, directory, subset + ".csv")
+    logging.info("Finished downloading and processing")
+    return subset_csv
+
+
+if __name__ == "__main__":
+    logging.set_verbosity(logging.INFO)
+    if len(sys.argv) != 4:
+        print("Usage: python prepare_data.py save_directory user password")
+        sys.exit()
+
+    DIR, USER["user"], USER["password"] = sys.argv[1], sys.argv[2], sys.argv[3]
+    for SUBSET in SUBSETS:
+        processor(DIR, SUBSET, False)
diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c58a232e7a146bb24718700527ab80e62a1ab1a
--- /dev/null
+++ b/TTS/encoder/utils/training.py
@@ -0,0 +1,99 @@
+import os
+from dataclasses import dataclass, field
+
+from coqpit import Coqpit
+from trainer import TrainerArgs, get_last_checkpoint
+from trainer.logging import logger_factory
+from trainer.logging.console_logger import ConsoleLogger
+
+from TTS.config import load_config, register_config
+from TTS.tts.utils.text.characters import parse_symbols
+from TTS.utils.generic_utils import get_experiment_folder_path, get_git_branch
+from TTS.utils.io import copy_model_files
+
+
+@dataclass
+class TrainArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def getarguments():
+    train_config = TrainArgs()
+    parser = train_config.init_argparse(arg_prefix="")
+    return parser
+
+
+def process_args(args, config=None):
+    """Process parsed comand line arguments and initialize the config if not provided.
+    Args:
+        args (argparse.Namespace or dict like): Parsed input arguments.
+        config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None.
+    Returns:
+        c (TTS.utils.io.AttrDict): Config paramaters.
+        out_path (str): Path to save models and logging.
+        audio_path (str): Path to save generated test audios.
+        c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does
+            logging to the console.
+        dashboard_logger (WandbLogger or TensorboardLogger): Class that does the dashboard Logging
+    TODO:
+        - Interactive config definition.
+    """
+    if isinstance(args, tuple):
+        args, coqpit_overrides = args
+    if args.continue_path:
+        # continue a previous training from its output folder
+        experiment_path = args.continue_path
+        args.config_path = os.path.join(args.continue_path, "config.json")
+        args.restore_path, best_model = get_last_checkpoint(args.continue_path)
+        if not args.best_path:
+            args.best_path = best_model
+    # init config if not already defined
+    if config is None:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(coqpit_overrides)
+            config = register_config(config_base.model)()
+    # override values from command-line args
+    config.parse_known_args(coqpit_overrides, relaxed_parser=True)
+    experiment_path = args.continue_path
+    if not experiment_path:
+        experiment_path = get_experiment_folder_path(config.output_path, config.run_name)
+    audio_path = os.path.join(experiment_path, "test_audios")
+    config.output_log_path = experiment_path
+    # setup rank 0 process in distributed training
+    dashboard_logger = None
+    if args.rank == 0:
+        new_fields = {}
+        if args.restore_path:
+            new_fields["restore_path"] = args.restore_path
+        new_fields["github_branch"] = get_git_branch()
+        # if model characters are not set in the config file
+        # save the default set to the config file for future
+        # compatibility.
+        if config.has("characters") and config.characters is None:
+            used_characters = parse_symbols()
+            new_fields["characters"] = used_characters
+        copy_model_files(config, experiment_path, new_fields)
+        dashboard_logger = logger_factory(config, experiment_path)
+    c_logger = ConsoleLogger()
+    return config, experiment_path, audio_path, c_logger, dashboard_logger
+
+
+def init_arguments():
+    train_config = TrainArgs()
+    parser = train_config.init_argparse(arg_prefix="")
+    return parser
+
+
+def init_training(config: Coqpit = None):
+    """Initialization of a training run."""
+    parser = init_arguments()
+    args = parser.parse_known_args()
+    config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = process_args(args, config)
+    return args[0], config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger
diff --git a/TTS/encoder/utils/visual.py b/TTS/encoder/utils/visual.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2db2f3fa3408f96a04f7932438f175c6ec19c51
--- /dev/null
+++ b/TTS/encoder/utils/visual.py
@@ -0,0 +1,50 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import umap
+
+matplotlib.use("Agg")
+
+
+colormap = (
+    np.array(
+        [
+            [76, 255, 0],
+            [0, 127, 70],
+            [255, 0, 0],
+            [255, 217, 38],
+            [0, 135, 255],
+            [165, 0, 165],
+            [255, 167, 255],
+            [0, 255, 255],
+            [255, 96, 38],
+            [142, 76, 0],
+            [33, 0, 127],
+            [0, 0, 0],
+            [183, 183, 183],
+        ],
+        dtype=np.float,
+    )
+    / 255
+)
+
+
+def plot_embeddings(embeddings, num_classes_in_batch):
+    num_utter_per_class = embeddings.shape[0] // num_classes_in_batch
+
+    # if necessary get just the first 10 classes
+    if num_classes_in_batch > 10:
+        num_classes_in_batch = 10
+        embeddings = embeddings[: num_classes_in_batch * num_utter_per_class]
+
+    model = umap.UMAP()
+    projection = model.fit_transform(embeddings)
+    ground_truth = np.repeat(np.arange(num_classes_in_batch), num_utter_per_class)
+    colors = [colormap[i] for i in ground_truth]
+    fig, ax = plt.subplots(figsize=(16, 10))
+    _ = ax.scatter(projection[:, 0], projection[:, 1], c=colors)
+    plt.gca().set_aspect("equal", "datalim")
+    plt.title("UMAP projection")
+    plt.tight_layout()
+    plt.savefig("umap")
+    return fig
diff --git a/TTS/model.py b/TTS/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae6be7b444695756c00c4faa8f2f6c787dfcf9d8
--- /dev/null
+++ b/TTS/model.py
@@ -0,0 +1,59 @@
+from abc import abstractmethod
+from typing import Dict
+
+import torch
+from coqpit import Coqpit
+from trainer import TrainerModel
+
+# pylint: skip-file
+
+
+class BaseTrainerModel(TrainerModel):
+    """BaseTrainerModel model expanding TrainerModel with required functions by 🐸TTS.
+
+    Every new 🐸TTS model must inherit it.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def init_from_config(config: Coqpit):
+        """Init the model and all its attributes from the given config.
+
+        Override this depending on your model.
+        """
+        ...
+
+    @abstractmethod
+    def inference(self, input: torch.Tensor, aux_input={}) -> Dict:
+        """Forward pass for inference.
+
+        It must return a dictionary with the main model output and all the auxiliary outputs. The key ```model_outputs```
+        is considered to be the main output and you can add any other auxiliary outputs as you want.
+
+        We don't use `*kwargs` since it is problematic with the TorchScript API.
+
+        Args:
+            input (torch.Tensor): [description]
+            aux_input (Dict): Auxiliary inputs like speaker embeddings, durations etc.
+
+        Returns:
+            Dict: [description]
+        """
+        outputs_dict = {"model_outputs": None}
+        ...
+        return outputs_dict
+
+    @abstractmethod
+    def load_checkpoint(
+        self, config: Coqpit, checkpoint_path: str, eval: bool = False, strict: bool = True, cache=False
+    ) -> None:
+        """Load a model checkpoint gile and get ready for training or inference.
+
+        Args:
+            config (Coqpit): Model configuration.
+            checkpoint_path (str): Path to the model checkpoint file.
+            eval (bool, optional): If true, init model for inference else for training. Defaults to False.
+            strict (bool, optional): Match all checkpoint keys to model's keys. Defaults to True.
+            cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False.
+        """
+        ...
diff --git a/TTS/server/README.md b/TTS/server/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..270656c4e39dc11636efbb1ba51eba7c9b4a8f04
--- /dev/null
+++ b/TTS/server/README.md
@@ -0,0 +1,18 @@
+# :frog: TTS demo server
+Before you use the server, make sure you [install](https://github.com/coqui-ai/TTS/tree/dev#install-tts)) :frog: TTS properly. Then, you can follow the steps below.
+
+**Note:** If you install :frog:TTS using ```pip```, you can also use the ```tts-server``` end point on the terminal.
+
+Examples runs:
+
+List officially released models.
+```python TTS/server/server.py  --list_models ```
+
+Run the server with the official models.
+```python TTS/server/server.py  --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan```
+
+Run the server with the official models on a GPU.
+```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py  --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True```
+
+Run the server with a custom models.
+```python TTS/server/server.py  --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```
diff --git a/TTS/server/__init__.py b/TTS/server/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/server/conf.json b/TTS/server/conf.json
new file mode 100644
index 0000000000000000000000000000000000000000..49b6c09c3848a224dfb39a1f653aa1b289a4b6e5
--- /dev/null
+++ b/TTS/server/conf.json
@@ -0,0 +1,12 @@
+{
+    "tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/",  // tts model root folder
+    "tts_file":"best_model.pth",     // tts checkpoint file
+    "tts_config":"config.json",     // tts config.json file
+    "tts_speakers": null,           // json file listing speaker ids. null if no speaker embedding.
+    "vocoder_config":null,
+    "vocoder_file": null,
+    "is_wavernn_batched":true,
+    "port": 5002,
+    "use_cuda": true,
+    "debug": true
+}
diff --git a/TTS/server/server.py b/TTS/server/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..345e4d50bb12f9297466311c8e68c43688b041c1
--- /dev/null
+++ b/TTS/server/server.py
@@ -0,0 +1,195 @@
+#!flask/bin/python
+import argparse
+import io
+import json
+import os
+import sys
+from pathlib import Path
+from threading import Lock
+from typing import Union
+
+from flask import Flask, render_template, request, send_file
+
+from TTS.config import load_config
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+
+
+def create_argparser():
+    def convert_boolean(x):
+        return x.lower() in ["true", "1", "yes"]
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--list_models",
+        type=convert_boolean,
+        nargs="?",
+        const=True,
+        default=False,
+        help="list available pre-trained tts and vocoder models.",
+    )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="tts_models/en/ljspeech/tacotron2-DDC",
+        help="Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>",
+    )
+    parser.add_argument("--vocoder_name", type=str, default=None, help="name of one of the released vocoder models.")
+
+    # Args for running custom models
+    parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default=None,
+        help="Path to model file.",
+    )
+    parser.add_argument(
+        "--vocoder_path",
+        type=str,
+        help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
+        default=None,
+    )
+    parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
+    parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
+    parser.add_argument("--port", type=int, default=5002, help="port to listen on.")
+    parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
+    parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
+    parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
+    return parser
+
+
+# parse the args
+args = create_argparser().parse_args()
+
+path = Path(__file__).parent / "../.models.json"
+manager = ModelManager(path)
+
+if args.list_models:
+    manager.list_models()
+    sys.exit()
+
+# update in-use models to the specified released models.
+model_path = None
+config_path = None
+speakers_file_path = None
+vocoder_path = None
+vocoder_config_path = None
+
+# CASE1: list pre-trained TTS models
+if args.list_models:
+    manager.list_models()
+    sys.exit()
+
+# CASE2: load pre-trained model paths
+if args.model_name is not None and not args.model_path:
+    model_path, config_path, model_item = manager.download_model(args.model_name)
+    args.vocoder_name = model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
+
+if args.vocoder_name is not None and not args.vocoder_path:
+    vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
+
+# CASE3: set custom model paths
+if args.model_path is not None:
+    model_path = args.model_path
+    config_path = args.config_path
+    speakers_file_path = args.speakers_file_path
+
+if args.vocoder_path is not None:
+    vocoder_path = args.vocoder_path
+    vocoder_config_path = args.vocoder_config_path
+
+# load models
+synthesizer = Synthesizer(
+    tts_checkpoint=model_path,
+    tts_config_path=config_path,
+    tts_speakers_file=speakers_file_path,
+    tts_languages_file=None,
+    vocoder_checkpoint=vocoder_path,
+    vocoder_config=vocoder_config_path,
+    encoder_checkpoint="",
+    encoder_config="",
+    use_cuda=args.use_cuda,
+)
+
+use_multi_speaker = hasattr(synthesizer.tts_model, "num_speakers") and (
+    synthesizer.tts_model.num_speakers > 1 or synthesizer.tts_speakers_file is not None
+)
+
+speaker_manager = getattr(synthesizer.tts_model, "speaker_manager", None)
+# TODO: set this from SpeakerManager
+use_gst = synthesizer.tts_config.get("use_gst", False)
+app = Flask(__name__)
+
+
+def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]:
+    """Transform an uri style_wav, in either a string (path to wav file to be use for style transfer)
+    or a dict (gst tokens/values to be use for styling)
+
+    Args:
+        style_wav (str): uri
+
+    Returns:
+        Union[str, dict]: path to file (str) or gst style (dict)
+    """
+    if style_wav:
+        if os.path.isfile(style_wav) and style_wav.endswith(".wav"):
+            return style_wav  # style_wav is a .wav file located on the server
+
+        style_wav = json.loads(style_wav)
+        return style_wav  # style_wav is a gst dictionary with {token1_id : token1_weigth, ...}
+    return None
+
+
+@app.route("/")
+def index():
+    return render_template(
+        "index.html",
+        show_details=args.show_details,
+        use_multi_speaker=use_multi_speaker,
+        speaker_ids=speaker_manager.name_to_id if speaker_manager is not None else None,
+        use_gst=use_gst,
+    )
+
+
+@app.route("/details")
+def details():
+    model_config = load_config(args.tts_config)
+    if args.vocoder_config is not None and os.path.isfile(args.vocoder_config):
+        vocoder_config = load_config(args.vocoder_config)
+    else:
+        vocoder_config = None
+
+    return render_template(
+        "details.html",
+        show_details=args.show_details,
+        model_config=model_config,
+        vocoder_config=vocoder_config,
+        args=args.__dict__,
+    )
+
+
+lock = Lock()
+
+
+@app.route("/api/tts", methods=["GET"])
+def tts():
+    with lock:
+        text = request.args.get("text")
+        speaker_idx = request.args.get("speaker_id", "")
+        style_wav = request.args.get("style_wav", "")
+        style_wav = style_wav_uri_to_dict(style_wav)
+        print(" > Model input: {}".format(text))
+        print(" > Speaker Idx: {}".format(speaker_idx))
+        wavs = synthesizer.tts(text, speaker_name=speaker_idx, style_wav=style_wav)
+        out = io.BytesIO()
+        synthesizer.save_wav(wavs, out)
+    return send_file(out, mimetype="audio/wav")
+
+
+def main():
+    app.run(debug=args.debug, host="::", port=args.port)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/server/static/coqui-log-green-TTS.png b/TTS/server/static/coqui-log-green-TTS.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ad188b8c03a170097c0393c6769996f03cf9054
Binary files /dev/null and b/TTS/server/static/coqui-log-green-TTS.png differ
diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html
new file mode 100644
index 0000000000000000000000000000000000000000..51c9ed85a83ac0aab045623ee1e6c430fbe51b9d
--- /dev/null
+++ b/TTS/server/templates/details.html
@@ -0,0 +1,131 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+  <meta name="description" content="">
+  <meta name="author" content="">
+
+  <title>TTS engine</title>
+
+  <!-- Bootstrap core CSS -->
+  <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
+    integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
+    rel="stylesheet">
+
+  <!-- Custom styles for this template -->
+  <style>
+    body {
+      padding-top: 54px;
+    }
+
+    @media (min-width: 992px) {
+      body {
+        padding-top: 56px;
+      }
+    }
+  </style>
+</head>
+
+<body>
+  <a href="https://github.com/mozilla/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
+      src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
+
+  {% if show_details == true %}
+
+  <div class="container">
+    <b>Model details</b>
+  </div>
+
+  <div class="container">
+    <details>
+      <summary>CLI arguments:</summary>
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> CLI key </td>
+          <td> Value </td>
+        </tr>
+
+        {% for key, value in args.items() %}
+
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+
+        {% endfor %}
+      </table>
+    </details>
+  </div></br>
+
+  <div class="container">
+
+    {% if model_config != None %}
+
+    <details>
+      <summary>Model config:</summary>
+
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> Key </td>
+          <td> Value </td>
+        </tr>
+
+
+        {% for key, value in model_config.items() %}
+
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+
+        {% endfor %}
+
+      </table>
+    </details>
+
+    {% endif %}
+
+  </div></br>
+
+
+
+  <div class="container">
+    {% if vocoder_config != None %}
+    <details>
+      <summary>Vocoder model config:</summary>
+
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> Key </td>
+          <td> Value </td>
+        </tr>
+
+
+        {% for key, value in vocoder_config.items() %}
+
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+
+        {% endfor %}
+
+
+      </table>
+    </details>
+    {% endif %}
+  </div></br>
+
+  {% else %}
+  <div class="container">
+    <b>Please start server with --show_details=true to see details.</b>
+  </div>
+
+  {% endif %}
+
+</body>
+
+</html>
\ No newline at end of file
diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..b0eab291a2c78e678709aba7dddb2b97b8e94b0f
--- /dev/null
+++ b/TTS/server/templates/index.html
@@ -0,0 +1,143 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+    <meta name="description" content="🐸Coqui AI TTS demo server.">
+    <meta name="author" content="🐸Coqui AI TTS">
+
+    <title>TTS engine</title>
+
+    <!-- Bootstrap core CSS -->
+    <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
+        integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
+        rel="stylesheet">
+
+    <!-- Custom styles for this template -->
+    <style>
+        body {
+            padding-top: 54px;
+        }
+
+        @media (min-width: 992px) {
+            body {
+                padding-top: 56px;
+            }
+        }
+    </style>
+</head>
+
+<body>
+    <a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
+            src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
+
+    <!-- Navigation -->
+    <!--
+    <nav class="navbar navbar-expand-lg navbar-dark bg-dark fixed-top">
+      <div class="container">
+        <a class="navbar-brand" href="#">Coqui TTS</a>
+        <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
+          <span class="navbar-toggler-icon"></span>
+        </button>
+        <div class="collapse navbar-collapse" id="navbarResponsive">
+          <ul class="navbar-nav ml-auto">
+            <li class="nav-item active">
+              <a class="nav-link" href="#">Home
+                <span class="sr-only">(current)</span>
+              </a>
+            </li>
+          </ul>
+        </div>
+      </div>
+    </nav>
+    -->
+
+    <!-- Page Content -->
+    <div class="container">
+        <div class="row">
+            <div class="col-lg-12 text-center">
+                <img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
+                    width="512" />
+
+                <ul class="list-unstyled">
+                </ul>
+
+                {%if use_gst%}
+                <input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
+                    type="text" name="style_wav">
+                {%endif%}
+
+                <input id="text" placeholder="Type here..." size=45 type="text" name="text">
+                <button id="speak-button" name="speak">Speak</button><br /><br />
+
+                {%if use_multi_speaker%}
+                Choose a speaker:
+                <select id="speaker_id" name=speaker_id method="GET" action="/">
+                    {% for speaker_id in speaker_ids %}
+                    <option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
+                    {% endfor %}
+                </select><br /><br />
+                {%endif%}
+
+                {%if show_details%}
+                <button id="details-button" onclick="location.href = 'details'" name="model-details">Model
+                    Details</button><br /><br />
+                {%endif%}
+                <audio id="audio" controls autoplay hidden></audio>
+                <p id="message"></p>
+            </div>
+        </div>
+    </div>
+
+    <!-- Bootstrap core JavaScript -->
+    <script>
+        function getTextValue(textId) {
+            const container = q(textId)
+            if (container) {
+                return container.value
+            }
+            return ""
+        }
+        function q(selector) { return document.querySelector(selector) }
+        q('#text').focus()
+        function do_tts(e) {
+            const text = q('#text').value
+            const speaker_id = getTextValue('#speaker_id')
+            const style_wav = getTextValue('#style_wav')
+            if (text) {
+                q('#message').textContent = 'Synthesizing...'
+                q('#speak-button').disabled = true
+                q('#audio').hidden = true
+                synthesize(text, speaker_id, style_wav)
+            }
+            e.preventDefault()
+            return false
+        }
+        q('#speak-button').addEventListener('click', do_tts)
+        q('#text').addEventListener('keyup', function (e) {
+            if (e.keyCode == 13) { // enter
+                do_tts(e)
+            }
+        })
+        function synthesize(text, speaker_id = "", style_wav = "") {
+            fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
+                .then(function (res) {
+                    if (!res.ok) throw Error(res.statusText)
+                    return res.blob()
+                }).then(function (blob) {
+                    q('#message').textContent = ''
+                    q('#speak-button').disabled = false
+                    q('#audio').src = URL.createObjectURL(blob)
+                    q('#audio').hidden = false
+                }).catch(function (err) {
+                    q('#message').textContent = 'Error: ' + err.message
+                    q('#speak-button').disabled = false
+                })
+        }
+    </script>
+
+</body>
+
+</html>
\ No newline at end of file
diff --git a/TTS/tts/.DS_Store b/TTS/tts/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..17106eadb3b5dcd836848f0a0542ed572f5457c5
Binary files /dev/null and b/TTS/tts/.DS_Store differ
diff --git a/TTS/tts/__init__.py b/TTS/tts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/__pycache__/__init__.cpython-310.pyc b/TTS/tts/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72548ffb26bcbacb423bf44aad5b430f10ead8cf
Binary files /dev/null and b/TTS/tts/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/tts/__pycache__/__init__.cpython-37.pyc b/TTS/tts/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e8b588198e22f0f9676e9a15261c363947d317c
Binary files /dev/null and b/TTS/tts/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/__pycache__/__init__.cpython-38.pyc b/TTS/tts/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0125492a6d3f49ba101b1280326b6270b66d4790
Binary files /dev/null and b/TTS/tts/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/__pycache__/__init__.cpython-39.pyc b/TTS/tts/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff4801cbc3338c8a4829ba27767e4fdb75b0e8f2
Binary files /dev/null and b/TTS/tts/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/configs/__init__.py b/TTS/tts/configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3146ac1c116cb807a81889b7a9ab223b9a051036
--- /dev/null
+++ b/TTS/tts/configs/__init__.py
@@ -0,0 +1,17 @@
+import importlib
+import os
+from inspect import isclass
+
+# import all files under configs/
+# configs_dir = os.path.dirname(__file__)
+# for file in os.listdir(configs_dir):
+#     path = os.path.join(configs_dir, file)
+#     if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
+#         config_name = file[: file.find(".py")] if file.endswith(".py") else file
+#         module = importlib.import_module("TTS.tts.configs." + config_name)
+#         for attribute_name in dir(module):
+#             attribute = getattr(module, attribute_name)
+
+#             if isclass(attribute):
+#                 # Add the class to this package's variables
+#                 globals()[attribute_name] = attribute
diff --git a/TTS/tts/configs/__pycache__/__init__.cpython-310.pyc b/TTS/tts/configs/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80f20c73a6857e5206e92cfd114515af9b2c1c6e
Binary files /dev/null and b/TTS/tts/configs/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/tts/configs/__pycache__/__init__.cpython-37.pyc b/TTS/tts/configs/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a6999526e23d0f369769a7e7be333a982d6f8165
Binary files /dev/null and b/TTS/tts/configs/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/configs/__pycache__/__init__.cpython-38.pyc b/TTS/tts/configs/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e8be02843679d570eb4f6136c258de66fd038d88
Binary files /dev/null and b/TTS/tts/configs/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/configs/__pycache__/__init__.cpython-39.pyc b/TTS/tts/configs/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a4fe7a9f61e6a3c15a95bcfdc33b15d1b522fa0
Binary files /dev/null and b/TTS/tts/configs/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/configs/__pycache__/glow_tts_config.cpython-39.pyc b/TTS/tts/configs/__pycache__/glow_tts_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..914670d6ed84940e0e665d886934d2b8fd9d2923
Binary files /dev/null and b/TTS/tts/configs/__pycache__/glow_tts_config.cpython-39.pyc differ
diff --git a/TTS/tts/configs/__pycache__/shared_configs.cpython-37.pyc b/TTS/tts/configs/__pycache__/shared_configs.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c26aad500f102768eb784c0b4cf4a1fd95ada80a
Binary files /dev/null and b/TTS/tts/configs/__pycache__/shared_configs.cpython-37.pyc differ
diff --git a/TTS/tts/configs/__pycache__/shared_configs.cpython-38.pyc b/TTS/tts/configs/__pycache__/shared_configs.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0ca831907e2322f7262cd0657a421c65197d80c
Binary files /dev/null and b/TTS/tts/configs/__pycache__/shared_configs.cpython-38.pyc differ
diff --git a/TTS/tts/configs/__pycache__/shared_configs.cpython-39.pyc b/TTS/tts/configs/__pycache__/shared_configs.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b12cf69ad04641223d5eb6257c48c04f6092e84
Binary files /dev/null and b/TTS/tts/configs/__pycache__/shared_configs.cpython-39.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron2_config.cpython-37.pyc b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c6e24c2799d4958bbfac967a9cbaea25c79871a
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-37.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron2_config.cpython-38.pyc b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7726393955d8dee9f400a943abcde67886685a1
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-38.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron2_config.cpython-39.pyc b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..184bcc41335930a014bcca73157bb6bc67651101
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron2_config.cpython-39.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron_config.cpython-37.pyc b/TTS/tts/configs/__pycache__/tacotron_config.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..efbd0e39973a7cee13936130746ae6aba2d2d201
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron_config.cpython-37.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron_config.cpython-38.pyc b/TTS/tts/configs/__pycache__/tacotron_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8928b164f7c60d51a5aad15dfe8662703a0d9339
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron_config.cpython-38.pyc differ
diff --git a/TTS/tts/configs/__pycache__/tacotron_config.cpython-39.pyc b/TTS/tts/configs/__pycache__/tacotron_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da047724f790ecbc0d5fcc1b30665dd770db857e
Binary files /dev/null and b/TTS/tts/configs/__pycache__/tacotron_config.cpython-39.pyc differ
diff --git a/TTS/tts/configs/align_tts_config.py b/TTS/tts/configs/align_tts_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..317a01af53ce26914d83610a913eb44b5836dac2
--- /dev/null
+++ b/TTS/tts/configs/align_tts_config.py
@@ -0,0 +1,107 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+from TTS.tts.models.align_tts import AlignTTSArgs
+
+
+@dataclass
+class AlignTTSConfig(BaseTTSConfig):
+    """Defines parameters for AlignTTS model.
+    Example:
+
+        >>> from TTS.tts.configs.align_tts_config import AlignTTSConfig
+        >>> config = AlignTTSConfig()
+
+    Args:
+        model(str):
+            Model name used for selecting the right model at initialization. Defaults to `align_tts`.
+        positional_encoding (bool):
+            enable / disable positional encoding applied to the encoder output. Defaults to True.
+        hidden_channels (int):
+            Base number of hidden channels. Defines all the layers expect ones defined by the specific encoder or decoder
+            parameters. Defaults to 256.
+        hidden_channels_dp (int):
+            Number of hidden channels of the duration predictor's layers. Defaults to 256.
+        encoder_type (str):
+            Type of the encoder used by the model. Look at `TTS.tts.layers.feed_forward.encoder` for more details.
+            Defaults to `fftransformer`.
+        encoder_params (dict):
+            Parameters used to define the encoder network. Look at `TTS.tts.layers.feed_forward.encoder` for more details.
+            Defaults to `{"hidden_channels_ffn": 1024, "num_heads": 2, "num_layers": 6, "dropout_p": 0.1}`.
+        decoder_type (str):
+            Type of the decoder used by the model. Look at `TTS.tts.layers.feed_forward.decoder` for more details.
+            Defaults to `fftransformer`.
+        decoder_params (dict):
+            Parameters used to define the decoder network. Look at `TTS.tts.layers.feed_forward.decoder` for more details.
+            Defaults to `{"hidden_channels_ffn": 1024, "num_heads": 2, "num_layers": 6, "dropout_p": 0.1}`.
+        phase_start_steps (List[int]):
+            A list of number of steps required to start the next training phase. AlignTTS has 4 different training
+            phases. Thus you need to define 4 different values to enable phase based training. If None, it
+            trains the whole model together. Defaults to None.
+        ssim_alpha (float):
+            Weight for the SSIM loss. If set <= 0, disables the SSIM loss. Defaults to 1.0.
+        duration_loss_alpha (float):
+            Weight for the duration predictor's loss. Defaults to 1.0.
+        mdn_alpha (float):
+            Weight for the MDN loss. Defaults to 1.0.
+        spec_loss_alpha (float):
+            Weight for the MSE spectrogram loss. If set <= 0, disables the L1 loss. Defaults to 1.0.
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+        noam_schedule (bool):
+            enable / disable the use of Noam LR scheduler. Defaults to False.
+        warmup_steps (int):
+            Number of warm-up steps for the Noam scheduler. Defaults 4000.
+        lr (float):
+            Initial learning rate. Defaults to `1e-3`.
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-7`.
+        min_seq_len (int):
+            Minimum input sequence length to be used at training.
+        max_seq_len (int):
+            Maximum input sequence length to be used at training. Larger values result in more VRAM usage."""
+
+    model: str = "align_tts"
+    # model specific params
+    model_args: AlignTTSArgs = field(default_factory=AlignTTSArgs)
+    phase_start_steps: List[int] = None
+
+    ssim_alpha: float = 1.0
+    spec_loss_alpha: float = 1.0
+    dur_loss_alpha: float = 1.0
+    mdn_alpha: float = 1.0
+
+    # multi-speaker settings
+    use_speaker_embedding: bool = False
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+
+    # optimizer parameters
+    optimizer: str = "Adam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = None
+    lr_scheduler_params: dict = None
+    lr: float = 1e-4
+    grad_clip: float = 5.0
+
+    # overrides
+    min_seq_len: int = 13
+    max_seq_len: int = 200
+    r: int = 1
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
diff --git a/TTS/tts/configs/fast_pitch_config.py b/TTS/tts/configs/fast_pitch_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..024040f8cf15b82c0de31e9907d52d00cb995e4f
--- /dev/null
+++ b/TTS/tts/configs/fast_pitch_config.py
@@ -0,0 +1,176 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+from TTS.tts.models.forward_tts import ForwardTTSArgs
+
+
+@dataclass
+class FastPitchConfig(BaseTTSConfig):
+    """Configure `ForwardTTS` as FastPitch model.
+
+    Example:
+
+        >>> from TTS.tts.configs.fast_pitch_config import FastPitchConfig
+        >>> config = FastPitchConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `fast_pitch`.
+
+        base_model (str):
+            Name of the base model being configured as this model so that 🐸 TTS knows it needs to initiate
+            the base model rather than searching for the `model` implementation. Defaults to `forward_tts`.
+
+        model_args (Coqpit):
+            Model class arguments. Check `FastPitchArgs` for more details. Defaults to `FastPitchArgs()`.
+
+        data_dep_init_steps (int):
+            Number of steps used for computing normalization parameters at the beginning of the training. GlowTTS uses
+            Activation Normalization that pre-computes normalization stats at the beginning and use the same values
+            for the rest. Defaults to 10.
+
+        speakers_file (str):
+            Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+            speaker names. Defaults to `None`.
+
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+
+        d_vector_dim (int):
+            Dimension of the external speaker embeddings. Defaults to 0.
+
+        optimizer (str):
+            Name of the model optimizer. Defaults to `Adam`.
+
+        optimizer_params (dict):
+            Arguments of the model optimizer. Defaults to `{"betas": [0.9, 0.998], "weight_decay": 1e-6}`.
+
+        lr_scheduler (str):
+            Name of the learning rate scheduler. Defaults to `Noam`.
+
+        lr_scheduler_params (dict):
+            Arguments of the learning rate scheduler. Defaults to `{"warmup_steps": 4000}`.
+
+        lr (float):
+            Initial learning rate. Defaults to `1e-3`.
+
+        grad_clip (float):
+            Gradient norm clipping value. Defaults to `5.0`.
+
+        spec_loss_type (str):
+            Type of the spectrogram loss. Check `ForwardTTSLoss` for possible values. Defaults to `mse`.
+
+        duration_loss_type (str):
+            Type of the duration loss. Check `ForwardTTSLoss` for possible values. Defaults to `mse`.
+
+        use_ssim_loss (bool):
+            Enable/disable the use of SSIM (Structural Similarity) loss. Defaults to True.
+
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-7`.
+
+        ssim_loss_alpha (float):
+            Weight for the SSIM loss. If set 0, disables the SSIM loss. Defaults to 1.0.
+
+        dur_loss_alpha (float):
+            Weight for the duration predictor's loss. If set 0, disables the huber loss. Defaults to 1.0.
+
+        spec_loss_alpha (float):
+            Weight for the L1 spectrogram loss. If set 0, disables the L1 loss. Defaults to 1.0.
+
+        pitch_loss_alpha (float):
+            Weight for the pitch predictor's loss. If set 0, disables the pitch predictor. Defaults to 1.0.
+
+        binary_align_loss_alpha (float):
+            Weight for the binary loss. If set 0, disables the binary loss. Defaults to 1.0.
+
+        binary_loss_warmup_epochs (float):
+            Number of epochs to gradually increase the binary loss impact. Defaults to 150.
+
+        min_seq_len (int):
+            Minimum input sequence length to be used at training.
+
+        max_seq_len (int):
+            Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
+    """
+
+    model: str = "fast_pitch"
+    base_model: str = "forward_tts"
+
+    # model specific params
+    model_args: ForwardTTSArgs = ForwardTTSArgs()
+
+    # multi-speaker settings
+    num_speakers: int = 0
+    speakers_file: str = None
+    use_speaker_embedding: bool = False
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+    d_vector_dim: int = 0
+
+    # optimizer parameters
+    optimizer: str = "Adam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = "NoamLR"
+    lr_scheduler_params: dict = field(default_factory=lambda: {"warmup_steps": 4000})
+    lr: float = 1e-4
+    grad_clip: float = 5.0
+
+    # loss params
+    spec_loss_type: str = "mse"
+    duration_loss_type: str = "mse"
+    use_ssim_loss: bool = True
+    ssim_loss_alpha: float = 1.0
+    spec_loss_alpha: float = 1.0
+    aligner_loss_alpha: float = 1.0
+    pitch_loss_alpha: float = 0.1
+    dur_loss_alpha: float = 0.1
+    binary_align_loss_alpha: float = 0.1
+    binary_loss_warmup_epochs: int = 150
+
+    # overrides
+    min_seq_len: int = 13
+    max_seq_len: int = 200
+    r: int = 1  # DO NOT CHANGE
+
+    # dataset configs
+    compute_f0: bool = True
+    f0_cache_path: str = None
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
+
+    def __post_init__(self):
+        # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+        if self.num_speakers > 0:
+            self.model_args.num_speakers = self.num_speakers
+
+        # speaker embedding settings
+        if self.use_speaker_embedding:
+            self.model_args.use_speaker_embedding = True
+        if self.speakers_file:
+            self.model_args.speakers_file = self.speakers_file
+
+        # d-vector settings
+        if self.use_d_vector_file:
+            self.model_args.use_d_vector_file = True
+        if self.d_vector_dim is not None and self.d_vector_dim > 0:
+            self.model_args.d_vector_dim = self.d_vector_dim
+        if self.d_vector_file:
+            self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/fast_speech_config.py b/TTS/tts/configs/fast_speech_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..16a76e215f4d47d086bea827d2b6ccc61524e5c1
--- /dev/null
+++ b/TTS/tts/configs/fast_speech_config.py
@@ -0,0 +1,177 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+from TTS.tts.models.forward_tts import ForwardTTSArgs
+
+
+@dataclass
+class FastSpeechConfig(BaseTTSConfig):
+    """Configure `ForwardTTS` as FastSpeech model.
+
+    Example:
+
+        >>> from TTS.tts.configs.fast_speech_config import FastSpeechConfig
+        >>> config = FastSpeechConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `fast_pitch`.
+
+        base_model (str):
+            Name of the base model being configured as this model so that 🐸 TTS knows it needs to initiate
+            the base model rather than searching for the `model` implementation. Defaults to `forward_tts`.
+
+        model_args (Coqpit):
+            Model class arguments. Check `FastSpeechArgs` for more details. Defaults to `FastSpeechArgs()`.
+
+        data_dep_init_steps (int):
+            Number of steps used for computing normalization parameters at the beginning of the training. GlowTTS uses
+            Activation Normalization that pre-computes normalization stats at the beginning and use the same values
+            for the rest. Defaults to 10.
+
+        speakers_file (str):
+            Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+            speaker names. Defaults to `None`.
+
+
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+
+        d_vector_dim (int):
+            Dimension of the external speaker embeddings. Defaults to 0.
+
+        optimizer (str):
+            Name of the model optimizer. Defaults to `Adam`.
+
+        optimizer_params (dict):
+            Arguments of the model optimizer. Defaults to `{"betas": [0.9, 0.998], "weight_decay": 1e-6}`.
+
+        lr_scheduler (str):
+            Name of the learning rate scheduler. Defaults to `Noam`.
+
+        lr_scheduler_params (dict):
+            Arguments of the learning rate scheduler. Defaults to `{"warmup_steps": 4000}`.
+
+        lr (float):
+            Initial learning rate. Defaults to `1e-3`.
+
+        grad_clip (float):
+            Gradient norm clipping value. Defaults to `5.0`.
+
+        spec_loss_type (str):
+            Type of the spectrogram loss. Check `ForwardTTSLoss` for possible values. Defaults to `mse`.
+
+        duration_loss_type (str):
+            Type of the duration loss. Check `ForwardTTSLoss` for possible values. Defaults to `mse`.
+
+        use_ssim_loss (bool):
+            Enable/disable the use of SSIM (Structural Similarity) loss. Defaults to True.
+
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-7`.
+
+        ssim_loss_alpha (float):
+            Weight for the SSIM loss. If set 0, disables the SSIM loss. Defaults to 1.0.
+
+        dur_loss_alpha (float):
+            Weight for the duration predictor's loss. If set 0, disables the huber loss. Defaults to 1.0.
+
+        spec_loss_alpha (float):
+            Weight for the L1 spectrogram loss. If set 0, disables the L1 loss. Defaults to 1.0.
+
+        pitch_loss_alpha (float):
+            Weight for the pitch predictor's loss. If set 0, disables the pitch predictor. Defaults to 1.0.
+
+        binary_loss_alpha (float):
+            Weight for the binary loss. If set 0, disables the binary loss. Defaults to 1.0.
+
+        binary_loss_warmup_epochs (float):
+            Number of epochs to gradually increase the binary loss impact. Defaults to 150.
+
+        min_seq_len (int):
+            Minimum input sequence length to be used at training.
+
+        max_seq_len (int):
+            Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
+    """
+
+    model: str = "fast_speech"
+    base_model: str = "forward_tts"
+
+    # model specific params
+    model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False)
+
+    # multi-speaker settings
+    num_speakers: int = 0
+    speakers_file: str = None
+    use_speaker_embedding: bool = False
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+    d_vector_dim: int = 0
+
+    # optimizer parameters
+    optimizer: str = "Adam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = "NoamLR"
+    lr_scheduler_params: dict = field(default_factory=lambda: {"warmup_steps": 4000})
+    lr: float = 1e-4
+    grad_clip: float = 5.0
+
+    # loss params
+    spec_loss_type: str = "mse"
+    duration_loss_type: str = "mse"
+    use_ssim_loss: bool = True
+    ssim_loss_alpha: float = 1.0
+    dur_loss_alpha: float = 1.0
+    spec_loss_alpha: float = 1.0
+    pitch_loss_alpha: float = 0.0
+    aligner_loss_alpha: float = 1.0
+    binary_align_loss_alpha: float = 1.0
+    binary_loss_warmup_epochs: int = 150
+
+    # overrides
+    min_seq_len: int = 13
+    max_seq_len: int = 200
+    r: int = 1  # DO NOT CHANGE
+
+    # dataset configs
+    compute_f0: bool = False
+    f0_cache_path: str = None
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
+
+    def __post_init__(self):
+        # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+        if self.num_speakers > 0:
+            self.model_args.num_speakers = self.num_speakers
+
+        # speaker embedding settings
+        if self.use_speaker_embedding:
+            self.model_args.use_speaker_embedding = True
+        if self.speakers_file:
+            self.model_args.speakers_file = self.speakers_file
+
+        # d-vector settings
+        if self.use_d_vector_file:
+            self.model_args.use_d_vector_file = True
+        if self.d_vector_dim is not None and self.d_vector_dim > 0:
+            self.model_args.d_vector_dim = self.d_vector_dim
+        if self.d_vector_file:
+            self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/glow_tts_config.py b/TTS/tts/configs/glow_tts_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..f42f3e5a510bacf1b2312ccea7d46201bbcb774f
--- /dev/null
+++ b/TTS/tts/configs/glow_tts_config.py
@@ -0,0 +1,182 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+
+
+@dataclass
+class GlowTTSConfig(BaseTTSConfig):
+    """Defines parameters for GlowTTS model.
+
+    Example:
+
+        >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
+        >>> config = GlowTTSConfig()
+
+    Args:
+        model(str):
+            Model name used for selecting the right model at initialization. Defaults to `glow_tts`.
+        encoder_type (str):
+            Type of the encoder used by the model. Look at `TTS.tts.layers.glow_tts.encoder` for more details.
+            Defaults to `rel_pos_transformers`.
+        encoder_params (dict):
+            Parameters used to define the encoder network. Look at `TTS.tts.layers.glow_tts.encoder` for more details.
+            Defaults to `{"kernel_size": 3, "dropout_p": 0.1, "num_layers": 6, "num_heads": 2, "hidden_channels_ffn": 768}`
+        use_encoder_prenet (bool):
+            enable / disable the use of a prenet for the encoder. Defaults to True.
+        hidden_channels_enc (int):
+            Number of base hidden channels used by the encoder network. It defines the input and the output channel sizes,
+            and for some encoder types internal hidden channels sizes too. Defaults to 192.
+        hidden_channels_dec (int):
+            Number of base hidden channels used by the decoder WaveNet network. Defaults to 192 as in the original work.
+        hidden_channels_dp (int):
+            Number of layer channels of the duration predictor network. Defaults to 256 as in the original work.
+        mean_only (bool):
+            If true predict only the mean values by the decoder flow. Defaults to True.
+        out_channels (int):
+            Number of channels of the model output tensor. Defaults to 80.
+        num_flow_blocks_dec (int):
+            Number of decoder blocks. Defaults to 12.
+        inference_noise_scale (float):
+            Noise scale used at inference. Defaults to 0.33.
+        kernel_size_dec (int):
+            Decoder kernel size. Defaults to 5
+        dilation_rate (int):
+            Rate to increase dilation by each layer in a decoder block. Defaults to 1.
+        num_block_layers (int):
+            Number of decoder layers in each decoder block.  Defaults to 4.
+        dropout_p_dec (float):
+            Dropout rate for decoder. Defaults to 0.1.
+        num_speaker (int):
+            Number of speaker to define the size of speaker embedding layer. Defaults to 0.
+        c_in_channels (int):
+            Number of speaker embedding channels. It is set to 512 if embeddings are learned. Defaults to 0.
+        num_splits (int):
+            Number of split levels in inversible conv1x1 operation. Defaults to 4.
+        num_squeeze (int):
+            Number of squeeze levels. When squeezing channels increases and time steps reduces by the factor
+            'num_squeeze'. Defaults to 2.
+        sigmoid_scale (bool):
+            enable/disable sigmoid scaling in decoder. Defaults to False.
+        mean_only (bool):
+            If True, encoder only computes mean value and uses constant variance for each time step. Defaults to true.
+        encoder_type (str):
+            Encoder module type. Possible values are`["rel_pos_transformer", "gated_conv", "residual_conv_bn", "time_depth_separable"]`
+            Check `TTS.tts.layers.glow_tts.encoder` for more details. Defaults to `rel_pos_transformers` as in the original paper.
+        encoder_params (dict):
+            Encoder module parameters. Defaults to None.
+        d_vector_dim (int):
+            Channels of external speaker embedding vectors. Defaults to 0.
+        data_dep_init_steps (int):
+            Number of steps used for computing normalization parameters at the beginning of the training. GlowTTS uses
+            Activation Normalization that pre-computes normalization stats at the beginning and use the same values
+            for the rest. Defaults to 10.
+        style_wav_for_test (str):
+            Path to the wav file used for changing the style of the speech. Defaults to None.
+        inference_noise_scale (float):
+            Variance used for sampling the random noise added to the decoder's input at inference. Defaults to 0.0.
+        length_scale (float):
+            Multiply the predicted durations with this value to change the speech speed. Defaults to 1.
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+        noam_schedule (bool):
+            enable / disable the use of Noam LR scheduler. Defaults to False.
+        warmup_steps (int):
+            Number of warm-up steps for the Noam scheduler. Defaults 4000.
+        lr (float):
+            Initial learning rate. Defaults to `1e-3`.
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-7`.
+        min_seq_len (int):
+            Minimum input sequence length to be used at training.
+        max_seq_len (int):
+            Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
+    """
+
+    model: str = "glow_tts"
+
+    # model params
+    num_chars: int = None
+    encoder_type: str = "rel_pos_transformer"
+    encoder_params: dict = field(
+        default_factory=lambda: {
+            "kernel_size": 3,
+            "dropout_p": 0.1,
+            "num_layers": 6,
+            "num_heads": 2,
+            "hidden_channels_ffn": 768,
+        }
+    )
+    use_encoder_prenet: bool = True
+    hidden_channels_enc: int = 192
+    hidden_channels_dec: int = 192
+    hidden_channels_dp: int = 256
+    dropout_p_dp: float = 0.1
+    dropout_p_dec: float = 0.05
+    mean_only: bool = True
+    out_channels: int = 80
+    num_flow_blocks_dec: int = 12
+    inference_noise_scale: float = 0.33
+    kernel_size_dec: int = 5
+    dilation_rate: int = 1
+    num_block_layers: int = 4
+    num_speakers: int = 0
+    c_in_channels: int = 0
+    num_splits: int = 4
+    num_squeeze: int = 2
+    sigmoid_scale: bool = False
+    encoder_type: str = "rel_pos_transformer"
+    encoder_params: dict = field(
+        default_factory=lambda: {
+            "kernel_size": 3,
+            "dropout_p": 0.1,
+            "num_layers": 6,
+            "num_heads": 2,
+            "hidden_channels_ffn": 768,
+            "input_length": None,
+        }
+    )
+    d_vector_dim: int = 0
+
+    # training params
+    data_dep_init_steps: int = 10
+
+    # inference params
+    style_wav_for_test: str = None
+    inference_noise_scale: float = 0.0
+    length_scale: float = 1.0
+
+    # multi-speaker settings
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+
+    # optimizer parameters
+    optimizer: str = "RAdam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = "NoamLR"
+    lr_scheduler_params: dict = field(default_factory=lambda: {"warmup_steps": 4000})
+    grad_clip: float = 5.0
+    lr: float = 1e-3
+
+    # overrides
+    min_seq_len: int = 3
+    max_seq_len: int = 500
+    r: int = 1  # DO NOT CHANGE - TODO: make this immutable once coqpit implements it.
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
diff --git a/TTS/tts/configs/shared_configs.py b/TTS/tts/configs/shared_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1ea8be3daaf39ba500cde2a605f86f202534428
--- /dev/null
+++ b/TTS/tts/configs/shared_configs.py
@@ -0,0 +1,331 @@
+from dataclasses import asdict, dataclass, field
+from typing import Dict, List
+
+from coqpit import Coqpit, check_argument
+
+from TTS.config import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
+
+
+@dataclass
+class GSTConfig(Coqpit):
+    """Defines the Global Style Token Module
+
+    Args:
+        gst_style_input_wav (str):
+            Path to the wav file used to define the style of the output speech at inference. Defaults to None.
+
+        gst_style_input_weights (dict):
+            Defines the weights for each style token used at inference. Defaults to None.
+
+        gst_embedding_dim (int):
+            Defines the size of the GST embedding vector dimensions. Defaults to 256.
+
+        gst_num_heads (int):
+            Number of attention heads used by the multi-head attention. Defaults to 4.
+
+        gst_num_style_tokens (int):
+            Number of style token vectors. Defaults to 10.
+    """
+
+    gst_style_input_wav: str = None
+    gst_style_input_weights: dict = None
+    gst_embedding_dim: int = 256
+    gst_use_speaker_embedding: bool = False
+    gst_num_heads: int = 4
+    gst_num_style_tokens: int = 10
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        super().check_values()
+        check_argument("gst_style_input_weights", c, restricted=False)
+        check_argument("gst_style_input_wav", c, restricted=False)
+        check_argument("gst_embedding_dim", c, restricted=True, min_val=0, max_val=1000)
+        check_argument("gst_use_speaker_embedding", c, restricted=False)
+        check_argument("gst_num_heads", c, restricted=True, min_val=2, max_val=10)
+        check_argument("gst_num_style_tokens", c, restricted=True, min_val=1, max_val=1000)
+
+
+@dataclass
+class CapacitronVAEConfig(Coqpit):
+    """Defines the capacitron VAE Module
+    Args:
+        capacitron_capacity (int):
+            Defines the variational capacity limit of the prosody embeddings. Defaults to 150.
+        capacitron_VAE_embedding_dim (int):
+            Defines the size of the Capacitron embedding vector dimension. Defaults to 128.
+        capacitron_use_text_summary_embeddings (bool):
+            If True, use a text summary embedding in Capacitron. Defaults to True.
+        capacitron_text_summary_embedding_dim (int):
+            Defines the size of the capacitron text embedding vector dimension. Defaults to 128.
+        capacitron_use_speaker_embedding (bool):
+            if True use speaker embeddings in Capacitron. Defaults to False.
+        capacitron_VAE_loss_alpha (float):
+            Weight for the VAE loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        capacitron_grad_clip (float):
+            Gradient clipping value for all gradients except beta. Defaults to 5.0
+    """
+
+    capacitron_loss_alpha: int = 1
+    capacitron_capacity: int = 150
+    capacitron_VAE_embedding_dim: int = 128
+    capacitron_use_text_summary_embeddings: bool = True
+    capacitron_text_summary_embedding_dim: int = 128
+    capacitron_use_speaker_embedding: bool = False
+    capacitron_VAE_loss_alpha: float = 0.25
+    capacitron_grad_clip: float = 5.0
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        super().check_values()
+        check_argument("capacitron_capacity", c, restricted=True, min_val=10, max_val=500)
+        check_argument("capacitron_VAE_embedding_dim", c, restricted=True, min_val=16, max_val=1024)
+        check_argument("capacitron_use_speaker_embedding", c, restricted=False)
+        check_argument("capacitron_text_summary_embedding_dim", c, restricted=False, min_val=16, max_val=512)
+        check_argument("capacitron_VAE_loss_alpha", c, restricted=False)
+        check_argument("capacitron_grad_clip", c, restricted=False)
+
+
+@dataclass
+class CharactersConfig(Coqpit):
+    """Defines arguments for the `BaseCharacters` or `BaseVocabulary` and their subclasses.
+
+    Args:
+        characters_class (str):
+            Defines the class of the characters used. If None, we pick ```Phonemes``` or ```Graphemes``` based on
+            the configuration. Defaults to None.
+
+        vocab_dict (dict):
+            Defines the vocabulary dictionary used to encode the characters. Defaults to None.
+
+        pad (str):
+            characters in place of empty padding. Defaults to None.
+
+        eos (str):
+            characters showing the end of a sentence. Defaults to None.
+
+        bos (str):
+            characters showing the beginning of a sentence. Defaults to None.
+
+        blank (str):
+            Optional character used between characters by some models for better prosody. Defaults to `_blank`.
+
+        characters (str):
+            character set used by the model. Characters not in this list are ignored when converting input text to
+            a list of sequence IDs. Defaults to None.
+
+        punctuations (str):
+            characters considered as punctuation as parsing the input sentence. Defaults to None.
+
+        phonemes (str):
+            characters considered as parsing phonemes. This is only for backwards compat. Use `characters` for new
+            models. Defaults to None.
+
+        is_unique (bool):
+            remove any duplicate characters in the character lists. It is a bandaid for compatibility with the old
+            models trained with character lists with duplicates. Defaults to True.
+
+        is_sorted (bool):
+            Sort the characters in alphabetical order. Defaults to True.
+    """
+
+    characters_class: str = None
+
+    # using BaseVocabulary
+    vocab_dict: Dict = None
+
+    # using on BaseCharacters
+    pad: str = None
+    eos: str = None
+    bos: str = None
+    blank: str = None
+    characters: str = None
+    punctuations: str = None
+    phonemes: str = None
+    is_unique: bool = True  # for backwards compatibility of models trained with char sets with duplicates
+    is_sorted: bool = True
+
+
+@dataclass
+class BaseTTSConfig(BaseTrainingConfig):
+    """Shared parameters among all the tts models.
+
+    Args:
+
+        audio (BaseAudioConfig):
+            Audio processor config object instance.
+
+        use_phonemes (bool):
+            enable / disable phoneme use.
+
+        phonemizer (str):
+            Name of the phonemizer to use. If set None, the phonemizer will be selected by `phoneme_language`.
+            Defaults to None.
+
+        phoneme_language (str):
+            Language code for the phonemizer. You can check the list of supported languages by running
+            `python TTS/tts/utils/text/phonemizers/__init__.py`. Defaults to None.
+
+        compute_input_seq_cache (bool):
+            enable / disable precomputation of the phoneme sequences. At the expense of some delay at the beginning of
+            the training, It allows faster data loader time and precise limitation with `max_seq_len` and
+            `min_seq_len`.
+
+        text_cleaner (str):
+            Name of the text cleaner used for cleaning and formatting transcripts.
+
+        enable_eos_bos_chars (bool):
+            enable / disable the use of eos and bos characters.
+
+        test_senteces_file (str):
+            Path to a txt file that has sentences used at test time. The file must have a sentence per line.
+
+        phoneme_cache_path (str):
+            Path to the output folder caching the computed phonemes for each sample.
+
+        characters (CharactersConfig):
+            Instance of a CharactersConfig class.
+
+        batch_group_size (int):
+            Size of the batch groups used for bucketing. By default, the dataloader orders samples by the sequence
+            length for a more efficient and stable training. If `batch_group_size > 1` then it performs bucketing to
+            prevent using the same batches for each epoch.
+
+        loss_masking (bool):
+            enable / disable masking loss values against padded segments of samples in a batch.
+
+        min_text_len (int):
+            Minimum length of input text to be used. All shorter samples will be ignored. Defaults to 0.
+
+        max_text_len (int):
+            Maximum length of input text to be used. All longer samples will be ignored. Defaults to float("inf").
+
+        min_audio_len (int):
+            Minimum length of input audio to be used. All shorter samples will be ignored. Defaults to 0.
+
+        max_audio_len (int):
+            Maximum length of input audio to be used. All longer samples will be ignored. The maximum length in the
+            dataset defines the VRAM used in the training. Hence, pay attention to this value if you encounter an
+            OOM error in training. Defaults to float("inf").
+
+        compute_f0 (int):
+            (Not in use yet).
+
+        compute_linear_spec (bool):
+            If True data loader computes and returns linear spectrograms alongside the other data.
+
+        precompute_num_workers (int):
+            Number of workers to precompute features. Defaults to 0.
+
+        use_noise_augment (bool):
+            Augment the input audio with random noise.
+
+        start_by_longest (bool):
+            If True, the data loader will start loading the longest batch first. It is useful for checking OOM issues.
+            Defaults to False.
+
+        add_blank (bool):
+            Add blank characters between each other two characters. It improves performance for some models at expense
+            of slower run-time due to the longer input sequence.
+
+        datasets (List[BaseDatasetConfig]):
+            List of datasets used for training. If multiple datasets are provided, they are merged and used together
+            for training.
+
+        optimizer (str):
+            Optimizer used for the training. Set one from `torch.optim.Optimizer` or `TTS.utils.training`.
+            Defaults to ``.
+
+        optimizer_params (dict):
+            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
+
+        lr_scheduler (str):
+            Learning rate scheduler for the training. Use one from `torch.optim.Scheduler` schedulers or
+            `TTS.utils.training`. Defaults to ``.
+
+        lr_scheduler_params (dict):
+            Parameters for the generator learning rate scheduler. Defaults to `{"warmup": 4000}`.
+
+        test_sentences (List[str]):
+            List of sentences to be used at testing. Defaults to '[]'
+
+        eval_split_max_size (int):
+            Number maximum of samples to be used for evaluation in proportion split. Defaults to None (Disabled).
+
+        eval_split_size (float):
+            If between 0.0 and 1.0 represents the proportion of the dataset to include in the evaluation set.
+            If > 1, represents the absolute number of evaluation samples. Defaults to 0.01 (1%).
+
+        use_speaker_weighted_sampler (bool):
+            Enable / Disable the batch balancer by speaker. Defaults to ```False```.
+
+        speaker_weighted_sampler_alpha (float):
+            Number that control the influence of the speaker sampler weights. Defaults to ```1.0```.
+
+        use_language_weighted_sampler (bool):
+            Enable / Disable the batch balancer by language. Defaults to ```False```.
+
+        language_weighted_sampler_alpha (float):
+            Number that control the influence of the language sampler weights. Defaults to ```1.0```.
+
+        use_length_weighted_sampler (bool):
+            Enable / Disable the batch balancer by audio length. If enabled the dataset will be divided
+            into 10 buckets considering the min and max audio of the dataset. The sampler weights will be
+            computed forcing to have the same quantity of data for each bucket in each training batch. Defaults to ```False```.
+
+        length_weighted_sampler_alpha (float):
+            Number that control the influence of the length sampler weights. Defaults to ```1.0```.
+    """
+
+    audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # phoneme settings
+    use_phonemes: bool = False
+    phonemizer: str = None
+    phoneme_language: str = None
+    compute_input_seq_cache: bool = False
+    text_cleaner: str = None
+    enable_eos_bos_chars: bool = False
+    test_sentences_file: str = ""
+    phoneme_cache_path: str = None
+    # vocabulary parameters
+    characters: CharactersConfig = None
+    add_blank: bool = False
+    # training params
+    batch_group_size: int = 0
+    loss_masking: bool = None
+    # dataloading
+    min_audio_len: int = 1
+    max_audio_len: int = float("inf")
+    min_text_len: int = 1
+    max_text_len: int = float("inf")
+    compute_f0: bool = False
+    compute_linear_spec: bool = False
+    precompute_num_workers: int = 0
+    use_noise_augment: bool = False
+    start_by_longest: bool = False
+    # dataset
+    datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
+    # optimizer
+    optimizer: str = "radam"
+    optimizer_params: dict = None
+    # scheduler
+    lr_scheduler: str = ""
+    lr_scheduler_params: dict = field(default_factory=lambda: {})
+    # testing
+    test_sentences: List[str] = field(default_factory=lambda: [])
+    # evaluation
+    eval_split_max_size: int = None
+    eval_split_size: float = 0.01
+    # weighted samplers
+    use_speaker_weighted_sampler: bool = False
+    speaker_weighted_sampler_alpha: float = 1.0
+    use_language_weighted_sampler: bool = False
+    language_weighted_sampler_alpha: float = 1.0
+    use_length_weighted_sampler: bool = False
+    length_weighted_sampler_alpha: float = 1.0
diff --git a/TTS/tts/configs/speedy_speech_config.py b/TTS/tts/configs/speedy_speech_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bf5101fcad2479e87836c827658c88addfd7cc6
--- /dev/null
+++ b/TTS/tts/configs/speedy_speech_config.py
@@ -0,0 +1,192 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+from TTS.tts.models.forward_tts import ForwardTTSArgs
+
+
+@dataclass
+class SpeedySpeechConfig(BaseTTSConfig):
+    """Configure `ForwardTTS` as SpeedySpeech model.
+
+    Example:
+
+        >>> from TTS.tts.configs.speedy_speech_config import SpeedySpeechConfig
+        >>> config = SpeedySpeechConfig()
+
+     Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `speedy_speech`.
+
+        base_model (str):
+            Name of the base model being configured as this model so that 🐸 TTS knows it needs to initiate
+            the base model rather than searching for the `model` implementation. Defaults to `forward_tts`.
+
+        model_args (Coqpit):
+            Model class arguments. Check `FastPitchArgs` for more details. Defaults to `FastPitchArgs()`.
+
+        data_dep_init_steps (int):
+            Number of steps used for computing normalization parameters at the beginning of the training. GlowTTS uses
+            Activation Normalization that pre-computes normalization stats at the beginning and use the same values
+            for the rest. Defaults to 10.
+
+        speakers_file (str):
+            Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+            speaker names. Defaults to `None`.
+
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+
+        d_vector_dim (int):
+            Dimension of the external speaker embeddings. Defaults to 0.
+
+        optimizer (str):
+            Name of the model optimizer. Defaults to `RAdam`.
+
+        optimizer_params (dict):
+            Arguments of the model optimizer. Defaults to `{"betas": [0.9, 0.998], "weight_decay": 1e-6}`.
+
+        lr_scheduler (str):
+            Name of the learning rate scheduler. Defaults to `Noam`.
+
+        lr_scheduler_params (dict):
+            Arguments of the learning rate scheduler. Defaults to `{"warmup_steps": 4000}`.
+
+        lr (float):
+            Initial learning rate. Defaults to `1e-3`.
+
+        grad_clip (float):
+            Gradient norm clipping value. Defaults to `5.0`.
+
+        spec_loss_type (str):
+            Type of the spectrogram loss. Check `ForwardTTSLoss` for possible values. Defaults to `l1`.
+
+        duration_loss_type (str):
+            Type of the duration loss. Check `ForwardTTSLoss` for possible values. Defaults to `huber`.
+
+        use_ssim_loss (bool):
+            Enable/disable the use of SSIM (Structural Similarity) loss. Defaults to True.
+
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-7`.
+
+        ssim_loss_alpha (float):
+            Weight for the SSIM loss. If set 0, disables the SSIM loss. Defaults to 1.0.
+
+        dur_loss_alpha (float):
+            Weight for the duration predictor's loss. If set 0, disables the huber loss. Defaults to 1.0.
+
+        spec_loss_alpha (float):
+            Weight for the L1 spectrogram loss. If set 0, disables the L1 loss. Defaults to 1.0.
+
+        binary_loss_alpha (float):
+            Weight for the binary loss. If set 0, disables the binary loss. Defaults to 1.0.
+
+        binary_loss_warmup_epochs (float):
+            Number of epochs to gradually increase the binary loss impact. Defaults to 150.
+
+        min_seq_len (int):
+            Minimum input sequence length to be used at training.
+
+        max_seq_len (int):
+            Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
+    """
+
+    model: str = "speedy_speech"
+    base_model: str = "forward_tts"
+
+    # set model args as SpeedySpeech
+    model_args: ForwardTTSArgs = ForwardTTSArgs(
+        use_pitch=False,
+        encoder_type="residual_conv_bn",
+        encoder_params={
+            "kernel_size": 4,
+            "dilations": 4 * [1, 2, 4] + [1],
+            "num_conv_blocks": 2,
+            "num_res_blocks": 13,
+        },
+        decoder_type="residual_conv_bn",
+        decoder_params={
+            "kernel_size": 4,
+            "dilations": 4 * [1, 2, 4, 8] + [1],
+            "num_conv_blocks": 2,
+            "num_res_blocks": 17,
+        },
+        out_channels=80,
+        hidden_channels=128,
+        positional_encoding=True,
+        detach_duration_predictor=True,
+    )
+
+    # multi-speaker settings
+    num_speakers: int = 0
+    speakers_file: str = None
+    use_speaker_embedding: bool = False
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+    d_vector_dim: int = 0
+
+    # optimizer parameters
+    optimizer: str = "Adam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = "NoamLR"
+    lr_scheduler_params: dict = field(default_factory=lambda: {"warmup_steps": 4000})
+    lr: float = 1e-4
+    grad_clip: float = 5.0
+
+    # loss params
+    spec_loss_type: str = "l1"
+    duration_loss_type: str = "huber"
+    use_ssim_loss: bool = False
+    ssim_loss_alpha: float = 1.0
+    dur_loss_alpha: float = 1.0
+    spec_loss_alpha: float = 1.0
+    aligner_loss_alpha: float = 1.0
+    binary_align_loss_alpha: float = 0.3
+    binary_loss_warmup_epochs: int = 150
+
+    # overrides
+    min_seq_len: int = 13
+    max_seq_len: int = 200
+    r: int = 1  # DO NOT CHANGE
+
+    # dataset configs
+    compute_f0: bool = False
+    f0_cache_path: str = None
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
+
+    def __post_init__(self):
+        # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+        if self.num_speakers > 0:
+            self.model_args.num_speakers = self.num_speakers
+
+        # speaker embedding settings
+        if self.use_speaker_embedding:
+            self.model_args.use_speaker_embedding = True
+        if self.speakers_file:
+            self.model_args.speakers_file = self.speakers_file
+
+        # d-vector settings
+        if self.use_d_vector_file:
+            self.model_args.use_d_vector_file = True
+        if self.d_vector_dim is not None and self.d_vector_dim > 0:
+            self.model_args.d_vector_dim = self.d_vector_dim
+        if self.d_vector_file:
+            self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/tacotron2_config.py b/TTS/tts/configs/tacotron2_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..95b65202218cf3aa0dd70c8d8cd55a3f913ed308
--- /dev/null
+++ b/TTS/tts/configs/tacotron2_config.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+
+from TTS.tts.configs.tacotron_config import TacotronConfig
+
+
+@dataclass
+class Tacotron2Config(TacotronConfig):
+    """Defines parameters for Tacotron2 based models.
+
+    Example:
+
+        >>> from TTS.tts.configs.tacotron2_config import Tacotron2Config
+        >>> config = Tacotron2Config()
+
+    Check `TacotronConfig` for argument descriptions.
+    """
+
+    model: str = "tacotron2"
+    out_channels: int = 80
+    encoder_in_features: int = 512
+    decoder_in_features: int = 512
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..350b5ea99633569d6977851875d5d8d83175ac36
--- /dev/null
+++ b/TTS/tts/configs/tacotron_config.py
@@ -0,0 +1,235 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig, CapacitronVAEConfig, GSTConfig
+
+
+@dataclass
+class TacotronConfig(BaseTTSConfig):
+    """Defines parameters for Tacotron based models.
+
+    Example:
+
+        >>> from TTS.tts.configs.tacotron_config import TacotronConfig
+        >>> config = TacotronConfig()
+
+    Args:
+        model (str):
+            Model name used to select the right model class to initilize. Defaults to `Tacotron`.
+        use_gst (bool):
+            enable / disable the use of Global Style Token modules. Defaults to False.
+        gst (GSTConfig):
+            Instance of `GSTConfig` class.
+        gst_style_input (str):
+            Path to the wav file used at inference to set the speech style through GST. If `GST` is enabled and
+            this is not defined, the model uses a zero vector as an input. Defaults to None.
+        use_capacitron_vae (bool):
+            enable / disable the use of Capacitron modules. Defaults to False.
+        capacitron_vae (CapacitronConfig):
+            Instance of `CapacitronConfig` class.
+        num_chars (int):
+            Number of characters used by the model. It must be defined before initializing the model. Defaults to None.
+        num_speakers (int):
+            Number of speakers for multi-speaker models. Defaults to 1.
+        r (int):
+            Initial number of output frames that the decoder computed per iteration. Larger values makes training and inference
+            faster but reduces the quality of the output frames. This must be equal to the largest `r` value used in
+            `gradual_training` schedule. Defaults to 1.
+        gradual_training (List[List]):
+            Parameters for the gradual training schedule. It is in the form `[[a, b, c], [d ,e ,f] ..]` where `a` is
+            the step number to start using the rest of the values, `b` is the `r` value and `c` is the batch size.
+            If sets None, no gradual training is used. Defaults to None.
+        memory_size (int):
+            Defines the number of previous frames used by the Prenet. If set to < 0, then it uses only the last frame.
+            Defaults to -1.
+        prenet_type (str):
+            `original` or `bn`. `original` sets the default Prenet and `bn` uses Batch Normalization version of the
+            Prenet. Defaults to `original`.
+        prenet_dropout (bool):
+            enables / disables the use of dropout in the Prenet. Defaults to True.
+        prenet_dropout_at_inference (bool):
+            enable / disable the use of dropout in the Prenet at the inference time. Defaults to False.
+        stopnet (bool):
+            enable /disable the Stopnet that predicts the end of the decoder sequence. Defaults to True.
+        stopnet_pos_weight (float):
+            Weight that is applied to over-weight positive instances in the Stopnet loss. Use larger values with
+            datasets with longer sentences. Defaults to 0.2.
+        max_decoder_steps (int):
+            Max number of steps allowed for the decoder. Defaults to 50.
+        encoder_in_features (int):
+            Channels of encoder input and character embedding tensors. Defaults to 256.
+        decoder_in_features (int):
+            Channels of decoder input and encoder output tensors. Defaults to 256.
+        out_channels (int):
+            Channels of the final model output. It must match the spectragram size. Defaults to 80.
+        separate_stopnet (bool):
+            Use a distinct Stopnet which is trained separately from the rest of the model. Defaults to True.
+        attention_type (str):
+            attention type. Check ```TTS.tts.layers.attentions.init_attn```. Defaults to 'original'.
+        attention_heads (int):
+            Number of attention heads for GMM attention. Defaults to 5.
+        windowing (bool):
+            It especially useful at inference to keep attention alignment diagonal. Defaults to False.
+        use_forward_attn (bool):
+            It is only valid if ```attn_type``` is ```original```.  Defaults to False.
+        forward_attn_mask (bool):
+            enable/disable extra masking over forward attention. It is useful at inference to prevent
+            possible attention failures. Defaults to False.
+        transition_agent (bool):
+            enable/disable transition agent in forward attention. Defaults to False.
+        location_attn (bool):
+            enable/disable location sensitive attention as in the original Tacotron2 paper.
+            It is only valid if ```attn_type``` is ```original```. Defaults to True.
+        bidirectional_decoder (bool):
+            enable/disable bidirectional decoding. Defaults to False.
+        double_decoder_consistency (bool):
+            enable/disable double decoder consistency. Defaults to False.
+        ddc_r (int):
+            reduction rate used by the coarse decoder when `double_decoder_consistency` is in use. Set this
+            as a multiple of the `r` value. Defaults to 6.
+        speakers_file (str):
+            Path to the speaker mapping file for the Speaker Manager. Defaults to None.
+        use_speaker_embedding (bool):
+            enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
+            in the multi-speaker mode. Defaults to False.
+        use_d_vector_file (bool):
+            enable /disable using external speaker embeddings in place of the learned embeddings. Defaults to False.
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+        optimizer (str):
+            Optimizer used for the training. Set one from `torch.optim.Optimizer` or `TTS.utils.training`.
+            Defaults to `RAdam`.
+        optimizer_params (dict):
+            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
+        lr_scheduler (str):
+            Learning rate scheduler for the training. Use one from `torch.optim.Scheduler` schedulers or
+            `TTS.utils.training`. Defaults to `NoamLR`.
+        lr_scheduler_params (dict):
+            Parameters for the generator learning rate scheduler. Defaults to `{"warmup": 4000}`.
+        lr (float):
+            Initial learning rate. Defaults to `1e-4`.
+        wd (float):
+            Weight decay coefficient. Defaults to `1e-6`.
+        grad_clip (float):
+            Gradient clipping threshold. Defaults to `5`.
+        seq_len_norm (bool):
+            enable / disable the sequnce length normalization in the loss functions. If set True, loss of a sample
+            is divided by the sequence length. Defaults to False.
+        loss_masking (bool):
+            enable / disable masking the paddings of the samples in loss computation. Defaults to True.
+        decoder_loss_alpha (float):
+            Weight for the decoder loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        postnet_loss_alpha (float):
+            Weight for the postnet loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        postnet_diff_spec_alpha (float):
+            Weight for the postnet differential loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        decoder_diff_spec_alpha (float):
+
+            Weight for the decoder differential loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        decoder_ssim_alpha (float):
+            Weight for the decoder SSIM loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        postnet_ssim_alpha (float):
+            Weight for the postnet SSIM loss of the Tacotron model. If set less than or equal to zero, it disables the
+            corresponding loss function. Defaults to 0.25
+        ga_alpha (float):
+            Weight for the guided attention loss. If set less than or equal to zero, it disables the corresponding loss
+            function. Defaults to 5.
+    """
+
+    model: str = "tacotron"
+    # model_params: TacotronArgs = field(default_factory=lambda: TacotronArgs())
+    use_gst: bool = False
+    gst: GSTConfig = None
+    gst_style_input: str = None
+
+    use_capacitron_vae: bool = False
+    capacitron_vae: CapacitronVAEConfig = None
+
+    # model specific params
+    num_speakers: int = 1
+    num_chars: int = 0
+    r: int = 2
+    gradual_training: List[List[int]] = None
+    memory_size: int = -1
+    prenet_type: str = "original"
+    prenet_dropout: bool = True
+    prenet_dropout_at_inference: bool = False
+    stopnet: bool = True
+    separate_stopnet: bool = True
+    stopnet_pos_weight: float = 0.2
+    max_decoder_steps: int = 10000
+    encoder_in_features: int = 256
+    decoder_in_features: int = 256
+    decoder_output_dim: int = 80
+    out_channels: int = 513
+
+    # attention layers
+    attention_type: str = "original"
+    attention_heads: int = None
+    attention_norm: str = "sigmoid"
+    attention_win: bool = False
+    windowing: bool = False
+    use_forward_attn: bool = False
+    forward_attn_mask: bool = False
+    transition_agent: bool = False
+    location_attn: bool = True
+
+    # advance methods
+    bidirectional_decoder: bool = False
+    double_decoder_consistency: bool = False
+    ddc_r: int = 6
+
+    # multi-speaker settings
+    speakers_file: str = None
+    use_speaker_embedding: bool = False
+    speaker_embedding_dim: int = 512
+    use_d_vector_file: bool = False
+    d_vector_file: str = False
+    d_vector_dim: int = None
+
+    # optimizer parameters
+    optimizer: str = "RAdam"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
+    lr_scheduler: str = "NoamLR"
+    lr_scheduler_params: dict = field(default_factory=lambda: {"warmup_steps": 4000})
+    lr: float = 1e-4
+    grad_clip: float = 5.0
+    seq_len_norm: bool = False
+    loss_masking: bool = True
+
+    # loss params
+    decoder_loss_alpha: float = 0.25
+    postnet_loss_alpha: float = 0.25
+    postnet_diff_spec_alpha: float = 0.25
+    decoder_diff_spec_alpha: float = 0.25
+    decoder_ssim_alpha: float = 0.25
+    postnet_ssim_alpha: float = 0.25
+    ga_alpha: float = 5.0
+
+    # testing
+    test_sentences: List[str] = field(
+        default_factory=lambda: [
+            "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            "Be a voice, not an echo.",
+            "I'm sorry Dave. I'm afraid I can't do that.",
+            "This cake is great. It's so delicious and moist.",
+            "Prior to November 22, 1963.",
+        ]
+    )
+
+    def check_values(self):
+        if self.gradual_training:
+            assert (
+                self.gradual_training[0][1] == self.r
+            ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+        if self.model == "tacotron" and self.audio is not None:
+            assert self.out_channels == (
+                self.audio.fft_size // 2 + 1
+            ), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}"
+        if self.model == "tacotron2" and self.audio is not None:
+            assert self.out_channels == self.audio.num_mels
diff --git a/TTS/tts/configs/vits_config.py b/TTS/tts/configs/vits_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..3469f701aebb2b0f68d3b5fbf73c4a49ab9c4480
--- /dev/null
+++ b/TTS/tts/configs/vits_config.py
@@ -0,0 +1,176 @@
+from dataclasses import dataclass, field
+from typing import List
+
+from TTS.tts.configs.shared_configs import BaseTTSConfig
+from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
+
+
+@dataclass
+class VitsConfig(BaseTTSConfig):
+    """Defines parameters for VITS End2End TTS model.
+
+    Args:
+        model (str):
+            Model name. Do not change unless you know what you are doing.
+
+        model_args (VitsArgs):
+            Model architecture arguments. Defaults to `VitsArgs()`.
+
+        audio (VitsAudioConfig):
+            Audio processing configuration. Defaults to `VitsAudioConfig()`.
+
+        grad_clip (List):
+            Gradient clipping thresholds for each optimizer. Defaults to `[1000.0, 1000.0]`.
+
+        lr_gen (float):
+            Initial learning rate for the generator. Defaults to 0.0002.
+
+        lr_disc (float):
+            Initial learning rate for the discriminator. Defaults to 0.0002.
+
+        lr_scheduler_gen (str):
+            Name of the learning rate scheduler for the generator. One of the `torch.optim.lr_scheduler.*`. Defaults to
+            `ExponentialLR`.
+
+        lr_scheduler_gen_params (dict):
+            Parameters for the learning rate scheduler of the generator. Defaults to `{'gamma': 0.999875, "last_epoch":-1}`.
+
+        lr_scheduler_disc (str):
+            Name of the learning rate scheduler for the discriminator. One of the `torch.optim.lr_scheduler.*`. Defaults to
+            `ExponentialLR`.
+
+        lr_scheduler_disc_params (dict):
+            Parameters for the learning rate scheduler of the discriminator. Defaults to `{'gamma': 0.999875, "last_epoch":-1}`.
+
+        scheduler_after_epoch (bool):
+            If true, step the schedulers after each epoch else after each step. Defaults to `False`.
+
+        optimizer (str):
+            Name of the optimizer to use with both the generator and the discriminator networks. One of the
+            `torch.optim.*`. Defaults to `AdamW`.
+
+        kl_loss_alpha (float):
+            Loss weight for KL loss. Defaults to 1.0.
+
+        disc_loss_alpha (float):
+            Loss weight for the discriminator loss. Defaults to 1.0.
+
+        gen_loss_alpha (float):
+            Loss weight for the generator loss. Defaults to 1.0.
+
+        feat_loss_alpha (float):
+            Loss weight for the feature matching loss. Defaults to 1.0.
+
+        mel_loss_alpha (float):
+            Loss weight for the mel loss. Defaults to 45.0.
+
+        return_wav (bool):
+            If true, data loader returns the waveform as well as the other outputs. Do not change. Defaults to `True`.
+
+        compute_linear_spec (bool):
+            If true, the linear spectrogram is computed and returned alongside the mel output. Do not change. Defaults to `True`.
+
+        use_weighted_sampler (bool):
+            If true, use weighted sampler with bucketing for balancing samples between datasets used in training. Defaults to `False`.
+
+        weighted_sampler_attrs (dict):
+            Key retuned by the formatter to be used for weighted sampler. For example `{"root_path": 2.0, "speaker_name": 1.0}` sets sample probabilities
+            by overweighting `root_path` by 2.0. Defaults to `{}`.
+
+        weighted_sampler_multipliers (dict):
+            Weight each unique value of a key returned by the formatter for weighted sampling.
+            For example `{"root_path":{"/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-100/":1.0, "/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-360/": 0.5}`.
+            It will sample instances from `train-clean-100` 2 times more than `train-clean-360`. Defaults to `{}`.
+
+        r (int):
+            Number of spectrogram frames to be generated at a time. Do not change. Defaults to `1`.
+
+        add_blank (bool):
+            If true, a blank token is added in between every character. Defaults to `True`.
+
+        test_sentences (List[List]):
+            List of sentences with speaker and language information to be used for testing.
+
+        language_ids_file (str):
+            Path to the language ids file.
+
+        use_language_embedding (bool):
+            If true, language embedding is used. Defaults to `False`.
+
+    Note:
+        Check :class:`TTS.tts.configs.shared_configs.BaseTTSConfig` for the inherited parameters.
+
+    Example:
+
+        >>> from TTS.tts.configs.vits_config import VitsConfig
+        >>> config = VitsConfig()
+    """
+
+    model: str = "vits"
+    # model specific params
+    model_args: VitsArgs = field(default_factory=VitsArgs)
+    audio: VitsAudioConfig = VitsAudioConfig()
+
+    # optimizer
+    grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
+    lr_gen: float = 0.0002
+    lr_disc: float = 0.0002
+    lr_scheduler_gen: str = "ExponentialLR"
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01})
+
+    # loss params
+    kl_loss_alpha: float = 1.0
+    disc_loss_alpha: float = 1.0
+    gen_loss_alpha: float = 1.0
+    feat_loss_alpha: float = 1.0
+    mel_loss_alpha: float = 45.0
+    dur_loss_alpha: float = 1.0
+    speaker_encoder_loss_alpha: float = 1.0
+
+    # data loader params
+    return_wav: bool = True
+    compute_linear_spec: bool = True
+
+    # sampler params
+    use_weighted_sampler: bool = False  # TODO: move it to the base config
+    weighted_sampler_attrs: dict = field(default_factory=lambda: {})
+    weighted_sampler_multipliers: dict = field(default_factory=lambda: {})
+
+    # overrides
+    r: int = 1  # DO NOT CHANGE
+    add_blank: bool = True
+
+    # testing
+    test_sentences: List[List] = field(
+        default_factory=lambda: [
+            ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
+            ["Be a voice, not an echo."],
+            ["I'm sorry Dave. I'm afraid I can't do that."],
+            ["This cake is great. It's so delicious and moist."],
+            ["Prior to November 22, 1963."],
+        ]
+    )
+
+    # multi-speaker settings
+    # use speaker embedding layer
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    speaker_embedding_channels: int = 256
+    language_ids_file: str = None
+    use_language_embedding: bool = False
+
+    # use d-vectors
+    use_d_vector_file: bool = False
+    d_vector_file: str = None
+    d_vector_dim: int = None
+
+    def __post_init__(self):
+        for key, val in self.model_args.items():
+            if hasattr(self, key):
+                self[key] = val
diff --git a/TTS/tts/datasets/.ipynb_checkpoints/formatters-checkpoint.py b/TTS/tts/datasets/.ipynb_checkpoints/formatters-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..31b62f3312ead7e9bb867434bfaa5c8a1254fda4
--- /dev/null
+++ b/TTS/tts/datasets/.ipynb_checkpoints/formatters-checkpoint.py
@@ -0,0 +1,633 @@
+import os
+import re
+import xml.etree.ElementTree as ET
+from glob import glob
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+from tqdm import tqdm
+
+########################
+# DATASETS
+########################
+
+
+def coqui(root_path, meta_file, ignored_speakers=None):
+    """Interal dataset formatter."""
+    filepath = os.path.join(root_path, meta_file)
+    # ensure there are 4 columns for every line
+    with open(filepath, "r", encoding="utf8") as f:
+        lines = f.readlines()
+    num_cols = len(lines[0].split("|"))  # take the first row as reference
+    for idx, line in enumerate(lines[1:]):
+        if len(line.split("|")) != num_cols:
+            print(f" > Missing column in line {idx + 1} -> {line.strip()}")
+    # load metadata
+    metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
+    assert all(x in metadata.columns for x in ["audio_file", "text"])
+    speaker_name = None if "speaker_name" in metadata.columns else "coqui"
+    emotion_name = None if "emotion_name" in metadata.columns else "neutral"
+    items = []
+    not_found_counter = 0
+    for row in metadata.itertuples():
+        if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers:
+            continue
+        audio_path = os.path.join(root_path, row.audio_file)
+        if not os.path.exists(audio_path):
+            not_found_counter += 1
+            continue
+        items.append(
+            {
+                "text": row.text,
+                "audio_file": audio_path,
+                "speaker_name": speaker_name if speaker_name is not None else row.speaker_name,
+                "emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
+                "root_path": root_path,
+            }
+        )
+    if not_found_counter > 0:
+        print(f" | > [!] {not_found_counter} files not found")
+    return items
+
+
+def tweb(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalize TWEB dataset.
+    https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
+    """
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "tweb"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("\t")
+            wav_file = os.path.join(root_path, cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mozilla(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes Mozilla meta data files to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "mozilla"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = cols[1].strip()
+            text = cols[0].strip()
+            wav_file = os.path.join(root_path, "wavs", wav_file)
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mozilla_de(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes Mozilla meta data files to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "mozilla"
+    with open(txt_file, "r", encoding="ISO 8859-1") as ttf:
+        for line in ttf:
+            cols = line.strip().split("|")
+            wav_file = cols[0].strip()
+            text = cols[1].strip()
+            folder_name = f"BATCH_{wav_file.split('_')[0]}_FINAL"
+            wav_file = os.path.join(root_path, folder_name, wav_file)
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mailabs(root_path, meta_files=None, ignored_speakers=None):
+    """Normalizes M-AI-Labs meta data files to TTS format
+
+    Args:
+        root_path (str): root folder of the MAILAB language folder.
+        meta_files (str):  list of meta files to be used in the training. If None, finds all the csv files
+            recursively. Defaults to None
+    """
+    speaker_regex = re.compile(f"by_book{os.sep}(male|female){os.sep}(?P<speaker_name>[^{os.sep}]+){os.sep}")
+    if not meta_files:
+        csv_files = glob(root_path + f"{os.sep}**{os.sep}metadata.csv", recursive=True)
+    else:
+        csv_files = meta_files
+
+    # meta_files = [f.strip() for f in meta_files.split(",")]
+    items = []
+    for csv_file in csv_files:
+        if os.path.isfile(csv_file):
+            txt_file = csv_file
+        else:
+            txt_file = os.path.join(root_path, csv_file)
+
+        folder = os.path.dirname(txt_file)
+        # determine speaker based on folder structure...
+        speaker_name_match = speaker_regex.search(txt_file)
+        if speaker_name_match is None:
+            continue
+        speaker_name = speaker_name_match.group("speaker_name")
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_name in ignored_speakers:
+                continue
+        print(" | > {}".format(csv_file))
+        with open(txt_file, "r", encoding="utf-8") as ttf:
+            for line in ttf:
+                cols = line.split("|")
+                if not meta_files:
+                    wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
+                else:
+                    wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
+                if os.path.isfile(wav_file):
+                    text = cols[1].strip()
+                    items.append(
+                        {"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}
+                    )
+                else:
+                    # M-AI-Labs have some missing samples, so just print the warning
+                    print("> File %s does not exist!" % (wav_file))
+    return items
+
+
+def ljspeech(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "ljspeech"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+def viettts(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "viettts"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split(" - ")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+def infore(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "infore"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path,"wavs", cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+
+
+def ljspeech_test(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file for TTS testing
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        speaker_id = 0
+        for idx, line in enumerate(ttf):
+            # 2 samples per speaker to avoid eval split issues
+            if idx % 2 == 0:
+                speaker_id += 1
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2]
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": f"ljspeech-{speaker_id}", "root_path": root_path}
+            )
+    return items
+
+
+def thorsten(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the thorsten meta data file to TTS format
+    https://github.com/thorstenMueller/deep-learning-german-tts/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "thorsten"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def sam_accenture(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the sam-accenture meta data file to TTS format
+    https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files"""
+    xml_file = os.path.join(root_path, "voice_over_recordings", meta_file)
+    xml_root = ET.parse(xml_file).getroot()
+    items = []
+    speaker_name = "sam_accenture"
+    for item in xml_root.findall("./fileid"):
+        text = item.text
+        wav_file = os.path.join(root_path, "vo_voice_quality_transformation", item.get("id") + ".wav")
+        if not os.path.exists(wav_file):
+            print(f" [!] {wav_file} in metafile does not exist. Skipping...")
+            continue
+        items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def ruslan(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the RUSLAN meta data file to TTS format
+    https://ruslan-corpus.github.io/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "ruslan"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "RUSLAN", cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def css10(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the CSS10 dataset file to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "css10"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
+
+
+def nancy(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the Nancy meta data file to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "nancy"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            utt_id = line.split()[1]
+            text = line[line.find('"') + 1 : line.rfind('"') - 1]
+            wav_file = os.path.join(root_path, "wavn", utt_id + ".wav")
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
+
+
+def common_voice(root_path, meta_file, ignored_speakers=None):
+    """Normalize the common voice meta data file to TTS format."""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("client_id"):
+                continue
+            cols = line.split("\t")
+            text = cols[2]
+            speaker_name = cols[0]
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_name in ignored_speakers:
+                    continue
+            wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav"))
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "MCV_" + speaker_name, "root_path": root_path}
+            )
+    return items
+
+
+def libri_tts(root_path, meta_files=None, ignored_speakers=None):
+    """https://ai.google/tools/datasets/libri-tts/"""
+    items = []
+    if not meta_files:
+        meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
+    else:
+        if isinstance(meta_files, str):
+            meta_files = [os.path.join(root_path, meta_files)]
+
+    for meta_file in meta_files:
+        _meta_file = os.path.basename(meta_file).split(".")[0]
+        with open(meta_file, "r", encoding="utf-8") as ttf:
+            for line in ttf:
+                cols = line.split("\t")
+                file_name = cols[0]
+                speaker_name, chapter_id, *_ = cols[0].split("_")
+                _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}")
+                wav_file = os.path.join(_root_path, file_name + ".wav")
+                text = cols[2]
+                # ignore speakers
+                if isinstance(ignored_speakers, list):
+                    if speaker_name in ignored_speakers:
+                        continue
+                items.append(
+                    {
+                        "text": text,
+                        "audio_file": wav_file,
+                        "speaker_name": f"LTTS_{speaker_name}",
+                        "root_path": root_path,
+                    }
+                )
+    for item in items:
+        assert os.path.exists(item["audio_file"]), f" [!] wav files don't exist - {item['audio_file']}"
+    return items
+
+
+def custom_turkish(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "turkish-female"
+    skipped_files = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0].strip() + ".wav")
+            if not os.path.exists(wav_file):
+                skipped_files.append(wav_file)
+                continue
+            text = cols[1].strip()
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    print(f" [!] {len(skipped_files)} files skipped. They don't exist...")
+    return items
+
+
+# ToDo: add the dataset link when the dataset is released publicly
+def brspeech(root_path, meta_file, ignored_speakers=None):
+    """BRSpeech 3.0 beta"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("wav_filename"):
+                continue
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[2]
+            speaker_id = cols[3]
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_id in ignored_speakers:
+                    continue
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_id, "root_path": root_path})
+    return items
+
+
+def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic1", ignored_speakers=None):
+    """VCTK dataset v0.92.
+
+    URL:
+        https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip
+
+    This dataset has 2 recordings per speaker that are annotated with ```mic1``` and ```mic2```.
+    It is believed that (😄 ) ```mic1``` files are the same as the previous version of the dataset.
+
+    mic1:
+        Audio recorded using an omni-directional microphone (DPA 4035).
+        Contains very low frequency noises.
+        This is the same audio released in previous versions of VCTK:
+        https://doi.org/10.7488/ds/1994
+
+    mic2:
+        Audio recorded using a small diaphragm condenser microphone with
+        very wide bandwidth (Sennheiser MKH 800).
+        Two speakers, p280 and p315 had technical issues of the audio
+        recordings using MKH 800.
+    """
+    file_ext = "flac"
+    items = []
+    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readlines()[0]
+        # p280 has no mic2 recordings
+        if speaker_id == "p280":
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_mic1.{file_ext}")
+        else:
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}")
+        if os.path.exists(wav_file):
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id, "root_path": root_path}
+            )
+        else:
+            print(f" [!] wav files don't exist - {wav_file}")
+    return items
+
+
+def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
+    """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
+    items = []
+    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readlines()[0]
+        wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav")
+        items.append(
+            {"text": text, "audio_file": wav_file, "speaker_name": "VCTK_old_" + speaker_id, "root_path": root_path}
+        )
+    return items
+
+
+def synpaflex(root_path, metafiles=None, **kwargs):  # pylint: disable=unused-argument
+    items = []
+    speaker_name = "synpaflex"
+    root_path = os.path.join(root_path, "")
+    wav_files = glob(f"{root_path}**/*.wav", recursive=True)
+    for wav_file in wav_files:
+        if os.sep + "wav" + os.sep in wav_file:
+            txt_file = wav_file.replace("wav", "txt")
+        else:
+            txt_file = os.path.join(
+                os.path.dirname(wav_file), "txt", os.path.basename(wav_file).replace(".wav", ".txt")
+            )
+        if os.path.exists(txt_file) and os.path.exists(wav_file):
+            with open(txt_file, "r", encoding="utf-8") as file_text:
+                text = file_text.readlines()[0]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def open_bible(root_path, meta_files="train", ignore_digits_sentences=True, ignored_speakers=None):
+    """ToDo: Refer the paper when available"""
+    items = []
+    split_dir = meta_files
+    meta_files = glob(f"{os.path.join(root_path, split_dir)}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readline().replace("\n", "")
+        # ignore sentences that contains digits
+        if ignore_digits_sentences and any(map(str.isdigit, text)):
+            continue
+        wav_file = os.path.join(root_path, split_dir, speaker_id, file_id + ".flac")
+        items.append({"text": text, "audio_file": wav_file, "speaker_name": "OB_" + speaker_id, "root_path": root_path})
+    return items
+
+
+def mls(root_path, meta_files=None, ignored_speakers=None):
+    """http://www.openslr.org/94/"""
+    items = []
+    with open(os.path.join(root_path, meta_files), "r", encoding="utf-8") as meta:
+        for line in meta:
+            file, text = line.split("\t")
+            text = text[:-1]
+            speaker, book, *_ = file.split("_")
+            wav_file = os.path.join(root_path, os.path.dirname(meta_files), "audio", speaker, book, file + ".wav")
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker in ignored_speakers:
+                    continue
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "MLS_" + speaker, "root_path": root_path}
+            )
+    return items
+
+
+# ======================================== VOX CELEB ===========================================
+def voxceleb2(root_path, meta_file=None, **kwargs):  # pylint: disable=unused-argument
+    """
+    :param meta_file   Used only for consistency with load_tts_samples api
+    """
+    return _voxcel_x(root_path, meta_file, voxcel_idx="2")
+
+
+def voxceleb1(root_path, meta_file=None, **kwargs):  # pylint: disable=unused-argument
+    """
+    :param meta_file   Used only for consistency with load_tts_samples api
+    """
+    return _voxcel_x(root_path, meta_file, voxcel_idx="1")
+
+
+def _voxcel_x(root_path, meta_file, voxcel_idx):
+    assert voxcel_idx in ["1", "2"]
+    expected_count = 148_000 if voxcel_idx == "1" else 1_000_000
+    voxceleb_path = Path(root_path)
+    cache_to = voxceleb_path / f"metafile_voxceleb{voxcel_idx}.csv"
+    cache_to.parent.mkdir(exist_ok=True)
+
+    # if not exists meta file, crawl recursively for 'wav' files
+    if meta_file is not None:
+        with open(str(meta_file), "r", encoding="utf-8") as f:
+            return [x.strip().split("|") for x in f.readlines()]
+
+    elif not cache_to.exists():
+        cnt = 0
+        meta_data = []
+        wav_files = voxceleb_path.rglob("**/*.wav")
+        for path in tqdm(
+            wav_files,
+            desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.",
+            total=expected_count,
+        ):
+            speaker_id = str(Path(path).parent.parent.stem)
+            assert speaker_id.startswith("id")
+            text = None  # VoxCel does not provide transciptions, and they are not needed for training the SE
+            meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n")
+            cnt += 1
+        with open(str(cache_to), "w", encoding="utf-8") as f:
+            f.write("".join(meta_data))
+        if cnt < expected_count:
+            raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}")
+
+    with open(str(cache_to), "r", encoding="utf-8") as f:
+        return [x.strip().split("|") for x in f.readlines()]
+
+
+def emotion(root_path, meta_file, ignored_speakers=None):
+    """Generic emotion dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("file_path"):
+                continue
+            cols = line.split(",")
+            wav_file = os.path.join(root_path, cols[0])
+            speaker_id = cols[1]
+            emotion_id = cols[2].replace("\n", "")
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_id in ignored_speakers:
+                    continue
+            items.append(
+                {"audio_file": wav_file, "speaker_name": speaker_id, "emotion_name": emotion_id, "root_path": root_path}
+            )
+    return items
+
+
+def baker(root_path: str, meta_file: str, **kwargs) -> List[List[str]]:  # pylint: disable=unused-argument
+    """Normalizes the Baker meta data file to TTS format
+
+    Args:
+        root_path (str): path to the baker dataset
+        meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
+    Returns:
+        List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
+    """
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "baker"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            wav_name, text = line.rstrip("\n").split("|")
+            wav_path = os.path.join(root_path, "clips_22", wav_name)
+            items.append({"text": text, "audio_file": wav_path, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def kokoro(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Japanese single-speaker dataset from https://github.com/kaiidams/Kokoro-Speech-Dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "kokoro"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2].replace(" ", "")
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def kss(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Korean single-speaker dataset from https://www.kaggle.com/datasets/bryanpark/korean-single-speaker-speech-dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "kss"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[2]  # cols[1] => 6월, cols[2] => 유월
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dce9fcdee1543a9013d5385eb9d0f51600f7efa
--- /dev/null
+++ b/TTS/tts/datasets/__init__.py
@@ -0,0 +1,180 @@
+import os
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Callable, Dict, List, Tuple, Union
+
+import numpy as np
+
+from TTS.tts.datasets.dataset import *
+from TTS.tts.datasets.formatters import *
+
+
+def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
+    """Split a dataset into train and eval. Consider speaker distribution in multi-speaker training.
+
+    Args:
+        items (List[List]):
+            A list of samples. Each sample is a list of `[audio_path, text, speaker_id]`.
+
+        eval_split_max_size (int):
+            Number maximum of samples to be used for evaluation in proportion split. Defaults to None (Disabled).
+
+        eval_split_size (float):
+            If between 0.0 and 1.0 represents the proportion of the dataset to include in the evaluation set.
+            If > 1, represents the absolute number of evaluation samples. Defaults to 0.01 (1%).
+    """
+    speakers = [item["speaker_name"] for item in items]
+    is_multi_speaker = len(set(speakers)) > 1
+    if eval_split_size > 1:
+        eval_split_size = int(eval_split_size)
+    else:
+        if eval_split_max_size:
+            eval_split_size = min(eval_split_max_size, int(len(items) * eval_split_size))
+        else:
+            eval_split_size = int(len(items) * eval_split_size)
+
+    assert (
+        eval_split_size > 0
+    ), " [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {}".format(
+        1 / len(items)
+    )
+    np.random.seed(0)
+    np.random.shuffle(items)
+    if is_multi_speaker:
+        items_eval = []
+        speakers = [item["speaker_name"] for item in items]
+        speaker_counter = Counter(speakers)
+        while len(items_eval) < eval_split_size:
+            item_idx = np.random.randint(0, len(items))
+            speaker_to_be_removed = items[item_idx]["speaker_name"]
+            if speaker_counter[speaker_to_be_removed] > 1:
+                items_eval.append(items[item_idx])
+                speaker_counter[speaker_to_be_removed] -= 1
+                del items[item_idx]
+        return items_eval, items
+    return items[:eval_split_size], items[eval_split_size:]
+
+
+def add_extra_keys(metadata, language, dataset_name):
+    for item in metadata:
+        # add language name
+        item["language"] = language
+        # add unique audio name
+        relfilepath = os.path.splitext(os.path.relpath(item["audio_file"], item["root_path"]))[0]
+        audio_unique_name = f"{dataset_name}#{relfilepath}"
+        item["audio_unique_name"] = audio_unique_name
+    return metadata
+
+
+def load_tts_samples(
+    datasets: Union[List[Dict], Dict],
+    eval_split=True,
+    formatter: Callable = None,
+    eval_split_max_size=None,
+    eval_split_size=0.01,
+) -> Tuple[List[List], List[List]]:
+    """Parse the dataset from the datasets config, load the samples as a List and load the attention alignments if provided.
+    If `formatter` is not None, apply the formatter to the samples else pick the formatter from the available ones based
+    on the dataset name.
+
+    Args:
+        datasets (List[Dict], Dict): A list of datasets or a single dataset dictionary. If multiple datasets are
+            in the list, they are all merged.
+
+        eval_split (bool, optional): If true, create a evaluation split. If an eval split provided explicitly, generate
+            an eval split automatically. Defaults to True.
+
+        formatter (Callable, optional): The preprocessing function to be applied to create the list of samples. It
+            must take the root_path and the meta_file name and return a list of samples in the format of
+            `[[text, audio_path, speaker_id], ...]]`. See the available formatters in `TTS.tts.dataset.formatter` as
+            example. Defaults to None.
+
+        eval_split_max_size (int):
+            Number maximum of samples to be used for evaluation in proportion split. Defaults to None (Disabled).
+
+        eval_split_size (float):
+            If between 0.0 and 1.0 represents the proportion of the dataset to include in the evaluation set.
+            If > 1, represents the absolute number of evaluation samples. Defaults to 0.01 (1%).
+
+    Returns:
+        Tuple[List[List], List[List]: training and evaluation splits of the dataset.
+    """
+    meta_data_train_all = []
+    meta_data_eval_all = [] if eval_split else None
+    if not isinstance(datasets, list):
+        datasets = [datasets]
+    for dataset in datasets:
+        formatter_name = dataset["formatter"]
+        dataset_name = dataset["dataset_name"]
+        root_path = dataset["path"]
+        meta_file_train = dataset["meta_file_train"]
+        meta_file_val = dataset["meta_file_val"]
+        ignored_speakers = dataset["ignored_speakers"]
+        language = dataset["language"]
+
+        # setup the right data processor
+        if formatter is None:
+            formatter = _get_formatter_by_name(formatter_name)
+        # load train set
+        meta_data_train = formatter(root_path, meta_file_train, ignored_speakers=ignored_speakers)
+        assert len(meta_data_train) > 0, f" [!] No training samples found in {root_path}/{meta_file_train}"
+
+        meta_data_train = add_extra_keys(meta_data_train, language, dataset_name)
+
+        print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}")
+        # load evaluation split if set
+        if eval_split:
+            if meta_file_val:
+                meta_data_eval = formatter(root_path, meta_file_val, ignored_speakers=ignored_speakers)
+                meta_data_eval = add_extra_keys(meta_data_eval, language, dataset_name)
+            else:
+                meta_data_eval, meta_data_train = split_dataset(meta_data_train, eval_split_max_size, eval_split_size)
+            meta_data_eval_all += meta_data_eval
+        meta_data_train_all += meta_data_train
+        # load attention masks for the duration predictor training
+        if dataset.meta_file_attn_mask:
+            meta_data = dict(load_attention_mask_meta_data(dataset["meta_file_attn_mask"]))
+            for idx, ins in enumerate(meta_data_train_all):
+                attn_file = meta_data[ins["audio_file"]].strip()
+                meta_data_train_all[idx].update({"alignment_file": attn_file})
+            if meta_data_eval_all:
+                for idx, ins in enumerate(meta_data_eval_all):
+                    attn_file = meta_data[ins["audio_file"]].strip()
+                    meta_data_eval_all[idx].update({"alignment_file": attn_file})
+        # set none for the next iter
+        formatter = None
+    return meta_data_train_all, meta_data_eval_all
+
+
+def load_attention_mask_meta_data(metafile_path):
+    """Load meta data file created by compute_attention_masks.py"""
+    with open(metafile_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    meta_data = []
+    for line in lines:
+        wav_file, attn_file = line.split("|")
+        meta_data.append([wav_file, attn_file])
+    return meta_data
+
+
+def _get_formatter_by_name(name):
+    """Returns the respective preprocessing function."""
+    thismodule = sys.modules[__name__]
+    return getattr(thismodule, name.lower())
+
+
+def find_unique_chars(data_samples, verbose=True):
+    texts = "".join(item[0] for item in data_samples)
+    chars = set(texts)
+    lower_chars = filter(lambda c: c.islower(), chars)
+    chars_force_lower = [c.lower() for c in chars]
+    chars_force_lower = set(chars_force_lower)
+
+    if verbose:
+        print(f" > Number of unique characters: {len(chars)}")
+        print(f" > Unique characters: {''.join(sorted(chars))}")
+        print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
+        print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
+    return chars_force_lower
diff --git a/TTS/tts/datasets/__pycache__/__init__.cpython-37.pyc b/TTS/tts/datasets/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..309c3d0d9ce6a0106e254b6b8196ae83e0841994
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/__init__.cpython-38.pyc b/TTS/tts/datasets/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5503bd73ec2b6c626b5a9cfe26aaa86a4e6205f6
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/__init__.cpython-39.pyc b/TTS/tts/datasets/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f89b833dcf665667328f35c3f5d2a1e692334e94
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/dataset.cpython-37.pyc b/TTS/tts/datasets/__pycache__/dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44012e94710740e04f483290a51364648cea43fd
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/dataset.cpython-37.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/dataset.cpython-38.pyc b/TTS/tts/datasets/__pycache__/dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a84f9a9f6934c1ba93594a1586e428e5de287f9f
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/dataset.cpython-38.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/dataset.cpython-39.pyc b/TTS/tts/datasets/__pycache__/dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..905acacf705bd0441720fdfe6eee6fd93e6eeeab
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/dataset.cpython-39.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/formatters.cpython-37.pyc b/TTS/tts/datasets/__pycache__/formatters.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..998c3d796df630cbd9d3326ff9a3a172325e2de8
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/formatters.cpython-37.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/formatters.cpython-38.pyc b/TTS/tts/datasets/__pycache__/formatters.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2de25df5433dcf7f810c39a9aeeb53b22a761b47
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/formatters.cpython-38.pyc differ
diff --git a/TTS/tts/datasets/__pycache__/formatters.cpython-39.pyc b/TTS/tts/datasets/__pycache__/formatters.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9da35e428fde982998599b81103c77b6184ff91
Binary files /dev/null and b/TTS/tts/datasets/__pycache__/formatters.cpython-39.pyc differ
diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdc67669097390e7ebcdbffbbb3515ab19e2322a
--- /dev/null
+++ b/TTS/tts/datasets/dataset.py
@@ -0,0 +1,778 @@
+import base64
+import collections
+import os
+import random
+from typing import Dict, List, Union
+
+import numpy as np
+import torch
+import tqdm
+from torch.utils.data import Dataset
+
+from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
+from TTS.utils.audio import AudioProcessor
+
+# to prevent too many open files error as suggested here
+# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
+torch.multiprocessing.set_sharing_strategy("file_system")
+
+
+def _parse_sample(item):
+    language_name = None
+    attn_file = None
+    if len(item) == 5:
+        text, wav_file, speaker_name, language_name, attn_file = item
+    elif len(item) == 4:
+        text, wav_file, speaker_name, language_name = item
+    elif len(item) == 3:
+        text, wav_file, speaker_name = item
+    else:
+        raise ValueError(" [!] Dataset cannot parse the sample.")
+    return text, wav_file, speaker_name, language_name, attn_file
+
+
+def noise_augment_audio(wav):
+    return wav + (1.0 / 32768.0) * np.random.rand(*wav.shape)
+
+
+def string2filename(string):
+    # generate a safe and reversible filename based on a string
+    filename = base64.urlsafe_b64encode(string.encode("utf-8")).decode("utf-8", "ignore")
+    return filename
+
+
+class TTSDataset(Dataset):
+    def __init__(
+        self,
+        outputs_per_step: int = 1,
+        compute_linear_spec: bool = False,
+        ap: AudioProcessor = None,
+        samples: List[Dict] = None,
+        tokenizer: "TTSTokenizer" = None,
+        compute_f0: bool = False,
+        f0_cache_path: str = None,
+        return_wav: bool = False,
+        batch_group_size: int = 0,
+        min_text_len: int = 0,
+        max_text_len: int = float("inf"),
+        min_audio_len: int = 0,
+        max_audio_len: int = float("inf"),
+        phoneme_cache_path: str = None,
+        precompute_num_workers: int = 0,
+        speaker_id_mapping: Dict = None,
+        d_vector_mapping: Dict = None,
+        language_id_mapping: Dict = None,
+        use_noise_augment: bool = False,
+        start_by_longest: bool = False,
+        verbose: bool = False,
+    ):
+        """Generic 📂 data loader for `tts` models. It is configurable for different outputs and needs.
+
+        If you need something different, you can subclass and override.
+
+        Args:
+            outputs_per_step (int): Number of time frames predicted per step.
+
+            compute_linear_spec (bool): compute linear spectrogram if True.
+
+            ap (TTS.tts.utils.AudioProcessor): Audio processor object.
+
+            samples (list): List of dataset samples.
+
+            tokenizer (TTSTokenizer): tokenizer to convert text to sequence IDs. If None init internally else
+                use the given. Defaults to None.
+
+            compute_f0 (bool): compute f0 if True. Defaults to False.
+
+            f0_cache_path (str): Path to store f0 cache. Defaults to None.
+
+            return_wav (bool): Return the waveform of the sample. Defaults to False.
+
+            batch_group_size (int): Range of batch randomization after sorting
+                sequences by length. It shuffles each batch with bucketing to gather similar lenght sequences in a
+                batch. Set 0 to disable. Defaults to 0.
+
+            min_text_len (int): Minimum length of input text to be used. All shorter samples will be ignored.
+                Defaults to 0.
+
+            max_text_len (int): Maximum length of input text to be used. All longer samples will be ignored.
+                Defaults to float("inf").
+
+            min_audio_len (int): Minimum length of input audio to be used. All shorter samples will be ignored.
+                Defaults to 0.
+
+            max_audio_len (int): Maximum length of input audio to be used. All longer samples will be ignored.
+                The maximum length in the dataset defines the VRAM used in the training. Hence, pay attention to
+                this value if you encounter an OOM error in training. Defaults to float("inf").
+
+            phoneme_cache_path (str): Path to cache computed phonemes. It writes phonemes of each sample to a
+                separate file. Defaults to None.
+
+            precompute_num_workers (int): Number of workers to precompute features. Defaults to 0.
+
+            speaker_id_mapping (dict): Mapping of speaker names to IDs used to compute embedding vectors by the
+                embedding layer. Defaults to None.
+
+            d_vector_mapping (dict): Mapping of wav files to computed d-vectors. Defaults to None.
+
+            use_noise_augment (bool): Enable adding random noise to wav for augmentation. Defaults to False.
+
+            start_by_longest (bool): Start by longest sequence. It is especially useful to check OOM. Defaults to False.
+
+            verbose (bool): Print diagnostic information. Defaults to false.
+        """
+        super().__init__()
+        self.batch_group_size = batch_group_size
+        self._samples = samples
+        self.outputs_per_step = outputs_per_step
+        self.compute_linear_spec = compute_linear_spec
+        self.return_wav = return_wav
+        self.compute_f0 = compute_f0
+        self.f0_cache_path = f0_cache_path
+        self.min_audio_len = min_audio_len
+        self.max_audio_len = max_audio_len
+        self.min_text_len = min_text_len
+        self.max_text_len = max_text_len
+        self.ap = ap
+        self.phoneme_cache_path = phoneme_cache_path
+        self.speaker_id_mapping = speaker_id_mapping
+        self.d_vector_mapping = d_vector_mapping
+        self.language_id_mapping = language_id_mapping
+        self.use_noise_augment = use_noise_augment
+        self.start_by_longest = start_by_longest
+
+        self.verbose = verbose
+        self.rescue_item_idx = 1
+        self.pitch_computed = False
+        self.tokenizer = tokenizer
+
+        if self.tokenizer.use_phonemes:
+            self.phoneme_dataset = PhonemeDataset(
+                self.samples, self.tokenizer, phoneme_cache_path, precompute_num_workers=precompute_num_workers
+            )
+
+        if compute_f0:
+            self.f0_dataset = F0Dataset(
+                self.samples, self.ap, cache_path=f0_cache_path, precompute_num_workers=precompute_num_workers
+            )
+
+        if self.verbose:
+            self.print_logs()
+
+    @property
+    def lengths(self):
+        lens = []
+        for item in self.samples:
+            _, wav_file, *_ = _parse_sample(item)
+            audio_len = os.path.getsize(wav_file) / 16 * 8  # assuming 16bit audio
+            lens.append(audio_len)
+        return lens
+
+    @property
+    def samples(self):
+        return self._samples
+
+    @samples.setter
+    def samples(self, new_samples):
+        self._samples = new_samples
+        if hasattr(self, "f0_dataset"):
+            self.f0_dataset.samples = new_samples
+        if hasattr(self, "phoneme_dataset"):
+            self.phoneme_dataset.samples = new_samples
+
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, idx):
+        return self.load_data(idx)
+
+    def print_logs(self, level: int = 0) -> None:
+        indent = "\t" * level
+        print("\n")
+        print(f"{indent}> DataLoader initialization")
+        print(f"{indent}| > Tokenizer:")
+        self.tokenizer.print_logs(level + 1)
+        print(f"{indent}| > Number of instances : {len(self.samples)}")
+
+    def load_wav(self, filename):
+        waveform = self.ap.load_wav(filename)
+        assert waveform.size > 0
+        return waveform
+
+    def get_phonemes(self, idx, text):
+        out_dict = self.phoneme_dataset[idx]
+        assert text == out_dict["text"], f"{text} != {out_dict['text']}"
+        assert len(out_dict["token_ids"]) > 0
+        return out_dict
+
+    def get_f0(self, idx):
+        out_dict = self.f0_dataset[idx]
+        item = self.samples[idx]
+        assert item["audio_unique_name"] == out_dict["audio_unique_name"]
+        return out_dict
+
+    @staticmethod
+    def get_attn_mask(attn_file):
+        return np.load(attn_file)
+
+    def get_token_ids(self, idx, text):
+        if self.tokenizer.use_phonemes:
+            token_ids = self.get_phonemes(idx, text)["token_ids"]
+        else:
+            token_ids = self.tokenizer.text_to_ids(text)
+        return np.array(token_ids, dtype=np.int32)
+
+    def load_data(self, idx):
+        item = self.samples[idx]
+
+        raw_text = item["text"]
+
+        wav = np.asarray(self.load_wav(item["audio_file"]), dtype=np.float32)
+
+        # apply noise for augmentation
+        if self.use_noise_augment:
+            wav = noise_augment_audio(wav)
+
+        # get token ids
+        token_ids = self.get_token_ids(idx, item["text"])
+
+        # get pre-computed attention maps
+        attn = None
+        if "alignment_file" in item:
+            attn = self.get_attn_mask(item["alignment_file"])
+
+        # after phonemization the text length may change
+        # this is a shareful 🤭 hack to prevent longer phonemes
+        # TODO: find a better fix
+        if len(token_ids) > self.max_text_len or len(wav) < self.min_audio_len:
+            self.rescue_item_idx += 1
+            return self.load_data(self.rescue_item_idx)
+
+        # get f0 values
+        f0 = None
+        if self.compute_f0:
+            f0 = self.get_f0(idx)["f0"]
+
+        sample = {
+            "raw_text": raw_text,
+            "token_ids": token_ids,
+            "wav": wav,
+            "pitch": f0,
+            "attn": attn,
+            "item_idx": item["audio_file"],
+            "speaker_name": item["speaker_name"],
+            "language_name": item["language"],
+            "wav_file_name": os.path.basename(item["audio_file"]),
+            "audio_unique_name": item["audio_unique_name"],
+        }
+        return sample
+
+    @staticmethod
+    def _compute_lengths(samples):
+        new_samples = []
+        for item in samples:
+            audio_length = os.path.getsize(item["audio_file"]) / 16 * 8  # assuming 16bit audio
+            text_lenght = len(item["text"])
+            item["audio_length"] = audio_length
+            item["text_length"] = text_lenght
+            new_samples += [item]
+        return new_samples
+
+    @staticmethod
+    def filter_by_length(lengths: List[int], min_len: int, max_len: int):
+        idxs = np.argsort(lengths)  # ascending order
+        ignore_idx = []
+        keep_idx = []
+        for idx in idxs:
+            length = lengths[idx]
+            if length < min_len or length > max_len:
+                ignore_idx.append(idx)
+            else:
+                keep_idx.append(idx)
+        return ignore_idx, keep_idx
+
+    @staticmethod
+    def sort_by_length(samples: List[List]):
+        audio_lengths = [s["audio_length"] for s in samples]
+        idxs = np.argsort(audio_lengths)  # ascending order
+        return idxs
+
+    @staticmethod
+    def create_buckets(samples, batch_group_size: int):
+        assert batch_group_size > 0
+        for i in range(len(samples) // batch_group_size):
+            offset = i * batch_group_size
+            end_offset = offset + batch_group_size
+            temp_items = samples[offset:end_offset]
+            random.shuffle(temp_items)
+            samples[offset:end_offset] = temp_items
+        return samples
+
+    @staticmethod
+    def _select_samples_by_idx(idxs, samples):
+        samples_new = []
+        for idx in idxs:
+            samples_new.append(samples[idx])
+        return samples_new
+
+    def preprocess_samples(self):
+        r"""Sort `items` based on text length or audio length in ascending order. Filter out samples out or the length
+        range.
+        """
+        samples = self._compute_lengths(self.samples)
+
+        # sort items based on the sequence length in ascending order
+        text_lengths = [i["text_length"] for i in samples]
+        audio_lengths = [i["audio_length"] for i in samples]
+        text_ignore_idx, text_keep_idx = self.filter_by_length(text_lengths, self.min_text_len, self.max_text_len)
+        audio_ignore_idx, audio_keep_idx = self.filter_by_length(audio_lengths, self.min_audio_len, self.max_audio_len)
+        keep_idx = list(set(audio_keep_idx) & set(text_keep_idx))
+        ignore_idx = list(set(audio_ignore_idx) | set(text_ignore_idx))
+
+        samples = self._select_samples_by_idx(keep_idx, samples)
+
+        sorted_idxs = self.sort_by_length(samples)
+
+        if self.start_by_longest:
+            longest_idxs = sorted_idxs[-1]
+            sorted_idxs[-1] = sorted_idxs[0]
+            sorted_idxs[0] = longest_idxs
+
+        samples = self._select_samples_by_idx(sorted_idxs, samples)
+
+        if len(samples) == 0:
+            raise RuntimeError(" [!] No samples left")
+
+        # shuffle batch groups
+        # create batches with similar length items
+        # the larger the `batch_group_size`, the higher the length variety in a batch.
+        if self.batch_group_size > 0:
+            samples = self.create_buckets(samples, self.batch_group_size)
+
+        # update items to the new sorted items
+        audio_lengths = [s["audio_length"] for s in samples]
+        text_lengths = [s["text_length"] for s in samples]
+        self.samples = samples
+
+        if self.verbose:
+            print(" | > Preprocessing samples")
+            print(" | > Max text length: {}".format(np.max(text_lengths)))
+            print(" | > Min text length: {}".format(np.min(text_lengths)))
+            print(" | > Avg text length: {}".format(np.mean(text_lengths)))
+            print(" | ")
+            print(" | > Max audio length: {}".format(np.max(audio_lengths)))
+            print(" | > Min audio length: {}".format(np.min(audio_lengths)))
+            print(" | > Avg audio length: {}".format(np.mean(audio_lengths)))
+            print(f" | > Num. instances discarded samples: {len(ignore_idx)}")
+            print(" | > Batch group size: {}.".format(self.batch_group_size))
+
+    @staticmethod
+    def _sort_batch(batch, text_lengths):
+        """Sort the batch by the input text length for RNN efficiency.
+
+        Args:
+            batch (Dict): Batch returned by `__getitem__`.
+            text_lengths (List[int]): Lengths of the input character sequences.
+        """
+        text_lengths, ids_sorted_decreasing = torch.sort(torch.LongTensor(text_lengths), dim=0, descending=True)
+        batch = [batch[idx] for idx in ids_sorted_decreasing]
+        return batch, text_lengths, ids_sorted_decreasing
+
+    def collate_fn(self, batch):
+        r"""
+        Perform preprocessing and create a final data batch:
+        1. Sort batch instances by text-length
+        2. Convert Audio signal to features.
+        3. PAD sequences wrt r.
+        4. Load to Torch.
+        """
+
+        # Puts each data field into a tensor with outer dimension batch size
+        if isinstance(batch[0], collections.abc.Mapping):
+
+            token_ids_lengths = np.array([len(d["token_ids"]) for d in batch])
+
+            # sort items with text input length for RNN efficiency
+            batch, token_ids_lengths, ids_sorted_decreasing = self._sort_batch(batch, token_ids_lengths)
+
+            # convert list of dicts to dict of lists
+            batch = {k: [dic[k] for dic in batch] for k in batch[0]}
+
+            # get language ids from language names
+            if self.language_id_mapping is not None:
+                language_ids = [self.language_id_mapping[ln] for ln in batch["language_name"]]
+            else:
+                language_ids = None
+            # get pre-computed d-vectors
+            if self.d_vector_mapping is not None:
+                embedding_keys = list(batch["audio_unique_name"])
+                d_vectors = [self.d_vector_mapping[w]["embedding"] for w in embedding_keys]
+            else:
+                d_vectors = None
+
+            # get numerical speaker ids from speaker names
+            if self.speaker_id_mapping:
+                speaker_ids = [self.speaker_id_mapping[sn] for sn in batch["speaker_name"]]
+            else:
+                speaker_ids = None
+            # compute features
+            mel = [self.ap.melspectrogram(w).astype("float32") for w in batch["wav"]]
+
+            mel_lengths = [m.shape[1] for m in mel]
+
+            # lengths adjusted by the reduction factor
+            mel_lengths_adjusted = [
+                m.shape[1] + (self.outputs_per_step - (m.shape[1] % self.outputs_per_step))
+                if m.shape[1] % self.outputs_per_step
+                else m.shape[1]
+                for m in mel
+            ]
+
+            # compute 'stop token' targets
+            stop_targets = [np.array([0.0] * (mel_len - 1) + [1.0]) for mel_len in mel_lengths]
+
+            # PAD stop targets
+            stop_targets = prepare_stop_target(stop_targets, self.outputs_per_step)
+
+            # PAD sequences with longest instance in the batch
+            token_ids = prepare_data(batch["token_ids"]).astype(np.int32)
+
+            # PAD features with longest instance
+            mel = prepare_tensor(mel, self.outputs_per_step)
+
+            # B x D x T --> B x T x D
+            mel = mel.transpose(0, 2, 1)
+
+            # convert things to pytorch
+            token_ids_lengths = torch.LongTensor(token_ids_lengths)
+            token_ids = torch.LongTensor(token_ids)
+            mel = torch.FloatTensor(mel).contiguous()
+            mel_lengths = torch.LongTensor(mel_lengths)
+            stop_targets = torch.FloatTensor(stop_targets)
+
+            # speaker vectors
+            if d_vectors is not None:
+                d_vectors = torch.FloatTensor(d_vectors)
+
+            if speaker_ids is not None:
+                speaker_ids = torch.LongTensor(speaker_ids)
+
+            if language_ids is not None:
+                language_ids = torch.LongTensor(language_ids)
+
+            # compute linear spectrogram
+            linear = None
+            if self.compute_linear_spec:
+                linear = [self.ap.spectrogram(w).astype("float32") for w in batch["wav"]]
+                linear = prepare_tensor(linear, self.outputs_per_step)
+                linear = linear.transpose(0, 2, 1)
+                assert mel.shape[1] == linear.shape[1]
+                linear = torch.FloatTensor(linear).contiguous()
+
+            # format waveforms
+            wav_padded = None
+            if self.return_wav:
+                wav_lengths = [w.shape[0] for w in batch["wav"]]
+                max_wav_len = max(mel_lengths_adjusted) * self.ap.hop_length
+                wav_lengths = torch.LongTensor(wav_lengths)
+                wav_padded = torch.zeros(len(batch["wav"]), 1, max_wav_len)
+                for i, w in enumerate(batch["wav"]):
+                    mel_length = mel_lengths_adjusted[i]
+                    w = np.pad(w, (0, self.ap.hop_length * self.outputs_per_step), mode="edge")
+                    w = w[: mel_length * self.ap.hop_length]
+                    wav_padded[i, :, : w.shape[0]] = torch.from_numpy(w)
+                wav_padded.transpose_(1, 2)
+
+            # format F0
+            if self.compute_f0:
+                pitch = prepare_data(batch["pitch"])
+                assert mel.shape[1] == pitch.shape[1], f"[!] {mel.shape} vs {pitch.shape}"
+                pitch = torch.FloatTensor(pitch)[:, None, :].contiguous()  # B x 1 xT
+            else:
+                pitch = None
+
+            # format attention masks
+            attns = None
+            if batch["attn"][0] is not None:
+                attns = [batch["attn"][idx].T for idx in ids_sorted_decreasing]
+                for idx, attn in enumerate(attns):
+                    pad2 = mel.shape[1] - attn.shape[1]
+                    pad1 = token_ids.shape[1] - attn.shape[0]
+                    assert pad1 >= 0 and pad2 >= 0, f"[!] Negative padding - {pad1} and {pad2}"
+                    attn = np.pad(attn, [[0, pad1], [0, pad2]])
+                    attns[idx] = attn
+                attns = prepare_tensor(attns, self.outputs_per_step)
+                attns = torch.FloatTensor(attns).unsqueeze(1)
+
+            return {
+                "token_id": token_ids,
+                "token_id_lengths": token_ids_lengths,
+                "speaker_names": batch["speaker_name"],
+                "linear": linear,
+                "mel": mel,
+                "mel_lengths": mel_lengths,
+                "stop_targets": stop_targets,
+                "item_idxs": batch["item_idx"],
+                "d_vectors": d_vectors,
+                "speaker_ids": speaker_ids,
+                "attns": attns,
+                "waveform": wav_padded,
+                "raw_text": batch["raw_text"],
+                "pitch": pitch,
+                "language_ids": language_ids,
+            }
+
+        raise TypeError(
+            (
+                "batch must contain tensors, numbers, dicts or lists;\
+                         found {}".format(
+                    type(batch[0])
+                )
+            )
+        )
+
+
+class PhonemeDataset(Dataset):
+    """Phoneme Dataset for converting input text to phonemes and then token IDs
+
+    At initialization, it pre-computes the phonemes under `cache_path` and loads them in training to reduce data
+    loading latency. If `cache_path` is already present, it skips the pre-computation.
+
+    Args:
+        samples (Union[List[List], List[Dict]]):
+            List of samples. Each sample is a list or a dict.
+
+        tokenizer (TTSTokenizer):
+            Tokenizer to convert input text to phonemes.
+
+        cache_path (str):
+            Path to cache phonemes. If `cache_path` is already present or None, it skips the pre-computation.
+
+        precompute_num_workers (int):
+            Number of workers used for pre-computing the phonemes. Defaults to 0.
+    """
+
+    def __init__(
+        self,
+        samples: Union[List[Dict], List[List]],
+        tokenizer: "TTSTokenizer",
+        cache_path: str,
+        precompute_num_workers=0,
+    ):
+        self.samples = samples
+        self.tokenizer = tokenizer
+        self.cache_path = cache_path
+        if cache_path is not None and not os.path.exists(cache_path):
+            os.makedirs(cache_path)
+            self.precompute(precompute_num_workers)
+
+    def __getitem__(self, index):
+        item = self.samples[index]
+        ids = self.compute_or_load(string2filename(item["audio_unique_name"]), item["text"])
+        ph_hat = self.tokenizer.ids_to_text(ids)
+        return {"text": item["text"], "ph_hat": ph_hat, "token_ids": ids, "token_ids_len": len(ids)}
+
+    def __len__(self):
+        return len(self.samples)
+
+    def compute_or_load(self, file_name, text):
+        """Compute phonemes for the given text.
+
+        If the phonemes are already cached, load them from cache.
+        """
+        file_ext = "_phoneme.npy"
+        cache_path = os.path.join(self.cache_path, file_name + file_ext)
+        try:
+            ids = np.load(cache_path)
+        except FileNotFoundError:
+            ids = self.tokenizer.text_to_ids(text)
+            np.save(cache_path, ids)
+        return ids
+
+    def get_pad_id(self):
+        """Get pad token ID for sequence padding"""
+        return self.tokenizer.pad_id
+
+    def precompute(self, num_workers=1):
+        """Precompute phonemes for all samples.
+
+        We use pytorch dataloader because we are lazy.
+        """
+        print("[*] Pre-computing phonemes...")
+        with tqdm.tqdm(total=len(self)) as pbar:
+            batch_size = num_workers if num_workers > 0 else 1
+            dataloder = torch.utils.data.DataLoader(
+                batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
+            )
+            for _ in dataloder:
+                pbar.update(batch_size)
+
+    def collate_fn(self, batch):
+        ids = [item["token_ids"] for item in batch]
+        ids_lens = [item["token_ids_len"] for item in batch]
+        texts = [item["text"] for item in batch]
+        texts_hat = [item["ph_hat"] for item in batch]
+        ids_lens_max = max(ids_lens)
+        ids_torch = torch.LongTensor(len(ids), ids_lens_max).fill_(self.get_pad_id())
+        for i, ids_len in enumerate(ids_lens):
+            ids_torch[i, :ids_len] = torch.LongTensor(ids[i])
+        return {"text": texts, "ph_hat": texts_hat, "token_ids": ids_torch}
+
+    def print_logs(self, level: int = 0) -> None:
+        indent = "\t" * level
+        print("\n")
+        print(f"{indent}> PhonemeDataset ")
+        print(f"{indent}| > Tokenizer:")
+        self.tokenizer.print_logs(level + 1)
+        print(f"{indent}| > Number of instances : {len(self.samples)}")
+
+
+class F0Dataset:
+    """F0 Dataset for computing F0 from wav files in CPU
+
+    Pre-compute F0 values for all the samples at initialization if `cache_path` is not None or already present. It
+    also computes the mean and std of F0 values if `normalize_f0` is True.
+
+    Args:
+        samples (Union[List[List], List[Dict]]):
+            List of samples. Each sample is a list or a dict.
+
+        ap (AudioProcessor):
+            AudioProcessor to compute F0 from wav files.
+
+        cache_path (str):
+            Path to cache F0 values. If `cache_path` is already present or None, it skips the pre-computation.
+            Defaults to None.
+
+        precompute_num_workers (int):
+            Number of workers used for pre-computing the F0 values. Defaults to 0.
+
+        normalize_f0 (bool):
+            Whether to normalize F0 values by mean and std. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        samples: Union[List[List], List[Dict]],
+        ap: "AudioProcessor",
+        verbose=False,
+        cache_path: str = None,
+        precompute_num_workers=0,
+        normalize_f0=True,
+    ):
+        self.samples = samples
+        self.ap = ap
+        self.verbose = verbose
+        self.cache_path = cache_path
+        self.normalize_f0 = normalize_f0
+        self.pad_id = 0.0
+        self.mean = None
+        self.std = None
+        if cache_path is not None and not os.path.exists(cache_path):
+            os.makedirs(cache_path)
+            self.precompute(precompute_num_workers)
+        if normalize_f0:
+            self.load_stats(cache_path)
+
+    def __getitem__(self, idx):
+        item = self.samples[idx]
+        f0 = self.compute_or_load(item["audio_file"], string2filename(item["audio_unique_name"]))
+        if self.normalize_f0:
+            assert self.mean is not None and self.std is not None, " [!] Mean and STD is not available"
+            f0 = self.normalize(f0)
+        return {"audio_unique_name": item["audio_unique_name"], "f0": f0}
+
+    def __len__(self):
+        return len(self.samples)
+
+    def precompute(self, num_workers=0):
+        print("[*] Pre-computing F0s...")
+        with tqdm.tqdm(total=len(self)) as pbar:
+            batch_size = num_workers if num_workers > 0 else 1
+            # we do not normalize at preproessing
+            normalize_f0 = self.normalize_f0
+            self.normalize_f0 = False
+            dataloder = torch.utils.data.DataLoader(
+                batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
+            )
+            computed_data = []
+            for batch in dataloder:
+                f0 = batch["f0"]
+                computed_data.append(f for f in f0)
+                pbar.update(batch_size)
+            self.normalize_f0 = normalize_f0
+
+        if self.normalize_f0:
+            computed_data = [tensor for batch in computed_data for tensor in batch]  # flatten
+            pitch_mean, pitch_std = self.compute_pitch_stats(computed_data)
+            pitch_stats = {"mean": pitch_mean, "std": pitch_std}
+            np.save(os.path.join(self.cache_path, "pitch_stats"), pitch_stats, allow_pickle=True)
+
+    def get_pad_id(self):
+        return self.pad_id
+
+    @staticmethod
+    def create_pitch_file_path(file_name, cache_path):
+        pitch_file = os.path.join(cache_path, file_name + "_pitch.npy")
+        return pitch_file
+
+    @staticmethod
+    def _compute_and_save_pitch(ap, wav_file, pitch_file=None):
+        wav = ap.load_wav(wav_file)
+        pitch = ap.compute_f0(wav)
+        if pitch_file:
+            np.save(pitch_file, pitch)
+        return pitch
+
+    @staticmethod
+    def compute_pitch_stats(pitch_vecs):
+        nonzeros = np.concatenate([v[np.where(v != 0.0)[0]] for v in pitch_vecs])
+        mean, std = np.mean(nonzeros), np.std(nonzeros)
+        return mean, std
+
+    def load_stats(self, cache_path):
+        stats_path = os.path.join(cache_path, "pitch_stats.npy")
+        stats = np.load(stats_path, allow_pickle=True).item()
+        self.mean = stats["mean"].astype(np.float32)
+        self.std = stats["std"].astype(np.float32)
+
+    def normalize(self, pitch):
+        zero_idxs = np.where(pitch == 0.0)[0]
+        pitch = pitch - self.mean
+        pitch = pitch / self.std
+        pitch[zero_idxs] = 0.0
+        return pitch
+
+    def denormalize(self, pitch):
+        zero_idxs = np.where(pitch == 0.0)[0]
+        pitch *= self.std
+        pitch += self.mean
+        pitch[zero_idxs] = 0.0
+        return pitch
+
+    def compute_or_load(self, wav_file, audio_unique_name):
+        """
+        compute pitch and return a numpy array of pitch values
+        """
+        pitch_file = self.create_pitch_file_path(audio_unique_name, self.cache_path)
+        if not os.path.exists(pitch_file):
+            pitch = self._compute_and_save_pitch(self.ap, wav_file, pitch_file)
+        else:
+            pitch = np.load(pitch_file)
+        return pitch.astype(np.float32)
+
+    def collate_fn(self, batch):
+        audio_unique_name = [item["audio_unique_name"] for item in batch]
+        f0s = [item["f0"] for item in batch]
+        f0_lens = [len(item["f0"]) for item in batch]
+        f0_lens_max = max(f0_lens)
+        f0s_torch = torch.LongTensor(len(f0s), f0_lens_max).fill_(self.get_pad_id())
+        for i, f0_len in enumerate(f0_lens):
+            f0s_torch[i, :f0_len] = torch.LongTensor(f0s[i])
+        return {"audio_unique_name": audio_unique_name, "f0": f0s_torch, "f0_lens": f0_lens}
+
+    def print_logs(self, level: int = 0) -> None:
+        indent = "\t" * level
+        print("\n")
+        print(f"{indent}> F0Dataset ")
+        print(f"{indent}| > Number of instances : {len(self.samples)}")
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
new file mode 100644
index 0000000000000000000000000000000000000000..391904b38a3a77a1105ba625750d37a5ae4ab6ba
--- /dev/null
+++ b/TTS/tts/datasets/formatters.py
@@ -0,0 +1,647 @@
+import os
+import re
+import xml.etree.ElementTree as ET
+from glob import glob
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+from tqdm import tqdm
+
+########################
+# DATASETS
+########################
+
+
+def coqui(root_path, meta_file, ignored_speakers=None):
+    """Interal dataset formatter."""
+    filepath = os.path.join(root_path, meta_file)
+    # ensure there are 4 columns for every line
+    with open(filepath, "r", encoding="utf8") as f:
+        lines = f.readlines()
+    num_cols = len(lines[0].split("|"))  # take the first row as reference
+    for idx, line in enumerate(lines[1:]):
+        if len(line.split("|")) != num_cols:
+            print(f" > Missing column in line {idx + 1} -> {line.strip()}")
+    # load metadata
+    metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
+    assert all(x in metadata.columns for x in ["audio_file", "text"])
+    speaker_name = None if "speaker_name" in metadata.columns else "coqui"
+    emotion_name = None if "emotion_name" in metadata.columns else "neutral"
+    items = []
+    not_found_counter = 0
+    for row in metadata.itertuples():
+        if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers:
+            continue
+        audio_path = os.path.join(root_path, row.audio_file)
+        if not os.path.exists(audio_path):
+            not_found_counter += 1
+            continue
+        items.append(
+            {
+                "text": row.text,
+                "audio_file": audio_path,
+                "speaker_name": speaker_name if speaker_name is not None else row.speaker_name,
+                "emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
+                "root_path": root_path,
+            }
+        )
+    if not_found_counter > 0:
+        print(f" | > [!] {not_found_counter} files not found")
+    return items
+
+
+def tweb(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalize TWEB dataset.
+    https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
+    """
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "tweb"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("\t")
+            wav_file = os.path.join(root_path, cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mozilla(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes Mozilla meta data files to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "mozilla"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = cols[1].strip()
+            text = cols[0].strip()
+            wav_file = os.path.join(root_path, "wavs", wav_file)
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mozilla_de(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes Mozilla meta data files to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "mozilla"
+    with open(txt_file, "r", encoding="ISO 8859-1") as ttf:
+        for line in ttf:
+            cols = line.strip().split("|")
+            wav_file = cols[0].strip()
+            text = cols[1].strip()
+            folder_name = f"BATCH_{wav_file.split('_')[0]}_FINAL"
+            wav_file = os.path.join(root_path, folder_name, wav_file)
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def mailabs(root_path, meta_files=None, ignored_speakers=None):
+    """Normalizes M-AI-Labs meta data files to TTS format
+
+    Args:
+        root_path (str): root folder of the MAILAB language folder.
+        meta_files (str):  list of meta files to be used in the training. If None, finds all the csv files
+            recursively. Defaults to None
+    """
+    speaker_regex = re.compile(f"by_book{os.sep}(male|female){os.sep}(?P<speaker_name>[^{os.sep}]+){os.sep}")
+    if not meta_files:
+        csv_files = glob(root_path + f"{os.sep}**{os.sep}metadata.csv", recursive=True)
+    else:
+        csv_files = meta_files
+
+    # meta_files = [f.strip() for f in meta_files.split(",")]
+    items = []
+    for csv_file in csv_files:
+        if os.path.isfile(csv_file):
+            txt_file = csv_file
+        else:
+            txt_file = os.path.join(root_path, csv_file)
+
+        folder = os.path.dirname(txt_file)
+        # determine speaker based on folder structure...
+        speaker_name_match = speaker_regex.search(txt_file)
+        if speaker_name_match is None:
+            continue
+        speaker_name = speaker_name_match.group("speaker_name")
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_name in ignored_speakers:
+                continue
+        print(" | > {}".format(csv_file))
+        with open(txt_file, "r", encoding="utf-8") as ttf:
+            for line in ttf:
+                cols = line.split("|")
+                if not meta_files:
+                    wav_file = os.path.join(folder, "wavs", cols[0] + ".wav")
+                else:
+                    wav_file = os.path.join(root_path, folder.replace("metadata.csv", ""), "wavs", cols[0] + ".wav")
+                if os.path.isfile(wav_file):
+                    text = cols[1].strip()
+                    items.append(
+                        {"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}
+                    )
+                else:
+                    # M-AI-Labs have some missing samples, so just print the warning
+                    print("> File %s does not exist!" % (wav_file))
+    return items
+
+
+def ljspeech(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "ljspeech"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+def viettts(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "viettts"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split(" - ")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+def infore(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "infore"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path,"wavs", cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+def infore22(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file to TTS format
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "infore"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            filename = cols[0].split("/")
+            wav_file = os.path.join(root_path,"wav22", filename[2])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+
+def ljspeech_test(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the LJSpeech meta data file for TTS testing
+    https://keithito.com/LJ-Speech-Dataset/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        speaker_id = 0
+        for idx, line in enumerate(ttf):
+            # 2 samples per speaker to avoid eval split issues
+            if idx % 2 == 0:
+                speaker_id += 1
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2]
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": f"ljspeech-{speaker_id}", "root_path": root_path}
+            )
+    return items
+
+
+def thorsten(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the thorsten meta data file to TTS format
+    https://github.com/thorstenMueller/deep-learning-german-tts/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "thorsten"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def sam_accenture(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the sam-accenture meta data file to TTS format
+    https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files"""
+    xml_file = os.path.join(root_path, "voice_over_recordings", meta_file)
+    xml_root = ET.parse(xml_file).getroot()
+    items = []
+    speaker_name = "sam_accenture"
+    for item in xml_root.findall("./fileid"):
+        text = item.text
+        wav_file = os.path.join(root_path, "vo_voice_quality_transformation", item.get("id") + ".wav")
+        if not os.path.exists(wav_file):
+            print(f" [!] {wav_file} in metafile does not exist. Skipping...")
+            continue
+        items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def ruslan(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the RUSLAN meta data file to TTS format
+    https://ruslan-corpus.github.io/"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "ruslan"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "RUSLAN", cols[0] + ".wav")
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def css10(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the CSS10 dataset file to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "css10"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[1]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
+
+
+def nancy(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Normalizes the Nancy meta data file to TTS format"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "nancy"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            utt_id = line.split()[1]
+            text = line[line.find('"') + 1 : line.rfind('"') - 1]
+            wav_file = os.path.join(root_path, "wavn", utt_id + ".wav")
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
+
+
+def common_voice(root_path, meta_file, ignored_speakers=None):
+    """Normalize the common voice meta data file to TTS format."""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("client_id"):
+                continue
+            cols = line.split("\t")
+            text = cols[2]
+            speaker_name = cols[0]
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_name in ignored_speakers:
+                    continue
+            wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav"))
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "MCV_" + speaker_name, "root_path": root_path}
+            )
+    return items
+
+
+def libri_tts(root_path, meta_files=None, ignored_speakers=None):
+    """https://ai.google/tools/datasets/libri-tts/"""
+    items = []
+    if not meta_files:
+        meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
+    else:
+        if isinstance(meta_files, str):
+            meta_files = [os.path.join(root_path, meta_files)]
+
+    for meta_file in meta_files:
+        _meta_file = os.path.basename(meta_file).split(".")[0]
+        with open(meta_file, "r", encoding="utf-8") as ttf:
+            for line in ttf:
+                cols = line.split("\t")
+                file_name = cols[0]
+                speaker_name, chapter_id, *_ = cols[0].split("_")
+                _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}")
+                wav_file = os.path.join(_root_path, file_name + ".wav")
+                text = cols[2]
+                # ignore speakers
+                if isinstance(ignored_speakers, list):
+                    if speaker_name in ignored_speakers:
+                        continue
+                items.append(
+                    {
+                        "text": text,
+                        "audio_file": wav_file,
+                        "speaker_name": f"LTTS_{speaker_name}",
+                        "root_path": root_path,
+                    }
+                )
+    for item in items:
+        assert os.path.exists(item["audio_file"]), f" [!] wav files don't exist - {item['audio_file']}"
+    return items
+
+
+def custom_turkish(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "turkish-female"
+    skipped_files = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0].strip() + ".wav")
+            if not os.path.exists(wav_file):
+                skipped_files.append(wav_file)
+                continue
+            text = cols[1].strip()
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    print(f" [!] {len(skipped_files)} files skipped. They don't exist...")
+    return items
+
+
+# ToDo: add the dataset link when the dataset is released publicly
+def brspeech(root_path, meta_file, ignored_speakers=None):
+    """BRSpeech 3.0 beta"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("wav_filename"):
+                continue
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[2]
+            speaker_id = cols[3]
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_id in ignored_speakers:
+                    continue
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_id, "root_path": root_path})
+    return items
+
+
+def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic1", ignored_speakers=None):
+    """VCTK dataset v0.92.
+
+    URL:
+        https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip
+
+    This dataset has 2 recordings per speaker that are annotated with ```mic1``` and ```mic2```.
+    It is believed that (😄 ) ```mic1``` files are the same as the previous version of the dataset.
+
+    mic1:
+        Audio recorded using an omni-directional microphone (DPA 4035).
+        Contains very low frequency noises.
+        This is the same audio released in previous versions of VCTK:
+        https://doi.org/10.7488/ds/1994
+
+    mic2:
+        Audio recorded using a small diaphragm condenser microphone with
+        very wide bandwidth (Sennheiser MKH 800).
+        Two speakers, p280 and p315 had technical issues of the audio
+        recordings using MKH 800.
+    """
+    file_ext = "flac"
+    items = []
+    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readlines()[0]
+        # p280 has no mic2 recordings
+        if speaker_id == "p280":
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_mic1.{file_ext}")
+        else:
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}")
+        if os.path.exists(wav_file):
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id, "root_path": root_path}
+            )
+        else:
+            print(f" [!] wav files don't exist - {wav_file}")
+    return items
+
+
+def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
+    """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
+    items = []
+    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readlines()[0]
+        wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav")
+        items.append(
+            {"text": text, "audio_file": wav_file, "speaker_name": "VCTK_old_" + speaker_id, "root_path": root_path}
+        )
+    return items
+
+
+def synpaflex(root_path, metafiles=None, **kwargs):  # pylint: disable=unused-argument
+    items = []
+    speaker_name = "synpaflex"
+    root_path = os.path.join(root_path, "")
+    wav_files = glob(f"{root_path}**/*.wav", recursive=True)
+    for wav_file in wav_files:
+        if os.sep + "wav" + os.sep in wav_file:
+            txt_file = wav_file.replace("wav", "txt")
+        else:
+            txt_file = os.path.join(
+                os.path.dirname(wav_file), "txt", os.path.basename(wav_file).replace(".wav", ".txt")
+            )
+        if os.path.exists(txt_file) and os.path.exists(wav_file):
+            with open(txt_file, "r", encoding="utf-8") as file_text:
+                text = file_text.readlines()[0]
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def open_bible(root_path, meta_files="train", ignore_digits_sentences=True, ignored_speakers=None):
+    """ToDo: Refer the paper when available"""
+    items = []
+    split_dir = meta_files
+    meta_files = glob(f"{os.path.join(root_path, split_dir)}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
+        file_id = txt_file.split(".")[0]
+        # ignore speakers
+        if isinstance(ignored_speakers, list):
+            if speaker_id in ignored_speakers:
+                continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readline().replace("\n", "")
+        # ignore sentences that contains digits
+        if ignore_digits_sentences and any(map(str.isdigit, text)):
+            continue
+        wav_file = os.path.join(root_path, split_dir, speaker_id, file_id + ".flac")
+        items.append({"text": text, "audio_file": wav_file, "speaker_name": "OB_" + speaker_id, "root_path": root_path})
+    return items
+
+
+def mls(root_path, meta_files=None, ignored_speakers=None):
+    """http://www.openslr.org/94/"""
+    items = []
+    with open(os.path.join(root_path, meta_files), "r", encoding="utf-8") as meta:
+        for line in meta:
+            file, text = line.split("\t")
+            text = text[:-1]
+            speaker, book, *_ = file.split("_")
+            wav_file = os.path.join(root_path, os.path.dirname(meta_files), "audio", speaker, book, file + ".wav")
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker in ignored_speakers:
+                    continue
+            items.append(
+                {"text": text, "audio_file": wav_file, "speaker_name": "MLS_" + speaker, "root_path": root_path}
+            )
+    return items
+
+
+# ======================================== VOX CELEB ===========================================
+def voxceleb2(root_path, meta_file=None, **kwargs):  # pylint: disable=unused-argument
+    """
+    :param meta_file   Used only for consistency with load_tts_samples api
+    """
+    return _voxcel_x(root_path, meta_file, voxcel_idx="2")
+
+
+def voxceleb1(root_path, meta_file=None, **kwargs):  # pylint: disable=unused-argument
+    """
+    :param meta_file   Used only for consistency with load_tts_samples api
+    """
+    return _voxcel_x(root_path, meta_file, voxcel_idx="1")
+
+
+def _voxcel_x(root_path, meta_file, voxcel_idx):
+    assert voxcel_idx in ["1", "2"]
+    expected_count = 148_000 if voxcel_idx == "1" else 1_000_000
+    voxceleb_path = Path(root_path)
+    cache_to = voxceleb_path / f"metafile_voxceleb{voxcel_idx}.csv"
+    cache_to.parent.mkdir(exist_ok=True)
+
+    # if not exists meta file, crawl recursively for 'wav' files
+    if meta_file is not None:
+        with open(str(meta_file), "r", encoding="utf-8") as f:
+            return [x.strip().split("|") for x in f.readlines()]
+
+    elif not cache_to.exists():
+        cnt = 0
+        meta_data = []
+        wav_files = voxceleb_path.rglob("**/*.wav")
+        for path in tqdm(
+            wav_files,
+            desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.",
+            total=expected_count,
+        ):
+            speaker_id = str(Path(path).parent.parent.stem)
+            assert speaker_id.startswith("id")
+            text = None  # VoxCel does not provide transciptions, and they are not needed for training the SE
+            meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n")
+            cnt += 1
+        with open(str(cache_to), "w", encoding="utf-8") as f:
+            f.write("".join(meta_data))
+        if cnt < expected_count:
+            raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}")
+
+    with open(str(cache_to), "r", encoding="utf-8") as f:
+        return [x.strip().split("|") for x in f.readlines()]
+
+
+def emotion(root_path, meta_file, ignored_speakers=None):
+    """Generic emotion dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            if line.startswith("file_path"):
+                continue
+            cols = line.split(",")
+            wav_file = os.path.join(root_path, cols[0])
+            speaker_id = cols[1]
+            emotion_id = cols[2].replace("\n", "")
+            # ignore speakers
+            if isinstance(ignored_speakers, list):
+                if speaker_id in ignored_speakers:
+                    continue
+            items.append(
+                {"audio_file": wav_file, "speaker_name": speaker_id, "emotion_name": emotion_id, "root_path": root_path}
+            )
+    return items
+
+
+def baker(root_path: str, meta_file: str, **kwargs) -> List[List[str]]:  # pylint: disable=unused-argument
+    """Normalizes the Baker meta data file to TTS format
+
+    Args:
+        root_path (str): path to the baker dataset
+        meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
+    Returns:
+        List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
+    """
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "baker"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            wav_name, text = line.rstrip("\n").split("|")
+            wav_path = os.path.join(root_path, "clips_22", wav_name)
+            items.append({"text": text, "audio_file": wav_path, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def kokoro(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Japanese single-speaker dataset from https://github.com/kaiidams/Kokoro-Speech-Dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "kokoro"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
+            text = cols[2].replace(" ", "")
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
+    return items
+
+
+def kss(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
+    """Korean single-speaker dataset from https://www.kaggle.com/datasets/bryanpark/korean-single-speaker-speech-dataset"""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    speaker_name = "kss"
+    with open(txt_file, "r", encoding="utf-8") as ttf:
+        for line in ttf:
+            cols = line.split("|")
+            wav_file = os.path.join(root_path, cols[0])
+            text = cols[2]  # cols[1] => 6월, cols[2] => 유월
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
+    return items
diff --git a/TTS/tts/layers/__init__.py b/TTS/tts/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f93efdb7fc41109ec3497d8e5e37ba05b0a4315e
--- /dev/null
+++ b/TTS/tts/layers/__init__.py
@@ -0,0 +1 @@
+from TTS.tts.layers.losses import *
diff --git a/TTS/tts/layers/__pycache__/__init__.cpython-37.pyc b/TTS/tts/layers/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a5b0bde80bd5a8caa90927c0725f17e0c1c94908
Binary files /dev/null and b/TTS/tts/layers/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/layers/__pycache__/__init__.cpython-38.pyc b/TTS/tts/layers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ad6d1e03b807a98455b6f734fc3c0a7d42c1ff3
Binary files /dev/null and b/TTS/tts/layers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/layers/__pycache__/__init__.cpython-39.pyc b/TTS/tts/layers/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fe11d359d260eb400d009d025c67b0a2528b834
Binary files /dev/null and b/TTS/tts/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/layers/__pycache__/losses.cpython-37.pyc b/TTS/tts/layers/__pycache__/losses.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b159a79b37ce583a40f62f088b97ec724201b0b
Binary files /dev/null and b/TTS/tts/layers/__pycache__/losses.cpython-37.pyc differ
diff --git a/TTS/tts/layers/__pycache__/losses.cpython-38.pyc b/TTS/tts/layers/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d18e66a470b977adf4619a30b7e419c732c4d87e
Binary files /dev/null and b/TTS/tts/layers/__pycache__/losses.cpython-38.pyc differ
diff --git a/TTS/tts/layers/__pycache__/losses.cpython-39.pyc b/TTS/tts/layers/__pycache__/losses.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35fb9d4dbb0ff62af4d632964a7acfeb5be61428
Binary files /dev/null and b/TTS/tts/layers/__pycache__/losses.cpython-39.pyc differ
diff --git a/TTS/tts/layers/align_tts/__init__.py b/TTS/tts/layers/align_tts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/layers/align_tts/duration_predictor.py b/TTS/tts/layers/align_tts/duration_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2b83894cc3f87575a89ea8fd7bf4a584ca22c28
--- /dev/null
+++ b/TTS/tts/layers/align_tts/duration_predictor.py
@@ -0,0 +1,21 @@
+from torch import nn
+
+from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
+from TTS.tts.layers.generic.transformer import FFTransformerBlock
+
+
+class DurationPredictor(nn.Module):
+    def __init__(self, num_chars, hidden_channels, hidden_channels_ffn, num_heads):
+        super().__init__()
+        self.embed = nn.Embedding(num_chars, hidden_channels)
+        self.pos_enc = PositionalEncoding(hidden_channels, dropout_p=0.1)
+        self.FFT = FFTransformerBlock(hidden_channels, num_heads, hidden_channels_ffn, 2, 0.1)
+        self.out_layer = nn.Conv1d(hidden_channels, 1, 1)
+
+    def forward(self, text, text_lengths):
+        # B, L -> B, L
+        emb = self.embed(text)
+        emb = self.pos_enc(emb.transpose(1, 2))
+        x = self.FFT(emb, text_lengths)
+        x = self.out_layer(x).squeeze(-1)
+        return x
diff --git a/TTS/tts/layers/align_tts/mdn.py b/TTS/tts/layers/align_tts/mdn.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdb332524bf7a5fec6a23da9e7977de6325a0324
--- /dev/null
+++ b/TTS/tts/layers/align_tts/mdn.py
@@ -0,0 +1,30 @@
+from torch import nn
+
+
+class MDNBlock(nn.Module):
+    """Mixture of Density Network implementation
+    https://arxiv.org/pdf/2003.01950.pdf
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.out_channels = out_channels
+        self.conv1 = nn.Conv1d(in_channels, in_channels, 1)
+        self.norm = nn.LayerNorm(in_channels)
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(0.1)
+        self.conv2 = nn.Conv1d(in_channels, out_channels, 1)
+
+    def forward(self, x):
+        o = self.conv1(x)
+        o = o.transpose(1, 2)
+        o = self.norm(o)
+        o = o.transpose(1, 2)
+        o = self.relu(o)
+        o = self.dropout(o)
+        mu_sigma = self.conv2(o)
+        # TODO: check this sigmoid
+        # mu = torch.sigmoid(mu_sigma[:, :self.out_channels//2, :])
+        mu = mu_sigma[:, : self.out_channels // 2, :]
+        log_sigma = mu_sigma[:, self.out_channels // 2 :, :]
+        return mu, log_sigma
diff --git a/TTS/tts/layers/feed_forward/__init__.py b/TTS/tts/layers/feed_forward/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/layers/feed_forward/decoder.py b/TTS/tts/layers/feed_forward/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..34c586aab24e014ce99d5806a975585a242b81bd
--- /dev/null
+++ b/TTS/tts/layers/feed_forward/decoder.py
@@ -0,0 +1,230 @@
+import torch
+from torch import nn
+
+from TTS.tts.layers.generic.res_conv_bn import Conv1dBN, Conv1dBNBlock, ResidualConv1dBNBlock
+from TTS.tts.layers.generic.transformer import FFTransformerBlock
+from TTS.tts.layers.generic.wavenet import WNBlocks
+from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
+
+
+class WaveNetDecoder(nn.Module):
+    """WaveNet based decoder with a prenet and a postnet.
+
+    prenet: conv1d_1x1
+    postnet: 3 x [conv1d_1x1 -> relu] -> conv1d_1x1
+
+    TODO: Integrate speaker conditioning vector.
+
+    Note:
+        default wavenet parameters;
+            params = {
+                "num_blocks": 12,
+                "hidden_channels":192,
+                "kernel_size": 5,
+                "dilation_rate": 1,
+                "num_layers": 4,
+                "dropout_p": 0.05
+            }
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels for prenet and postnet.
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, c_in_channels, params):
+        super().__init__()
+        # prenet
+        self.prenet = torch.nn.Conv1d(in_channels, params["hidden_channels"], 1)
+        # wavenet layers
+        self.wn = WNBlocks(params["hidden_channels"], c_in_channels=c_in_channels, **params)
+        # postnet
+        self.postnet = [
+            torch.nn.Conv1d(params["hidden_channels"], hidden_channels, 1),
+            torch.nn.ReLU(),
+            torch.nn.Conv1d(hidden_channels, hidden_channels, 1),
+            torch.nn.ReLU(),
+            torch.nn.Conv1d(hidden_channels, hidden_channels, 1),
+            torch.nn.ReLU(),
+            torch.nn.Conv1d(hidden_channels, out_channels, 1),
+        ]
+        self.postnet = nn.Sequential(*self.postnet)
+
+    def forward(self, x, x_mask=None, g=None):
+        x = self.prenet(x) * x_mask
+        x = self.wn(x, x_mask, g)
+        o = self.postnet(x) * x_mask
+        return o
+
+
+class RelativePositionTransformerDecoder(nn.Module):
+    """Decoder with Relative Positional Transformer.
+
+    Note:
+        Default params
+            params={
+                'hidden_channels_ffn': 128,
+                'num_heads': 2,
+                "kernel_size": 3,
+                "dropout_p": 0.1,
+                "num_layers": 8,
+                "rel_attn_window_size": 4,
+                "input_length": None
+            }
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels including Transformer layers.
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, params):
+
+        super().__init__()
+        self.prenet = Conv1dBN(in_channels, hidden_channels, 1, 1)
+        self.rel_pos_transformer = RelativePositionTransformer(in_channels, out_channels, hidden_channels, **params)
+
+    def forward(self, x, x_mask=None, g=None):  # pylint: disable=unused-argument
+        o = self.prenet(x) * x_mask
+        o = self.rel_pos_transformer(o, x_mask)
+        return o
+
+
+class FFTransformerDecoder(nn.Module):
+    """Decoder with FeedForwardTransformer.
+
+    Default params
+            params={
+                'hidden_channels_ffn': 1024,
+                'num_heads': 2,
+                "dropout_p": 0.1,
+                "num_layers": 6,
+            }
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels including Transformer layers.
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, params):
+
+        super().__init__()
+        self.transformer_block = FFTransformerBlock(in_channels, **params)
+        self.postnet = nn.Conv1d(in_channels, out_channels, 1)
+
+    def forward(self, x, x_mask=None, g=None):  # pylint: disable=unused-argument
+        # TODO: handle multi-speaker
+        x_mask = 1 if x_mask is None else x_mask
+        o = self.transformer_block(x) * x_mask
+        o = self.postnet(o) * x_mask
+        return o
+
+
+class ResidualConv1dBNDecoder(nn.Module):
+    """Residual Convolutional Decoder as in the original Speedy Speech paper
+
+    TODO: Integrate speaker conditioning vector.
+
+    Note:
+        Default params
+                params = {
+                    "kernel_size": 4,
+                    "dilations": 4 * [1, 2, 4, 8] + [1],
+                    "num_conv_blocks": 2,
+                    "num_res_blocks": 17
+                }
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels including ResidualConv1dBNBlock layers.
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, params):
+        super().__init__()
+        self.res_conv_block = ResidualConv1dBNBlock(in_channels, hidden_channels, hidden_channels, **params)
+        self.post_conv = nn.Conv1d(hidden_channels, hidden_channels, 1)
+        self.postnet = nn.Sequential(
+            Conv1dBNBlock(
+                hidden_channels, hidden_channels, hidden_channels, params["kernel_size"], 1, num_conv_blocks=2
+            ),
+            nn.Conv1d(hidden_channels, out_channels, 1),
+        )
+
+    def forward(self, x, x_mask=None, g=None):  # pylint: disable=unused-argument
+        o = self.res_conv_block(x, x_mask)
+        o = self.post_conv(o) + x
+        return self.postnet(o) * x_mask
+
+
+class Decoder(nn.Module):
+    """Decodes the expanded phoneme encoding into spectrograms
+    Args:
+        out_channels (int): number of output channels.
+        in_hidden_channels (int): input and hidden channels. Model keeps the input channels for the intermediate layers.
+        decoder_type (str): decoder layer types. 'transformers' or 'residual_conv_bn'. Default 'residual_conv_bn'.
+        decoder_params (dict): model parameters for specified decoder type.
+        c_in_channels (int): number of channels for conditional input.
+
+    Shapes:
+        - input: (B, C, T)
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        out_channels,
+        in_hidden_channels,
+        decoder_type="residual_conv_bn",
+        decoder_params={
+            "kernel_size": 4,
+            "dilations": 4 * [1, 2, 4, 8] + [1],
+            "num_conv_blocks": 2,
+            "num_res_blocks": 17,
+        },
+        c_in_channels=0,
+    ):
+        super().__init__()
+
+        if decoder_type.lower() == "relative_position_transformer":
+            self.decoder = RelativePositionTransformerDecoder(
+                in_channels=in_hidden_channels,
+                out_channels=out_channels,
+                hidden_channels=in_hidden_channels,
+                params=decoder_params,
+            )
+        elif decoder_type.lower() == "residual_conv_bn":
+            self.decoder = ResidualConv1dBNDecoder(
+                in_channels=in_hidden_channels,
+                out_channels=out_channels,
+                hidden_channels=in_hidden_channels,
+                params=decoder_params,
+            )
+        elif decoder_type.lower() == "wavenet":
+            self.decoder = WaveNetDecoder(
+                in_channels=in_hidden_channels,
+                out_channels=out_channels,
+                hidden_channels=in_hidden_channels,
+                c_in_channels=c_in_channels,
+                params=decoder_params,
+            )
+        elif decoder_type.lower() == "fftransformer":
+            self.decoder = FFTransformerDecoder(in_hidden_channels, out_channels, decoder_params)
+        else:
+            raise ValueError(f"[!] Unknown decoder type - {decoder_type}")
+
+    def forward(self, x, x_mask, g=None):  # pylint: disable=unused-argument
+        """
+        Args:
+            x: [B, C, T]
+            x_mask: [B, 1, T]
+            g: [B, C_g, 1]
+        """
+        # TODO: implement multi-speaker
+        o = self.decoder(x, x_mask, g)
+        return o
diff --git a/TTS/tts/layers/feed_forward/duration_predictor.py b/TTS/tts/layers/feed_forward/duration_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..5392aeca3cd4eed08daeb2a3c34c735baec18364
--- /dev/null
+++ b/TTS/tts/layers/feed_forward/duration_predictor.py
@@ -0,0 +1,42 @@
+from torch import nn
+
+from TTS.tts.layers.generic.res_conv_bn import Conv1dBN
+
+
+class DurationPredictor(nn.Module):
+    """Speedy Speech duration predictor model.
+    Predicts phoneme durations from encoder outputs.
+
+    Note:
+        Outputs interpreted as log(durations)
+        To get actual durations, do exp transformation
+
+    conv_BN_4x1 -> conv_BN_3x1 -> conv_BN_1x1 -> conv_1x1
+
+    Args:
+        hidden_channels (int): number of channels in the inner layers.
+    """
+
+    def __init__(self, hidden_channels):
+
+        super().__init__()
+
+        self.layers = nn.ModuleList(
+            [
+                Conv1dBN(hidden_channels, hidden_channels, 4, 1),
+                Conv1dBN(hidden_channels, hidden_channels, 3, 1),
+                Conv1dBN(hidden_channels, hidden_channels, 1, 1),
+                nn.Conv1d(hidden_channels, 1, 1),
+            ]
+        )
+
+    def forward(self, x, x_mask):
+        """
+        Shapes:
+            x: [B, C, T]
+            x_mask: [B, 1, T]
+        """
+        o = x
+        for layer in self.layers:
+            o = layer(o) * x_mask
+        return o
diff --git a/TTS/tts/layers/feed_forward/encoder.py b/TTS/tts/layers/feed_forward/encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..caf939ffc73fedac299228e090b2df3bb4cc553c
--- /dev/null
+++ b/TTS/tts/layers/feed_forward/encoder.py
@@ -0,0 +1,162 @@
+from torch import nn
+
+from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
+from TTS.tts.layers.generic.transformer import FFTransformerBlock
+from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
+
+
+class RelativePositionTransformerEncoder(nn.Module):
+    """Speedy speech encoder built on Transformer with Relative Position encoding.
+
+    TODO: Integrate speaker conditioning vector.
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, params):
+        super().__init__()
+        self.prenet = ResidualConv1dBNBlock(
+            in_channels,
+            hidden_channels,
+            hidden_channels,
+            kernel_size=5,
+            num_res_blocks=3,
+            num_conv_blocks=1,
+            dilations=[1, 1, 1],
+        )
+        self.rel_pos_transformer = RelativePositionTransformer(hidden_channels, out_channels, hidden_channels, **params)
+
+    def forward(self, x, x_mask=None, g=None):  # pylint: disable=unused-argument
+        if x_mask is None:
+            x_mask = 1
+        o = self.prenet(x) * x_mask
+        o = self.rel_pos_transformer(o, x_mask)
+        return o
+
+
+class ResidualConv1dBNEncoder(nn.Module):
+    """Residual Convolutional Encoder as in the original Speedy Speech paper
+
+    TODO: Integrate speaker conditioning vector.
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of hidden channels
+        params (dict): dictionary for residual convolutional blocks.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, params):
+        super().__init__()
+        self.prenet = nn.Sequential(nn.Conv1d(in_channels, hidden_channels, 1), nn.ReLU())
+        self.res_conv_block = ResidualConv1dBNBlock(hidden_channels, hidden_channels, hidden_channels, **params)
+
+        self.postnet = nn.Sequential(
+            *[
+                nn.Conv1d(hidden_channels, hidden_channels, 1),
+                nn.ReLU(),
+                nn.BatchNorm1d(hidden_channels),
+                nn.Conv1d(hidden_channels, out_channels, 1),
+            ]
+        )
+
+    def forward(self, x, x_mask=None, g=None):  # pylint: disable=unused-argument
+        if x_mask is None:
+            x_mask = 1
+        o = self.prenet(x) * x_mask
+        o = self.res_conv_block(o, x_mask)
+        o = self.postnet(o + x) * x_mask
+        return o * x_mask
+
+
+class Encoder(nn.Module):
+    # pylint: disable=dangerous-default-value
+    """Factory class for Speedy Speech encoder enables different encoder types internally.
+
+    Args:
+        num_chars (int): number of characters.
+        out_channels (int): number of output channels.
+        in_hidden_channels (int): input and hidden channels. Model keeps the input channels for the intermediate layers.
+        encoder_type (str): encoder layer types. 'transformers' or 'residual_conv_bn'. Default 'residual_conv_bn'.
+        encoder_params (dict): model parameters for specified encoder type.
+        c_in_channels (int): number of channels for conditional input.
+
+    Note:
+        Default encoder_params to be set in config.json...
+
+        ```python
+        # for 'relative_position_transformer'
+        encoder_params={
+            'hidden_channels_ffn': 128,
+            'num_heads': 2,
+            "kernel_size": 3,
+            "dropout_p": 0.1,
+            "num_layers": 6,
+            "rel_attn_window_size": 4,
+            "input_length": None
+        },
+
+        # for 'residual_conv_bn'
+        encoder_params = {
+            "kernel_size": 4,
+            "dilations": 4 * [1, 2, 4] + [1],
+            "num_conv_blocks": 2,
+            "num_res_blocks": 13
+        }
+
+        # for 'fftransformer'
+        encoder_params = {
+            "hidden_channels_ffn": 1024 ,
+            "num_heads": 2,
+            "num_layers": 6,
+            "dropout_p": 0.1
+        }
+        ```
+    """
+
+    def __init__(
+        self,
+        in_hidden_channels,
+        out_channels,
+        encoder_type="residual_conv_bn",
+        encoder_params={"kernel_size": 4, "dilations": 4 * [1, 2, 4] + [1], "num_conv_blocks": 2, "num_res_blocks": 13},
+        c_in_channels=0,
+    ):
+        super().__init__()
+        self.out_channels = out_channels
+        self.in_channels = in_hidden_channels
+        self.hidden_channels = in_hidden_channels
+        self.encoder_type = encoder_type
+        self.c_in_channels = c_in_channels
+
+        # init encoder
+        if encoder_type.lower() == "relative_position_transformer":
+            # text encoder
+            # pylint: disable=unexpected-keyword-arg
+            self.encoder = RelativePositionTransformerEncoder(
+                in_hidden_channels, out_channels, in_hidden_channels, encoder_params
+            )
+        elif encoder_type.lower() == "residual_conv_bn":
+            self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params)
+        elif encoder_type.lower() == "fftransformer":
+            assert (
+                in_hidden_channels == out_channels
+            ), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'"
+            # pylint: disable=unexpected-keyword-arg
+            self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params)
+        else:
+            raise NotImplementedError(" [!] unknown encoder type.")
+
+    def forward(self, x, x_mask, g=None):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            x: [B, C, T]
+            x_mask: [B, 1, T]
+            g: [B, C, 1]
+        """
+        o = self.encoder(x, x_mask)
+        return o * x_mask
diff --git a/TTS/tts/layers/generic/__init__.py b/TTS/tts/layers/generic/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/layers/generic/aligner.py b/TTS/tts/layers/generic/aligner.py
new file mode 100644
index 0000000000000000000000000000000000000000..eef4c4b66d80f9bab83ddf81427e5b48d2a43b4b
--- /dev/null
+++ b/TTS/tts/layers/generic/aligner.py
@@ -0,0 +1,81 @@
+from typing import Tuple
+
+import torch
+from torch import nn
+
+
+class AlignmentNetwork(torch.nn.Module):
+    """Aligner Network for learning alignment between the input text and the model output with Gaussian Attention.
+
+    ::
+
+        query -> conv1d -> relu -> conv1d -> relu -> conv1d -> L2_dist -> softmax -> alignment
+        key   -> conv1d -> relu -> conv1d -----------------------^
+
+    Args:
+        in_query_channels (int): Number of channels in the query network. Defaults to 80.
+        in_key_channels (int): Number of channels in the key network. Defaults to 512.
+        attn_channels (int): Number of inner channels in the attention layers. Defaults to 80.
+        temperature (float): Temperature for the softmax. Defaults to 0.0005.
+    """
+
+    def __init__(
+        self,
+        in_query_channels=80,
+        in_key_channels=512,
+        attn_channels=80,
+        temperature=0.0005,
+    ):
+        super().__init__()
+        self.temperature = temperature
+        self.softmax = torch.nn.Softmax(dim=3)
+        self.log_softmax = torch.nn.LogSoftmax(dim=3)
+
+        self.key_layer = nn.Sequential(
+            nn.Conv1d(
+                in_key_channels,
+                in_key_channels * 2,
+                kernel_size=3,
+                padding=1,
+                bias=True,
+            ),
+            torch.nn.ReLU(),
+            nn.Conv1d(in_key_channels * 2, attn_channels, kernel_size=1, padding=0, bias=True),
+        )
+
+        self.query_layer = nn.Sequential(
+            nn.Conv1d(
+                in_query_channels,
+                in_query_channels * 2,
+                kernel_size=3,
+                padding=1,
+                bias=True,
+            ),
+            torch.nn.ReLU(),
+            nn.Conv1d(in_query_channels * 2, in_query_channels, kernel_size=1, padding=0, bias=True),
+            torch.nn.ReLU(),
+            nn.Conv1d(in_query_channels, attn_channels, kernel_size=1, padding=0, bias=True),
+        )
+
+    def forward(
+        self, queries: torch.tensor, keys: torch.tensor, mask: torch.tensor = None, attn_prior: torch.tensor = None
+    ) -> Tuple[torch.tensor, torch.tensor]:
+        """Forward pass of the aligner encoder.
+        Shapes:
+            - queries: :math:`[B, C, T_de]`
+            - keys: :math:`[B, C_emb, T_en]`
+            - mask: :math:`[B, T_de]`
+        Output:
+            attn (torch.tensor): :math:`[B, 1, T_en, T_de]` soft attention mask.
+            attn_logp (torch.tensor): :math:`[ßB, 1, T_en , T_de]` log probabilities.
+        """
+        key_out = self.key_layer(keys)
+        query_out = self.query_layer(queries)
+        attn_factor = (query_out[:, :, :, None] - key_out[:, :, None]) ** 2
+        attn_logp = -self.temperature * attn_factor.sum(1, keepdim=True)
+        if attn_prior is not None:
+            attn_logp = self.log_softmax(attn_logp) + torch.log(attn_prior[:, None] + 1e-8)
+        if mask is not None:
+            attn_logp.data.masked_fill_(~mask.bool().unsqueeze(2), -float("inf"))
+        attn = self.softmax(attn_logp)
+        return attn, attn_logp
diff --git a/TTS/tts/layers/generic/gated_conv.py b/TTS/tts/layers/generic/gated_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a29c4499f970db538a4b99c3c05cba22576195f
--- /dev/null
+++ b/TTS/tts/layers/generic/gated_conv.py
@@ -0,0 +1,37 @@
+from torch import nn
+
+from .normalization import LayerNorm
+
+
+class GatedConvBlock(nn.Module):
+    """Gated convolutional block as in https://arxiv.org/pdf/1612.08083.pdf
+    Args:
+        in_out_channels (int): number of input/output channels.
+        kernel_size (int): convolution kernel size.
+        dropout_p (float): dropout rate.
+    """
+
+    def __init__(self, in_out_channels, kernel_size, dropout_p, num_layers):
+        super().__init__()
+        # class arguments
+        self.dropout_p = dropout_p
+        self.num_layers = num_layers
+        # define layers
+        self.conv_layers = nn.ModuleList()
+        self.norm_layers = nn.ModuleList()
+        self.layers = nn.ModuleList()
+        for _ in range(num_layers):
+            self.conv_layers += [nn.Conv1d(in_out_channels, 2 * in_out_channels, kernel_size, padding=kernel_size // 2)]
+            self.norm_layers += [LayerNorm(2 * in_out_channels)]
+
+    def forward(self, x, x_mask):
+        o = x
+        res = x
+        for idx in range(self.num_layers):
+            o = nn.functional.dropout(o, p=self.dropout_p, training=self.training)
+            o = self.conv_layers[idx](o * x_mask)
+            o = self.norm_layers[idx](o)
+            o = nn.functional.glu(o, dim=1)
+            o = res + o
+            res = o
+        return o
diff --git a/TTS/tts/layers/generic/normalization.py b/TTS/tts/layers/generic/normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0270e405e4246e47b7bc0787e4cd4b069533f92
--- /dev/null
+++ b/TTS/tts/layers/generic/normalization.py
@@ -0,0 +1,123 @@
+import torch
+from torch import nn
+
+
+class LayerNorm(nn.Module):
+    def __init__(self, channels, eps=1e-4):
+        """Layer norm for the 2nd dimension of the input.
+        Args:
+            channels (int): number of channels (2nd dimension) of the input.
+            eps (float): to prevent 0 division
+
+        Shapes:
+            - input: (B, C, T)
+            - output: (B, C, T)
+        """
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(1, channels, 1) * 0.1)
+        self.beta = nn.Parameter(torch.zeros(1, channels, 1))
+
+    def forward(self, x):
+        mean = torch.mean(x, 1, keepdim=True)
+        variance = torch.mean((x - mean) ** 2, 1, keepdim=True)
+        x = (x - mean) * torch.rsqrt(variance + self.eps)
+        x = x * self.gamma + self.beta
+        return x
+
+
+class LayerNorm2(nn.Module):
+    """Layer norm for the 2nd dimension of the input using torch primitive.
+    Args:
+        channels (int): number of channels (2nd dimension) of the input.
+        eps (float): to prevent 0 division
+
+    Shapes:
+        - input: (B, C, T)
+        - output: (B, C, T)
+    """
+
+    def __init__(self, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+
+    def forward(self, x):
+        x = x.transpose(1, -1)
+        x = torch.nn.functional.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+        return x.transpose(1, -1)
+
+
+class TemporalBatchNorm1d(nn.BatchNorm1d):
+    """Normalize each channel separately over time and batch."""
+
+    def __init__(self, channels, affine=True, track_running_stats=True, momentum=0.1):
+        super().__init__(channels, affine=affine, track_running_stats=track_running_stats, momentum=momentum)
+
+    def forward(self, x):
+        return super().forward(x.transpose(2, 1)).transpose(2, 1)
+
+
+class ActNorm(nn.Module):
+    """Activation Normalization bijector as an alternative to Batch Norm. It computes
+    mean and std from a sample data in advance and it uses these values
+    for normalization at training.
+
+    Args:
+        channels (int): input channels.
+        ddi (False): data depended initialization flag.
+
+    Shapes:
+        - inputs: (B, C, T)
+        - outputs: (B, C, T)
+    """
+
+    def __init__(self, channels, ddi=False, **kwargs):  # pylint: disable=unused-argument
+        super().__init__()
+        self.channels = channels
+        self.initialized = not ddi
+
+        self.logs = nn.Parameter(torch.zeros(1, channels, 1))
+        self.bias = nn.Parameter(torch.zeros(1, channels, 1))
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):  # pylint: disable=unused-argument
+        if x_mask is None:
+            x_mask = torch.ones(x.size(0), 1, x.size(2)).to(device=x.device, dtype=x.dtype)
+        x_len = torch.sum(x_mask, [1, 2])
+        if not self.initialized:
+            self.initialize(x, x_mask)
+            self.initialized = True
+
+        if reverse:
+            z = (x - self.bias) * torch.exp(-self.logs) * x_mask
+            logdet = None
+        else:
+            z = (self.bias + torch.exp(self.logs) * x) * x_mask
+            logdet = torch.sum(self.logs) * x_len  # [b]
+
+        return z, logdet
+
+    def store_inverse(self):
+        pass
+
+    def set_ddi(self, ddi):
+        self.initialized = not ddi
+
+    def initialize(self, x, x_mask):
+        with torch.no_grad():
+            denom = torch.sum(x_mask, [0, 2])
+            m = torch.sum(x * x_mask, [0, 2]) / denom
+            m_sq = torch.sum(x * x * x_mask, [0, 2]) / denom
+            v = m_sq - (m**2)
+            logs = 0.5 * torch.log(torch.clamp_min(v, 1e-6))
+
+            bias_init = (-m * torch.exp(-logs)).view(*self.bias.shape).to(dtype=self.bias.dtype)
+            logs_init = (-logs).view(*self.logs.shape).to(dtype=self.logs.dtype)
+
+            self.bias.data.copy_(bias_init)
+            self.logs.data.copy_(logs_init)
diff --git a/TTS/tts/layers/generic/pos_encoding.py b/TTS/tts/layers/generic/pos_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..913add0d14332bf70c3ecd2a95869d0071310bd4
--- /dev/null
+++ b/TTS/tts/layers/generic/pos_encoding.py
@@ -0,0 +1,69 @@
+import math
+
+import torch
+from torch import nn
+
+
+class PositionalEncoding(nn.Module):
+    """Sinusoidal positional encoding for non-recurrent neural networks.
+    Implementation based on "Attention Is All You Need"
+
+    Args:
+       channels (int): embedding size
+       dropout_p (float): dropout rate applied to the output.
+       max_len (int): maximum sequence length.
+       use_scale (bool): whether to use a learnable scaling coefficient.
+    """
+
+    def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):
+        super().__init__()
+        if channels % 2 != 0:
+            raise ValueError(
+                "Cannot use sin/cos positional encoding with " "odd channels (got channels={:d})".format(channels)
+            )
+        self.use_scale = use_scale
+        if use_scale:
+            self.scale = torch.nn.Parameter(torch.ones(1))
+        pe = torch.zeros(max_len, channels)
+        position = torch.arange(0, max_len).unsqueeze(1)
+        div_term = torch.pow(10000, torch.arange(0, channels, 2).float() / channels)
+        pe[:, 0::2] = torch.sin(position.float() * div_term)
+        pe[:, 1::2] = torch.cos(position.float() * div_term)
+        pe = pe.unsqueeze(0).transpose(1, 2)
+        self.register_buffer("pe", pe)
+        if dropout_p > 0:
+            self.dropout = nn.Dropout(p=dropout_p)
+        self.channels = channels
+
+    def forward(self, x, mask=None, first_idx=None, last_idx=None):
+        """
+        Shapes:
+            x: [B, C, T]
+            mask: [B, 1, T]
+            first_idx: int
+            last_idx: int
+        """
+
+        x = x * math.sqrt(self.channels)
+        if first_idx is None:
+            if self.pe.size(2) < x.size(2):
+                raise RuntimeError(
+                    f"Sequence is {x.size(2)} but PositionalEncoding is"
+                    f" limited to {self.pe.size(2)}. See max_len argument."
+                )
+            if mask is not None:
+                pos_enc = self.pe[:, :, : x.size(2)] * mask
+            else:
+                pos_enc = self.pe[:, :, : x.size(2)]
+            if self.use_scale:
+                x = x + self.scale * pos_enc
+            else:
+                x = x + pos_enc
+        else:
+            if self.use_scale:
+                x = x + self.scale * self.pe[:, :, first_idx:last_idx]
+            else:
+                x = x + self.pe[:, :, first_idx:last_idx]
+        if hasattr(self, "dropout"):
+            x = self.dropout(x)
+        return x
diff --git a/TTS/tts/layers/generic/res_conv_bn.py b/TTS/tts/layers/generic/res_conv_bn.py
new file mode 100644
index 0000000000000000000000000000000000000000..30c134cd70018197950fb9fb4d7f5fa1a7198b5e
--- /dev/null
+++ b/TTS/tts/layers/generic/res_conv_bn.py
@@ -0,0 +1,128 @@
+from torch import nn
+
+
+class ZeroTemporalPad(nn.Module):
+    """Pad sequences to equal lentgh in the temporal dimension"""
+
+    def __init__(self, kernel_size, dilation):
+        super().__init__()
+        total_pad = dilation * (kernel_size - 1)
+        begin = total_pad // 2
+        end = total_pad - begin
+        self.pad_layer = nn.ZeroPad2d((0, 0, begin, end))
+
+    def forward(self, x):
+        return self.pad_layer(x)
+
+
+class Conv1dBN(nn.Module):
+    """1d convolutional with batch norm.
+    conv1d -> relu -> BN blocks.
+
+    Note:
+        Batch normalization is applied after ReLU regarding the original implementation.
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        kernel_size (int): kernel size for convolutional filters.
+        dilation (int): dilation for convolution layers.
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, dilation):
+        super().__init__()
+        padding = dilation * (kernel_size - 1)
+        pad_s = padding // 2
+        pad_e = padding - pad_s
+        self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation)
+        self.pad = nn.ZeroPad2d((pad_s, pad_e, 0, 0))  # uneven left and right padding
+        self.norm = nn.BatchNorm1d(out_channels)
+
+    def forward(self, x):
+        o = self.conv1d(x)
+        o = self.pad(o)
+        o = nn.functional.relu(o)
+        o = self.norm(o)
+        return o
+
+
+class Conv1dBNBlock(nn.Module):
+    """1d convolutional block with batch norm. It is a set of conv1d -> relu -> BN blocks.
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of inner convolution channels.
+        kernel_size (int): kernel size for convolutional filters.
+        dilation (int): dilation for convolution layers.
+        num_conv_blocks (int, optional): number of convolutional blocks. Defaults to 2.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, kernel_size, dilation, num_conv_blocks=2):
+        super().__init__()
+        self.conv_bn_blocks = []
+        for idx in range(num_conv_blocks):
+            layer = Conv1dBN(
+                in_channels if idx == 0 else hidden_channels,
+                out_channels if idx == (num_conv_blocks - 1) else hidden_channels,
+                kernel_size,
+                dilation,
+            )
+            self.conv_bn_blocks.append(layer)
+        self.conv_bn_blocks = nn.Sequential(*self.conv_bn_blocks)
+
+    def forward(self, x):
+        """
+        Shapes:
+            x: (B, D, T)
+        """
+        return self.conv_bn_blocks(x)
+
+
+class ResidualConv1dBNBlock(nn.Module):
+    """Residual Convolutional Blocks with BN
+    Each block has 'num_conv_block' conv layers and 'num_res_blocks' such blocks are connected
+    with residual connections.
+
+    conv_block = (conv1d -> relu -> bn) x 'num_conv_blocks'
+    residuak_conv_block =  (x -> conv_block ->  + ->) x 'num_res_blocks'
+                            ' - - - - - - - - - ^
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        hidden_channels (int): number of inner convolution channels.
+        kernel_size (int): kernel size for convolutional filters.
+        dilations (list): dilations for each convolution layer.
+        num_res_blocks (int, optional): number of residual blocks. Defaults to 13.
+        num_conv_blocks (int, optional): number of convolutional blocks in each residual block. Defaults to 2.
+    """
+
+    def __init__(
+        self, in_channels, out_channels, hidden_channels, kernel_size, dilations, num_res_blocks=13, num_conv_blocks=2
+    ):
+
+        super().__init__()
+        assert len(dilations) == num_res_blocks
+        self.res_blocks = nn.ModuleList()
+        for idx, dilation in enumerate(dilations):
+            block = Conv1dBNBlock(
+                in_channels if idx == 0 else hidden_channels,
+                out_channels if (idx + 1) == len(dilations) else hidden_channels,
+                hidden_channels,
+                kernel_size,
+                dilation,
+                num_conv_blocks,
+            )
+            self.res_blocks.append(block)
+
+    def forward(self, x, x_mask=None):
+        if x_mask is None:
+            x_mask = 1.0
+        o = x * x_mask
+        for block in self.res_blocks:
+            res = o
+            o = block(o)
+            o = o + res
+            if x_mask is not None:
+                o = o * x_mask
+        return o
diff --git a/TTS/tts/layers/generic/time_depth_sep_conv.py b/TTS/tts/layers/generic/time_depth_sep_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..186cea02e75e156c40923de91086c369a9ea02ee
--- /dev/null
+++ b/TTS/tts/layers/generic/time_depth_sep_conv.py
@@ -0,0 +1,84 @@
+import torch
+from torch import nn
+
+
+class TimeDepthSeparableConv(nn.Module):
+    """Time depth separable convolution as in https://arxiv.org/pdf/1904.02619.pdf
+    It shows competative results with less computation and memory footprint."""
+
+    def __init__(self, in_channels, hid_channels, out_channels, kernel_size, bias=True):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hid_channels = hid_channels
+        self.kernel_size = kernel_size
+
+        self.time_conv = nn.Conv1d(
+            in_channels,
+            2 * hid_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        self.norm1 = nn.BatchNorm1d(2 * hid_channels)
+        self.depth_conv = nn.Conv1d(
+            hid_channels,
+            hid_channels,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+            groups=hid_channels,
+            bias=bias,
+        )
+        self.norm2 = nn.BatchNorm1d(hid_channels)
+        self.time_conv2 = nn.Conv1d(
+            hid_channels,
+            out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        self.norm3 = nn.BatchNorm1d(out_channels)
+
+    def forward(self, x):
+        x_res = x
+        x = self.time_conv(x)
+        x = self.norm1(x)
+        x = nn.functional.glu(x, dim=1)
+        x = self.depth_conv(x)
+        x = self.norm2(x)
+        x = x * torch.sigmoid(x)
+        x = self.time_conv2(x)
+        x = self.norm3(x)
+        x = x_res + x
+        return x
+
+
+class TimeDepthSeparableConvBlock(nn.Module):
+    def __init__(self, in_channels, hid_channels, out_channels, num_layers, kernel_size, bias=True):
+        super().__init__()
+        assert (kernel_size - 1) % 2 == 0
+        assert num_layers > 1
+
+        self.layers = nn.ModuleList()
+        layer = TimeDepthSeparableConv(
+            in_channels, hid_channels, out_channels if num_layers == 1 else hid_channels, kernel_size, bias
+        )
+        self.layers.append(layer)
+        for idx in range(num_layers - 1):
+            layer = TimeDepthSeparableConv(
+                hid_channels,
+                hid_channels,
+                out_channels if (idx + 1) == (num_layers - 1) else hid_channels,
+                kernel_size,
+                bias,
+            )
+            self.layers.append(layer)
+
+    def forward(self, x, mask):
+        for layer in self.layers:
+            x = layer(x * mask)
+        return x
diff --git a/TTS/tts/layers/generic/transformer.py b/TTS/tts/layers/generic/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b7ecee2bacb68cd330e18630531c97bc6f2e6a3
--- /dev/null
+++ b/TTS/tts/layers/generic/transformer.py
@@ -0,0 +1,89 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class FFTransformer(nn.Module):
+    def __init__(self, in_out_channels, num_heads, hidden_channels_ffn=1024, kernel_size_fft=3, dropout_p=0.1):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(in_out_channels, num_heads, dropout=dropout_p)
+
+        padding = (kernel_size_fft - 1) // 2
+        self.conv1 = nn.Conv1d(in_out_channels, hidden_channels_ffn, kernel_size=kernel_size_fft, padding=padding)
+        self.conv2 = nn.Conv1d(hidden_channels_ffn, in_out_channels, kernel_size=kernel_size_fft, padding=padding)
+
+        self.norm1 = nn.LayerNorm(in_out_channels)
+        self.norm2 = nn.LayerNorm(in_out_channels)
+
+        self.dropout1 = nn.Dropout(dropout_p)
+        self.dropout2 = nn.Dropout(dropout_p)
+
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        """😦 ugly looking with all the transposing"""
+        src = src.permute(2, 0, 1)
+        src2, enc_align = self.self_attn(src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)
+        src = src + self.dropout1(src2)
+        src = self.norm1(src + src2)
+        # T x B x D -> B x D x T
+        src = src.permute(1, 2, 0)
+        src2 = self.conv2(F.relu(self.conv1(src)))
+        src2 = self.dropout2(src2)
+        src = src + src2
+        src = src.transpose(1, 2)
+        src = self.norm2(src)
+        src = src.transpose(1, 2)
+        return src, enc_align
+
+
+class FFTransformerBlock(nn.Module):
+    def __init__(self, in_out_channels, num_heads, hidden_channels_ffn, num_layers, dropout_p):
+        super().__init__()
+        self.fft_layers = nn.ModuleList(
+            [
+                FFTransformer(
+                    in_out_channels=in_out_channels,
+                    num_heads=num_heads,
+                    hidden_channels_ffn=hidden_channels_ffn,
+                    dropout_p=dropout_p,
+                )
+                for _ in range(num_layers)
+            ]
+        )
+
+    def forward(self, x, mask=None, g=None):  # pylint: disable=unused-argument
+        """
+        TODO: handle multi-speaker
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - mask:  :math:`[B, 1, T] or [B, T]`
+        """
+        if mask is not None and mask.ndim == 3:
+            mask = mask.squeeze(1)
+            # mask is negated, torch uses 1s and 0s reversely.
+            mask = ~mask.bool()
+        alignments = []
+        for layer in self.fft_layers:
+            x, align = layer(x, src_key_padding_mask=mask)
+            alignments.append(align.unsqueeze(1))
+        alignments = torch.cat(alignments, 1)
+        return x
+
+
+class FFTDurationPredictor:
+    def __init__(
+        self, in_channels, hidden_channels, num_heads, num_layers, dropout_p=0.1, cond_channels=None
+    ):  # pylint: disable=unused-argument
+        self.fft = FFTransformerBlock(in_channels, num_heads, hidden_channels, num_layers, dropout_p)
+        self.proj = nn.Linear(in_channels, 1)
+
+    def forward(self, x, mask=None, g=None):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - mask:  :math:`[B, 1, T]`
+
+        TODO: Handle the cond input
+        """
+        x = self.fft(x, mask=mask)
+        x = self.proj(x)
+        return x
diff --git a/TTS/tts/layers/generic/wavenet.py b/TTS/tts/layers/generic/wavenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..613ad19d596e19b0abe2b20cdbb4b9470f4c4141
--- /dev/null
+++ b/TTS/tts/layers/generic/wavenet.py
@@ -0,0 +1,176 @@
+import torch
+from torch import nn
+
+
+@torch.jit.script
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+    n_channels_int = n_channels[0]
+    in_act = input_a + input_b
+    t_act = torch.tanh(in_act[:, :n_channels_int, :])
+    s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+    acts = t_act * s_act
+    return acts
+
+
+class WN(torch.nn.Module):
+    """Wavenet layers with weight norm and no input conditioning.
+
+         |-----------------------------------------------------------------------------|
+         |                                    |-> tanh    -|                           |
+    res -|- conv1d(dilation) -> dropout -> + -|            * -> conv1d1x1 -> split -|- + -> res
+    g -------------------------------------|  |-> sigmoid -|                        |
+    o --------------------------------------------------------------------------- + --------- o
+
+    Args:
+        in_channels (int): number of input channels.
+        hidden_channes (int): number of hidden channels.
+        kernel_size (int): filter kernel size for the first conv layer.
+        dilation_rate (int): dilations rate to increase dilation per layer.
+            If it is 2, dilations are 1, 2, 4, 8 for the next 4 layers.
+        num_layers (int): number of wavenet layers.
+        c_in_channels (int): number of channels of conditioning input.
+        dropout_p (float): dropout rate.
+        weight_norm (bool): enable/disable weight norm for convolution layers.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        kernel_size,
+        dilation_rate,
+        num_layers,
+        c_in_channels=0,
+        dropout_p=0,
+        weight_norm=True,
+    ):
+        super().__init__()
+        assert kernel_size % 2 == 1
+        assert hidden_channels % 2 == 0
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.num_layers = num_layers
+        self.c_in_channels = c_in_channels
+        self.dropout_p = dropout_p
+
+        self.in_layers = torch.nn.ModuleList()
+        self.res_skip_layers = torch.nn.ModuleList()
+        self.dropout = nn.Dropout(dropout_p)
+
+        # init conditioning layer
+        if c_in_channels > 0:
+            cond_layer = torch.nn.Conv1d(c_in_channels, 2 * hidden_channels * num_layers, 1)
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
+        # intermediate layers
+        for i in range(num_layers):
+            dilation = dilation_rate**i
+            padding = int((kernel_size * dilation - dilation) / 2)
+            if i == 0:
+                in_layer = torch.nn.Conv1d(
+                    in_channels, 2 * hidden_channels, kernel_size, dilation=dilation, padding=padding
+                )
+            else:
+                in_layer = torch.nn.Conv1d(
+                    hidden_channels, 2 * hidden_channels, kernel_size, dilation=dilation, padding=padding
+                )
+            in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
+            self.in_layers.append(in_layer)
+
+            if i < num_layers - 1:
+                res_skip_channels = 2 * hidden_channels
+            else:
+                res_skip_channels = hidden_channels
+
+            res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
+            res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
+            self.res_skip_layers.append(res_skip_layer)
+        # setup weight norm
+        if not weight_norm:
+            self.remove_weight_norm()
+
+    def forward(self, x, x_mask=None, g=None, **kwargs):  # pylint: disable=unused-argument
+        output = torch.zeros_like(x)
+        n_channels_tensor = torch.IntTensor([self.hidden_channels])
+        x_mask = 1.0 if x_mask is None else x_mask
+        if g is not None:
+            g = self.cond_layer(g)
+        for i in range(self.num_layers):
+            x_in = self.in_layers[i](x)
+            x_in = self.dropout(x_in)
+            if g is not None:
+                cond_offset = i * 2 * self.hidden_channels
+                g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+            else:
+                g_l = torch.zeros_like(x_in)
+            acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+            res_skip_acts = self.res_skip_layers[i](acts)
+            if i < self.num_layers - 1:
+                x = (x + res_skip_acts[:, : self.hidden_channels, :]) * x_mask
+                output = output + res_skip_acts[:, self.hidden_channels :, :]
+            else:
+                output = output + res_skip_acts
+        return output * x_mask
+
+    def remove_weight_norm(self):
+        if self.c_in_channels != 0:
+            torch.nn.utils.remove_weight_norm(self.cond_layer)
+        for l in self.in_layers:
+            torch.nn.utils.remove_weight_norm(l)
+        for l in self.res_skip_layers:
+            torch.nn.utils.remove_weight_norm(l)
+
+
+class WNBlocks(nn.Module):
+    """Wavenet blocks.
+
+    Note: After each block dilation resets to 1 and it increases in each block
+        along the dilation rate.
+
+    Args:
+        in_channels (int): number of input channels.
+        hidden_channes (int): number of hidden channels.
+        kernel_size (int): filter kernel size for the first conv layer.
+        dilation_rate (int): dilations rate to increase dilation per layer.
+            If it is 2, dilations are 1, 2, 4, 8 for the next 4 layers.
+        num_blocks (int): number of wavenet blocks.
+        num_layers (int): number of wavenet layers.
+        c_in_channels (int): number of channels of conditioning input.
+        dropout_p (float): dropout rate.
+        weight_norm (bool): enable/disable weight norm for convolution layers.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        kernel_size,
+        dilation_rate,
+        num_blocks,
+        num_layers,
+        c_in_channels=0,
+        dropout_p=0,
+        weight_norm=True,
+    ):
+
+        super().__init__()
+        self.wn_blocks = nn.ModuleList()
+        for idx in range(num_blocks):
+            layer = WN(
+                in_channels=in_channels if idx == 0 else hidden_channels,
+                hidden_channels=hidden_channels,
+                kernel_size=kernel_size,
+                dilation_rate=dilation_rate,
+                num_layers=num_layers,
+                c_in_channels=c_in_channels,
+                dropout_p=dropout_p,
+                weight_norm=weight_norm,
+            )
+            self.wn_blocks.append(layer)
+
+    def forward(self, x, x_mask=None, g=None):
+        o = x
+        for layer in self.wn_blocks:
+            o = layer(o, x_mask, g)
+        return o
diff --git a/TTS/tts/layers/glow_tts/__init__.py b/TTS/tts/layers/glow_tts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/layers/glow_tts/decoder.py b/TTS/tts/layers/glow_tts/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..61c5174ac5e67885288043885290c2906656c99c
--- /dev/null
+++ b/TTS/tts/layers/glow_tts/decoder.py
@@ -0,0 +1,141 @@
+import torch
+from torch import nn
+
+from TTS.tts.layers.generic.normalization import ActNorm
+from TTS.tts.layers.glow_tts.glow import CouplingBlock, InvConvNear
+
+
+def squeeze(x, x_mask=None, num_sqz=2):
+    """GlowTTS squeeze operation
+    Increase number of channels and reduce number of time steps
+    by the same factor.
+
+    Note:
+        each 's' is a n-dimensional vector.
+        ``[s1,s2,s3,s4,s5,s6] --> [[s1, s3, s5], [s2, s4, s6]]``
+    """
+    b, c, t = x.size()
+
+    t = (t // num_sqz) * num_sqz
+    x = x[:, :, :t]
+    x_sqz = x.view(b, c, t // num_sqz, num_sqz)
+    x_sqz = x_sqz.permute(0, 3, 1, 2).contiguous().view(b, c * num_sqz, t // num_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask[:, :, num_sqz - 1 :: num_sqz]
+    else:
+        x_mask = torch.ones(b, 1, t // num_sqz).to(device=x.device, dtype=x.dtype)
+    return x_sqz * x_mask, x_mask
+
+
+def unsqueeze(x, x_mask=None, num_sqz=2):
+    """GlowTTS unsqueeze operation (revert the squeeze)
+
+    Note:
+        each 's' is a n-dimensional vector.
+        ``[[s1, s3, s5], [s2, s4, s6]] --> [[s1, s3, s5, s2, s4, s6]]``
+    """
+    b, c, t = x.size()
+
+    x_unsqz = x.view(b, num_sqz, c // num_sqz, t)
+    x_unsqz = x_unsqz.permute(0, 2, 3, 1).contiguous().view(b, c // num_sqz, t * num_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask.unsqueeze(-1).repeat(1, 1, 1, num_sqz).view(b, 1, t * num_sqz)
+    else:
+        x_mask = torch.ones(b, 1, t * num_sqz).to(device=x.device, dtype=x.dtype)
+    return x_unsqz * x_mask, x_mask
+
+
+class Decoder(nn.Module):
+    """Stack of Glow Decoder Modules.
+
+    ::
+
+        Squeeze -> ActNorm -> InvertibleConv1x1 -> AffineCoupling -> Unsqueeze
+
+    Args:
+        in_channels (int): channels of input tensor.
+        hidden_channels (int): hidden decoder channels.
+        kernel_size (int): Coupling block kernel size. (Wavenet filter kernel size.)
+        dilation_rate (int): rate to increase dilation by each layer in a decoder block.
+        num_flow_blocks (int): number of decoder blocks.
+        num_coupling_layers (int): number coupling layers. (number of wavenet layers.)
+        dropout_p (float): wavenet dropout rate.
+        sigmoid_scale (bool): enable/disable sigmoid scaling in coupling layer.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        kernel_size,
+        dilation_rate,
+        num_flow_blocks,
+        num_coupling_layers,
+        dropout_p=0.0,
+        num_splits=4,
+        num_squeeze=2,
+        sigmoid_scale=False,
+        c_in_channels=0,
+    ):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.num_flow_blocks = num_flow_blocks
+        self.num_coupling_layers = num_coupling_layers
+        self.dropout_p = dropout_p
+        self.num_splits = num_splits
+        self.num_squeeze = num_squeeze
+        self.sigmoid_scale = sigmoid_scale
+        self.c_in_channels = c_in_channels
+
+        self.flows = nn.ModuleList()
+        for _ in range(num_flow_blocks):
+            self.flows.append(ActNorm(channels=in_channels * num_squeeze))
+            self.flows.append(InvConvNear(channels=in_channels * num_squeeze, num_splits=num_splits))
+            self.flows.append(
+                CouplingBlock(
+                    in_channels * num_squeeze,
+                    hidden_channels,
+                    kernel_size=kernel_size,
+                    dilation_rate=dilation_rate,
+                    num_layers=num_coupling_layers,
+                    c_in_channels=c_in_channels,
+                    dropout_p=dropout_p,
+                    sigmoid_scale=sigmoid_scale,
+                )
+            )
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        """
+        Shapes:
+            - x:  :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1 ,T]`
+            - g: :math:`[B, C]`
+        """
+        if not reverse:
+            flows = self.flows
+            logdet_tot = 0
+        else:
+            flows = reversed(self.flows)
+            logdet_tot = None
+
+        if self.num_squeeze > 1:
+            x, x_mask = squeeze(x, x_mask, self.num_squeeze)
+        for f in flows:
+            if not reverse:
+                x, logdet = f(x, x_mask, g=g, reverse=reverse)
+                logdet_tot += logdet
+            else:
+                x, logdet = f(x, x_mask, g=g, reverse=reverse)
+        if self.num_squeeze > 1:
+            x, x_mask = unsqueeze(x, x_mask, self.num_squeeze)
+        return x, logdet_tot
+
+    def store_inverse(self):
+        for f in self.flows:
+            f.store_inverse()
diff --git a/TTS/tts/layers/glow_tts/duration_predictor.py b/TTS/tts/layers/glow_tts/duration_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..e766ed6ab5a0348eaca8d1482be124003d8b8c68
--- /dev/null
+++ b/TTS/tts/layers/glow_tts/duration_predictor.py
@@ -0,0 +1,69 @@
+import torch
+from torch import nn
+
+from ..generic.normalization import LayerNorm
+
+
+class DurationPredictor(nn.Module):
+    """Glow-TTS duration prediction model.
+
+    ::
+
+        [2 x (conv1d_kxk -> relu -> layer_norm -> dropout)] -> conv1d_1x1 -> durs
+
+    Args:
+        in_channels (int): Number of channels of the input tensor.
+        hidden_channels (int): Number of hidden channels of the network.
+        kernel_size (int): Kernel size for the conv layers.
+        dropout_p (float): Dropout rate used after each conv layer.
+    """
+
+    def __init__(self, in_channels, hidden_channels, kernel_size, dropout_p, cond_channels=None, language_emb_dim=None):
+        super().__init__()
+
+        # add language embedding dim in the input
+        if language_emb_dim:
+            in_channels += language_emb_dim
+
+        # class arguments
+        self.in_channels = in_channels
+        self.filter_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dropout_p = dropout_p
+        # layers
+        self.drop = nn.Dropout(dropout_p)
+        self.conv_1 = nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2)
+        self.norm_1 = LayerNorm(hidden_channels)
+        self.conv_2 = nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2)
+        self.norm_2 = LayerNorm(hidden_channels)
+        # output layer
+        self.proj = nn.Conv1d(hidden_channels, 1, 1)
+        if cond_channels is not None and cond_channels != 0:
+            self.cond = nn.Conv1d(cond_channels, in_channels, 1)
+
+        if language_emb_dim != 0 and language_emb_dim is not None:
+            self.cond_lang = nn.Conv1d(language_emb_dim, in_channels, 1)
+
+    def forward(self, x, x_mask, g=None, lang_emb=None):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+            - g: :math:`[B, C, 1]`
+        """
+        if g is not None:
+            x = x + self.cond(g)
+
+        if lang_emb is not None:
+            x = x + self.cond_lang(lang_emb)
+
+        x = self.conv_1(x * x_mask)
+        x = torch.relu(x)
+        x = self.norm_1(x)
+        x = self.drop(x)
+        x = self.conv_2(x * x_mask)
+        x = torch.relu(x)
+        x = self.norm_2(x)
+        x = self.drop(x)
+        x = self.proj(x * x_mask)
+        return x * x_mask
diff --git a/TTS/tts/layers/glow_tts/encoder.py b/TTS/tts/layers/glow_tts/encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b43e527f5e9ca2bd0880bf204e04a1526bc8dfb
--- /dev/null
+++ b/TTS/tts/layers/glow_tts/encoder.py
@@ -0,0 +1,179 @@
+import math
+
+import torch
+from torch import nn
+
+from TTS.tts.layers.generic.gated_conv import GatedConvBlock
+from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
+from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock
+from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
+from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock
+from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
+from TTS.tts.utils.helpers import sequence_mask
+
+
+class Encoder(nn.Module):
+    """Glow-TTS encoder module.
+
+    ::
+
+        embedding -> <prenet> -> encoder_module -> <postnet> --> proj_mean
+                                                             |
+                                                             |-> proj_var
+                                                             |
+                                                             |-> concat -> duration_predictor
+                                                                    ↑
+                                                              speaker_embed
+
+    Args:
+        num_chars (int): number of characters.
+        out_channels (int): number of output channels.
+        hidden_channels (int): encoder's embedding size.
+        hidden_channels_ffn (int): transformer's feed-forward channels.
+        kernel_size (int): kernel size for conv layers and duration predictor.
+        dropout_p (float): dropout rate for any dropout layer.
+        mean_only (bool): if True, output only mean values and use constant std.
+        use_prenet (bool): if True, use pre-convolutional layers before transformer layers.
+        c_in_channels (int): number of channels in conditional input.
+
+    Shapes:
+        - input: (B, T, C)
+
+    ::
+
+        suggested encoder params...
+
+        for encoder_type == 'rel_pos_transformer'
+            encoder_params={
+                'kernel_size':3,
+                'dropout_p': 0.1,
+                'num_layers': 6,
+                'num_heads': 2,
+                'hidden_channels_ffn': 768,  # 4 times the hidden_channels
+                'input_length': None
+            }
+
+        for encoder_type == 'gated_conv'
+            encoder_params={
+                'kernel_size':5,
+                'dropout_p': 0.1,
+                'num_layers': 9,
+            }
+
+        for encoder_type == 'residual_conv_bn'
+            encoder_params={
+                "kernel_size": 4,
+                "dilations": [1, 2, 4, 1, 2, 4, 1, 2, 4, 1, 2, 4, 1],
+                "num_conv_blocks": 2,
+                "num_res_blocks": 13
+            }
+
+         for encoder_type == 'time_depth_separable'
+            encoder_params={
+                "kernel_size": 5,
+                'num_layers': 9,
+            }
+    """
+
+    def __init__(
+        self,
+        num_chars,
+        out_channels,
+        hidden_channels,
+        hidden_channels_dp,
+        encoder_type,
+        encoder_params,
+        dropout_p_dp=0.1,
+        mean_only=False,
+        use_prenet=True,
+        c_in_channels=0,
+    ):
+        super().__init__()
+        # class arguments
+        self.num_chars = num_chars
+        self.out_channels = out_channels
+        self.hidden_channels = hidden_channels
+        self.hidden_channels_dp = hidden_channels_dp
+        self.dropout_p_dp = dropout_p_dp
+        self.mean_only = mean_only
+        self.use_prenet = use_prenet
+        self.c_in_channels = c_in_channels
+        self.encoder_type = encoder_type
+        # embedding layer
+        self.emb = nn.Embedding(num_chars, hidden_channels)
+        nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5)
+        # init encoder module
+        if encoder_type.lower() == "rel_pos_transformer":
+            if use_prenet:
+                self.prenet = ResidualConv1dLayerNormBlock(
+                    hidden_channels, hidden_channels, hidden_channels, kernel_size=5, num_layers=3, dropout_p=0.5
+                )
+            self.encoder = RelativePositionTransformer(
+                hidden_channels, hidden_channels, hidden_channels, **encoder_params
+            )
+        elif encoder_type.lower() == "gated_conv":
+            self.encoder = GatedConvBlock(hidden_channels, **encoder_params)
+        elif encoder_type.lower() == "residual_conv_bn":
+            if use_prenet:
+                self.prenet = nn.Sequential(nn.Conv1d(hidden_channels, hidden_channels, 1), nn.ReLU())
+            self.encoder = ResidualConv1dBNBlock(hidden_channels, hidden_channels, hidden_channels, **encoder_params)
+            self.postnet = nn.Sequential(
+                nn.Conv1d(self.hidden_channels, self.hidden_channels, 1), nn.BatchNorm1d(self.hidden_channels)
+            )
+        elif encoder_type.lower() == "time_depth_separable":
+            if use_prenet:
+                self.prenet = ResidualConv1dLayerNormBlock(
+                    hidden_channels, hidden_channels, hidden_channels, kernel_size=5, num_layers=3, dropout_p=0.5
+                )
+            self.encoder = TimeDepthSeparableConvBlock(
+                hidden_channels, hidden_channels, hidden_channels, **encoder_params
+            )
+        else:
+            raise ValueError(" [!] Unkown encoder type.")
+
+        # final projection layers
+        self.proj_m = nn.Conv1d(hidden_channels, out_channels, 1)
+        if not mean_only:
+            self.proj_s = nn.Conv1d(hidden_channels, out_channels, 1)
+        # duration predictor
+        self.duration_predictor = DurationPredictor(
+            hidden_channels + c_in_channels, hidden_channels_dp, 3, dropout_p_dp
+        )
+
+    def forward(self, x, x_lengths, g=None):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_lengths: :math:`[B]`
+            - g (optional): :math:`[B, 1, T]`
+        """
+        # embedding layer
+        # [B ,T, D]
+        x = self.emb(x) * math.sqrt(self.hidden_channels)
+        # [B, D, T]
+        x = torch.transpose(x, 1, -1)
+        # compute input sequence mask
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
+        # prenet
+        if hasattr(self, "prenet") and self.use_prenet:
+            x = self.prenet(x, x_mask)
+        # encoder
+        x = self.encoder(x, x_mask)
+        # postnet
+        if hasattr(self, "postnet"):
+            x = self.postnet(x) * x_mask
+        # set duration predictor input
+        if g is not None:
+            g_exp = g.expand(-1, -1, x.size(-1))
+            x_dp = torch.cat([x.detach(), g_exp], 1)
+        else:
+            x_dp = x.detach()
+        # final projection layer
+        x_m = self.proj_m(x) * x_mask
+        if not self.mean_only:
+            x_logs = self.proj_s(x) * x_mask
+        else:
+            x_logs = torch.zeros_like(x_m)
+        # duration predictor
+        logw = self.duration_predictor(x_dp, x_mask)
+        return x_m, x_logs, logw, x_mask
diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b745018a2454495e68d7976a4a725762289bb20
--- /dev/null
+++ b/TTS/tts/layers/glow_tts/glow.py
@@ -0,0 +1,234 @@
+from distutils.version import LooseVersion
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from TTS.tts.layers.generic.wavenet import WN
+
+from ..generic.normalization import LayerNorm
+
+
+class ResidualConv1dLayerNormBlock(nn.Module):
+    """Conv1d with Layer Normalization and residual connection as in GlowTTS paper.
+    https://arxiv.org/pdf/1811.00002.pdf
+
+    ::
+
+        x |-> conv1d -> layer_norm -> relu -> dropout -> + -> o
+          |---------------> conv1d_1x1 ------------------|
+
+    Args:
+        in_channels (int): number of input tensor channels.
+        hidden_channels (int): number of inner layer channels.
+        out_channels (int): number of output tensor channels.
+        kernel_size (int): kernel size of conv1d filter.
+        num_layers (int): number of blocks.
+        dropout_p (float): dropout rate for each block.
+    """
+
+    def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, num_layers, dropout_p):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.num_layers = num_layers
+        self.dropout_p = dropout_p
+        assert num_layers > 1, " [!] number of layers should be > 0."
+        assert kernel_size % 2 == 1, " [!] kernel size should be odd number."
+
+        self.conv_layers = nn.ModuleList()
+        self.norm_layers = nn.ModuleList()
+
+        for idx in range(num_layers):
+            self.conv_layers.append(
+                nn.Conv1d(
+                    in_channels if idx == 0 else hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2
+                )
+            )
+            self.norm_layers.append(LayerNorm(hidden_channels))
+
+        self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+
+    def forward(self, x, x_mask):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+        """
+        x_res = x
+        for i in range(self.num_layers):
+            x = self.conv_layers[i](x * x_mask)
+            x = self.norm_layers[i](x * x_mask)
+            x = F.dropout(F.relu(x), self.dropout_p, training=self.training)
+        x = x_res + self.proj(x)
+        return x * x_mask
+
+
+class InvConvNear(nn.Module):
+    """Invertible Convolution with input splitting as in GlowTTS paper.
+    https://arxiv.org/pdf/1811.00002.pdf
+
+    Args:
+        channels (int): input and output channels.
+        num_splits (int): number of splits, also H and W of conv layer.
+        no_jacobian (bool): enable/disable jacobian computations.
+
+    Note:
+        Split the input into groups of size self.num_splits and
+        perform 1x1 convolution separately. Cast 1x1 conv operation
+        to 2d by reshaping the input for efficiency.
+    """
+
+    def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs):  # pylint: disable=unused-argument
+        super().__init__()
+        assert num_splits % 2 == 0
+        self.channels = channels
+        self.num_splits = num_splits
+        self.no_jacobian = no_jacobian
+        self.weight_inv = None
+
+        if LooseVersion(torch.__version__) < LooseVersion("1.9"):
+            w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0]
+        else:
+            w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0]
+
+        if torch.det(w_init) < 0:
+            w_init[:, 0] = -1 * w_init[:, 0]
+        self.weight = nn.Parameter(w_init)
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+        """
+        b, c, t = x.size()
+        assert c % self.num_splits == 0
+        if x_mask is None:
+            x_mask = 1
+            x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
+        else:
+            x_len = torch.sum(x_mask, [1, 2])
+
+        x = x.view(b, 2, c // self.num_splits, self.num_splits // 2, t)
+        x = x.permute(0, 1, 3, 2, 4).contiguous().view(b, self.num_splits, c // self.num_splits, t)
+
+        if reverse:
+            if self.weight_inv is not None:
+                weight = self.weight_inv
+            else:
+                weight = torch.inverse(self.weight.float()).to(dtype=self.weight.dtype)
+            logdet = None
+        else:
+            weight = self.weight
+            if self.no_jacobian:
+                logdet = 0
+            else:
+                logdet = torch.logdet(self.weight) * (c / self.num_splits) * x_len  # [b]
+
+        weight = weight.view(self.num_splits, self.num_splits, 1, 1)
+        z = F.conv2d(x, weight)
+
+        z = z.view(b, 2, self.num_splits // 2, c // self.num_splits, t)
+        z = z.permute(0, 1, 3, 2, 4).contiguous().view(b, c, t) * x_mask
+        return z, logdet
+
+    def store_inverse(self):
+        weight_inv = torch.inverse(self.weight.float()).to(dtype=self.weight.dtype)
+        self.weight_inv = nn.Parameter(weight_inv, requires_grad=False)
+
+
+class CouplingBlock(nn.Module):
+    """Glow Affine Coupling block as in GlowTTS paper.
+    https://arxiv.org/pdf/1811.00002.pdf
+
+    ::
+
+        x --> x0 -> conv1d -> wavenet -> conv1d --> t, s -> concat(s*x1 + t, x0) -> o
+        '-> x1 - - - - - - - - - - - - - - - - - - - - - - - - - ^
+
+    Args:
+         in_channels (int): number of input tensor channels.
+         hidden_channels (int): number of hidden channels.
+         kernel_size (int): WaveNet filter kernel size.
+         dilation_rate (int): rate to increase dilation by each layer in a decoder block.
+         num_layers (int): number of WaveNet layers.
+         c_in_channels (int): number of conditioning input channels.
+         dropout_p (int): wavenet dropout rate.
+         sigmoid_scale (bool): enable/disable sigmoid scaling for output scale.
+
+    Note:
+         It does not use the conditional inputs differently from WaveGlow.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        kernel_size,
+        dilation_rate,
+        num_layers,
+        c_in_channels=0,
+        dropout_p=0,
+        sigmoid_scale=False,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.num_layers = num_layers
+        self.c_in_channels = c_in_channels
+        self.dropout_p = dropout_p
+        self.sigmoid_scale = sigmoid_scale
+        # input layer
+        start = torch.nn.Conv1d(in_channels // 2, hidden_channels, 1)
+        start = torch.nn.utils.weight_norm(start)
+        self.start = start
+        # output layer
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        end = torch.nn.Conv1d(hidden_channels, in_channels, 1)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = end
+        # coupling layers
+        self.wn = WN(hidden_channels, hidden_channels, kernel_size, dilation_rate, num_layers, c_in_channels, dropout_p)
+
+    def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+            - g: :math:`[B, C, 1]`
+        """
+        if x_mask is None:
+            x_mask = 1
+        x_0, x_1 = x[:, : self.in_channels // 2], x[:, self.in_channels // 2 :]
+
+        x = self.start(x_0) * x_mask
+        x = self.wn(x, x_mask, g)
+        out = self.end(x)
+
+        z_0 = x_0
+        t = out[:, : self.in_channels // 2, :]
+        s = out[:, self.in_channels // 2 :, :]
+        if self.sigmoid_scale:
+            s = torch.log(1e-6 + torch.sigmoid(s + 2))
+
+        if reverse:
+            z_1 = (x_1 - t) * torch.exp(-s) * x_mask
+            logdet = None
+        else:
+            z_1 = (t + torch.exp(s) * x_1) * x_mask
+            logdet = torch.sum(s * x_mask, [1, 2])
+
+        z = torch.cat([z_0, z_1], 1)
+        return z, logdet
+
+    def store_inverse(self):
+        self.wn.remove_weight_norm()
diff --git a/TTS/tts/layers/glow_tts/transformer.py b/TTS/tts/layers/glow_tts/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f837abfeb441477de419f6cf4c9a05730a351c8
--- /dev/null
+++ b/TTS/tts/layers/glow_tts/transformer.py
@@ -0,0 +1,434 @@
+import math
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from TTS.tts.layers.generic.normalization import LayerNorm, LayerNorm2
+
+
+class RelativePositionMultiHeadAttention(nn.Module):
+    """Multi-head attention with Relative Positional embedding.
+    https://arxiv.org/pdf/1809.04281.pdf
+
+    It learns positional embeddings for a window of neighbours. For keys and values,
+    it learns different set of embeddings. Key embeddings are agregated with the attention
+    scores and value embeddings are aggregated with the output.
+
+    Note:
+        Example with relative attention window size 2
+
+        - input = [a, b, c, d, e]
+        - rel_attn_embeddings = [e(t-2), e(t-1), e(t+1), e(t+2)]
+
+        So it learns 4 embedding vectors (in total 8) separately for key and value vectors.
+
+        Considering the input c
+
+        - e(t-2) corresponds to c -> a
+        - e(t-2) corresponds to c -> b
+        - e(t-2) corresponds to c -> d
+        - e(t-2) corresponds to c -> e
+
+        These embeddings are shared among different time steps. So input a, b, d and e also uses
+        the same embeddings.
+
+        Embeddings are ignored when the relative window is out of limit for the first and the last
+        n items.
+
+    Args:
+        channels (int): input and inner layer channels.
+        out_channels (int): output channels.
+        num_heads (int): number of attention heads.
+        rel_attn_window_size (int, optional): relation attention window size.
+            If 4, for each time step next and previous 4 time steps are attended.
+            If default, relative encoding is disabled and it is a regular transformer.
+            Defaults to None.
+        heads_share (bool, optional): [description]. Defaults to True.
+        dropout_p (float, optional): dropout rate. Defaults to 0..
+        input_length (int, optional): intput length for positional encoding. Defaults to None.
+        proximal_bias (bool, optional): enable/disable proximal bias as in the paper. Defaults to False.
+        proximal_init (bool, optional): enable/disable poximal init as in the paper.
+            Init key and query layer weights the same. Defaults to False.
+    """
+
+    def __init__(
+        self,
+        channels,
+        out_channels,
+        num_heads,
+        rel_attn_window_size=None,
+        heads_share=True,
+        dropout_p=0.0,
+        input_length=None,
+        proximal_bias=False,
+        proximal_init=False,
+    ):
+
+        super().__init__()
+        assert channels % num_heads == 0, " [!] channels should be divisible by num_heads."
+        # class attributes
+        self.channels = channels
+        self.out_channels = out_channels
+        self.num_heads = num_heads
+        self.rel_attn_window_size = rel_attn_window_size
+        self.heads_share = heads_share
+        self.input_length = input_length
+        self.proximal_bias = proximal_bias
+        self.dropout_p = dropout_p
+        self.attn = None
+        # query, key, value layers
+        self.k_channels = channels // num_heads
+        self.conv_q = nn.Conv1d(channels, channels, 1)
+        self.conv_k = nn.Conv1d(channels, channels, 1)
+        self.conv_v = nn.Conv1d(channels, channels, 1)
+        # output layers
+        self.conv_o = nn.Conv1d(channels, out_channels, 1)
+        self.dropout = nn.Dropout(dropout_p)
+        # relative positional encoding layers
+        if rel_attn_window_size is not None:
+            n_heads_rel = 1 if heads_share else num_heads
+            rel_stddev = self.k_channels**-0.5
+            emb_rel_k = nn.Parameter(
+                torch.randn(n_heads_rel, rel_attn_window_size * 2 + 1, self.k_channels) * rel_stddev
+            )
+            emb_rel_v = nn.Parameter(
+                torch.randn(n_heads_rel, rel_attn_window_size * 2 + 1, self.k_channels) * rel_stddev
+            )
+            self.register_parameter("emb_rel_k", emb_rel_k)
+            self.register_parameter("emb_rel_v", emb_rel_v)
+
+        # init layers
+        nn.init.xavier_uniform_(self.conv_q.weight)
+        nn.init.xavier_uniform_(self.conv_k.weight)
+        # proximal bias
+        if proximal_init:
+            self.conv_k.weight.data.copy_(self.conv_q.weight.data)
+            self.conv_k.bias.data.copy_(self.conv_q.bias.data)
+        nn.init.xavier_uniform_(self.conv_v.weight)
+
+    def forward(self, x, c, attn_mask=None):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - c: :math:`[B, C, T]`
+            - attn_mask: :math:`[B, 1, T, T]`
+        """
+        q = self.conv_q(x)
+        k = self.conv_k(c)
+        v = self.conv_v(c)
+        x, self.attn = self.attention(q, k, v, mask=attn_mask)
+        x = self.conv_o(x)
+        return x
+
+    def attention(self, query, key, value, mask=None):
+        # reshape [b, d, t] -> [b, n_h, t, d_k]
+        b, d, t_s, t_t = (*key.size(), query.size(2))
+        query = query.view(b, self.num_heads, self.k_channels, t_t).transpose(2, 3)
+        key = key.view(b, self.num_heads, self.k_channels, t_s).transpose(2, 3)
+        value = value.view(b, self.num_heads, self.k_channels, t_s).transpose(2, 3)
+        # compute raw attention scores
+        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.k_channels)
+        # relative positional encoding for scores
+        if self.rel_attn_window_size is not None:
+            assert t_s == t_t, "Relative attention is only available for self-attention."
+            # get relative key embeddings
+            key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
+            rel_logits = self._matmul_with_relative_keys(query, key_relative_embeddings)
+            rel_logits = self._relative_position_to_absolute_position(rel_logits)
+            scores_local = rel_logits / math.sqrt(self.k_channels)
+            scores = scores + scores_local
+        # proximan bias
+        if self.proximal_bias:
+            assert t_s == t_t, "Proximal bias is only available for self-attention."
+            scores = scores + self._attn_proximity_bias(t_s).to(device=scores.device, dtype=scores.dtype)
+        # attention score masking
+        if mask is not None:
+            # add small value to prevent oor error.
+            scores = scores.masked_fill(mask == 0, -1e4)
+            if self.input_length is not None:
+                block_mask = torch.ones_like(scores).triu(-1 * self.input_length).tril(self.input_length)
+                scores = scores * block_mask + -1e4 * (1 - block_mask)
+        # attention score normalization
+        p_attn = F.softmax(scores, dim=-1)  # [b, n_h, t_t, t_s]
+        # apply dropout to attention weights
+        p_attn = self.dropout(p_attn)
+        # compute output
+        output = torch.matmul(p_attn, value)
+        # relative positional encoding for values
+        if self.rel_attn_window_size is not None:
+            relative_weights = self._absolute_position_to_relative_position(p_attn)
+            value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
+            output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
+        output = output.transpose(2, 3).contiguous().view(b, d, t_t)  # [b, n_h, t_t, d_k] -> [b, d, t_t]
+        return output, p_attn
+
+    @staticmethod
+    def _matmul_with_relative_values(p_attn, re):
+        """
+        Args:
+            p_attn (Tensor): attention weights.
+            re (Tensor): relative value embedding vector. (a_(i,j)^V)
+
+        Shapes:
+            -p_attn: :math:`[B, H, T, V]`
+            -re: :math:`[H or 1, V, D]`
+            -logits: :math:`[B, H, T, D]`
+        """
+        logits = torch.matmul(p_attn, re.unsqueeze(0))
+        return logits
+
+    @staticmethod
+    def _matmul_with_relative_keys(query, re):
+        """
+        Args:
+            query (Tensor): batch of query vectors. (x*W^Q)
+            re (Tensor): relative key embedding vector. (a_(i,j)^K)
+
+        Shapes:
+            - query: :math:`[B, H, T, D]`
+            - re: :math:`[H or 1, V, D]`
+            - logits: :math:`[B, H, T, V]`
+        """
+        # logits = torch.einsum('bhld, kmd -> bhlm', [query, re.to(query.dtype)])
+        logits = torch.matmul(query, re.unsqueeze(0).transpose(-2, -1))
+        return logits
+
+    def _get_relative_embeddings(self, relative_embeddings, length):
+        """Convert embedding vestors to a tensor of embeddings"""
+        # Pad first before slice to avoid using cond ops.
+        pad_length = max(length - (self.rel_attn_window_size + 1), 0)
+        slice_start_position = max((self.rel_attn_window_size + 1) - length, 0)
+        slice_end_position = slice_start_position + 2 * length - 1
+        if pad_length > 0:
+            padded_relative_embeddings = F.pad(relative_embeddings, [0, 0, pad_length, pad_length, 0, 0])
+        else:
+            padded_relative_embeddings = relative_embeddings
+        used_relative_embeddings = padded_relative_embeddings[:, slice_start_position:slice_end_position]
+        return used_relative_embeddings
+
+    @staticmethod
+    def _relative_position_to_absolute_position(x):
+        """Converts tensor from relative to absolute indexing for local attention.
+        Shapes:
+            x: :math:`[B, C, T, 2 * T - 1]`
+        Returns:
+            A Tensor of shape :math:`[B, C, T, T]`
+        """
+        batch, heads, length, _ = x.size()
+        # Pad to shift from relative to absolute indexing.
+        x = F.pad(x, [0, 1, 0, 0, 0, 0, 0, 0])
+        # Pad extra elements so to add up to shape (len+1, 2*len-1).
+        x_flat = x.view([batch, heads, length * 2 * length])
+        x_flat = F.pad(x_flat, [0, length - 1, 0, 0, 0, 0])
+        # Reshape and slice out the padded elements.
+        x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[:, :, :length, length - 1 :]
+        return x_final
+
+    @staticmethod
+    def _absolute_position_to_relative_position(x):
+        """
+        Shapes:
+            - x: :math:`[B, C, T, T]`
+            - ret: :math:`[B, C, T, 2*T-1]`
+        """
+        batch, heads, length, _ = x.size()
+        # padd along column
+        x = F.pad(x, [0, length - 1, 0, 0, 0, 0, 0, 0])
+        x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
+        # add 0's in the beginning that will skew the elements after reshape
+        x_flat = F.pad(x_flat, [length, 0, 0, 0, 0, 0])
+        x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
+        return x_final
+
+    @staticmethod
+    def _attn_proximity_bias(length):
+        """Produce an attention mask that discourages distant
+        attention values.
+        Args:
+            length (int): an integer scalar.
+        Returns:
+            a Tensor with shape :math:`[1, 1, T, T]`
+        """
+        # L
+        r = torch.arange(length, dtype=torch.float32)
+        # L x L
+        diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
+        # scale mask values
+        diff = -torch.log1p(torch.abs(diff))
+        # 1 x 1 x L x L
+        return diff.unsqueeze(0).unsqueeze(0)
+
+
+class FeedForwardNetwork(nn.Module):
+    """Feed Forward Inner layers for Transformer.
+
+    Args:
+        in_channels (int): input tensor channels.
+        out_channels (int): output tensor channels.
+        hidden_channels (int): inner layers hidden channels.
+        kernel_size (int): conv1d filter kernel size.
+        dropout_p (float, optional): dropout rate. Defaults to 0.
+    """
+
+    def __init__(self, in_channels, out_channels, hidden_channels, kernel_size, dropout_p=0.0, causal=False):
+
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dropout_p = dropout_p
+
+        if causal:
+            self.padding = self._causal_padding
+        else:
+            self.padding = self._same_padding
+
+        self.conv_1 = nn.Conv1d(in_channels, hidden_channels, kernel_size)
+        self.conv_2 = nn.Conv1d(hidden_channels, out_channels, kernel_size)
+        self.dropout = nn.Dropout(dropout_p)
+
+    def forward(self, x, x_mask):
+        x = self.conv_1(self.padding(x * x_mask))
+        x = torch.relu(x)
+        x = self.dropout(x)
+        x = self.conv_2(self.padding(x * x_mask))
+        return x * x_mask
+
+    def _causal_padding(self, x):
+        if self.kernel_size == 1:
+            return x
+        pad_l = self.kernel_size - 1
+        pad_r = 0
+        padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+        x = F.pad(x, self._pad_shape(padding))
+        return x
+
+    def _same_padding(self, x):
+        if self.kernel_size == 1:
+            return x
+        pad_l = (self.kernel_size - 1) // 2
+        pad_r = self.kernel_size // 2
+        padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+        x = F.pad(x, self._pad_shape(padding))
+        return x
+
+    @staticmethod
+    def _pad_shape(padding):
+        l = padding[::-1]
+        pad_shape = [item for sublist in l for item in sublist]
+        return pad_shape
+
+
+class RelativePositionTransformer(nn.Module):
+    """Transformer with Relative Potional Encoding.
+    https://arxiv.org/abs/1803.02155
+
+    Args:
+        in_channels (int): number of channels of the input tensor.
+        out_chanels (int): number of channels of the output tensor.
+        hidden_channels (int): model hidden channels.
+        hidden_channels_ffn (int): hidden channels of FeedForwardNetwork.
+        num_heads (int): number of attention heads.
+        num_layers (int): number of transformer layers.
+        kernel_size (int, optional): kernel size of feed-forward inner layers. Defaults to 1.
+        dropout_p (float, optional): dropout rate for self-attention and feed-forward inner layers_per_stack. Defaults to 0.
+        rel_attn_window_size (int, optional): relation attention window size.
+            If 4, for each time step next and previous 4 time steps are attended.
+            If default, relative encoding is disabled and it is a regular transformer.
+            Defaults to None.
+        input_length (int, optional): input lenght to limit position encoding. Defaults to None.
+        layer_norm_type (str, optional): type "1" uses torch tensor operations and type "2" uses torch layer_norm
+            primitive. Use type "2", type "1: is for backward compat. Defaults to "1".
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        hidden_channels: int,
+        hidden_channels_ffn: int,
+        num_heads: int,
+        num_layers: int,
+        kernel_size=1,
+        dropout_p=0.0,
+        rel_attn_window_size: int = None,
+        input_length: int = None,
+        layer_norm_type: str = "1",
+    ):
+        super().__init__()
+        self.hidden_channels = hidden_channels
+        self.hidden_channels_ffn = hidden_channels_ffn
+        self.num_heads = num_heads
+        self.num_layers = num_layers
+        self.kernel_size = kernel_size
+        self.dropout_p = dropout_p
+        self.rel_attn_window_size = rel_attn_window_size
+
+        self.dropout = nn.Dropout(dropout_p)
+        self.attn_layers = nn.ModuleList()
+        self.norm_layers_1 = nn.ModuleList()
+        self.ffn_layers = nn.ModuleList()
+        self.norm_layers_2 = nn.ModuleList()
+
+        for idx in range(self.num_layers):
+            self.attn_layers.append(
+                RelativePositionMultiHeadAttention(
+                    hidden_channels if idx != 0 else in_channels,
+                    hidden_channels,
+                    num_heads,
+                    rel_attn_window_size=rel_attn_window_size,
+                    dropout_p=dropout_p,
+                    input_length=input_length,
+                )
+            )
+            if layer_norm_type == "1":
+                self.norm_layers_1.append(LayerNorm(hidden_channels))
+            elif layer_norm_type == "2":
+                self.norm_layers_1.append(LayerNorm2(hidden_channels))
+            else:
+                raise ValueError(" [!] Unknown layer norm type")
+
+            if hidden_channels != out_channels and (idx + 1) == self.num_layers:
+                self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+
+            self.ffn_layers.append(
+                FeedForwardNetwork(
+                    hidden_channels,
+                    hidden_channels if (idx + 1) != self.num_layers else out_channels,
+                    hidden_channels_ffn,
+                    kernel_size,
+                    dropout_p=dropout_p,
+                )
+            )
+
+            if layer_norm_type == "1":
+                self.norm_layers_2.append(LayerNorm(hidden_channels if (idx + 1) != self.num_layers else out_channels))
+            elif layer_norm_type == "2":
+                self.norm_layers_2.append(LayerNorm2(hidden_channels if (idx + 1) != self.num_layers else out_channels))
+            else:
+                raise ValueError(" [!] Unknown layer norm type")
+
+    def forward(self, x, x_mask):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+        """
+        attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
+        for i in range(self.num_layers):
+            x = x * x_mask
+            y = self.attn_layers[i](x, x, attn_mask)
+            y = self.dropout(y)
+            x = self.norm_layers_1[i](x + y)
+
+            y = self.ffn_layers[i](x, x_mask)
+            y = self.dropout(y)
+
+            if (i + 1) == self.num_layers and hasattr(self, "proj"):
+                x = self.proj(x)
+
+            x = self.norm_layers_2[i](x + y)
+        x = x * x_mask
+        return x
diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..9933df6beac4f88d7f46c0e4afd94ff6e3247bf1
--- /dev/null
+++ b/TTS/tts/layers/losses.py
@@ -0,0 +1,874 @@
+import math
+
+import numpy as np
+import torch
+from coqpit import Coqpit
+from torch import nn
+from torch.nn import functional
+
+from TTS.tts.utils.helpers import sequence_mask
+from TTS.tts.utils.ssim import SSIMLoss as _SSIMLoss
+from TTS.utils.audio.torch_transforms import TorchSTFT
+
+
+# pylint: disable=abstract-method
+# relates https://github.com/pytorch/pytorch/issues/42305
+class L1LossMasked(nn.Module):
+    def __init__(self, seq_len_norm):
+        super().__init__()
+        self.seq_len_norm = seq_len_norm
+
+    def forward(self, x, target, length):
+        """
+        Args:
+            x: A Variable containing a FloatTensor of size
+                (batch, max_len, dim) which contains the
+                unnormalized probability for each class.
+            target: A Variable containing a LongTensor of size
+                (batch, max_len, dim) which contains the index of the true
+                class for each corresponding step.
+            length: A Variable containing a LongTensor of size (batch,)
+                which contains the length of each data in a batch.
+        Shapes:
+            x: B x T X D
+            target: B x T x D
+            length: B
+        Returns:
+            loss: An average loss value in range [0, 1] masked by the length.
+        """
+        # mask: (batch, max_len, 1)
+        target.requires_grad = False
+        mask = sequence_mask(sequence_length=length, max_len=target.size(1)).unsqueeze(2).float()
+        if self.seq_len_norm:
+            norm_w = mask / mask.sum(dim=1, keepdim=True)
+            out_weights = norm_w.div(target.shape[0] * target.shape[2])
+            mask = mask.expand_as(x)
+            loss = functional.l1_loss(x * mask, target * mask, reduction="none")
+            loss = loss.mul(out_weights.to(loss.device)).sum()
+        else:
+            mask = mask.expand_as(x)
+            loss = functional.l1_loss(x * mask, target * mask, reduction="sum")
+            loss = loss / mask.sum()
+        return loss
+
+
+class MSELossMasked(nn.Module):
+    def __init__(self, seq_len_norm):
+        super().__init__()
+        self.seq_len_norm = seq_len_norm
+
+    def forward(self, x, target, length):
+        """
+        Args:
+            x: A Variable containing a FloatTensor of size
+                (batch, max_len, dim) which contains the
+                unnormalized probability for each class.
+            target: A Variable containing a LongTensor of size
+                (batch, max_len, dim) which contains the index of the true
+                class for each corresponding step.
+            length: A Variable containing a LongTensor of size (batch,)
+                which contains the length of each data in a batch.
+        Shapes:
+            - x: :math:`[B, T, D]`
+            - target: :math:`[B, T, D]`
+            - length: :math:`B`
+        Returns:
+            loss: An average loss value in range [0, 1] masked by the length.
+        """
+        # mask: (batch, max_len, 1)
+        target.requires_grad = False
+        mask = sequence_mask(sequence_length=length, max_len=target.size(1)).unsqueeze(2).float()
+        if self.seq_len_norm:
+            norm_w = mask / mask.sum(dim=1, keepdim=True)
+            out_weights = norm_w.div(target.shape[0] * target.shape[2])
+            mask = mask.expand_as(x)
+            loss = functional.mse_loss(x * mask, target * mask, reduction="none")
+            loss = loss.mul(out_weights.to(loss.device)).sum()
+        else:
+            mask = mask.expand_as(x)
+            loss = functional.mse_loss(x * mask, target * mask, reduction="sum")
+            loss = loss / mask.sum()
+        return loss
+
+
+def sample_wise_min_max(x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+    """Min-Max normalize tensor through first dimension
+    Shapes:
+        - x: :math:`[B, D1, D2]`
+        - m: :math:`[B, D1, 1]`
+    """
+    maximum = torch.amax(x.masked_fill(~mask, 0), dim=(1, 2), keepdim=True)
+    minimum = torch.amin(x.masked_fill(~mask, np.inf), dim=(1, 2), keepdim=True)
+    return (x - minimum) / (maximum - minimum + 1e-8)
+
+
+class SSIMLoss(torch.nn.Module):
+    """SSIM loss as (1 - SSIM)
+    SSIM is explained here https://en.wikipedia.org/wiki/Structural_similarity
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.loss_func = _SSIMLoss()
+
+    def forward(self, y_hat, y, length):
+        """
+        Args:
+            y_hat (tensor): model prediction values.
+            y (tensor): target values.
+            length (tensor): length of each sample in a batch for masking.
+
+        Shapes:
+            y_hat: B x T X D
+            y: B x T x D
+            length: B
+
+         Returns:
+            loss: An average loss value in range [0, 1] masked by the length.
+        """
+        mask = sequence_mask(sequence_length=length, max_len=y.size(1)).unsqueeze(2)
+        y_norm = sample_wise_min_max(y, mask)
+        y_hat_norm = sample_wise_min_max(y_hat, mask)
+        ssim_loss = self.loss_func((y_norm * mask).unsqueeze(1), (y_hat_norm * mask).unsqueeze(1))
+
+        if ssim_loss.item() > 1.0:
+            print(f" > SSIM loss is out-of-range {ssim_loss.item()}, setting it 1.0")
+            ssim_loss = torch.tensor(1.0, device=ssim_loss.device)
+
+        if ssim_loss.item() < 0.0:
+            print(f" > SSIM loss is out-of-range {ssim_loss.item()}, setting it 0.0")
+            ssim_loss = torch.tensor(0.0, device=ssim_loss.device)
+
+        return ssim_loss
+
+
+class AttentionEntropyLoss(nn.Module):
+    # pylint: disable=R0201
+    def forward(self, align):
+        """
+        Forces attention to be more decisive by penalizing
+        soft attention weights
+        """
+        entropy = torch.distributions.Categorical(probs=align).entropy()
+        loss = (entropy / np.log(align.shape[1])).mean()
+        return loss
+
+
+class BCELossMasked(nn.Module):
+    """BCE loss with masking.
+
+    Used mainly for stopnet in autoregressive models.
+
+    Args:
+        pos_weight (float): weight for positive samples. If set < 1, penalize early stopping. Defaults to None.
+    """
+
+    def __init__(self, pos_weight: float = None):
+        super().__init__()
+        self.pos_weight = nn.Parameter(torch.tensor([pos_weight]), requires_grad=False)
+
+    def forward(self, x, target, length):
+        """
+        Args:
+            x: A Variable containing a FloatTensor of size
+                (batch, max_len) which contains the
+                unnormalized probability for each class.
+            target: A Variable containing a LongTensor of size
+                (batch, max_len) which contains the index of the true
+                class for each corresponding step.
+            length: A Variable containing a LongTensor of size (batch,)
+                which contains the length of each data in a batch.
+        Shapes:
+            x: B x T
+            target: B x T
+            length: B
+        Returns:
+            loss: An average loss value in range [0, 1] masked by the length.
+        """
+        target.requires_grad = False
+        if length is not None:
+            # mask: (batch, max_len, 1)
+            mask = sequence_mask(sequence_length=length, max_len=target.size(1))
+            num_items = mask.sum()
+            loss = functional.binary_cross_entropy_with_logits(
+                x.masked_select(mask), target.masked_select(mask), pos_weight=self.pos_weight, reduction="sum"
+            )
+        else:
+            loss = functional.binary_cross_entropy_with_logits(x, target, pos_weight=self.pos_weight, reduction="sum")
+            num_items = torch.numel(x)
+        loss = loss / num_items
+        return loss
+
+
+class DifferentailSpectralLoss(nn.Module):
+    """Differential Spectral Loss
+    https://arxiv.org/ftp/arxiv/papers/1909/1909.10302.pdf"""
+
+    def __init__(self, loss_func):
+        super().__init__()
+        self.loss_func = loss_func
+
+    def forward(self, x, target, length=None):
+        """
+         Shapes:
+            x: B x T
+            target: B x T
+            length: B
+        Returns:
+            loss: An average loss value in range [0, 1] masked by the length.
+        """
+        x_diff = x[:, 1:] - x[:, :-1]
+        target_diff = target[:, 1:] - target[:, :-1]
+        if length is None:
+            return self.loss_func(x_diff, target_diff)
+        return self.loss_func(x_diff, target_diff, length - 1)
+
+
+class GuidedAttentionLoss(torch.nn.Module):
+    def __init__(self, sigma=0.4):
+        super().__init__()
+        self.sigma = sigma
+
+    def _make_ga_masks(self, ilens, olens):
+        B = len(ilens)
+        max_ilen = max(ilens)
+        max_olen = max(olens)
+        ga_masks = torch.zeros((B, max_olen, max_ilen))
+        for idx, (ilen, olen) in enumerate(zip(ilens, olens)):
+            ga_masks[idx, :olen, :ilen] = self._make_ga_mask(ilen, olen, self.sigma)
+        return ga_masks
+
+    def forward(self, att_ws, ilens, olens):
+        ga_masks = self._make_ga_masks(ilens, olens).to(att_ws.device)
+        seq_masks = self._make_masks(ilens, olens).to(att_ws.device)
+        losses = ga_masks * att_ws
+        loss = torch.mean(losses.masked_select(seq_masks))
+        return loss
+
+    @staticmethod
+    def _make_ga_mask(ilen, olen, sigma):
+        grid_x, grid_y = torch.meshgrid(torch.arange(olen).to(olen), torch.arange(ilen).to(ilen))
+        grid_x, grid_y = grid_x.float(), grid_y.float()
+        return 1.0 - torch.exp(-((grid_y / ilen - grid_x / olen) ** 2) / (2 * (sigma**2)))
+
+    @staticmethod
+    def _make_masks(ilens, olens):
+        in_masks = sequence_mask(ilens)
+        out_masks = sequence_mask(olens)
+        return out_masks.unsqueeze(-1) & in_masks.unsqueeze(-2)
+
+
+class Huber(nn.Module):
+    # pylint: disable=R0201
+    def forward(self, x, y, length=None):
+        """
+        Shapes:
+            x: B x T
+            y: B x T
+            length: B
+        """
+        mask = sequence_mask(sequence_length=length, max_len=y.size(1)).unsqueeze(2).float()
+        return torch.nn.functional.smooth_l1_loss(x * mask, y * mask, reduction="sum") / mask.sum()
+
+
+class ForwardSumLoss(nn.Module):
+    def __init__(self, blank_logprob=-1):
+        super().__init__()
+        self.log_softmax = torch.nn.LogSoftmax(dim=3)
+        self.ctc_loss = torch.nn.CTCLoss(zero_infinity=True)
+        self.blank_logprob = blank_logprob
+
+    def forward(self, attn_logprob, in_lens, out_lens):
+        key_lens = in_lens
+        query_lens = out_lens
+        attn_logprob_padded = torch.nn.functional.pad(input=attn_logprob, pad=(1, 0), value=self.blank_logprob)
+
+        total_loss = 0.0
+        for bid in range(attn_logprob.shape[0]):
+            target_seq = torch.arange(1, key_lens[bid] + 1).unsqueeze(0)
+            curr_logprob = attn_logprob_padded[bid].permute(1, 0, 2)[: query_lens[bid], :, : key_lens[bid] + 1]
+
+            curr_logprob = self.log_softmax(curr_logprob[None])[0]
+            loss = self.ctc_loss(
+                curr_logprob,
+                target_seq,
+                input_lengths=query_lens[bid : bid + 1],
+                target_lengths=key_lens[bid : bid + 1],
+            )
+            total_loss = total_loss + loss
+
+        total_loss = total_loss / attn_logprob.shape[0]
+        return total_loss
+
+
+########################
+# MODEL LOSS LAYERS
+########################
+
+
+class TacotronLoss(torch.nn.Module):
+    """Collection of Tacotron set-up based on provided config."""
+
+    def __init__(self, c, ga_sigma=0.4):
+        super().__init__()
+        self.stopnet_pos_weight = c.stopnet_pos_weight
+        self.use_capacitron_vae = c.use_capacitron_vae
+        if self.use_capacitron_vae:
+            self.capacitron_capacity = c.capacitron_vae.capacitron_capacity
+            self.capacitron_vae_loss_alpha = c.capacitron_vae.capacitron_VAE_loss_alpha
+        self.ga_alpha = c.ga_alpha
+        self.decoder_diff_spec_alpha = c.decoder_diff_spec_alpha
+        self.postnet_diff_spec_alpha = c.postnet_diff_spec_alpha
+        self.decoder_alpha = c.decoder_loss_alpha
+        self.postnet_alpha = c.postnet_loss_alpha
+        self.decoder_ssim_alpha = c.decoder_ssim_alpha
+        self.postnet_ssim_alpha = c.postnet_ssim_alpha
+        self.config = c
+
+        # postnet and decoder loss
+        if c.loss_masking:
+            self.criterion = L1LossMasked(c.seq_len_norm) if c.model in ["Tacotron"] else MSELossMasked(c.seq_len_norm)
+        else:
+            self.criterion = nn.L1Loss() if c.model in ["Tacotron"] else nn.MSELoss()
+        # guided attention loss
+        if c.ga_alpha > 0:
+            self.criterion_ga = GuidedAttentionLoss(sigma=ga_sigma)
+        # differential spectral loss
+        if c.postnet_diff_spec_alpha > 0 or c.decoder_diff_spec_alpha > 0:
+            self.criterion_diff_spec = DifferentailSpectralLoss(loss_func=self.criterion)
+        # ssim loss
+        if c.postnet_ssim_alpha > 0 or c.decoder_ssim_alpha > 0:
+            self.criterion_ssim = SSIMLoss()
+        # stopnet loss
+        # pylint: disable=not-callable
+        self.criterion_st = BCELossMasked(pos_weight=torch.tensor(self.stopnet_pos_weight)) if c.stopnet else None
+
+        # For dev pruposes only
+        self.criterion_capacitron_reconstruction_loss = nn.L1Loss(reduction="sum")
+
+    def forward(
+        self,
+        postnet_output,
+        decoder_output,
+        mel_input,
+        linear_input,
+        stopnet_output,
+        stopnet_target,
+        stop_target_length,
+        capacitron_vae_outputs,
+        output_lens,
+        decoder_b_output,
+        alignments,
+        alignment_lens,
+        alignments_backwards,
+        input_lens,
+    ):
+
+        # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2
+        # the target should be set acccordingly
+        postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input
+
+        return_dict = {}
+        # remove lengths if no masking is applied
+        if not self.config.loss_masking:
+            output_lens = None
+        # decoder and postnet losses
+        if self.config.loss_masking:
+            if self.decoder_alpha > 0:
+                decoder_loss = self.criterion(decoder_output, mel_input, output_lens)
+            if self.postnet_alpha > 0:
+                postnet_loss = self.criterion(postnet_output, postnet_target, output_lens)
+        else:
+            if self.decoder_alpha > 0:
+                decoder_loss = self.criterion(decoder_output, mel_input)
+            if self.postnet_alpha > 0:
+                postnet_loss = self.criterion(postnet_output, postnet_target)
+        loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss
+        return_dict["decoder_loss"] = decoder_loss
+        return_dict["postnet_loss"] = postnet_loss
+
+        if self.use_capacitron_vae:
+            # extract capacitron vae infos
+            posterior_distribution, prior_distribution, beta = capacitron_vae_outputs
+
+            # KL divergence term between the posterior and the prior
+            kl_term = torch.mean(torch.distributions.kl_divergence(posterior_distribution, prior_distribution))
+
+            # Limit the mutual information between the data and latent space by the variational capacity limit
+            kl_capacity = kl_term - self.capacitron_capacity
+
+            # pass beta through softplus to keep it positive
+            beta = torch.nn.functional.softplus(beta)[0]
+
+            # This is the term going to the main ADAM optimiser, we detach beta because
+            # beta is optimised by a separate, SGD optimiser below
+            capacitron_vae_loss = beta.detach() * kl_capacity
+
+            # normalize the capacitron_vae_loss as in L1Loss or MSELoss.
+            # After this, both the standard loss and capacitron_vae_loss will be in the same scale.
+            # For this reason we don't need use L1Loss and MSELoss in "sum" reduction mode.
+            # Note: the batch is not considered because the L1Loss was calculated in "sum" mode
+            # divided by the batch size, So not dividing the capacitron_vae_loss by B is legitimate.
+
+            # get B T D dimension from input
+            B, T, D = mel_input.size()
+            # normalize
+            if self.config.loss_masking:
+                # if mask loss get T using the mask
+                T = output_lens.sum() / B
+
+            # Only for dev purposes to be able to compare the reconstruction loss with the values in the
+            # original Capacitron paper
+            return_dict["capaciton_reconstruction_loss"] = (
+                self.criterion_capacitron_reconstruction_loss(decoder_output, mel_input) / decoder_output.size(0)
+            ) + kl_capacity
+
+            capacitron_vae_loss = capacitron_vae_loss / (T * D)
+            capacitron_vae_loss = capacitron_vae_loss * self.capacitron_vae_loss_alpha
+
+            # This is the term to purely optimise beta and to pass into the SGD optimizer
+            beta_loss = torch.negative(beta) * kl_capacity.detach()
+
+            loss += capacitron_vae_loss
+
+            return_dict["capacitron_vae_loss"] = capacitron_vae_loss
+            return_dict["capacitron_vae_beta_loss"] = beta_loss
+            return_dict["capacitron_vae_kl_term"] = kl_term
+            return_dict["capacitron_beta"] = beta
+
+        stop_loss = (
+            self.criterion_st(stopnet_output, stopnet_target, stop_target_length)
+            if self.config.stopnet
+            else torch.zeros(1)
+        )
+        loss += stop_loss
+        return_dict["stopnet_loss"] = stop_loss
+
+        # backward decoder loss (if enabled)
+        if self.config.bidirectional_decoder:
+            if self.config.loss_masking:
+                decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input, output_lens)
+            else:
+                decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input)
+            decoder_c_loss = torch.nn.functional.l1_loss(torch.flip(decoder_b_output, dims=(1,)), decoder_output)
+            loss += self.decoder_alpha * (decoder_b_loss + decoder_c_loss)
+            return_dict["decoder_b_loss"] = decoder_b_loss
+            return_dict["decoder_c_loss"] = decoder_c_loss
+
+        # double decoder consistency loss (if enabled)
+        if self.config.double_decoder_consistency:
+            if self.config.loss_masking:
+                decoder_b_loss = self.criterion(decoder_b_output, mel_input, output_lens)
+            else:
+                decoder_b_loss = self.criterion(decoder_b_output, mel_input)
+            # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output)
+            attention_c_loss = torch.nn.functional.l1_loss(alignments, alignments_backwards)
+            loss += self.decoder_alpha * (decoder_b_loss + attention_c_loss)
+            return_dict["decoder_coarse_loss"] = decoder_b_loss
+            return_dict["decoder_ddc_loss"] = attention_c_loss
+
+        # guided attention loss (if enabled)
+        if self.config.ga_alpha > 0:
+            ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens)
+            loss += ga_loss * self.ga_alpha
+            return_dict["ga_loss"] = ga_loss
+
+        # decoder differential spectral loss
+        if self.config.decoder_diff_spec_alpha > 0:
+            decoder_diff_spec_loss = self.criterion_diff_spec(decoder_output, mel_input, output_lens)
+            loss += decoder_diff_spec_loss * self.decoder_diff_spec_alpha
+            return_dict["decoder_diff_spec_loss"] = decoder_diff_spec_loss
+
+        # postnet differential spectral loss
+        if self.config.postnet_diff_spec_alpha > 0:
+            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens)
+            loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha
+            return_dict["postnet_diff_spec_loss"] = postnet_diff_spec_loss
+
+        # decoder ssim loss
+        if self.config.decoder_ssim_alpha > 0:
+            decoder_ssim_loss = self.criterion_ssim(decoder_output, mel_input, output_lens)
+            loss += decoder_ssim_loss * self.postnet_ssim_alpha
+            return_dict["decoder_ssim_loss"] = decoder_ssim_loss
+
+        # postnet ssim loss
+        if self.config.postnet_ssim_alpha > 0:
+            postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens)
+            loss += postnet_ssim_loss * self.postnet_ssim_alpha
+            return_dict["postnet_ssim_loss"] = postnet_ssim_loss
+
+        return_dict["loss"] = loss
+        return return_dict
+
+
+class GlowTTSLoss(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.constant_factor = 0.5 * math.log(2 * math.pi)
+
+    def forward(self, z, means, scales, log_det, y_lengths, o_dur_log, o_attn_dur, x_lengths):
+        return_dict = {}
+        # flow loss - neg log likelihood
+        pz = torch.sum(scales) + 0.5 * torch.sum(torch.exp(-2 * scales) * (z - means) ** 2)
+        log_mle = self.constant_factor + (pz - torch.sum(log_det)) / (torch.sum(y_lengths) * z.shape[2])
+        # duration loss - MSE
+        loss_dur = torch.sum((o_dur_log - o_attn_dur) ** 2) / torch.sum(x_lengths)
+        # duration loss - huber loss
+        # loss_dur = torch.nn.functional.smooth_l1_loss(o_dur_log, o_attn_dur, reduction="sum") / torch.sum(x_lengths)
+        return_dict["loss"] = log_mle + loss_dur
+        return_dict["log_mle"] = log_mle
+        return_dict["loss_dur"] = loss_dur
+
+        # check if any loss is NaN
+        for key, loss in return_dict.items():
+            if torch.isnan(loss):
+                raise RuntimeError(f" [!] NaN loss with {key}.")
+        return return_dict
+
+
+def mse_loss_custom(x, y):
+    """MSE loss using the torch back-end without reduction.
+    It uses less VRAM than the raw code"""
+    expanded_x, expanded_y = torch.broadcast_tensors(x, y)
+    return torch._C._nn.mse_loss(expanded_x, expanded_y, 0)  # pylint: disable=protected-access, c-extension-no-member
+
+
+class MDNLoss(nn.Module):
+    """Mixture of Density Network Loss as described in https://arxiv.org/pdf/2003.01950.pdf."""
+
+    def forward(self, logp, text_lengths, mel_lengths):  # pylint: disable=no-self-use
+        """
+        Shapes:
+            mu: [B, D, T]
+            log_sigma: [B, D, T]
+            mel_spec: [B, D, T]
+        """
+        B, T_seq, T_mel = logp.shape
+        log_alpha = logp.new_ones(B, T_seq, T_mel) * (-1e4)
+        log_alpha[:, 0, 0] = logp[:, 0, 0]
+        for t in range(1, T_mel):
+            prev_step = torch.cat(
+                [log_alpha[:, :, t - 1 : t], functional.pad(log_alpha[:, :, t - 1 : t], (0, 0, 1, -1), value=-1e4)],
+                dim=-1,
+            )
+            log_alpha[:, :, t] = torch.logsumexp(prev_step + 1e-4, dim=-1) + logp[:, :, t]
+        alpha_last = log_alpha[torch.arange(B), text_lengths - 1, mel_lengths - 1]
+        mdn_loss = -alpha_last.mean() / T_seq
+        return mdn_loss  # , log_prob_matrix
+
+
+class AlignTTSLoss(nn.Module):
+    """Modified AlignTTS Loss.
+    Computes
+        - L1 and SSIM losses from output spectrograms.
+        - Huber loss for duration predictor.
+        - MDNLoss for Mixture of Density Network.
+
+    All loss values are aggregated by a weighted sum of the alpha values.
+
+    Args:
+        c (dict): TTS model configuration.
+    """
+
+    def __init__(self, c):
+        super().__init__()
+        self.mdn_loss = MDNLoss()
+        self.spec_loss = MSELossMasked(False)
+        self.ssim = SSIMLoss()
+        self.dur_loss = MSELossMasked(False)
+
+        self.ssim_alpha = c.ssim_alpha
+        self.dur_loss_alpha = c.dur_loss_alpha
+        self.spec_loss_alpha = c.spec_loss_alpha
+        self.mdn_alpha = c.mdn_alpha
+
+    def forward(
+        self, logp, decoder_output, decoder_target, decoder_output_lens, dur_output, dur_target, input_lens, phase
+    ):
+        # ssim_alpha, dur_loss_alpha, spec_loss_alpha, mdn_alpha = self.set_alphas(step)
+        spec_loss, ssim_loss, dur_loss, mdn_loss = 0, 0, 0, 0
+        if phase == 0:
+            mdn_loss = self.mdn_loss(logp, input_lens, decoder_output_lens)
+        elif phase == 1:
+            spec_loss = self.spec_loss(decoder_output, decoder_target, decoder_output_lens)
+            ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
+        elif phase == 2:
+            mdn_loss = self.mdn_loss(logp, input_lens, decoder_output_lens)
+            spec_loss = self.spec_lossX(decoder_output, decoder_target, decoder_output_lens)
+            ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
+        elif phase == 3:
+            dur_loss = self.dur_loss(dur_output.unsqueeze(2), dur_target.unsqueeze(2), input_lens)
+        else:
+            mdn_loss = self.mdn_loss(logp, input_lens, decoder_output_lens)
+            spec_loss = self.spec_loss(decoder_output, decoder_target, decoder_output_lens)
+            ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
+            dur_loss = self.dur_loss(dur_output.unsqueeze(2), dur_target.unsqueeze(2), input_lens)
+        loss = (
+            self.spec_loss_alpha * spec_loss
+            + self.ssim_alpha * ssim_loss
+            + self.dur_loss_alpha * dur_loss
+            + self.mdn_alpha * mdn_loss
+        )
+        return {"loss": loss, "loss_l1": spec_loss, "loss_ssim": ssim_loss, "loss_dur": dur_loss, "mdn_loss": mdn_loss}
+
+
+class VitsGeneratorLoss(nn.Module):
+    def __init__(self, c: Coqpit):
+        super().__init__()
+        self.kl_loss_alpha = c.kl_loss_alpha
+        self.gen_loss_alpha = c.gen_loss_alpha
+        self.feat_loss_alpha = c.feat_loss_alpha
+        self.dur_loss_alpha = c.dur_loss_alpha
+        self.mel_loss_alpha = c.mel_loss_alpha
+        self.spk_encoder_loss_alpha = c.speaker_encoder_loss_alpha
+        self.stft = TorchSTFT(
+            c.audio.fft_size,
+            c.audio.hop_length,
+            c.audio.win_length,
+            sample_rate=c.audio.sample_rate,
+            mel_fmin=c.audio.mel_fmin,
+            mel_fmax=c.audio.mel_fmax,
+            n_mels=c.audio.num_mels,
+            use_mel=True,
+            do_amp_to_db=True,
+        )
+
+    @staticmethod
+    def feature_loss(feats_real, feats_generated):
+        loss = 0
+        for dr, dg in zip(feats_real, feats_generated):
+            for rl, gl in zip(dr, dg):
+                rl = rl.float().detach()
+                gl = gl.float()
+                loss += torch.mean(torch.abs(rl - gl))
+        return loss * 2
+
+    @staticmethod
+    def generator_loss(scores_fake):
+        loss = 0
+        gen_losses = []
+        for dg in scores_fake:
+            dg = dg.float()
+            l = torch.mean((1 - dg) ** 2)
+            gen_losses.append(l)
+            loss += l
+
+        return loss, gen_losses
+
+    @staticmethod
+    def kl_loss(z_p, logs_q, m_p, logs_p, z_mask):
+        """
+        z_p, logs_q: [b, h, t_t]
+        m_p, logs_p: [b, h, t_t]
+        """
+        z_p = z_p.float()
+        logs_q = logs_q.float()
+        m_p = m_p.float()
+        logs_p = logs_p.float()
+        z_mask = z_mask.float()
+
+        kl = logs_p - logs_q - 0.5
+        kl += 0.5 * ((z_p - m_p) ** 2) * torch.exp(-2.0 * logs_p)
+        kl = torch.sum(kl * z_mask)
+        l = kl / torch.sum(z_mask)
+        return l
+
+    @staticmethod
+    def cosine_similarity_loss(gt_spk_emb, syn_spk_emb):
+        return -torch.nn.functional.cosine_similarity(gt_spk_emb, syn_spk_emb).mean()
+
+    def forward(
+        self,
+        mel_slice,
+        mel_slice_hat,
+        z_p,
+        logs_q,
+        m_p,
+        logs_p,
+        z_len,
+        scores_disc_fake,
+        feats_disc_fake,
+        feats_disc_real,
+        loss_duration,
+        use_speaker_encoder_as_loss=False,
+        gt_spk_emb=None,
+        syn_spk_emb=None,
+    ):
+        """
+        Shapes:
+            - mel_slice : :math:`[B, 1, T]`
+            - mel_slice_hat: :math:`[B, 1, T]`
+            - z_p: :math:`[B, C, T]`
+            - logs_q: :math:`[B, C, T]`
+            - m_p: :math:`[B, C, T]`
+            - logs_p: :math:`[B, C, T]`
+            - z_len: :math:`[B]`
+            - scores_disc_fake[i]: :math:`[B, C]`
+            - feats_disc_fake[i][j]: :math:`[B, C, T', P]`
+            - feats_disc_real[i][j]: :math:`[B, C, T', P]`
+        """
+        loss = 0.0
+        return_dict = {}
+        z_mask = sequence_mask(z_len).float()
+        # compute losses
+        loss_kl = (
+            self.kl_loss(z_p=z_p, logs_q=logs_q, m_p=m_p, logs_p=logs_p, z_mask=z_mask.unsqueeze(1))
+            * self.kl_loss_alpha
+        )
+        loss_feat = (
+            self.feature_loss(feats_real=feats_disc_real, feats_generated=feats_disc_fake) * self.feat_loss_alpha
+        )
+        loss_gen = self.generator_loss(scores_fake=scores_disc_fake)[0] * self.gen_loss_alpha
+        loss_mel = torch.nn.functional.l1_loss(mel_slice, mel_slice_hat) * self.mel_loss_alpha
+        loss_duration = torch.sum(loss_duration.float()) * self.dur_loss_alpha
+        loss = loss_kl + loss_feat + loss_mel + loss_gen + loss_duration
+
+        if use_speaker_encoder_as_loss:
+            loss_se = self.cosine_similarity_loss(gt_spk_emb, syn_spk_emb) * self.spk_encoder_loss_alpha
+            loss = loss + loss_se
+            return_dict["loss_spk_encoder"] = loss_se
+        # pass losses to the dict
+        return_dict["loss_gen"] = loss_gen
+        return_dict["loss_kl"] = loss_kl
+        return_dict["loss_feat"] = loss_feat
+        return_dict["loss_mel"] = loss_mel
+        return_dict["loss_duration"] = loss_duration
+        return_dict["loss"] = loss
+        return return_dict
+
+
+class VitsDiscriminatorLoss(nn.Module):
+    def __init__(self, c: Coqpit):
+        super().__init__()
+        self.disc_loss_alpha = c.disc_loss_alpha
+
+    @staticmethod
+    def discriminator_loss(scores_real, scores_fake):
+        loss = 0
+        real_losses = []
+        fake_losses = []
+        for dr, dg in zip(scores_real, scores_fake):
+            dr = dr.float()
+            dg = dg.float()
+            real_loss = torch.mean((1 - dr) ** 2)
+            fake_loss = torch.mean(dg**2)
+            loss += real_loss + fake_loss
+            real_losses.append(real_loss.item())
+            fake_losses.append(fake_loss.item())
+        return loss, real_losses, fake_losses
+
+    def forward(self, scores_disc_real, scores_disc_fake):
+        loss = 0.0
+        return_dict = {}
+        loss_disc, loss_disc_real, _ = self.discriminator_loss(
+            scores_real=scores_disc_real, scores_fake=scores_disc_fake
+        )
+        return_dict["loss_disc"] = loss_disc * self.disc_loss_alpha
+        loss = loss + return_dict["loss_disc"]
+        return_dict["loss"] = loss
+
+        for i, ldr in enumerate(loss_disc_real):
+            return_dict[f"loss_disc_real_{i}"] = ldr
+        return return_dict
+
+
+class ForwardTTSLoss(nn.Module):
+    """Generic configurable ForwardTTS loss."""
+
+    def __init__(self, c):
+        super().__init__()
+        if c.spec_loss_type == "mse":
+            self.spec_loss = MSELossMasked(False)
+        elif c.spec_loss_type == "l1":
+            self.spec_loss = L1LossMasked(False)
+        else:
+            raise ValueError(" [!] Unknown spec_loss_type {}".format(c.spec_loss_type))
+
+        if c.duration_loss_type == "mse":
+            self.dur_loss = MSELossMasked(False)
+        elif c.duration_loss_type == "l1":
+            self.dur_loss = L1LossMasked(False)
+        elif c.duration_loss_type == "huber":
+            self.dur_loss = Huber()
+        else:
+            raise ValueError(" [!] Unknown duration_loss_type {}".format(c.duration_loss_type))
+
+        if c.model_args.use_aligner:
+            self.aligner_loss = ForwardSumLoss()
+            self.aligner_loss_alpha = c.aligner_loss_alpha
+
+        if c.model_args.use_pitch:
+            self.pitch_loss = MSELossMasked(False)
+            self.pitch_loss_alpha = c.pitch_loss_alpha
+
+        if c.use_ssim_loss:
+            self.ssim = SSIMLoss() if c.use_ssim_loss else None
+            self.ssim_loss_alpha = c.ssim_loss_alpha
+
+        self.spec_loss_alpha = c.spec_loss_alpha
+        self.dur_loss_alpha = c.dur_loss_alpha
+        self.binary_alignment_loss_alpha = c.binary_align_loss_alpha
+
+    @staticmethod
+    def _binary_alignment_loss(alignment_hard, alignment_soft):
+        """Binary loss that forces soft alignments to match the hard alignments as
+        explained in `https://arxiv.org/pdf/2108.10447.pdf`.
+        """
+        log_sum = torch.log(torch.clamp(alignment_soft[alignment_hard == 1], min=1e-12)).sum()
+        return -log_sum / alignment_hard.sum()
+
+    def forward(
+        self,
+        decoder_output,
+        decoder_target,
+        decoder_output_lens,
+        dur_output,
+        dur_target,
+        pitch_output,
+        pitch_target,
+        input_lens,
+        alignment_logprob=None,
+        alignment_hard=None,
+        alignment_soft=None,
+        binary_loss_weight=None,
+    ):
+        loss = 0
+        return_dict = {}
+        if hasattr(self, "ssim_loss") and self.ssim_loss_alpha > 0:
+            ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
+            loss = loss + self.ssim_loss_alpha * ssim_loss
+            return_dict["loss_ssim"] = self.ssim_loss_alpha * ssim_loss
+
+        if self.spec_loss_alpha > 0:
+            spec_loss = self.spec_loss(decoder_output, decoder_target, decoder_output_lens)
+            loss = loss + self.spec_loss_alpha * spec_loss
+            return_dict["loss_spec"] = self.spec_loss_alpha * spec_loss
+
+        if self.dur_loss_alpha > 0:
+            log_dur_tgt = torch.log(dur_target.float() + 1)
+            dur_loss = self.dur_loss(dur_output[:, :, None], log_dur_tgt[:, :, None], input_lens)
+            loss = loss + self.dur_loss_alpha * dur_loss
+            return_dict["loss_dur"] = self.dur_loss_alpha * dur_loss
+
+        if hasattr(self, "pitch_loss") and self.pitch_loss_alpha > 0:
+            pitch_loss = self.pitch_loss(pitch_output.transpose(1, 2), pitch_target.transpose(1, 2), input_lens)
+            loss = loss + self.pitch_loss_alpha * pitch_loss
+            return_dict["loss_pitch"] = self.pitch_loss_alpha * pitch_loss
+
+        if hasattr(self, "aligner_loss") and self.aligner_loss_alpha > 0:
+            aligner_loss = self.aligner_loss(alignment_logprob, input_lens, decoder_output_lens)
+            loss = loss + self.aligner_loss_alpha * aligner_loss
+            return_dict["loss_aligner"] = self.aligner_loss_alpha * aligner_loss
+
+        if self.binary_alignment_loss_alpha > 0 and alignment_hard is not None:
+            binary_alignment_loss = self._binary_alignment_loss(alignment_hard, alignment_soft)
+            loss = loss + self.binary_alignment_loss_alpha * binary_alignment_loss
+            if binary_loss_weight:
+                return_dict["loss_binary_alignment"] = (
+                    self.binary_alignment_loss_alpha * binary_alignment_loss * binary_loss_weight
+                )
+            else:
+                return_dict["loss_binary_alignment"] = self.binary_alignment_loss_alpha * binary_alignment_loss
+
+        return_dict["loss"] = loss
+        return return_dict
diff --git a/TTS/tts/layers/tacotron/__init__.py b/TTS/tts/layers/tacotron/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf893129dce28a22f5906222f4b9cdb0cd3c48ab
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..666afddbd777337a480cd32b32dbb93eab97d5f9
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3048460f5a5fa9b13577f5e9ebd115b0587e167b
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f4eb9485a1be0010ab93815f3bf9facdcd78450
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55c773006000d5855477eeab9817a9e59511ad85
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..983e9d98bc5df21459ba77144fcd9a79fe127542
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/attentions.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71bca63e57bcea0ce21b1d966e544802be1082eb
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..14c84ef8e71e644668fc652b212f6088cb2b89b9
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..949a729fa0ee754415e20a2351e7c46990c90efd
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/capacitron_layers.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2f70843758746f8c9ea0a1609531861faac76bfd
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9237aed1ab61b599d8920172c688746d2473c787
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc810d634381733e5bb9b9d0ac4b0609e71e4300
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/common_layers.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7476e227bcc808813b1aab4ac33cc8a876efd3a0
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3e940f11d01e6bc6de2d5f44e1737bd569ca66c
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ffc3a73b796a08c8723b513b5666d75e507c0cf0
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/gst_layers.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-37.pyc b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..710c6206221e4d0772ab702ef4ae906fd9c6761d
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-37.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-38.pyc b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c46d91e2d82f8e0161b88c8086f687c21546ded
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-38.pyc differ
diff --git a/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-39.pyc b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78a118e2de7cb727ba8afadc8f53b37792cb4158
Binary files /dev/null and b/TTS/tts/layers/tacotron/__pycache__/tacotron2.cpython-39.pyc differ
diff --git a/TTS/tts/layers/tacotron/attentions.py b/TTS/tts/layers/tacotron/attentions.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8a90d72010066c1e3e09fd195c25954282e7526
--- /dev/null
+++ b/TTS/tts/layers/tacotron/attentions.py
@@ -0,0 +1,487 @@
+import torch
+from scipy.stats import betabinom
+from torch import nn
+from torch.nn import functional as F
+
+from TTS.tts.layers.tacotron.common_layers import Linear
+
+
+class LocationLayer(nn.Module):
+    """Layers for Location Sensitive Attention
+
+    Args:
+        attention_dim (int): number of channels in the input tensor.
+        attention_n_filters (int, optional): number of filters in convolution. Defaults to 32.
+        attention_kernel_size (int, optional): kernel size of convolution filter. Defaults to 31.
+    """
+
+    def __init__(self, attention_dim, attention_n_filters=32, attention_kernel_size=31):
+        super().__init__()
+        self.location_conv1d = nn.Conv1d(
+            in_channels=2,
+            out_channels=attention_n_filters,
+            kernel_size=attention_kernel_size,
+            stride=1,
+            padding=(attention_kernel_size - 1) // 2,
+            bias=False,
+        )
+        self.location_dense = Linear(attention_n_filters, attention_dim, bias=False, init_gain="tanh")
+
+    def forward(self, attention_cat):
+        """
+        Shapes:
+            attention_cat: [B, 2, C]
+        """
+        processed_attention = self.location_conv1d(attention_cat)
+        processed_attention = self.location_dense(processed_attention.transpose(1, 2))
+        return processed_attention
+
+
+class GravesAttention(nn.Module):
+    """Graves Attention as is ref1 with updates from ref2.
+    ref1: https://arxiv.org/abs/1910.10288
+    ref2: https://arxiv.org/pdf/1906.01083.pdf
+
+    Args:
+        query_dim (int): number of channels in query tensor.
+        K (int): number of Gaussian heads to be used for computing attention.
+    """
+
+    COEF = 0.3989422917366028  # numpy.sqrt(1/(2*numpy.pi))
+
+    def __init__(self, query_dim, K):
+
+        super().__init__()
+        self._mask_value = 1e-8
+        self.K = K
+        # self.attention_alignment = 0.05
+        self.eps = 1e-5
+        self.J = None
+        self.N_a = nn.Sequential(
+            nn.Linear(query_dim, query_dim, bias=True), nn.ReLU(), nn.Linear(query_dim, 3 * K, bias=True)
+        )
+        self.attention_weights = None
+        self.mu_prev = None
+        self.init_layers()
+
+    def init_layers(self):
+        torch.nn.init.constant_(self.N_a[2].bias[(2 * self.K) : (3 * self.K)], 1.0)  # bias mean
+        torch.nn.init.constant_(self.N_a[2].bias[self.K : (2 * self.K)], 10)  # bias std
+
+    def init_states(self, inputs):
+        if self.J is None or inputs.shape[1] + 1 > self.J.shape[-1]:
+            self.J = torch.arange(0, inputs.shape[1] + 2.0).to(inputs.device) + 0.5
+        self.attention_weights = torch.zeros(inputs.shape[0], inputs.shape[1]).to(inputs.device)
+        self.mu_prev = torch.zeros(inputs.shape[0], self.K).to(inputs.device)
+
+    # pylint: disable=R0201
+    # pylint: disable=unused-argument
+    def preprocess_inputs(self, inputs):
+        return None
+
+    def forward(self, query, inputs, processed_inputs, mask):
+        """
+        Shapes:
+            query: [B, C_attention_rnn]
+            inputs: [B, T_in, C_encoder]
+            processed_inputs: place_holder
+            mask: [B, T_in]
+        """
+        gbk_t = self.N_a(query)
+        gbk_t = gbk_t.view(gbk_t.size(0), -1, self.K)
+
+        # attention model parameters
+        # each B x K
+        g_t = gbk_t[:, 0, :]
+        b_t = gbk_t[:, 1, :]
+        k_t = gbk_t[:, 2, :]
+
+        # dropout to decorrelate attention heads
+        g_t = torch.nn.functional.dropout(g_t, p=0.5, training=self.training)
+
+        # attention GMM parameters
+        sig_t = torch.nn.functional.softplus(b_t) + self.eps
+
+        mu_t = self.mu_prev + torch.nn.functional.softplus(k_t)
+        g_t = torch.softmax(g_t, dim=-1) + self.eps
+
+        j = self.J[: inputs.size(1) + 1]
+
+        # attention weights
+        phi_t = g_t.unsqueeze(-1) * (1 / (1 + torch.sigmoid((mu_t.unsqueeze(-1) - j) / sig_t.unsqueeze(-1))))
+
+        # discritize attention weights
+        alpha_t = torch.sum(phi_t, 1)
+        alpha_t = alpha_t[:, 1:] - alpha_t[:, :-1]
+        alpha_t[alpha_t == 0] = 1e-8
+
+        # apply masking
+        if mask is not None:
+            alpha_t.data.masked_fill_(~mask, self._mask_value)
+
+        context = torch.bmm(alpha_t.unsqueeze(1), inputs).squeeze(1)
+        self.attention_weights = alpha_t
+        self.mu_prev = mu_t
+        return context
+
+
+class OriginalAttention(nn.Module):
+    """Bahdanau Attention with various optional modifications.
+    - Location sensitive attnetion: https://arxiv.org/abs/1712.05884
+    - Forward Attention: https://arxiv.org/abs/1807.06736 + state masking at inference
+    - Using sigmoid instead of softmax normalization
+    - Attention windowing at inference time
+
+    Note:
+        Location Sensitive Attention extends the additive attention mechanism
+    to use cumulative attention weights from previous decoder time steps with the current time step features.
+
+        Forward attention computes most probable monotonic alignment. The modified attention probabilities at each
+    timestep are computed recursively by the forward algorithm.
+
+        Transition agent in the forward attention explicitly gates the attention mechanism whether to move forward or
+    stay at each decoder timestep.
+
+        Attention windowing is a inductive prior that prevents the model from attending to previous and future timesteps
+    beyond a certain window.
+
+    Args:
+        query_dim (int): number of channels in the query tensor.
+        embedding_dim (int): number of channels in the vakue tensor. In general, the value tensor is the output of the encoder layer.
+        attention_dim (int): number of channels of the inner attention layers.
+        location_attention (bool): enable/disable location sensitive attention.
+        attention_location_n_filters (int): number of location attention filters.
+        attention_location_kernel_size (int): filter size of location attention convolution layer.
+        windowing (int): window size for attention windowing. if it is 5, for computing the attention, it only considers the time steps [(t-5), ..., (t+5)] of the input.
+        norm (str): normalization method applied to the attention weights. 'softmax' or 'sigmoid'
+        forward_attn (bool): enable/disable forward attention.
+        trans_agent (bool): enable/disable transition agent in the forward attention.
+        forward_attn_mask (int): enable/disable an explicit masking in forward attention. It is useful to set at especially inference time.
+    """
+
+    # Pylint gets confused by PyTorch conventions here
+    # pylint: disable=attribute-defined-outside-init
+    def __init__(
+        self,
+        query_dim,
+        embedding_dim,
+        attention_dim,
+        location_attention,
+        attention_location_n_filters,
+        attention_location_kernel_size,
+        windowing,
+        norm,
+        forward_attn,
+        trans_agent,
+        forward_attn_mask,
+    ):
+        super().__init__()
+        self.query_layer = Linear(query_dim, attention_dim, bias=False, init_gain="tanh")
+        self.inputs_layer = Linear(embedding_dim, attention_dim, bias=False, init_gain="tanh")
+        self.v = Linear(attention_dim, 1, bias=True)
+        if trans_agent:
+            self.ta = nn.Linear(query_dim + embedding_dim, 1, bias=True)
+        if location_attention:
+            self.location_layer = LocationLayer(
+                attention_dim,
+                attention_location_n_filters,
+                attention_location_kernel_size,
+            )
+        self._mask_value = -float("inf")
+        self.windowing = windowing
+        self.win_idx = None
+        self.norm = norm
+        self.forward_attn = forward_attn
+        self.trans_agent = trans_agent
+        self.forward_attn_mask = forward_attn_mask
+        self.location_attention = location_attention
+
+    def init_win_idx(self):
+        self.win_idx = -1
+        self.win_back = 2
+        self.win_front = 6
+
+    def init_forward_attn(self, inputs):
+        B = inputs.shape[0]
+        T = inputs.shape[1]
+        self.alpha = torch.cat([torch.ones([B, 1]), torch.zeros([B, T])[:, :-1] + 1e-7], dim=1).to(inputs.device)
+        self.u = (0.5 * torch.ones([B, 1])).to(inputs.device)
+
+    def init_location_attention(self, inputs):
+        B = inputs.size(0)
+        T = inputs.size(1)
+        self.attention_weights_cum = torch.zeros([B, T], device=inputs.device)
+
+    def init_states(self, inputs):
+        B = inputs.size(0)
+        T = inputs.size(1)
+        self.attention_weights = torch.zeros([B, T], device=inputs.device)
+        if self.location_attention:
+            self.init_location_attention(inputs)
+        if self.forward_attn:
+            self.init_forward_attn(inputs)
+        if self.windowing:
+            self.init_win_idx()
+
+    def preprocess_inputs(self, inputs):
+        return self.inputs_layer(inputs)
+
+    def update_location_attention(self, alignments):
+        self.attention_weights_cum += alignments
+
+    def get_location_attention(self, query, processed_inputs):
+        attention_cat = torch.cat((self.attention_weights.unsqueeze(1), self.attention_weights_cum.unsqueeze(1)), dim=1)
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_cat)
+        energies = self.v(torch.tanh(processed_query + processed_attention_weights + processed_inputs))
+        energies = energies.squeeze(-1)
+        return energies, processed_query
+
+    def get_attention(self, query, processed_inputs):
+        processed_query = self.query_layer(query.unsqueeze(1))
+        energies = self.v(torch.tanh(processed_query + processed_inputs))
+        energies = energies.squeeze(-1)
+        return energies, processed_query
+
+    def apply_windowing(self, attention, inputs):
+        back_win = self.win_idx - self.win_back
+        front_win = self.win_idx + self.win_front
+        if back_win > 0:
+            attention[:, :back_win] = -float("inf")
+        if front_win < inputs.shape[1]:
+            attention[:, front_win:] = -float("inf")
+        # this is a trick to solve a special problem.
+        # but it does not hurt.
+        if self.win_idx == -1:
+            attention[:, 0] = attention.max()
+        # Update the window
+        self.win_idx = torch.argmax(attention, 1).long()[0].item()
+        return attention
+
+    def apply_forward_attention(self, alignment):
+        # forward attention
+        fwd_shifted_alpha = F.pad(self.alpha[:, :-1].clone().to(alignment.device), (1, 0, 0, 0))
+        # compute transition potentials
+        alpha = ((1 - self.u) * self.alpha + self.u * fwd_shifted_alpha + 1e-8) * alignment
+        # force incremental alignment
+        if not self.training and self.forward_attn_mask:
+            _, n = fwd_shifted_alpha.max(1)
+            val, _ = alpha.max(1)
+            for b in range(alignment.shape[0]):
+                alpha[b, n[b] + 3 :] = 0
+                alpha[b, : (n[b] - 1)] = 0  # ignore all previous states to prevent repetition.
+                alpha[b, (n[b] - 2)] = 0.01 * val[b]  # smoothing factor for the prev step
+        # renormalize attention weights
+        alpha = alpha / alpha.sum(dim=1, keepdim=True)
+        return alpha
+
+    def forward(self, query, inputs, processed_inputs, mask):
+        """
+        shapes:
+            query: [B, C_attn_rnn]
+            inputs: [B, T_en, D_en]
+            processed_inputs: [B, T_en, D_attn]
+            mask: [B, T_en]
+        """
+        if self.location_attention:
+            attention, _ = self.get_location_attention(query, processed_inputs)
+        else:
+            attention, _ = self.get_attention(query, processed_inputs)
+        # apply masking
+        if mask is not None:
+            attention.data.masked_fill_(~mask, self._mask_value)
+        # apply windowing - only in eval mode
+        if not self.training and self.windowing:
+            attention = self.apply_windowing(attention, inputs)
+
+        # normalize attention values
+        if self.norm == "softmax":
+            alignment = torch.softmax(attention, dim=-1)
+        elif self.norm == "sigmoid":
+            alignment = torch.sigmoid(attention) / torch.sigmoid(attention).sum(dim=1, keepdim=True)
+        else:
+            raise ValueError("Unknown value for attention norm type")
+
+        if self.location_attention:
+            self.update_location_attention(alignment)
+
+        # apply forward attention if enabled
+        if self.forward_attn:
+            alignment = self.apply_forward_attention(alignment)
+            self.alpha = alignment
+
+        context = torch.bmm(alignment.unsqueeze(1), inputs)
+        context = context.squeeze(1)
+        self.attention_weights = alignment
+
+        # compute transition agent
+        if self.forward_attn and self.trans_agent:
+            ta_input = torch.cat([context, query.squeeze(1)], dim=-1)
+            self.u = torch.sigmoid(self.ta(ta_input))
+        return context
+
+
+class MonotonicDynamicConvolutionAttention(nn.Module):
+    """Dynamic convolution attention from
+    https://arxiv.org/pdf/1910.10288.pdf
+
+
+    query -> linear -> tanh -> linear ->|
+                                        |                                            mask values
+                                        v                                              |    |
+               atten_w(t-1) -|-> conv1d_dynamic -> linear -|-> tanh -> + -> softmax -> * -> * -> context
+                             |-> conv1d_static  -> linear -|           |
+                             |-> conv1d_prior   -> log ----------------|
+
+    query: attention rnn output.
+
+    Note:
+        Dynamic convolution attention is an alternation of the location senstive attention with
+    dynamically computed convolution filters from the previous attention scores and a set of
+    constraints to keep the attention alignment diagonal.
+        DCA is sensitive to mixed precision training and might cause instable training.
+
+    Args:
+        query_dim (int): number of channels in the query tensor.
+        embedding_dim (int): number of channels in the value tensor.
+        static_filter_dim (int): number of channels in the convolution layer computing the static filters.
+        static_kernel_size (int): kernel size for the convolution layer computing the static filters.
+        dynamic_filter_dim (int): number of channels in the convolution layer computing the dynamic filters.
+        dynamic_kernel_size (int): kernel size for the convolution layer computing the dynamic filters.
+        prior_filter_len (int, optional): [description]. Defaults to 11 from the paper.
+        alpha (float, optional): [description]. Defaults to 0.1 from the paper.
+        beta (float, optional): [description]. Defaults to 0.9 from the paper.
+    """
+
+    def __init__(
+        self,
+        query_dim,
+        embedding_dim,  # pylint: disable=unused-argument
+        attention_dim,
+        static_filter_dim,
+        static_kernel_size,
+        dynamic_filter_dim,
+        dynamic_kernel_size,
+        prior_filter_len=11,
+        alpha=0.1,
+        beta=0.9,
+    ):
+        super().__init__()
+        self._mask_value = 1e-8
+        self.dynamic_filter_dim = dynamic_filter_dim
+        self.dynamic_kernel_size = dynamic_kernel_size
+        self.prior_filter_len = prior_filter_len
+        self.attention_weights = None
+        # setup key and query layers
+        self.query_layer = nn.Linear(query_dim, attention_dim)
+        self.key_layer = nn.Linear(attention_dim, dynamic_filter_dim * dynamic_kernel_size, bias=False)
+        self.static_filter_conv = nn.Conv1d(
+            1,
+            static_filter_dim,
+            static_kernel_size,
+            padding=(static_kernel_size - 1) // 2,
+            bias=False,
+        )
+        self.static_filter_layer = nn.Linear(static_filter_dim, attention_dim, bias=False)
+        self.dynamic_filter_layer = nn.Linear(dynamic_filter_dim, attention_dim)
+        self.v = nn.Linear(attention_dim, 1, bias=False)
+
+        prior = betabinom.pmf(range(prior_filter_len), prior_filter_len - 1, alpha, beta)
+        self.register_buffer("prior", torch.FloatTensor(prior).flip(0))
+
+    # pylint: disable=unused-argument
+    def forward(self, query, inputs, processed_inputs, mask):
+        """
+        query: [B, C_attn_rnn]
+        inputs: [B, T_en, D_en]
+        processed_inputs: place holder.
+        mask: [B, T_en]
+        """
+        # compute prior filters
+        prior_filter = F.conv1d(
+            F.pad(self.attention_weights.unsqueeze(1), (self.prior_filter_len - 1, 0)), self.prior.view(1, 1, -1)
+        )
+        prior_filter = torch.log(prior_filter.clamp_min_(1e-6)).squeeze(1)
+        G = self.key_layer(torch.tanh(self.query_layer(query)))
+        # compute dynamic filters
+        dynamic_filter = F.conv1d(
+            self.attention_weights.unsqueeze(0),
+            G.view(-1, 1, self.dynamic_kernel_size),
+            padding=(self.dynamic_kernel_size - 1) // 2,
+            groups=query.size(0),
+        )
+        dynamic_filter = dynamic_filter.view(query.size(0), self.dynamic_filter_dim, -1).transpose(1, 2)
+        # compute static filters
+        static_filter = self.static_filter_conv(self.attention_weights.unsqueeze(1)).transpose(1, 2)
+        alignment = (
+            self.v(
+                torch.tanh(self.static_filter_layer(static_filter) + self.dynamic_filter_layer(dynamic_filter))
+            ).squeeze(-1)
+            + prior_filter
+        )
+        # compute attention weights
+        attention_weights = F.softmax(alignment, dim=-1)
+        # apply masking
+        if mask is not None:
+            attention_weights.data.masked_fill_(~mask, self._mask_value)
+        self.attention_weights = attention_weights
+        # compute context
+        context = torch.bmm(attention_weights.unsqueeze(1), inputs).squeeze(1)
+        return context
+
+    def preprocess_inputs(self, inputs):  # pylint: disable=no-self-use
+        return None
+
+    def init_states(self, inputs):
+        B = inputs.size(0)
+        T = inputs.size(1)
+        self.attention_weights = torch.zeros([B, T], device=inputs.device)
+        self.attention_weights[:, 0] = 1.0
+
+
+def init_attn(
+    attn_type,
+    query_dim,
+    embedding_dim,
+    attention_dim,
+    location_attention,
+    attention_location_n_filters,
+    attention_location_kernel_size,
+    windowing,
+    norm,
+    forward_attn,
+    trans_agent,
+    forward_attn_mask,
+    attn_K,
+):
+    if attn_type == "original":
+        return OriginalAttention(
+            query_dim,
+            embedding_dim,
+            attention_dim,
+            location_attention,
+            attention_location_n_filters,
+            attention_location_kernel_size,
+            windowing,
+            norm,
+            forward_attn,
+            trans_agent,
+            forward_attn_mask,
+        )
+    if attn_type == "graves":
+        return GravesAttention(query_dim, attn_K)
+    if attn_type == "dynamic_convolution":
+        return MonotonicDynamicConvolutionAttention(
+            query_dim,
+            embedding_dim,
+            attention_dim,
+            static_filter_dim=8,
+            static_kernel_size=21,
+            dynamic_filter_dim=8,
+            dynamic_kernel_size=21,
+            prior_filter_len=11,
+            alpha=0.1,
+            beta=0.9,
+        )
+
+    raise RuntimeError(f" [!] Given Attention Type '{attn_type}' is not exist.")
diff --git a/TTS/tts/layers/tacotron/capacitron_layers.py b/TTS/tts/layers/tacotron/capacitron_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..68321358d2edb32776298583b95cf277ac61762a
--- /dev/null
+++ b/TTS/tts/layers/tacotron/capacitron_layers.py
@@ -0,0 +1,206 @@
+import torch
+from torch import nn
+from torch.distributions.multivariate_normal import MultivariateNormal as MVN
+from torch.nn import functional as F
+
+
+class CapacitronVAE(nn.Module):
+    """Effective Use of Variational Embedding Capacity for prosody transfer.
+
+    See https://arxiv.org/abs/1906.03402"""
+
+    def __init__(
+        self,
+        num_mel,
+        capacitron_VAE_embedding_dim,
+        encoder_output_dim=256,
+        reference_encoder_out_dim=128,
+        speaker_embedding_dim=None,
+        text_summary_embedding_dim=None,
+    ):
+        super().__init__()
+        # Init distributions
+        self.prior_distribution = MVN(
+            torch.zeros(capacitron_VAE_embedding_dim), torch.eye(capacitron_VAE_embedding_dim)
+        )
+        self.approximate_posterior_distribution = None
+        # define output ReferenceEncoder dim to the capacitron_VAE_embedding_dim
+        self.encoder = ReferenceEncoder(num_mel, out_dim=reference_encoder_out_dim)
+
+        # Init beta, the lagrange-like term for the KL distribution
+        self.beta = torch.nn.Parameter(torch.log(torch.exp(torch.Tensor([1.0])) - 1), requires_grad=True)
+        mlp_input_dimension = reference_encoder_out_dim
+
+        if text_summary_embedding_dim is not None:
+            self.text_summary_net = TextSummary(text_summary_embedding_dim, encoder_output_dim=encoder_output_dim)
+            mlp_input_dimension += text_summary_embedding_dim
+        if speaker_embedding_dim is not None:
+            # TODO: Test a multispeaker model!
+            mlp_input_dimension += speaker_embedding_dim
+        self.post_encoder_mlp = PostEncoderMLP(mlp_input_dimension, capacitron_VAE_embedding_dim)
+
+    def forward(self, reference_mel_info=None, text_info=None, speaker_embedding=None):
+        # Use reference
+        if reference_mel_info is not None:
+            reference_mels = reference_mel_info[0]  # [batch_size, num_frames, num_mels]
+            mel_lengths = reference_mel_info[1]  # [batch_size]
+            enc_out = self.encoder(reference_mels, mel_lengths)
+
+            # concat speaker_embedding and/or text summary embedding
+            if text_info is not None:
+                text_inputs = text_info[0]  # [batch_size, num_characters, num_embedding]
+                input_lengths = text_info[1]
+                text_summary_out = self.text_summary_net(text_inputs, input_lengths).to(reference_mels.device)
+                enc_out = torch.cat([enc_out, text_summary_out], dim=-1)
+            if speaker_embedding is not None:
+                speaker_embedding = torch.squeeze(speaker_embedding)
+                enc_out = torch.cat([enc_out, speaker_embedding], dim=-1)
+
+            # Feed the output of the ref encoder and information about text/speaker into
+            # an MLP to produce the parameteres for the approximate poterior distributions
+            mu, sigma = self.post_encoder_mlp(enc_out)
+            # convert to cpu because prior_distribution was created on cpu
+            mu = mu.cpu()
+            sigma = sigma.cpu()
+
+            # Sample from the posterior: z ~ q(z|x)
+            self.approximate_posterior_distribution = MVN(mu, torch.diag_embed(sigma))
+            VAE_embedding = self.approximate_posterior_distribution.rsample()
+        # Infer from the model, bypasses encoding
+        else:
+            # Sample from the prior: z ~ p(z)
+            VAE_embedding = self.prior_distribution.sample().unsqueeze(0)
+
+        # reshape to [batch_size, 1, capacitron_VAE_embedding_dim]
+        return VAE_embedding.unsqueeze(1), self.approximate_posterior_distribution, self.prior_distribution, self.beta
+
+
+class ReferenceEncoder(nn.Module):
+    """NN module creating a fixed size prosody embedding from a spectrogram.
+
+    inputs: mel spectrograms [batch_size, num_spec_frames, num_mel]
+    outputs: [batch_size, embedding_dim]
+    """
+
+    def __init__(self, num_mel, out_dim):
+
+        super().__init__()
+        self.num_mel = num_mel
+        filters = [1] + [32, 32, 64, 64, 128, 128]
+        num_layers = len(filters) - 1
+        convs = [
+            nn.Conv2d(
+                in_channels=filters[i], out_channels=filters[i + 1], kernel_size=(3, 3), stride=(2, 2), padding=(2, 2)
+            )
+            for i in range(num_layers)
+        ]
+        self.convs = nn.ModuleList(convs)
+        self.training = False
+        self.bns = nn.ModuleList([nn.BatchNorm2d(num_features=filter_size) for filter_size in filters[1:]])
+
+        post_conv_height = self.calculate_post_conv_height(num_mel, 3, 2, 2, num_layers)
+        self.recurrence = nn.LSTM(
+            input_size=filters[-1] * post_conv_height, hidden_size=out_dim, batch_first=True, bidirectional=False
+        )
+
+    def forward(self, inputs, input_lengths):
+        batch_size = inputs.size(0)
+        x = inputs.view(batch_size, 1, -1, self.num_mel)  # [batch_size, num_channels==1, num_frames, num_mel]
+        valid_lengths = input_lengths.float()  # [batch_size]
+        for conv, bn in zip(self.convs, self.bns):
+            x = conv(x)
+            x = bn(x)
+            x = F.relu(x)
+
+            # Create the post conv width mask based on the valid lengths of the output of the convolution.
+            # The valid lengths for the output of a convolution on varying length inputs is
+            # ceil(input_length/stride) + 1 for stride=3 and padding=2
+            # For example (kernel_size=3, stride=2, padding=2):
+            # 0 0 x x x x x 0 0 -> Input = 5, 0 is zero padding, x is valid values coming from padding=2 in conv2d
+            # _____
+            #   x _____
+            #       x _____
+            #           x  ____
+            #               x
+            # x x x x -> Output valid length = 4
+            # Since every example in te batch is zero padded and therefore have separate valid_lengths,
+            # we need to mask off all the values AFTER the valid length for each example in the batch.
+            # Otherwise, the convolutions create noise and a lot of not real information
+            valid_lengths = (valid_lengths / 2).float()
+            valid_lengths = torch.ceil(valid_lengths).to(dtype=torch.int64) + 1  # 2 is stride -- size: [batch_size]
+            post_conv_max_width = x.size(2)
+
+            mask = torch.arange(post_conv_max_width).to(inputs.device).expand(
+                len(valid_lengths), post_conv_max_width
+            ) < valid_lengths.unsqueeze(1)
+            mask = mask.expand(1, 1, -1, -1).transpose(2, 0).transpose(-1, 2)  # [batch_size, 1, post_conv_max_width, 1]
+            x = x * mask
+
+        x = x.transpose(1, 2)
+        # x: 4D tensor [batch_size, post_conv_width,
+        #               num_channels==128, post_conv_height]
+
+        post_conv_width = x.size(1)
+        x = x.contiguous().view(batch_size, post_conv_width, -1)
+        # x: 3D tensor [batch_size, post_conv_width,
+        #               num_channels*post_conv_height]
+
+        # Routine for fetching the last valid output of a dynamic LSTM with varying input lengths and padding
+        post_conv_input_lengths = valid_lengths
+        packed_seqs = nn.utils.rnn.pack_padded_sequence(
+            x, post_conv_input_lengths.tolist(), batch_first=True, enforce_sorted=False
+        )  # dynamic rnn sequence padding
+        self.recurrence.flatten_parameters()
+        _, (ht, _) = self.recurrence(packed_seqs)
+        last_output = ht[-1]
+
+        return last_output.to(inputs.device)  # [B, 128]
+
+    @staticmethod
+    def calculate_post_conv_height(height, kernel_size, stride, pad, n_convs):
+        """Height of spec after n convolutions with fixed kernel/stride/pad."""
+        for _ in range(n_convs):
+            height = (height - kernel_size + 2 * pad) // stride + 1
+        return height
+
+
+class TextSummary(nn.Module):
+    def __init__(self, embedding_dim, encoder_output_dim):
+        super().__init__()
+        self.lstm = nn.LSTM(
+            encoder_output_dim,  # text embedding dimension from the text encoder
+            embedding_dim,  # fixed length output summary the lstm creates from the input
+            batch_first=True,
+            bidirectional=False,
+        )
+
+    def forward(self, inputs, input_lengths):
+        # Routine for fetching the last valid output of a dynamic LSTM with varying input lengths and padding
+        packed_seqs = nn.utils.rnn.pack_padded_sequence(
+            inputs, input_lengths.tolist(), batch_first=True, enforce_sorted=False
+        )  # dynamic rnn sequence padding
+        self.lstm.flatten_parameters()
+        _, (ht, _) = self.lstm(packed_seqs)
+        last_output = ht[-1]
+        return last_output
+
+
+class PostEncoderMLP(nn.Module):
+    def __init__(self, input_size, hidden_size):
+        super().__init__()
+        self.hidden_size = hidden_size
+        modules = [
+            nn.Linear(input_size, hidden_size),  # Hidden Layer
+            nn.Tanh(),
+            nn.Linear(hidden_size, hidden_size * 2),
+        ]  # Output layer twice the size for mean and variance
+        self.net = nn.Sequential(*modules)
+        self.softplus = nn.Softplus()
+
+    def forward(self, _input):
+        mlp_output = self.net(_input)
+        # The mean parameter is unconstrained
+        mu = mlp_output[:, : self.hidden_size]
+        # The standard deviation must be positive. Parameterise with a softplus
+        sigma = self.softplus(mlp_output[:, self.hidden_size :])
+        return mu, sigma
diff --git a/TTS/tts/layers/tacotron/common_layers.py b/TTS/tts/layers/tacotron/common_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..f78ff1e75f6c23eb1a0fe827247a1127bc8f9958
--- /dev/null
+++ b/TTS/tts/layers/tacotron/common_layers.py
@@ -0,0 +1,119 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+
+class Linear(nn.Module):
+    """Linear layer with a specific initialization.
+
+    Args:
+        in_features (int): number of channels in the input tensor.
+        out_features (int): number of channels in the output tensor.
+        bias (bool, optional): enable/disable bias in the layer. Defaults to True.
+        init_gain (str, optional): method to compute the gain in the weight initializtion based on the nonlinear activation used afterwards. Defaults to 'linear'.
+    """
+
+    def __init__(self, in_features, out_features, bias=True, init_gain="linear"):
+        super().__init__()
+        self.linear_layer = torch.nn.Linear(in_features, out_features, bias=bias)
+        self._init_w(init_gain)
+
+    def _init_w(self, init_gain):
+        torch.nn.init.xavier_uniform_(self.linear_layer.weight, gain=torch.nn.init.calculate_gain(init_gain))
+
+    def forward(self, x):
+        return self.linear_layer(x)
+
+
+class LinearBN(nn.Module):
+    """Linear layer with Batch Normalization.
+
+    x -> linear -> BN -> o
+
+    Args:
+        in_features (int): number of channels in the input tensor.
+        out_features (int ): number of channels in the output tensor.
+        bias (bool, optional): enable/disable bias in the linear layer. Defaults to True.
+        init_gain (str, optional): method to set the gain for weight initialization. Defaults to 'linear'.
+    """
+
+    def __init__(self, in_features, out_features, bias=True, init_gain="linear"):
+        super().__init__()
+        self.linear_layer = torch.nn.Linear(in_features, out_features, bias=bias)
+        self.batch_normalization = nn.BatchNorm1d(out_features, momentum=0.1, eps=1e-5)
+        self._init_w(init_gain)
+
+    def _init_w(self, init_gain):
+        torch.nn.init.xavier_uniform_(self.linear_layer.weight, gain=torch.nn.init.calculate_gain(init_gain))
+
+    def forward(self, x):
+        """
+        Shapes:
+            x: [T, B, C] or [B, C]
+        """
+        out = self.linear_layer(x)
+        if len(out.shape) == 3:
+            out = out.permute(1, 2, 0)
+        out = self.batch_normalization(out)
+        if len(out.shape) == 3:
+            out = out.permute(2, 0, 1)
+        return out
+
+
+class Prenet(nn.Module):
+    """Tacotron specific Prenet with an optional Batch Normalization.
+
+    Note:
+        Prenet with BN improves the model performance significantly especially
+    if it is enabled after learning a diagonal attention alignment with the original
+    prenet. However, if the target dataset is high quality then it also works from
+    the start. It is also suggested to disable dropout if BN is in use.
+
+        prenet_type == "original"
+            x -> [linear -> ReLU -> Dropout]xN -> o
+
+        prenet_type == "bn"
+            x -> [linear -> BN -> ReLU -> Dropout]xN -> o
+
+    Args:
+        in_features (int): number of channels in the input tensor and the inner layers.
+        prenet_type (str, optional): prenet type "original" or "bn". Defaults to "original".
+        prenet_dropout (bool, optional): dropout rate. Defaults to True.
+        dropout_at_inference (bool, optional): use dropout at inference. It leads to a better quality for some models.
+        out_features (list, optional): List of output channels for each prenet block.
+            It also defines number of the prenet blocks based on the length of argument list.
+            Defaults to [256, 256].
+        bias (bool, optional): enable/disable bias in prenet linear layers. Defaults to True.
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        in_features,
+        prenet_type="original",
+        prenet_dropout=True,
+        dropout_at_inference=False,
+        out_features=[256, 256],
+        bias=True,
+    ):
+        super().__init__()
+        self.prenet_type = prenet_type
+        self.prenet_dropout = prenet_dropout
+        self.dropout_at_inference = dropout_at_inference
+        in_features = [in_features] + out_features[:-1]
+        if prenet_type == "bn":
+            self.linear_layers = nn.ModuleList(
+                [LinearBN(in_size, out_size, bias=bias) for (in_size, out_size) in zip(in_features, out_features)]
+            )
+        elif prenet_type == "original":
+            self.linear_layers = nn.ModuleList(
+                [Linear(in_size, out_size, bias=bias) for (in_size, out_size) in zip(in_features, out_features)]
+            )
+
+    def forward(self, x):
+        for linear in self.linear_layers:
+            if self.prenet_dropout:
+                x = F.dropout(F.relu(linear(x)), p=0.5, training=self.training or self.dropout_at_inference)
+            else:
+                x = F.relu(linear(x))
+        return x
diff --git a/TTS/tts/layers/tacotron/gst_layers.py b/TTS/tts/layers/tacotron/gst_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec622e4db80eb7f0e319bc11df950086b9562f41
--- /dev/null
+++ b/TTS/tts/layers/tacotron/gst_layers.py
@@ -0,0 +1,151 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class GST(nn.Module):
+    """Global Style Token Module for factorizing prosody in speech.
+
+    See https://arxiv.org/pdf/1803.09017"""
+
+    def __init__(self, num_mel, num_heads, num_style_tokens, gst_embedding_dim, embedded_speaker_dim=None):
+        super().__init__()
+        self.encoder = ReferenceEncoder(num_mel, gst_embedding_dim)
+        self.style_token_layer = StyleTokenLayer(num_heads, num_style_tokens, gst_embedding_dim, embedded_speaker_dim)
+
+    def forward(self, inputs, speaker_embedding=None):
+        enc_out = self.encoder(inputs)
+        # concat speaker_embedding
+        if speaker_embedding is not None:
+            enc_out = torch.cat([enc_out, speaker_embedding], dim=-1)
+        style_embed = self.style_token_layer(enc_out)
+
+        return style_embed
+
+
+class ReferenceEncoder(nn.Module):
+    """NN module creating a fixed size prosody embedding from a spectrogram.
+
+    inputs: mel spectrograms [batch_size, num_spec_frames, num_mel]
+    outputs: [batch_size, embedding_dim]
+    """
+
+    def __init__(self, num_mel, embedding_dim):
+
+        super().__init__()
+        self.num_mel = num_mel
+        filters = [1] + [32, 32, 64, 64, 128, 128]
+        num_layers = len(filters) - 1
+        convs = [
+            nn.Conv2d(
+                in_channels=filters[i], out_channels=filters[i + 1], kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)
+            )
+            for i in range(num_layers)
+        ]
+        self.convs = nn.ModuleList(convs)
+        self.bns = nn.ModuleList([nn.BatchNorm2d(num_features=filter_size) for filter_size in filters[1:]])
+
+        post_conv_height = self.calculate_post_conv_height(num_mel, 3, 2, 1, num_layers)
+        self.recurrence = nn.GRU(
+            input_size=filters[-1] * post_conv_height, hidden_size=embedding_dim // 2, batch_first=True
+        )
+
+    def forward(self, inputs):
+        batch_size = inputs.size(0)
+        x = inputs.view(batch_size, 1, -1, self.num_mel)
+        # x: 4D tensor [batch_size, num_channels==1, num_frames, num_mel]
+        for conv, bn in zip(self.convs, self.bns):
+            x = conv(x)
+            x = bn(x)
+            x = F.relu(x)
+
+        x = x.transpose(1, 2)
+        # x: 4D tensor [batch_size, post_conv_width,
+        #               num_channels==128, post_conv_height]
+        post_conv_width = x.size(1)
+        x = x.contiguous().view(batch_size, post_conv_width, -1)
+        # x: 3D tensor [batch_size, post_conv_width,
+        #               num_channels*post_conv_height]
+        self.recurrence.flatten_parameters()
+        _, out = self.recurrence(x)
+        # out: 3D tensor [seq_len==1, batch_size, encoding_size=128]
+
+        return out.squeeze(0)
+
+    @staticmethod
+    def calculate_post_conv_height(height, kernel_size, stride, pad, n_convs):
+        """Height of spec after n convolutions with fixed kernel/stride/pad."""
+        for _ in range(n_convs):
+            height = (height - kernel_size + 2 * pad) // stride + 1
+        return height
+
+
+class StyleTokenLayer(nn.Module):
+    """NN Module attending to style tokens based on prosody encodings."""
+
+    def __init__(self, num_heads, num_style_tokens, gst_embedding_dim, d_vector_dim=None):
+        super().__init__()
+
+        self.query_dim = gst_embedding_dim // 2
+
+        if d_vector_dim:
+            self.query_dim += d_vector_dim
+
+        self.key_dim = gst_embedding_dim // num_heads
+        self.style_tokens = nn.Parameter(torch.FloatTensor(num_style_tokens, self.key_dim))
+        nn.init.normal_(self.style_tokens, mean=0, std=0.5)
+        self.attention = MultiHeadAttention(
+            query_dim=self.query_dim, key_dim=self.key_dim, num_units=gst_embedding_dim, num_heads=num_heads
+        )
+
+    def forward(self, inputs):
+        batch_size = inputs.size(0)
+        prosody_encoding = inputs.unsqueeze(1)
+        # prosody_encoding: 3D tensor [batch_size, 1, encoding_size==128]
+        tokens = torch.tanh(self.style_tokens).unsqueeze(0).expand(batch_size, -1, -1)
+        # tokens: 3D tensor [batch_size, num tokens, token embedding size]
+        style_embed = self.attention(prosody_encoding, tokens)
+
+        return style_embed
+
+
+class MultiHeadAttention(nn.Module):
+    """
+    input:
+        query --- [N, T_q, query_dim]
+        key --- [N, T_k, key_dim]
+    output:
+        out --- [N, T_q, num_units]
+    """
+
+    def __init__(self, query_dim, key_dim, num_units, num_heads):
+
+        super().__init__()
+        self.num_units = num_units
+        self.num_heads = num_heads
+        self.key_dim = key_dim
+
+        self.W_query = nn.Linear(in_features=query_dim, out_features=num_units, bias=False)
+        self.W_key = nn.Linear(in_features=key_dim, out_features=num_units, bias=False)
+        self.W_value = nn.Linear(in_features=key_dim, out_features=num_units, bias=False)
+
+    def forward(self, query, key):
+        queries = self.W_query(query)  # [N, T_q, num_units]
+        keys = self.W_key(key)  # [N, T_k, num_units]
+        values = self.W_value(key)
+
+        split_size = self.num_units // self.num_heads
+        queries = torch.stack(torch.split(queries, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
+        keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
+        values = torch.stack(torch.split(values, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
+
+        # score = softmax(QK^T / (d_k**0.5))
+        scores = torch.matmul(queries, keys.transpose(2, 3))  # [h, N, T_q, T_k]
+        scores = scores / (self.key_dim**0.5)
+        scores = F.softmax(scores, dim=3)
+
+        # out = score * V
+        out = torch.matmul(scores, values)  # [h, N, T_q, num_units/h]
+        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)  # [N, T_q, num_units]
+
+        return out
diff --git a/TTS/tts/layers/tacotron/tacotron.py b/TTS/tts/layers/tacotron/tacotron.py
new file mode 100644
index 0000000000000000000000000000000000000000..bddaf449c112a99458c9047c5c07df592e935972
--- /dev/null
+++ b/TTS/tts/layers/tacotron/tacotron.py
@@ -0,0 +1,504 @@
+# coding: utf-8
+# adapted from https://github.com/r9y9/tacotron_pytorch
+
+import torch
+from torch import nn
+
+from .attentions import init_attn
+from .common_layers import Prenet
+
+
+class BatchNormConv1d(nn.Module):
+    r"""A wrapper for Conv1d with BatchNorm. It sets the activation
+    function between Conv and BatchNorm layers. BatchNorm layer
+    is initialized with the TF default values for momentum and eps.
+
+    Args:
+        in_channels: size of each input sample
+        out_channels: size of each output samples
+        kernel_size: kernel size of conv filters
+        stride: stride of conv filters
+        padding: padding of conv filters
+        activation: activation function set b/w Conv1d and BatchNorm
+
+    Shapes:
+        - input: (B, D)
+        - output: (B, D)
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, activation=None):
+
+        super().__init__()
+        self.padding = padding
+        self.padder = nn.ConstantPad1d(padding, 0)
+        self.conv1d = nn.Conv1d(
+            in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=0, bias=False
+        )
+        # Following tensorflow's default parameters
+        self.bn = nn.BatchNorm1d(out_channels, momentum=0.99, eps=1e-3)
+        self.activation = activation
+        # self.init_layers()
+
+    def init_layers(self):
+        if isinstance(self.activation, torch.nn.ReLU):
+            w_gain = "relu"
+        elif isinstance(self.activation, torch.nn.Tanh):
+            w_gain = "tanh"
+        elif self.activation is None:
+            w_gain = "linear"
+        else:
+            raise RuntimeError("Unknown activation function")
+        torch.nn.init.xavier_uniform_(self.conv1d.weight, gain=torch.nn.init.calculate_gain(w_gain))
+
+    def forward(self, x):
+        x = self.padder(x)
+        x = self.conv1d(x)
+        x = self.bn(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+
+class Highway(nn.Module):
+    r"""Highway layers as explained in https://arxiv.org/abs/1505.00387
+
+    Args:
+        in_features (int): size of each input sample
+        out_feature (int): size of each output sample
+
+    Shapes:
+        - input: (B, *, H_in)
+        - output: (B, *, H_out)
+    """
+
+    # TODO: Try GLU layer
+    def __init__(self, in_features, out_feature):
+        super().__init__()
+        self.H = nn.Linear(in_features, out_feature)
+        self.H.bias.data.zero_()
+        self.T = nn.Linear(in_features, out_feature)
+        self.T.bias.data.fill_(-1)
+        self.relu = nn.ReLU()
+        self.sigmoid = nn.Sigmoid()
+        # self.init_layers()
+
+    def init_layers(self):
+        torch.nn.init.xavier_uniform_(self.H.weight, gain=torch.nn.init.calculate_gain("relu"))
+        torch.nn.init.xavier_uniform_(self.T.weight, gain=torch.nn.init.calculate_gain("sigmoid"))
+
+    def forward(self, inputs):
+        H = self.relu(self.H(inputs))
+        T = self.sigmoid(self.T(inputs))
+        return H * T + inputs * (1.0 - T)
+
+
+class CBHG(nn.Module):
+    """CBHG module: a recurrent neural network composed of:
+    - 1-d convolution banks
+    - Highway networks + residual connections
+    - Bidirectional gated recurrent units
+
+    Args:
+        in_features (int): sample size
+        K (int): max filter size in conv bank
+        projections (list): conv channel sizes for conv projections
+        num_highways (int): number of highways layers
+
+    Shapes:
+        - input: (B, C, T_in)
+        - output: (B, T_in, C*2)
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        in_features,
+        K=16,
+        conv_bank_features=128,
+        conv_projections=[128, 128],
+        highway_features=128,
+        gru_features=128,
+        num_highways=4,
+    ):
+        super().__init__()
+        self.in_features = in_features
+        self.conv_bank_features = conv_bank_features
+        self.highway_features = highway_features
+        self.gru_features = gru_features
+        self.conv_projections = conv_projections
+        self.relu = nn.ReLU()
+        # list of conv1d bank with filter size k=1...K
+        # TODO: try dilational layers instead
+        self.conv1d_banks = nn.ModuleList(
+            [
+                BatchNormConv1d(
+                    in_features,
+                    conv_bank_features,
+                    kernel_size=k,
+                    stride=1,
+                    padding=[(k - 1) // 2, k // 2],
+                    activation=self.relu,
+                )
+                for k in range(1, K + 1)
+            ]
+        )
+        # max pooling of conv bank, with padding
+        # TODO: try average pooling OR larger kernel size
+        out_features = [K * conv_bank_features] + conv_projections[:-1]
+        activations = [self.relu] * (len(conv_projections) - 1)
+        activations += [None]
+        # setup conv1d projection layers
+        layer_set = []
+        for (in_size, out_size, ac) in zip(out_features, conv_projections, activations):
+            layer = BatchNormConv1d(in_size, out_size, kernel_size=3, stride=1, padding=[1, 1], activation=ac)
+            layer_set.append(layer)
+        self.conv1d_projections = nn.ModuleList(layer_set)
+        # setup Highway layers
+        if self.highway_features != conv_projections[-1]:
+            self.pre_highway = nn.Linear(conv_projections[-1], highway_features, bias=False)
+        self.highways = nn.ModuleList([Highway(highway_features, highway_features) for _ in range(num_highways)])
+        # bi-directional GPU layer
+        self.gru = nn.GRU(gru_features, gru_features, 1, batch_first=True, bidirectional=True)
+
+    def forward(self, inputs):
+        # (B, in_features, T_in)
+        x = inputs
+        # (B, hid_features*K, T_in)
+        # Concat conv1d bank outputs
+        outs = []
+        for conv1d in self.conv1d_banks:
+            out = conv1d(x)
+            outs.append(out)
+        x = torch.cat(outs, dim=1)
+        assert x.size(1) == self.conv_bank_features * len(self.conv1d_banks)
+        for conv1d in self.conv1d_projections:
+            x = conv1d(x)
+        x += inputs
+        x = x.transpose(1, 2)
+        if self.highway_features != self.conv_projections[-1]:
+            x = self.pre_highway(x)
+        # Residual connection
+        # TODO: try residual scaling as in Deep Voice 3
+        # TODO: try plain residual layers
+        for highway in self.highways:
+            x = highway(x)
+        # (B, T_in, hid_features*2)
+        # TODO: replace GRU with convolution as in Deep Voice 3
+        self.gru.flatten_parameters()
+        outputs, _ = self.gru(x)
+        return outputs
+
+
+class EncoderCBHG(nn.Module):
+    r"""CBHG module with Encoder specific arguments"""
+
+    def __init__(self):
+        super().__init__()
+        self.cbhg = CBHG(
+            128,
+            K=16,
+            conv_bank_features=128,
+            conv_projections=[128, 128],
+            highway_features=128,
+            gru_features=128,
+            num_highways=4,
+        )
+
+    def forward(self, x):
+        return self.cbhg(x)
+
+
+class Encoder(nn.Module):
+    r"""Stack Prenet and CBHG module for encoder
+    Args:
+        inputs (FloatTensor): embedding features
+
+    Shapes:
+        - inputs: (B, T, D_in)
+        - outputs: (B, T, 128 * 2)
+    """
+
+    def __init__(self, in_features):
+        super().__init__()
+        self.prenet = Prenet(in_features, out_features=[256, 128])
+        self.cbhg = EncoderCBHG()
+
+    def forward(self, inputs):
+        # B x T x prenet_dim
+        outputs = self.prenet(inputs)
+        outputs = self.cbhg(outputs.transpose(1, 2))
+        return outputs
+
+
+class PostCBHG(nn.Module):
+    def __init__(self, mel_dim):
+        super().__init__()
+        self.cbhg = CBHG(
+            mel_dim,
+            K=8,
+            conv_bank_features=128,
+            conv_projections=[256, mel_dim],
+            highway_features=128,
+            gru_features=128,
+            num_highways=4,
+        )
+
+    def forward(self, x):
+        return self.cbhg(x)
+
+
+class Decoder(nn.Module):
+    """Tacotron decoder.
+
+    Args:
+        in_channels (int): number of input channels.
+        frame_channels (int): number of feature frame channels.
+        r (int): number of outputs per time step (reduction rate).
+        memory_size (int): size of the past window. if <= 0 memory_size = r
+        attn_type (string): type of attention used in decoder.
+        attn_windowing (bool): if true, define an attention window centered to maximum
+            attention response. It provides more robust attention alignment especially
+            at interence time.
+        attn_norm (string): attention normalization function. 'sigmoid' or 'softmax'.
+        prenet_type (string): 'original' or 'bn'.
+        prenet_dropout (float): prenet dropout rate.
+        forward_attn (bool): if true, use forward attention method. https://arxiv.org/abs/1807.06736
+        trans_agent (bool): if true, use transition agent. https://arxiv.org/abs/1807.06736
+        forward_attn_mask (bool): if true, mask attention values smaller than a threshold.
+        location_attn (bool): if true, use location sensitive attention.
+        attn_K (int): number of attention heads for GravesAttention.
+        separate_stopnet (bool): if true, detach stopnet input to prevent gradient flow.
+        d_vector_dim (int): size of speaker embedding vector, for multi-speaker training.
+        max_decoder_steps (int): Maximum number of steps allowed for the decoder. Defaults to 500.
+    """
+
+    # Pylint gets confused by PyTorch conventions here
+    # pylint: disable=attribute-defined-outside-init
+
+    def __init__(
+        self,
+        in_channels,
+        frame_channels,
+        r,
+        memory_size,
+        attn_type,
+        attn_windowing,
+        attn_norm,
+        prenet_type,
+        prenet_dropout,
+        forward_attn,
+        trans_agent,
+        forward_attn_mask,
+        location_attn,
+        attn_K,
+        separate_stopnet,
+        max_decoder_steps,
+    ):
+        super().__init__()
+        self.r_init = r
+        self.r = r
+        self.in_channels = in_channels
+        self.max_decoder_steps = max_decoder_steps
+        self.use_memory_queue = memory_size > 0
+        self.memory_size = memory_size if memory_size > 0 else r
+        self.frame_channels = frame_channels
+        self.separate_stopnet = separate_stopnet
+        self.query_dim = 256
+        # memory -> |Prenet| -> processed_memory
+        prenet_dim = frame_channels * self.memory_size if self.use_memory_queue else frame_channels
+        self.prenet = Prenet(prenet_dim, prenet_type, prenet_dropout, out_features=[256, 128])
+        # processed_inputs, processed_memory -> |Attention| -> Attention, attention, RNN_State
+        # attention_rnn generates queries for the attention mechanism
+        self.attention_rnn = nn.GRUCell(in_channels + 128, self.query_dim)
+        self.attention = init_attn(
+            attn_type=attn_type,
+            query_dim=self.query_dim,
+            embedding_dim=in_channels,
+            attention_dim=128,
+            location_attention=location_attn,
+            attention_location_n_filters=32,
+            attention_location_kernel_size=31,
+            windowing=attn_windowing,
+            norm=attn_norm,
+            forward_attn=forward_attn,
+            trans_agent=trans_agent,
+            forward_attn_mask=forward_attn_mask,
+            attn_K=attn_K,
+        )
+        # (processed_memory | attention context) -> |Linear| -> decoder_RNN_input
+        self.project_to_decoder_in = nn.Linear(256 + in_channels, 256)
+        # decoder_RNN_input -> |RNN| -> RNN_state
+        self.decoder_rnns = nn.ModuleList([nn.GRUCell(256, 256) for _ in range(2)])
+        # RNN_state -> |Linear| -> mel_spec
+        self.proj_to_mel = nn.Linear(256, frame_channels * self.r_init)
+        # learn init values instead of zero init.
+        self.stopnet = StopNet(256 + frame_channels * self.r_init)
+
+    def set_r(self, new_r):
+        self.r = new_r
+
+    def _reshape_memory(self, memory):
+        """
+        Reshape the spectrograms for given 'r'
+        """
+        # Grouping multiple frames if necessary
+        if memory.size(-1) == self.frame_channels:
+            memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1)
+        # Time first (T_decoder, B, frame_channels)
+        memory = memory.transpose(0, 1)
+        return memory
+
+    def _init_states(self, inputs):
+        """
+        Initialization of decoder states
+        """
+        B = inputs.size(0)
+        # go frame as zeros matrix
+        if self.use_memory_queue:
+            self.memory_input = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels * self.memory_size)
+        else:
+            self.memory_input = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels)
+        # decoder states
+        self.attention_rnn_hidden = torch.zeros(1, device=inputs.device).repeat(B, 256)
+        self.decoder_rnn_hiddens = [
+            torch.zeros(1, device=inputs.device).repeat(B, 256) for idx in range(len(self.decoder_rnns))
+        ]
+        self.context_vec = inputs.data.new(B, self.in_channels).zero_()
+        # cache attention inputs
+        self.processed_inputs = self.attention.preprocess_inputs(inputs)
+
+    def _parse_outputs(self, outputs, attentions, stop_tokens):
+        # Back to batch first
+        attentions = torch.stack(attentions).transpose(0, 1)
+        stop_tokens = torch.stack(stop_tokens).transpose(0, 1)
+        outputs = torch.stack(outputs).transpose(0, 1).contiguous()
+        outputs = outputs.view(outputs.size(0), -1, self.frame_channels)
+        outputs = outputs.transpose(1, 2)
+        return outputs, attentions, stop_tokens
+
+    def decode(self, inputs, mask=None):
+        # Prenet
+        processed_memory = self.prenet(self.memory_input)
+        # Attention RNN
+        self.attention_rnn_hidden = self.attention_rnn(
+            torch.cat((processed_memory, self.context_vec), -1), self.attention_rnn_hidden
+        )
+        self.context_vec = self.attention(self.attention_rnn_hidden, inputs, self.processed_inputs, mask)
+        # Concat RNN output and attention context vector
+        decoder_input = self.project_to_decoder_in(torch.cat((self.attention_rnn_hidden, self.context_vec), -1))
+
+        # Pass through the decoder RNNs
+        for idx, decoder_rnn in enumerate(self.decoder_rnns):
+            self.decoder_rnn_hiddens[idx] = decoder_rnn(decoder_input, self.decoder_rnn_hiddens[idx])
+            # Residual connection
+            decoder_input = self.decoder_rnn_hiddens[idx] + decoder_input
+        decoder_output = decoder_input
+
+        # predict mel vectors from decoder vectors
+        output = self.proj_to_mel(decoder_output)
+        # output = torch.sigmoid(output)
+        # predict stop token
+        stopnet_input = torch.cat([decoder_output, output], -1)
+        if self.separate_stopnet:
+            stop_token = self.stopnet(stopnet_input.detach())
+        else:
+            stop_token = self.stopnet(stopnet_input)
+        output = output[:, : self.r * self.frame_channels]
+        return output, stop_token, self.attention.attention_weights
+
+    def _update_memory_input(self, new_memory):
+        if self.use_memory_queue:
+            if self.memory_size > self.r:
+                # memory queue size is larger than number of frames per decoder iter
+                self.memory_input = torch.cat(
+                    [new_memory, self.memory_input[:, : (self.memory_size - self.r) * self.frame_channels].clone()],
+                    dim=-1,
+                )
+            else:
+                # memory queue size smaller than number of frames per decoder iter
+                self.memory_input = new_memory[:, : self.memory_size * self.frame_channels]
+        else:
+            # use only the last frame prediction
+            # assert new_memory.shape[-1] == self.r * self.frame_channels
+            self.memory_input = new_memory[:, self.frame_channels * (self.r - 1) :]
+
+    def forward(self, inputs, memory, mask):
+        """
+        Args:
+            inputs: Encoder outputs.
+            memory: Decoder memory (autoregression. If None (at eval-time),
+              decoder outputs are used as decoder inputs. If None, it uses the last
+              output as the input.
+            mask: Attention mask for sequence padding.
+
+        Shapes:
+            - inputs: (B, T, D_out_enc)
+            - memory: (B, T_mel, D_mel)
+        """
+        # Run greedy decoding if memory is None
+        memory = self._reshape_memory(memory)
+        outputs = []
+        attentions = []
+        stop_tokens = []
+        t = 0
+        self._init_states(inputs)
+        self.attention.init_states(inputs)
+        while len(outputs) < memory.size(0):
+            if t > 0:
+                new_memory = memory[t - 1]
+                self._update_memory_input(new_memory)
+
+            output, stop_token, attention = self.decode(inputs, mask)
+            outputs += [output]
+            attentions += [attention]
+            stop_tokens += [stop_token.squeeze(1)]
+            t += 1
+        return self._parse_outputs(outputs, attentions, stop_tokens)
+
+    def inference(self, inputs):
+        """
+        Args:
+            inputs: encoder outputs.
+        Shapes:
+            - inputs: batch x time x encoder_out_dim
+        """
+        outputs = []
+        attentions = []
+        stop_tokens = []
+        t = 0
+        self._init_states(inputs)
+        self.attention.init_states(inputs)
+        while True:
+            if t > 0:
+                new_memory = outputs[-1]
+                self._update_memory_input(new_memory)
+            output, stop_token, attention = self.decode(inputs, None)
+            stop_token = torch.sigmoid(stop_token.data)
+            outputs += [output]
+            attentions += [attention]
+            stop_tokens += [stop_token]
+            t += 1
+            if t > inputs.shape[1] / 4 and (stop_token > 0.6 or attention[:, -1].item() > 0.6):
+                break
+            if t > self.max_decoder_steps:
+                print("   | > Decoder stopped with 'max_decoder_steps")
+                break
+        return self._parse_outputs(outputs, attentions, stop_tokens)
+
+
+class StopNet(nn.Module):
+    r"""Stopnet signalling decoder to stop inference.
+    Args:
+        in_features (int): feature dimension of input.
+    """
+
+    def __init__(self, in_features):
+        super().__init__()
+        self.dropout = nn.Dropout(0.1)
+        self.linear = nn.Linear(in_features, 1)
+        torch.nn.init.xavier_uniform_(self.linear.weight, gain=torch.nn.init.calculate_gain("linear"))
+
+    def forward(self, inputs):
+        outputs = self.dropout(inputs)
+        outputs = self.linear(outputs)
+        return outputs
diff --git a/TTS/tts/layers/tacotron/tacotron2.py b/TTS/tts/layers/tacotron/tacotron2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c79b70997249efc94cbac630bcc7d6c571f5743e
--- /dev/null
+++ b/TTS/tts/layers/tacotron/tacotron2.py
@@ -0,0 +1,414 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .attentions import init_attn
+from .common_layers import Linear, Prenet
+
+
+# pylint: disable=no-value-for-parameter
+# pylint: disable=unexpected-keyword-arg
+class ConvBNBlock(nn.Module):
+    r"""Convolutions with Batch Normalization and non-linear activation.
+
+    Args:
+        in_channels (int): number of input channels.
+        out_channels (int): number of output channels.
+        kernel_size (int): convolution kernel size.
+        activation (str): 'relu', 'tanh', None (linear).
+
+    Shapes:
+        - input: (B, C_in, T)
+        - output: (B, C_out, T)
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, activation=None):
+        super().__init__()
+        assert (kernel_size - 1) % 2 == 0
+        padding = (kernel_size - 1) // 2
+        self.convolution1d = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding)
+        self.batch_normalization = nn.BatchNorm1d(out_channels, momentum=0.1, eps=1e-5)
+        self.dropout = nn.Dropout(p=0.5)
+        if activation == "relu":
+            self.activation = nn.ReLU()
+        elif activation == "tanh":
+            self.activation = nn.Tanh()
+        else:
+            self.activation = nn.Identity()
+
+    def forward(self, x):
+        o = self.convolution1d(x)
+        o = self.batch_normalization(o)
+        o = self.activation(o)
+        o = self.dropout(o)
+        return o
+
+
+class Postnet(nn.Module):
+    r"""Tacotron2 Postnet
+
+    Args:
+        in_out_channels (int): number of output channels.
+
+    Shapes:
+        - input: (B, C_in, T)
+        - output: (B, C_in, T)
+    """
+
+    def __init__(self, in_out_channels, num_convs=5):
+        super().__init__()
+        self.convolutions = nn.ModuleList()
+        self.convolutions.append(ConvBNBlock(in_out_channels, 512, kernel_size=5, activation="tanh"))
+        for _ in range(1, num_convs - 1):
+            self.convolutions.append(ConvBNBlock(512, 512, kernel_size=5, activation="tanh"))
+        self.convolutions.append(ConvBNBlock(512, in_out_channels, kernel_size=5, activation=None))
+
+    def forward(self, x):
+        o = x
+        for layer in self.convolutions:
+            o = layer(o)
+        return o
+
+
+class Encoder(nn.Module):
+    r"""Tacotron2 Encoder
+
+    Args:
+        in_out_channels (int): number of input and output channels.
+
+    Shapes:
+        - input: (B, C_in, T)
+        - output: (B, C_in, T)
+    """
+
+    def __init__(self, in_out_channels=512):
+        super().__init__()
+        self.convolutions = nn.ModuleList()
+        for _ in range(3):
+            self.convolutions.append(ConvBNBlock(in_out_channels, in_out_channels, 5, "relu"))
+        self.lstm = nn.LSTM(
+            in_out_channels, int(in_out_channels / 2), num_layers=1, batch_first=True, bias=True, bidirectional=True
+        )
+        self.rnn_state = None
+
+    def forward(self, x, input_lengths):
+        o = x
+        for layer in self.convolutions:
+            o = layer(o)
+        o = o.transpose(1, 2)
+        o = nn.utils.rnn.pack_padded_sequence(o, input_lengths.cpu(), batch_first=True)
+        self.lstm.flatten_parameters()
+        o, _ = self.lstm(o)
+        o, _ = nn.utils.rnn.pad_packed_sequence(o, batch_first=True)
+        return o
+
+    def inference(self, x):
+        o = x
+        for layer in self.convolutions:
+            o = layer(o)
+        o = o.transpose(1, 2)
+        # self.lstm.flatten_parameters()
+        o, _ = self.lstm(o)
+        return o
+
+
+# adapted from https://github.com/NVIDIA/tacotron2/
+class Decoder(nn.Module):
+    """Tacotron2 decoder. We don't use Zoneout but Dropout between RNN layers.
+
+    Args:
+        in_channels (int): number of input channels.
+        frame_channels (int): number of feature frame channels.
+        r (int): number of outputs per time step (reduction rate).
+        memory_size (int): size of the past window. if <= 0 memory_size = r
+        attn_type (string): type of attention used in decoder.
+        attn_win (bool): if true, define an attention window centered to maximum
+            attention response. It provides more robust attention alignment especially
+            at interence time.
+        attn_norm (string): attention normalization function. 'sigmoid' or 'softmax'.
+        prenet_type (string): 'original' or 'bn'.
+        prenet_dropout (float): prenet dropout rate.
+        forward_attn (bool): if true, use forward attention method. https://arxiv.org/abs/1807.06736
+        trans_agent (bool): if true, use transition agent. https://arxiv.org/abs/1807.06736
+        forward_attn_mask (bool): if true, mask attention values smaller than a threshold.
+        location_attn (bool): if true, use location sensitive attention.
+        attn_K (int): number of attention heads for GravesAttention.
+        separate_stopnet (bool): if true, detach stopnet input to prevent gradient flow.
+        max_decoder_steps (int): Maximum number of steps allowed for the decoder. Defaults to 10000.
+    """
+
+    # Pylint gets confused by PyTorch conventions here
+    # pylint: disable=attribute-defined-outside-init
+    def __init__(
+        self,
+        in_channels,
+        frame_channels,
+        r,
+        attn_type,
+        attn_win,
+        attn_norm,
+        prenet_type,
+        prenet_dropout,
+        forward_attn,
+        trans_agent,
+        forward_attn_mask,
+        location_attn,
+        attn_K,
+        separate_stopnet,
+        max_decoder_steps,
+    ):
+        super().__init__()
+        self.frame_channels = frame_channels
+        self.r_init = r
+        self.r = r
+        self.encoder_embedding_dim = in_channels
+        self.separate_stopnet = separate_stopnet
+        self.max_decoder_steps = max_decoder_steps
+        self.stop_threshold = 0.5
+
+        # model dimensions
+        self.query_dim = 1024
+        self.decoder_rnn_dim = 1024
+        self.prenet_dim = 256
+        self.attn_dim = 128
+        self.p_attention_dropout = 0.1
+        self.p_decoder_dropout = 0.1
+
+        # memory -> |Prenet| -> processed_memory
+        prenet_dim = self.frame_channels
+        self.prenet = Prenet(
+            prenet_dim, prenet_type, prenet_dropout, out_features=[self.prenet_dim, self.prenet_dim], bias=False
+        )
+
+        self.attention_rnn = nn.LSTMCell(self.prenet_dim + in_channels, self.query_dim, bias=True)
+
+        self.attention = init_attn(
+            attn_type=attn_type,
+            query_dim=self.query_dim,
+            embedding_dim=in_channels,
+            attention_dim=128,
+            location_attention=location_attn,
+            attention_location_n_filters=32,
+            attention_location_kernel_size=31,
+            windowing=attn_win,
+            norm=attn_norm,
+            forward_attn=forward_attn,
+            trans_agent=trans_agent,
+            forward_attn_mask=forward_attn_mask,
+            attn_K=attn_K,
+        )
+
+        self.decoder_rnn = nn.LSTMCell(self.query_dim + in_channels, self.decoder_rnn_dim, bias=True)
+
+        self.linear_projection = Linear(self.decoder_rnn_dim + in_channels, self.frame_channels * self.r_init)
+
+        self.stopnet = nn.Sequential(
+            nn.Dropout(0.1),
+            Linear(self.decoder_rnn_dim + self.frame_channels * self.r_init, 1, bias=True, init_gain="sigmoid"),
+        )
+        self.memory_truncated = None
+
+    def set_r(self, new_r):
+        self.r = new_r
+
+    def get_go_frame(self, inputs):
+        B = inputs.size(0)
+        memory = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels * self.r)
+        return memory
+
+    def _init_states(self, inputs, mask, keep_states=False):
+        B = inputs.size(0)
+        # T = inputs.size(1)
+        if not keep_states:
+            self.query = torch.zeros(1, device=inputs.device).repeat(B, self.query_dim)
+            self.attention_rnn_cell_state = torch.zeros(1, device=inputs.device).repeat(B, self.query_dim)
+            self.decoder_hidden = torch.zeros(1, device=inputs.device).repeat(B, self.decoder_rnn_dim)
+            self.decoder_cell = torch.zeros(1, device=inputs.device).repeat(B, self.decoder_rnn_dim)
+            self.context = torch.zeros(1, device=inputs.device).repeat(B, self.encoder_embedding_dim)
+        self.inputs = inputs
+        self.processed_inputs = self.attention.preprocess_inputs(inputs)
+        self.mask = mask
+
+    def _reshape_memory(self, memory):
+        """
+        Reshape the spectrograms for given 'r'
+        """
+        # Grouping multiple frames if necessary
+        if memory.size(-1) == self.frame_channels:
+            memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1)
+        # Time first (T_decoder, B, frame_channels)
+        memory = memory.transpose(0, 1)
+        return memory
+
+    def _parse_outputs(self, outputs, stop_tokens, alignments):
+        alignments = torch.stack(alignments).transpose(0, 1)
+        stop_tokens = torch.stack(stop_tokens).transpose(0, 1)
+        outputs = torch.stack(outputs).transpose(0, 1).contiguous()
+        outputs = outputs.view(outputs.size(0), -1, self.frame_channels)
+        outputs = outputs.transpose(1, 2)
+        return outputs, stop_tokens, alignments
+
+    def _update_memory(self, memory):
+        if len(memory.shape) == 2:
+            return memory[:, self.frame_channels * (self.r - 1) :]
+        return memory[:, :, self.frame_channels * (self.r - 1) :]
+
+    def decode(self, memory):
+        """
+        shapes:
+           - memory: B x r * self.frame_channels
+        """
+        # self.context: B x D_en
+        # query_input: B x D_en + (r * self.frame_channels)
+        query_input = torch.cat((memory, self.context), -1)
+        # self.query and self.attention_rnn_cell_state : B x D_attn_rnn
+        self.query, self.attention_rnn_cell_state = self.attention_rnn(
+            query_input, (self.query, self.attention_rnn_cell_state)
+        )
+        self.query = F.dropout(self.query, self.p_attention_dropout, self.training)
+        self.attention_rnn_cell_state = F.dropout(
+            self.attention_rnn_cell_state, self.p_attention_dropout, self.training
+        )
+        # B x D_en
+        self.context = self.attention(self.query, self.inputs, self.processed_inputs, self.mask)
+        # B x (D_en + D_attn_rnn)
+        decoder_rnn_input = torch.cat((self.query, self.context), -1)
+        # self.decoder_hidden and self.decoder_cell: B x D_decoder_rnn
+        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
+            decoder_rnn_input, (self.decoder_hidden, self.decoder_cell)
+        )
+        self.decoder_hidden = F.dropout(self.decoder_hidden, self.p_decoder_dropout, self.training)
+        # B x (D_decoder_rnn + D_en)
+        decoder_hidden_context = torch.cat((self.decoder_hidden, self.context), dim=1)
+        # B x (self.r * self.frame_channels)
+        decoder_output = self.linear_projection(decoder_hidden_context)
+        # B x (D_decoder_rnn + (self.r * self.frame_channels))
+        stopnet_input = torch.cat((self.decoder_hidden, decoder_output), dim=1)
+        if self.separate_stopnet:
+            stop_token = self.stopnet(stopnet_input.detach())
+        else:
+            stop_token = self.stopnet(stopnet_input)
+        # select outputs for the reduction rate self.r
+        decoder_output = decoder_output[:, : self.r * self.frame_channels]
+        return decoder_output, self.attention.attention_weights, stop_token
+
+    def forward(self, inputs, memories, mask):
+        r"""Train Decoder with teacher forcing.
+        Args:
+            inputs: Encoder outputs.
+            memories: Feature frames for teacher-forcing.
+            mask: Attention mask for sequence padding.
+
+        Shapes:
+            - inputs: (B, T, D_out_enc)
+            - memory: (B, T_mel, D_mel)
+            - outputs: (B, T_mel, D_mel)
+            - alignments: (B, T_in, T_out)
+            - stop_tokens: (B, T_out)
+        """
+        memory = self.get_go_frame(inputs).unsqueeze(0)
+        memories = self._reshape_memory(memories)
+        memories = torch.cat((memory, memories), dim=0)
+        memories = self._update_memory(memories)
+        memories = self.prenet(memories)
+
+        self._init_states(inputs, mask=mask)
+        self.attention.init_states(inputs)
+
+        outputs, stop_tokens, alignments = [], [], []
+        while len(outputs) < memories.size(0) - 1:
+            memory = memories[len(outputs)]
+            decoder_output, attention_weights, stop_token = self.decode(memory)
+            outputs += [decoder_output.squeeze(1)]
+            stop_tokens += [stop_token.squeeze(1)]
+            alignments += [attention_weights]
+
+        outputs, stop_tokens, alignments = self._parse_outputs(outputs, stop_tokens, alignments)
+        return outputs, alignments, stop_tokens
+
+    def inference(self, inputs):
+        r"""Decoder inference without teacher forcing and use
+        Stopnet to stop decoder.
+        Args:
+            inputs: Encoder outputs.
+
+        Shapes:
+            - inputs: (B, T, D_out_enc)
+            - outputs: (B, T_mel, D_mel)
+            - alignments: (B, T_in, T_out)
+            - stop_tokens: (B, T_out)
+        """
+        memory = self.get_go_frame(inputs)
+        memory = self._update_memory(memory)
+
+        self._init_states(inputs, mask=None)
+        self.attention.init_states(inputs)
+
+        outputs, stop_tokens, alignments, t = [], [], [], 0
+        while True:
+            memory = self.prenet(memory)
+            decoder_output, alignment, stop_token = self.decode(memory)
+            stop_token = torch.sigmoid(stop_token.data)
+            outputs += [decoder_output.squeeze(1)]
+            stop_tokens += [stop_token]
+            alignments += [alignment]
+
+            if stop_token > self.stop_threshold and t > inputs.shape[0] // 2:
+                break
+            if len(outputs) == self.max_decoder_steps:
+                print(f"   > Decoder stopped with `max_decoder_steps` {self.max_decoder_steps}")
+                break
+
+            memory = self._update_memory(decoder_output)
+            t += 1
+
+        outputs, stop_tokens, alignments = self._parse_outputs(outputs, stop_tokens, alignments)
+
+        return outputs, alignments, stop_tokens
+
+    def inference_truncated(self, inputs):
+        """
+        Preserve decoder states for continuous inference
+        """
+        if self.memory_truncated is None:
+            self.memory_truncated = self.get_go_frame(inputs)
+            self._init_states(inputs, mask=None, keep_states=False)
+        else:
+            self._init_states(inputs, mask=None, keep_states=True)
+
+        self.attention.init_states(inputs)
+        outputs, stop_tokens, alignments, t = [], [], [], 0
+        while True:
+            memory = self.prenet(self.memory_truncated)
+            decoder_output, alignment, stop_token = self.decode(memory)
+            stop_token = torch.sigmoid(stop_token.data)
+            outputs += [decoder_output.squeeze(1)]
+            stop_tokens += [stop_token]
+            alignments += [alignment]
+
+            if stop_token > 0.7:
+                break
+            if len(outputs) == self.max_decoder_steps:
+                print("   | > Decoder stopped with 'max_decoder_steps")
+                break
+
+            self.memory_truncated = decoder_output
+            t += 1
+
+        outputs, stop_tokens, alignments = self._parse_outputs(outputs, stop_tokens, alignments)
+
+        return outputs, alignments, stop_tokens
+
+    def inference_step(self, inputs, t, memory=None):
+        """
+        For debug purposes
+        """
+        if t == 0:
+            memory = self.get_go_frame(inputs)
+            self._init_states(inputs, mask=None)
+
+        memory = self.prenet(memory)
+        decoder_output, stop_token, alignment = self.decode(memory)
+        stop_token = torch.sigmoid(stop_token.data)
+        memory = decoder_output
+        return decoder_output, stop_token, alignment
diff --git a/TTS/tts/layers/vits/discriminator.py b/TTS/tts/layers/vits/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..148f283c9010e522c49ad2595860ab859ba6aa48
--- /dev/null
+++ b/TTS/tts/layers/vits/discriminator.py
@@ -0,0 +1,89 @@
+import torch
+from torch import nn
+from torch.nn.modules.conv import Conv1d
+
+from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP, MultiPeriodDiscriminator
+
+
+class DiscriminatorS(torch.nn.Module):
+    """HiFiGAN Scale Discriminator. Channel sizes are different from the original HiFiGAN.
+
+    Args:
+        use_spectral_norm (bool): if `True` swith to spectral norm instead of weight norm.
+    """
+
+    def __init__(self, use_spectral_norm=False):
+        super().__init__()
+        norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.weight_norm
+        self.convs = nn.ModuleList(
+            [
+                norm_f(Conv1d(1, 16, 15, 1, padding=7)),
+                norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)),
+                norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)),
+                norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)),
+                norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)),
+                norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
+            ]
+        )
+        self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            Tensor: discriminator scores.
+            List[Tensor]: list of features from the convolutiona layers.
+        """
+        feat = []
+        for l in self.convs:
+            x = l(x)
+            x = torch.nn.functional.leaky_relu(x, 0.1)
+            feat.append(x)
+        x = self.conv_post(x)
+        feat.append(x)
+        x = torch.flatten(x, 1, -1)
+        return x, feat
+
+
+class VitsDiscriminator(nn.Module):
+    """VITS discriminator wrapping one Scale Discriminator and a stack of Period Discriminator.
+
+    ::
+        waveform -> ScaleDiscriminator() -> scores_sd, feats_sd --> append() -> scores, feats
+               |--> MultiPeriodDiscriminator() -> scores_mpd, feats_mpd ^
+
+    Args:
+        use_spectral_norm (bool): if `True` swith to spectral norm instead of weight norm.
+    """
+
+    def __init__(self, periods=(2, 3, 5, 7, 11), use_spectral_norm=False):
+        super().__init__()
+        self.nets = nn.ModuleList()
+        self.nets.append(DiscriminatorS(use_spectral_norm=use_spectral_norm))
+        self.nets.extend([DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods])
+
+    def forward(self, x, x_hat=None):
+        """
+        Args:
+            x (Tensor): ground truth waveform.
+            x_hat (Tensor): predicted waveform.
+
+        Returns:
+            List[Tensor]: discriminator scores.
+            List[List[Tensor]]: list of list of features from each layers of each discriminator.
+        """
+        x_scores = []
+        x_hat_scores = [] if x_hat is not None else None
+        x_feats = []
+        x_hat_feats = [] if x_hat is not None else None
+        for net in self.nets:
+            x_score, x_feat = net(x)
+            x_scores.append(x_score)
+            x_feats.append(x_feat)
+            if x_hat is not None:
+                x_hat_score, x_hat_feat = net(x_hat)
+                x_hat_scores.append(x_hat_score)
+                x_hat_feats.append(x_hat_feat)
+        return x_scores, x_feats, x_hat_scores, x_hat_feats
diff --git a/TTS/tts/layers/vits/networks.py b/TTS/tts/layers/vits/networks.py
new file mode 100644
index 0000000000000000000000000000000000000000..f97b584fe6ed311127a8c01a089b159946219cb2
--- /dev/null
+++ b/TTS/tts/layers/vits/networks.py
@@ -0,0 +1,288 @@
+import math
+
+import torch
+from torch import nn
+
+from TTS.tts.layers.glow_tts.glow import WN
+from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
+from TTS.tts.utils.helpers import sequence_mask
+
+LRELU_SLOPE = 0.1
+
+
+def convert_pad_shape(pad_shape):
+    l = pad_shape[::-1]
+    pad_shape = [item for sublist in l for item in sublist]
+    return pad_shape
+
+
+def init_weights(m, mean=0.0, std=0.01):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(mean, std)
+
+
+def get_padding(kernel_size, dilation=1):
+    return int((kernel_size * dilation - dilation) / 2)
+
+
+class TextEncoder(nn.Module):
+    def __init__(
+        self,
+        n_vocab: int,
+        out_channels: int,
+        hidden_channels: int,
+        hidden_channels_ffn: int,
+        num_heads: int,
+        num_layers: int,
+        kernel_size: int,
+        dropout_p: float,
+        language_emb_dim: int = None,
+    ):
+        """Text Encoder for VITS model.
+
+        Args:
+            n_vocab (int): Number of characters for the embedding layer.
+            out_channels (int): Number of channels for the output.
+            hidden_channels (int): Number of channels for the hidden layers.
+            hidden_channels_ffn (int): Number of channels for the convolutional layers.
+            num_heads (int): Number of attention heads for the Transformer layers.
+            num_layers (int): Number of Transformer layers.
+            kernel_size (int): Kernel size for the FFN layers in Transformer network.
+            dropout_p (float): Dropout rate for the Transformer layers.
+        """
+        super().__init__()
+        self.out_channels = out_channels
+        self.hidden_channels = hidden_channels
+
+        self.emb = nn.Embedding(n_vocab, hidden_channels)
+
+        nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5)
+
+        if language_emb_dim:
+            hidden_channels += language_emb_dim
+
+        self.encoder = RelativePositionTransformer(
+            in_channels=hidden_channels,
+            out_channels=hidden_channels,
+            hidden_channels=hidden_channels,
+            hidden_channels_ffn=hidden_channels_ffn,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            kernel_size=kernel_size,
+            dropout_p=dropout_p,
+            layer_norm_type="2",
+            rel_attn_window_size=4,
+        )
+
+        self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
+
+    def forward(self, x, x_lengths, lang_emb=None):
+        """
+        Shapes:
+            - x: :math:`[B, T]`
+            - x_length: :math:`[B]`
+        """
+        assert x.shape[0] == x_lengths.shape[0]
+        x = self.emb(x) * math.sqrt(self.hidden_channels)  # [b, t, h]
+
+        # concat the lang emb in embedding chars
+        if lang_emb is not None:
+            x = torch.cat((x, lang_emb.transpose(2, 1).expand(x.size(0), x.size(1), -1)), dim=-1)
+
+        x = torch.transpose(x, 1, -1)  # [b, h, t]
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)  # [b, 1, t]
+
+        x = self.encoder(x * x_mask, x_mask)
+        stats = self.proj(x) * x_mask
+
+        m, logs = torch.split(stats, self.out_channels, dim=1)
+        return x, m, logs, x_mask
+
+
+class ResidualCouplingBlock(nn.Module):
+    def __init__(
+        self,
+        channels,
+        hidden_channels,
+        kernel_size,
+        dilation_rate,
+        num_layers,
+        dropout_p=0,
+        cond_channels=0,
+        mean_only=False,
+    ):
+        assert channels % 2 == 0, "channels should be divisible by 2"
+        super().__init__()
+        self.half_channels = channels // 2
+        self.mean_only = mean_only
+        # input layer
+        self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
+        # coupling layers
+        self.enc = WN(
+            hidden_channels,
+            hidden_channels,
+            kernel_size,
+            dilation_rate,
+            num_layers,
+            dropout_p=dropout_p,
+            c_in_channels=cond_channels,
+        )
+        # output layer
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
+        self.post.weight.data.zero_()
+        self.post.bias.data.zero_()
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        """
+        Note:
+            Set `reverse` to True for inference.
+
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+            - g: :math:`[B, C, 1]`
+        """
+        x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+        h = self.pre(x0) * x_mask
+        h = self.enc(h, x_mask, g=g)
+        stats = self.post(h) * x_mask
+        if not self.mean_only:
+            m, log_scale = torch.split(stats, [self.half_channels] * 2, 1)
+        else:
+            m = stats
+            log_scale = torch.zeros_like(m)
+
+        if not reverse:
+            x1 = m + x1 * torch.exp(log_scale) * x_mask
+            x = torch.cat([x0, x1], 1)
+            logdet = torch.sum(log_scale, [1, 2])
+            return x, logdet
+        else:
+            x1 = (x1 - m) * torch.exp(-log_scale) * x_mask
+            x = torch.cat([x0, x1], 1)
+            return x
+
+
+class ResidualCouplingBlocks(nn.Module):
+    def __init__(
+        self,
+        channels: int,
+        hidden_channels: int,
+        kernel_size: int,
+        dilation_rate: int,
+        num_layers: int,
+        num_flows=4,
+        cond_channels=0,
+    ):
+        """Redisual Coupling blocks for VITS flow layers.
+
+        Args:
+            channels (int): Number of input and output tensor channels.
+            hidden_channels (int): Number of hidden network channels.
+            kernel_size (int): Kernel size of the WaveNet layers.
+            dilation_rate (int): Dilation rate of the WaveNet layers.
+            num_layers (int): Number of the WaveNet layers.
+            num_flows (int, optional): Number of Residual Coupling blocks. Defaults to 4.
+            cond_channels (int, optional): Number of channels of the conditioning tensor. Defaults to 0.
+        """
+        super().__init__()
+        self.channels = channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.num_layers = num_layers
+        self.num_flows = num_flows
+        self.cond_channels = cond_channels
+
+        self.flows = nn.ModuleList()
+        for _ in range(num_flows):
+            self.flows.append(
+                ResidualCouplingBlock(
+                    channels,
+                    hidden_channels,
+                    kernel_size,
+                    dilation_rate,
+                    num_layers,
+                    cond_channels=cond_channels,
+                    mean_only=True,
+                )
+            )
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        """
+        Note:
+            Set `reverse` to True for inference.
+
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+            - g: :math:`[B, C, 1]`
+        """
+        if not reverse:
+            for flow in self.flows:
+                x, _ = flow(x, x_mask, g=g, reverse=reverse)
+                x = torch.flip(x, [1])
+        else:
+            for flow in reversed(self.flows):
+                x = torch.flip(x, [1])
+                x = flow(x, x_mask, g=g, reverse=reverse)
+        return x
+
+
+class PosteriorEncoder(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        hidden_channels: int,
+        kernel_size: int,
+        dilation_rate: int,
+        num_layers: int,
+        cond_channels=0,
+    ):
+        """Posterior Encoder of VITS model.
+
+        ::
+            x -> conv1x1() -> WaveNet() (non-causal) -> conv1x1() -> split() -> [m, s] -> sample(m, s) -> z
+
+        Args:
+            in_channels (int): Number of input tensor channels.
+            out_channels (int): Number of output tensor channels.
+            hidden_channels (int): Number of hidden channels.
+            kernel_size (int): Kernel size of the WaveNet convolution layers.
+            dilation_rate (int): Dilation rate of the WaveNet layers.
+            num_layers (int): Number of the WaveNet layers.
+            cond_channels (int, optional): Number of conditioning tensor channels. Defaults to 0.
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.num_layers = num_layers
+        self.cond_channels = cond_channels
+
+        self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
+        self.enc = WN(
+            hidden_channels, hidden_channels, kernel_size, dilation_rate, num_layers, c_in_channels=cond_channels
+        )
+        self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
+
+    def forward(self, x, x_lengths, g=None):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_lengths: :math:`[B, 1]`
+            - g: :math:`[B, C, 1]`
+        """
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
+        x = self.pre(x) * x_mask
+        x = self.enc(x, x_mask, g=g)
+        stats = self.proj(x) * x_mask
+        mean, log_scale = torch.split(stats, self.out_channels, dim=1)
+        z = (mean + torch.randn_like(mean) * torch.exp(log_scale)) * x_mask
+        return z, mean, log_scale, x_mask
diff --git a/TTS/tts/layers/vits/stochastic_duration_predictor.py b/TTS/tts/layers/vits/stochastic_duration_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..738ee341e649dfaf62059735c2620cb6ae1a2b1f
--- /dev/null
+++ b/TTS/tts/layers/vits/stochastic_duration_predictor.py
@@ -0,0 +1,294 @@
+import math
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from TTS.tts.layers.generic.normalization import LayerNorm2
+from TTS.tts.layers.vits.transforms import piecewise_rational_quadratic_transform
+
+
+class DilatedDepthSeparableConv(nn.Module):
+    def __init__(self, channels, kernel_size, num_layers, dropout_p=0.0) -> torch.tensor:
+        """Dilated Depth-wise Separable Convolution module.
+
+        ::
+            x |-> DDSConv(x) -> LayerNorm(x) -> GeLU(x) -> Conv1x1(x) -> LayerNorm(x) -> GeLU(x) -> + -> o
+              |-------------------------------------------------------------------------------------^
+
+        Args:
+            channels ([type]): [description]
+            kernel_size ([type]): [description]
+            num_layers ([type]): [description]
+            dropout_p (float, optional): [description]. Defaults to 0.0.
+
+        Returns:
+            torch.tensor: Network output masked by the input sequence mask.
+        """
+        super().__init__()
+        self.num_layers = num_layers
+
+        self.convs_sep = nn.ModuleList()
+        self.convs_1x1 = nn.ModuleList()
+        self.norms_1 = nn.ModuleList()
+        self.norms_2 = nn.ModuleList()
+        for i in range(num_layers):
+            dilation = kernel_size**i
+            padding = (kernel_size * dilation - dilation) // 2
+            self.convs_sep.append(
+                nn.Conv1d(channels, channels, kernel_size, groups=channels, dilation=dilation, padding=padding)
+            )
+            self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
+            self.norms_1.append(LayerNorm2(channels))
+            self.norms_2.append(LayerNorm2(channels))
+        self.dropout = nn.Dropout(dropout_p)
+
+    def forward(self, x, x_mask, g=None):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+        """
+        if g is not None:
+            x = x + g
+        for i in range(self.num_layers):
+            y = self.convs_sep[i](x * x_mask)
+            y = self.norms_1[i](y)
+            y = F.gelu(y)
+            y = self.convs_1x1[i](y)
+            y = self.norms_2[i](y)
+            y = F.gelu(y)
+            y = self.dropout(y)
+            x = x + y
+        return x * x_mask
+
+
+class ElementwiseAffine(nn.Module):
+    """Element-wise affine transform like no-population stats BatchNorm alternative.
+
+    Args:
+        channels (int): Number of input tensor channels.
+    """
+
+    def __init__(self, channels):
+        super().__init__()
+        self.translation = nn.Parameter(torch.zeros(channels, 1))
+        self.log_scale = nn.Parameter(torch.zeros(channels, 1))
+
+    def forward(self, x, x_mask, reverse=False, **kwargs):  # pylint: disable=unused-argument
+        if not reverse:
+            y = (x * torch.exp(self.log_scale) + self.translation) * x_mask
+            logdet = torch.sum(self.log_scale * x_mask, [1, 2])
+            return y, logdet
+        x = (x - self.translation) * torch.exp(-self.log_scale) * x_mask
+        return x
+
+
+class ConvFlow(nn.Module):
+    """Dilated depth separable convolutional based spline flow.
+
+    Args:
+        in_channels (int): Number of input tensor channels.
+        hidden_channels (int): Number of in network channels.
+        kernel_size (int): Convolutional kernel size.
+        num_layers (int): Number of convolutional layers.
+        num_bins (int, optional): Number of spline bins. Defaults to 10.
+        tail_bound (float, optional): Tail bound for PRQT. Defaults to 5.0.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        hidden_channels: int,
+        kernel_size: int,
+        num_layers: int,
+        num_bins=10,
+        tail_bound=5.0,
+    ):
+        super().__init__()
+        self.num_bins = num_bins
+        self.tail_bound = tail_bound
+        self.hidden_channels = hidden_channels
+        self.half_channels = in_channels // 2
+
+        self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
+        self.convs = DilatedDepthSeparableConv(hidden_channels, kernel_size, num_layers, dropout_p=0.0)
+        self.proj = nn.Conv1d(hidden_channels, self.half_channels * (num_bins * 3 - 1), 1)
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+        h = self.pre(x0)
+        h = self.convs(h, x_mask, g=g)
+        h = self.proj(h) * x_mask
+
+        b, c, t = x0.shape
+        h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2)  # [b, cx?, t] -> [b, c, t, ?]
+
+        unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.hidden_channels)
+        unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(self.hidden_channels)
+        unnormalized_derivatives = h[..., 2 * self.num_bins :]
+
+        x1, logabsdet = piecewise_rational_quadratic_transform(
+            x1,
+            unnormalized_widths,
+            unnormalized_heights,
+            unnormalized_derivatives,
+            inverse=reverse,
+            tails="linear",
+            tail_bound=self.tail_bound,
+        )
+
+        x = torch.cat([x0, x1], 1) * x_mask
+        logdet = torch.sum(logabsdet * x_mask, [1, 2])
+        if not reverse:
+            return x, logdet
+        return x
+
+
+class StochasticDurationPredictor(nn.Module):
+    """Stochastic duration predictor with Spline Flows.
+
+    It applies Variational Dequantization and Variationsl Data Augmentation.
+
+    Paper:
+        SDP: https://arxiv.org/pdf/2106.06103.pdf
+        Spline Flow: https://arxiv.org/abs/1906.04032
+
+    ::
+        ## Inference
+
+        x -> TextCondEncoder() -> Flow() -> dr_hat
+        noise ----------------------^
+
+        ## Training
+                                                                              |---------------------|
+        x -> TextCondEncoder() -> + -> PosteriorEncoder() -> split() -> z_u, z_v -> (d - z_u) -> concat() -> Flow() -> noise
+        d -> DurCondEncoder()  -> ^                                                    |
+        |------------------------------------------------------------------------------|
+
+    Args:
+        in_channels (int): Number of input tensor channels.
+        hidden_channels (int): Number of hidden channels.
+        kernel_size (int): Kernel size of convolutional layers.
+        dropout_p (float): Dropout rate.
+        num_flows (int, optional): Number of flow blocks. Defaults to 4.
+        cond_channels (int, optional): Number of channels of conditioning tensor. Defaults to 0.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        hidden_channels: int,
+        kernel_size: int,
+        dropout_p: float,
+        num_flows=4,
+        cond_channels=0,
+        language_emb_dim=0,
+    ):
+        super().__init__()
+
+        # add language embedding dim in the input
+        if language_emb_dim:
+            in_channels += language_emb_dim
+
+        # condition encoder text
+        self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
+        self.convs = DilatedDepthSeparableConv(hidden_channels, kernel_size, num_layers=3, dropout_p=dropout_p)
+        self.proj = nn.Conv1d(hidden_channels, hidden_channels, 1)
+
+        # posterior encoder
+        self.flows = nn.ModuleList()
+        self.flows.append(ElementwiseAffine(2))
+        self.flows += [ConvFlow(2, hidden_channels, kernel_size, num_layers=3) for _ in range(num_flows)]
+
+        # condition encoder duration
+        self.post_pre = nn.Conv1d(1, hidden_channels, 1)
+        self.post_convs = DilatedDepthSeparableConv(hidden_channels, kernel_size, num_layers=3, dropout_p=dropout_p)
+        self.post_proj = nn.Conv1d(hidden_channels, hidden_channels, 1)
+
+        # flow layers
+        self.post_flows = nn.ModuleList()
+        self.post_flows.append(ElementwiseAffine(2))
+        self.post_flows += [ConvFlow(2, hidden_channels, kernel_size, num_layers=3) for _ in range(num_flows)]
+
+        if cond_channels != 0 and cond_channels is not None:
+            self.cond = nn.Conv1d(cond_channels, hidden_channels, 1)
+
+        if language_emb_dim != 0 and language_emb_dim is not None:
+            self.cond_lang = nn.Conv1d(language_emb_dim, hidden_channels, 1)
+
+    def forward(self, x, x_mask, dr=None, g=None, lang_emb=None, reverse=False, noise_scale=1.0):
+        """
+        Shapes:
+            - x: :math:`[B, C, T]`
+            - x_mask: :math:`[B, 1, T]`
+            - dr: :math:`[B, 1, T]`
+            - g: :math:`[B, C]`
+        """
+        # condition encoder text
+        x = self.pre(x)
+        if g is not None:
+            x = x + self.cond(g)
+
+        if lang_emb is not None:
+            x = x + self.cond_lang(lang_emb)
+
+        x = self.convs(x, x_mask)
+        x = self.proj(x) * x_mask
+
+        if not reverse:
+            flows = self.flows
+            assert dr is not None
+
+            # condition encoder duration
+            h = self.post_pre(dr)
+            h = self.post_convs(h, x_mask)
+            h = self.post_proj(h) * x_mask
+            noise = torch.randn(dr.size(0), 2, dr.size(2)).to(device=x.device, dtype=x.dtype) * x_mask
+            z_q = noise
+
+            # posterior encoder
+            logdet_tot_q = 0.0
+            for idx, flow in enumerate(self.post_flows):
+                z_q, logdet_q = flow(z_q, x_mask, g=(x + h))
+                logdet_tot_q = logdet_tot_q + logdet_q
+                if idx > 0:
+                    z_q = torch.flip(z_q, [1])
+
+            z_u, z_v = torch.split(z_q, [1, 1], 1)
+            u = torch.sigmoid(z_u) * x_mask
+            z0 = (dr - u) * x_mask
+
+            # posterior encoder - neg log likelihood
+            logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2])
+            nll_posterior_encoder = (
+                torch.sum(-0.5 * (math.log(2 * math.pi) + (noise**2)) * x_mask, [1, 2]) - logdet_tot_q
+            )
+
+            z0 = torch.log(torch.clamp_min(z0, 1e-5)) * x_mask
+            logdet_tot = torch.sum(-z0, [1, 2])
+            z = torch.cat([z0, z_v], 1)
+
+            # flow layers
+            for idx, flow in enumerate(flows):
+                z, logdet = flow(z, x_mask, g=x, reverse=reverse)
+                logdet_tot = logdet_tot + logdet
+                if idx > 0:
+                    z = torch.flip(z, [1])
+
+            # flow layers - neg log likelihood
+            nll_flow_layers = torch.sum(0.5 * (math.log(2 * math.pi) + (z**2)) * x_mask, [1, 2]) - logdet_tot
+            return nll_flow_layers + nll_posterior_encoder
+
+        flows = list(reversed(self.flows))
+        flows = flows[:-2] + [flows[-1]]  # remove a useless vflow
+        z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale
+        for flow in flows:
+            z = torch.flip(z, [1])
+            z = flow(z, x_mask, g=x, reverse=reverse)
+
+        z0, _ = torch.split(z, [1, 1], 1)
+        logw = z0
+        return logw
diff --git a/TTS/tts/layers/vits/transforms.py b/TTS/tts/layers/vits/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1505554488fb18010b82bd97c88b28c7d4547e1
--- /dev/null
+++ b/TTS/tts/layers/vits/transforms.py
@@ -0,0 +1,203 @@
+# adopted from https://github.com/bayesiains/nflows
+
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+DEFAULT_MIN_BIN_WIDTH = 1e-3
+DEFAULT_MIN_BIN_HEIGHT = 1e-3
+DEFAULT_MIN_DERIVATIVE = 1e-3
+
+
+def piecewise_rational_quadratic_transform(
+    inputs,
+    unnormalized_widths,
+    unnormalized_heights,
+    unnormalized_derivatives,
+    inverse=False,
+    tails=None,
+    tail_bound=1.0,
+    min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+    min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+    min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+
+    if tails is None:
+        spline_fn = rational_quadratic_spline
+        spline_kwargs = {}
+    else:
+        spline_fn = unconstrained_rational_quadratic_spline
+        spline_kwargs = {"tails": tails, "tail_bound": tail_bound}
+
+    outputs, logabsdet = spline_fn(
+        inputs=inputs,
+        unnormalized_widths=unnormalized_widths,
+        unnormalized_heights=unnormalized_heights,
+        unnormalized_derivatives=unnormalized_derivatives,
+        inverse=inverse,
+        min_bin_width=min_bin_width,
+        min_bin_height=min_bin_height,
+        min_derivative=min_derivative,
+        **spline_kwargs,
+    )
+    return outputs, logabsdet
+
+
+def searchsorted(bin_locations, inputs, eps=1e-6):
+    bin_locations[..., -1] += eps
+    return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1
+
+
+def unconstrained_rational_quadratic_spline(
+    inputs,
+    unnormalized_widths,
+    unnormalized_heights,
+    unnormalized_derivatives,
+    inverse=False,
+    tails="linear",
+    tail_bound=1.0,
+    min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+    min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+    min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+    inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
+    outside_interval_mask = ~inside_interval_mask
+
+    outputs = torch.zeros_like(inputs)
+    logabsdet = torch.zeros_like(inputs)
+
+    if tails == "linear":
+        unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
+        constant = np.log(np.exp(1 - min_derivative) - 1)
+        unnormalized_derivatives[..., 0] = constant
+        unnormalized_derivatives[..., -1] = constant
+
+        outputs[outside_interval_mask] = inputs[outside_interval_mask]
+        logabsdet[outside_interval_mask] = 0
+    else:
+        raise RuntimeError("{} tails are not implemented.".format(tails))
+
+    outputs[inside_interval_mask], logabsdet[inside_interval_mask] = rational_quadratic_spline(
+        inputs=inputs[inside_interval_mask],
+        unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
+        unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
+        unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
+        inverse=inverse,
+        left=-tail_bound,
+        right=tail_bound,
+        bottom=-tail_bound,
+        top=tail_bound,
+        min_bin_width=min_bin_width,
+        min_bin_height=min_bin_height,
+        min_derivative=min_derivative,
+    )
+
+    return outputs, logabsdet
+
+
+def rational_quadratic_spline(
+    inputs,
+    unnormalized_widths,
+    unnormalized_heights,
+    unnormalized_derivatives,
+    inverse=False,
+    left=0.0,
+    right=1.0,
+    bottom=0.0,
+    top=1.0,
+    min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+    min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+    min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+    if torch.min(inputs) < left or torch.max(inputs) > right:
+        raise ValueError("Input to a transform is not within its domain")
+
+    num_bins = unnormalized_widths.shape[-1]
+
+    if min_bin_width * num_bins > 1.0:
+        raise ValueError("Minimal bin width too large for the number of bins")
+    if min_bin_height * num_bins > 1.0:
+        raise ValueError("Minimal bin height too large for the number of bins")
+
+    widths = F.softmax(unnormalized_widths, dim=-1)
+    widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
+    cumwidths = torch.cumsum(widths, dim=-1)
+    cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
+    cumwidths = (right - left) * cumwidths + left
+    cumwidths[..., 0] = left
+    cumwidths[..., -1] = right
+    widths = cumwidths[..., 1:] - cumwidths[..., :-1]
+
+    derivatives = min_derivative + F.softplus(unnormalized_derivatives)
+
+    heights = F.softmax(unnormalized_heights, dim=-1)
+    heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
+    cumheights = torch.cumsum(heights, dim=-1)
+    cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
+    cumheights = (top - bottom) * cumheights + bottom
+    cumheights[..., 0] = bottom
+    cumheights[..., -1] = top
+    heights = cumheights[..., 1:] - cumheights[..., :-1]
+
+    if inverse:
+        bin_idx = searchsorted(cumheights, inputs)[..., None]
+    else:
+        bin_idx = searchsorted(cumwidths, inputs)[..., None]
+
+    input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
+    input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
+
+    input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
+    delta = heights / widths
+    input_delta = delta.gather(-1, bin_idx)[..., 0]
+
+    input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
+    input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
+
+    input_heights = heights.gather(-1, bin_idx)[..., 0]
+
+    if inverse:
+        a = (inputs - input_cumheights) * (
+            input_derivatives + input_derivatives_plus_one - 2 * input_delta
+        ) + input_heights * (input_delta - input_derivatives)
+        b = input_heights * input_derivatives - (inputs - input_cumheights) * (
+            input_derivatives + input_derivatives_plus_one - 2 * input_delta
+        )
+        c = -input_delta * (inputs - input_cumheights)
+
+        discriminant = b.pow(2) - 4 * a * c
+        assert (discriminant >= 0).all()
+
+        root = (2 * c) / (-b - torch.sqrt(discriminant))
+        outputs = root * input_bin_widths + input_cumwidths
+
+        theta_one_minus_theta = root * (1 - root)
+        denominator = input_delta + (
+            (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta
+        )
+        derivative_numerator = input_delta.pow(2) * (
+            input_derivatives_plus_one * root.pow(2)
+            + 2 * input_delta * theta_one_minus_theta
+            + input_derivatives * (1 - root).pow(2)
+        )
+        logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
+
+        return outputs, -logabsdet
+    else:
+        theta = (inputs - input_cumwidths) / input_bin_widths
+        theta_one_minus_theta = theta * (1 - theta)
+
+        numerator = input_heights * (input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta)
+        denominator = input_delta + (
+            (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta
+        )
+        outputs = input_cumheights + numerator / denominator
+
+        derivative_numerator = input_delta.pow(2) * (
+            input_derivatives_plus_one * theta.pow(2)
+            + 2 * input_delta * theta_one_minus_theta
+            + input_derivatives * (1 - theta).pow(2)
+        )
+        logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
+
+        return outputs, logabsdet
diff --git a/TTS/tts/models/__init__.py b/TTS/tts/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d76a3bebee652f44a65f4a3d919ae2c3971d82f8
--- /dev/null
+++ b/TTS/tts/models/__init__.py
@@ -0,0 +1,14 @@
+from typing import Dict, List, Union
+
+from TTS.utils.generic_utils import find_module
+
+
+def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseTTS":
+    print(" > Using model: {}".format(config.model))
+    # fetch the right model implementation.
+    if "base_model" in config and config["base_model"] is not None:
+        MyModel = find_module("TTS.tts.models", config.base_model.lower())
+    else:
+        MyModel = find_module("TTS.tts.models", config.model.lower())
+    model = MyModel.init_from_config(config, samples)
+    return model
diff --git a/TTS/tts/models/__pycache__/__init__.cpython-37.pyc b/TTS/tts/models/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..797dc94aef77bfb62d5ec74818a58ecee3438d26
Binary files /dev/null and b/TTS/tts/models/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/models/__pycache__/__init__.cpython-38.pyc b/TTS/tts/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac1b678e634fafb001d0f5bb95421ca63c3b64a7
Binary files /dev/null and b/TTS/tts/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/models/__pycache__/__init__.cpython-39.pyc b/TTS/tts/models/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78bdf274eb3fd034a669f055bdf0a36890de98e8
Binary files /dev/null and b/TTS/tts/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tacotron.cpython-37.pyc b/TTS/tts/models/__pycache__/base_tacotron.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c5c0a93abe024a71be8749c84837edd1e28aa585
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tacotron.cpython-37.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tacotron.cpython-38.pyc b/TTS/tts/models/__pycache__/base_tacotron.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9ef389d1bfcc3d787786036bf0e016f85169349
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tacotron.cpython-38.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tacotron.cpython-39.pyc b/TTS/tts/models/__pycache__/base_tacotron.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7094c889ffaef59e202d4a1c3c3d1ad575960ea1
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tacotron.cpython-39.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tts.cpython-37.pyc b/TTS/tts/models/__pycache__/base_tts.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13f5a88e43d307432945d0234facea84269a1eb9
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tts.cpython-37.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tts.cpython-38.pyc b/TTS/tts/models/__pycache__/base_tts.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a98ee6c6526f62457e3245aae5026f7f0720e45
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tts.cpython-38.pyc differ
diff --git a/TTS/tts/models/__pycache__/base_tts.cpython-39.pyc b/TTS/tts/models/__pycache__/base_tts.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6414b16077ec8c95b1776efae864ea5fd37ac988
Binary files /dev/null and b/TTS/tts/models/__pycache__/base_tts.cpython-39.pyc differ
diff --git a/TTS/tts/models/__pycache__/tacotron2.cpython-37.pyc b/TTS/tts/models/__pycache__/tacotron2.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98196c85c2df5589841b14c30b099d584e16230e
Binary files /dev/null and b/TTS/tts/models/__pycache__/tacotron2.cpython-37.pyc differ
diff --git a/TTS/tts/models/__pycache__/tacotron2.cpython-38.pyc b/TTS/tts/models/__pycache__/tacotron2.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d28bd6922225097f42f56e1bc20d8610cd232d9
Binary files /dev/null and b/TTS/tts/models/__pycache__/tacotron2.cpython-38.pyc differ
diff --git a/TTS/tts/models/__pycache__/tacotron2.cpython-39.pyc b/TTS/tts/models/__pycache__/tacotron2.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33b558d7da3ed2377d8d66322498d6ce9ad48934
Binary files /dev/null and b/TTS/tts/models/__pycache__/tacotron2.cpython-39.pyc differ
diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fdaa596f72e7f0fa899daf4aa3f6be4f5629fd2
--- /dev/null
+++ b/TTS/tts/models/align_tts.py
@@ -0,0 +1,449 @@
+from dataclasses import dataclass, field
+from typing import Dict, List, Union
+
+import torch
+from coqpit import Coqpit
+from torch import nn
+
+from TTS.tts.layers.align_tts.mdn import MDNBlock
+from TTS.tts.layers.feed_forward.decoder import Decoder
+from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
+from TTS.tts.layers.feed_forward.encoder import Encoder
+from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
+from TTS.tts.models.base_tts import BaseTTS
+from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+from TTS.utils.io import load_fsspec
+
+
+@dataclass
+class AlignTTSArgs(Coqpit):
+    """
+    Args:
+        num_chars (int):
+            number of unique input to characters
+        out_channels (int):
+            number of output tensor channels. It is equal to the expected spectrogram size.
+        hidden_channels (int):
+            number of channels in all the model layers.
+        hidden_channels_ffn (int):
+            number of channels in transformer's conv layers.
+        hidden_channels_dp (int):
+            number of channels in duration predictor network.
+        num_heads (int):
+            number of attention heads in transformer networks.
+        num_transformer_layers (int):
+            number of layers in encoder and decoder transformer blocks.
+        dropout_p (int):
+            dropout rate in transformer layers.
+        length_scale (int, optional):
+            coefficient to set the speech speed. <1 slower, >1 faster. Defaults to 1.
+        num_speakers (int, optional):
+            number of speakers for multi-speaker training. Defaults to 0.
+        external_c (bool, optional):
+            enable external speaker embeddings. Defaults to False.
+        c_in_channels (int, optional):
+            number of channels in speaker embedding vectors. Defaults to 0.
+    """
+
+    num_chars: int = None
+    out_channels: int = 80
+    hidden_channels: int = 256
+    hidden_channels_dp: int = 256
+    encoder_type: str = "fftransformer"
+    encoder_params: dict = field(
+        default_factory=lambda: {"hidden_channels_ffn": 1024, "num_heads": 2, "num_layers": 6, "dropout_p": 0.1}
+    )
+    decoder_type: str = "fftransformer"
+    decoder_params: dict = field(
+        default_factory=lambda: {"hidden_channels_ffn": 1024, "num_heads": 2, "num_layers": 6, "dropout_p": 0.1}
+    )
+    length_scale: float = 1.0
+    num_speakers: int = 0
+    use_speaker_embedding: bool = False
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+
+
+class AlignTTS(BaseTTS):
+    """AlignTTS with modified duration predictor.
+    https://arxiv.org/pdf/2003.01950.pdf
+
+    Encoder -> DurationPredictor -> Decoder
+
+    Check :class:`AlignTTSArgs` for the class arguments.
+
+    Paper Abstract:
+        Targeting at both high efficiency and performance, we propose AlignTTS to predict the
+        mel-spectrum in parallel. AlignTTS is based on a Feed-Forward Transformer which generates mel-spectrum from a
+        sequence of characters, and the duration of each character is determined by a duration predictor.Instead of
+        adopting the attention mechanism in Transformer TTS to align text to mel-spectrum, the alignment loss is presented
+        to consider all possible alignments in training by use of dynamic programming. Experiments on the LJSpeech dataset s
+        how that our model achieves not only state-of-the-art performance which outperforms Transformer TTS by 0.03 in mean
+        option score (MOS), but also a high efficiency which is more than 50 times faster than real-time.
+
+    Note:
+        Original model uses a separate character embedding layer for duration predictor. However, it causes the
+        duration predictor to overfit and prevents learning higher level interactions among characters. Therefore,
+        we predict durations based on encoder outputs which has higher level information about input characters. This
+        enables training without phases as in the original paper.
+
+        Original model uses Transormers in encoder and decoder layers. However, here you can set the architecture
+        differently based on your requirements using ```encoder_type``` and ```decoder_type``` parameters.
+
+    Examples:
+        >>> from TTS.tts.configs.align_tts_config import AlignTTSConfig
+        >>> config = AlignTTSConfig()
+        >>> model = AlignTTS(config)
+
+    """
+
+    # pylint: disable=dangerous-default-value
+
+    def __init__(
+        self,
+        config: "AlignTTSConfig",
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+    ):
+
+        super().__init__(config, ap, tokenizer, speaker_manager)
+        self.speaker_manager = speaker_manager
+        self.phase = -1
+        self.length_scale = (
+            float(config.model_args.length_scale)
+            if isinstance(config.model_args.length_scale, int)
+            else config.model_args.length_scale
+        )
+
+        self.emb = nn.Embedding(self.config.model_args.num_chars, self.config.model_args.hidden_channels)
+
+        self.embedded_speaker_dim = 0
+        self.init_multispeaker(config)
+
+        self.pos_encoder = PositionalEncoding(config.model_args.hidden_channels)
+        self.encoder = Encoder(
+            config.model_args.hidden_channels,
+            config.model_args.hidden_channels,
+            config.model_args.encoder_type,
+            config.model_args.encoder_params,
+            self.embedded_speaker_dim,
+        )
+        self.decoder = Decoder(
+            config.model_args.out_channels,
+            config.model_args.hidden_channels,
+            config.model_args.decoder_type,
+            config.model_args.decoder_params,
+        )
+        self.duration_predictor = DurationPredictor(config.model_args.hidden_channels_dp)
+
+        self.mod_layer = nn.Conv1d(config.model_args.hidden_channels, config.model_args.hidden_channels, 1)
+
+        self.mdn_block = MDNBlock(config.model_args.hidden_channels, 2 * config.model_args.out_channels)
+
+        if self.embedded_speaker_dim > 0 and self.embedded_speaker_dim != config.model_args.hidden_channels:
+            self.proj_g = nn.Conv1d(self.embedded_speaker_dim, config.model_args.hidden_channels, 1)
+
+    @staticmethod
+    def compute_log_probs(mu, log_sigma, y):
+        # pylint: disable=protected-access, c-extension-no-member
+        y = y.transpose(1, 2).unsqueeze(1)  # [B, 1, T1, D]
+        mu = mu.transpose(1, 2).unsqueeze(2)  # [B, T2, 1, D]
+        log_sigma = log_sigma.transpose(1, 2).unsqueeze(2)  # [B, T2, 1, D]
+        expanded_y, expanded_mu = torch.broadcast_tensors(y, mu)
+        exponential = -0.5 * torch.mean(
+            torch._C._nn.mse_loss(expanded_y, expanded_mu, 0) / torch.pow(log_sigma.exp(), 2), dim=-1
+        )  # B, L, T
+        logp = exponential - 0.5 * log_sigma.mean(dim=-1)
+        return logp
+
+    def compute_align_path(self, mu, log_sigma, y, x_mask, y_mask):
+        # find the max alignment path
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        log_p = self.compute_log_probs(mu, log_sigma, y)
+        # [B, T_en, T_dec]
+        attn = maximum_path(log_p, attn_mask.squeeze(1)).unsqueeze(1)
+        dr_mas = torch.sum(attn, -1)
+        return dr_mas.squeeze(1), log_p
+
+    @staticmethod
+    def generate_attn(dr, x_mask, y_mask=None):
+        # compute decode mask from the durations
+        if y_mask is None:
+            y_lengths = dr.sum(1).long()
+            y_lengths[y_lengths < 1] = 1
+            y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(dr.dtype)
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        attn = generate_path(dr, attn_mask.squeeze(1)).to(dr.dtype)
+        return attn
+
+    def expand_encoder_outputs(self, en, dr, x_mask, y_mask):
+        """Generate attention alignment map from durations and
+        expand encoder outputs
+
+        Examples::
+            - encoder output: [a,b,c,d]
+            - durations: [1, 3, 2, 1]
+
+            - expanded: [a, b, b, b, c, c, d]
+            - attention map: [[0, 0, 0, 0, 0, 0, 1],
+                             [0, 0, 0, 0, 1, 1, 0],
+                             [0, 1, 1, 1, 0, 0, 0],
+                             [1, 0, 0, 0, 0, 0, 0]]
+        """
+        attn = self.generate_attn(dr, x_mask, y_mask)
+        o_en_ex = torch.matmul(attn.squeeze(1).transpose(1, 2), en.transpose(1, 2)).transpose(1, 2)
+        return o_en_ex, attn
+
+    def format_durations(self, o_dr_log, x_mask):
+        o_dr = (torch.exp(o_dr_log) - 1) * x_mask * self.length_scale
+        o_dr[o_dr < 1] = 1.0
+        o_dr = torch.round(o_dr)
+        return o_dr
+
+    @staticmethod
+    def _concat_speaker_embedding(o_en, g):
+        g_exp = g.expand(-1, -1, o_en.size(-1))  # [B, C, T_en]
+        o_en = torch.cat([o_en, g_exp], 1)
+        return o_en
+
+    def _sum_speaker_embedding(self, x, g):
+        # project g to decoder dim.
+        if hasattr(self, "proj_g"):
+            g = self.proj_g(g)
+
+        return x + g
+
+    def _forward_encoder(self, x, x_lengths, g=None):
+        if hasattr(self, "emb_g"):
+            g = nn.functional.normalize(self.speaker_embedding(g))  # [B, C, 1]
+
+        if g is not None:
+            g = g.unsqueeze(-1)
+
+        # [B, T, C]
+        x_emb = self.emb(x)
+        # [B, C, T]
+        x_emb = torch.transpose(x_emb, 1, -1)
+
+        # compute sequence masks
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).to(x.dtype)
+
+        # encoder pass
+        o_en = self.encoder(x_emb, x_mask)
+
+        # speaker conditioning for duration predictor
+        if g is not None:
+            o_en_dp = self._concat_speaker_embedding(o_en, g)
+        else:
+            o_en_dp = o_en
+        return o_en, o_en_dp, x_mask, g
+
+    def _forward_decoder(self, o_en, o_en_dp, dr, x_mask, y_lengths, g):
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en_dp.dtype)
+        # expand o_en with durations
+        o_en_ex, attn = self.expand_encoder_outputs(o_en, dr, x_mask, y_mask)
+        # positional encoding
+        if hasattr(self, "pos_encoder"):
+            o_en_ex = self.pos_encoder(o_en_ex, y_mask)
+        # speaker embedding
+        if g is not None:
+            o_en_ex = self._sum_speaker_embedding(o_en_ex, g)
+        # decoder pass
+        o_de = self.decoder(o_en_ex, y_mask, g=g)
+        return o_de, attn.transpose(1, 2)
+
+    def _forward_mdn(self, o_en, y, y_lengths, x_mask):
+        # MAS potentials and alignment
+        mu, log_sigma = self.mdn_block(o_en)
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en.dtype)
+        dr_mas, logp = self.compute_align_path(mu, log_sigma, y, x_mask, y_mask)
+        return dr_mas, mu, log_sigma, logp
+
+    def forward(
+        self, x, x_lengths, y, y_lengths, aux_input={"d_vectors": None}, phase=None
+    ):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            - x: :math:`[B, T_max]`
+            - x_lengths: :math:`[B]`
+            - y_lengths: :math:`[B]`
+            - dr: :math:`[B, T_max]`
+            - g: :math:`[B, C]`
+        """
+        y = y.transpose(1, 2)
+        g = aux_input["d_vectors"] if "d_vectors" in aux_input else None
+        o_de, o_dr_log, dr_mas_log, attn, mu, log_sigma, logp = None, None, None, None, None, None, None
+        if phase == 0:
+            # train encoder and MDN
+            o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+            dr_mas, mu, log_sigma, logp = self._forward_mdn(o_en, y, y_lengths, x_mask)
+            y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en_dp.dtype)
+            attn = self.generate_attn(dr_mas, x_mask, y_mask)
+        elif phase == 1:
+            # train decoder
+            o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+            dr_mas, _, _, _ = self._forward_mdn(o_en, y, y_lengths, x_mask)
+            o_de, attn = self._forward_decoder(o_en.detach(), o_en_dp.detach(), dr_mas.detach(), x_mask, y_lengths, g=g)
+        elif phase == 2:
+            # train the whole except duration predictor
+            o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+            dr_mas, mu, log_sigma, logp = self._forward_mdn(o_en, y, y_lengths, x_mask)
+            o_de, attn = self._forward_decoder(o_en, o_en_dp, dr_mas, x_mask, y_lengths, g=g)
+        elif phase == 3:
+            # train duration predictor
+            o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+            o_dr_log = self.duration_predictor(x, x_mask)
+            dr_mas, mu, log_sigma, logp = self._forward_mdn(o_en, y, y_lengths, x_mask)
+            o_de, attn = self._forward_decoder(o_en, o_en_dp, dr_mas, x_mask, y_lengths, g=g)
+            o_dr_log = o_dr_log.squeeze(1)
+        else:
+            o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+            o_dr_log = self.duration_predictor(o_en_dp.detach(), x_mask)
+            dr_mas, mu, log_sigma, logp = self._forward_mdn(o_en, y, y_lengths, x_mask)
+            o_de, attn = self._forward_decoder(o_en, o_en_dp, dr_mas, x_mask, y_lengths, g=g)
+            o_dr_log = o_dr_log.squeeze(1)
+        dr_mas_log = torch.log(dr_mas + 1).squeeze(1)
+        outputs = {
+            "model_outputs": o_de.transpose(1, 2),
+            "alignments": attn,
+            "durations_log": o_dr_log,
+            "durations_mas_log": dr_mas_log,
+            "mu": mu,
+            "log_sigma": log_sigma,
+            "logp": logp,
+        }
+        return outputs
+
+    @torch.no_grad()
+    def inference(self, x, aux_input={"d_vectors": None}):  # pylint: disable=unused-argument
+        """
+        Shapes:
+            - x: :math:`[B, T_max]`
+            - x_lengths: :math:`[B]`
+            - g: :math:`[B, C]`
+        """
+        g = aux_input["d_vectors"] if "d_vectors" in aux_input else None
+        x_lengths = torch.tensor(x.shape[1:2]).to(x.device)
+        # pad input to prevent dropping the last word
+        # x = torch.nn.functional.pad(x, pad=(0, 5), mode='constant', value=0)
+        o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g)
+        # o_dr_log = self.duration_predictor(x, x_mask)
+        o_dr_log = self.duration_predictor(o_en_dp, x_mask)
+        # duration predictor pass
+        o_dr = self.format_durations(o_dr_log, x_mask).squeeze(1)
+        y_lengths = o_dr.sum(1)
+        o_de, attn = self._forward_decoder(o_en, o_en_dp, o_dr, x_mask, y_lengths, g=g)
+        outputs = {"model_outputs": o_de.transpose(1, 2), "alignments": attn}
+        return outputs
+
+    def train_step(self, batch: dict, criterion: nn.Module):
+        text_input = batch["text_input"]
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        mel_lengths = batch["mel_lengths"]
+        d_vectors = batch["d_vectors"]
+        speaker_ids = batch["speaker_ids"]
+
+        aux_input = {"d_vectors": d_vectors, "speaker_ids": speaker_ids}
+        outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input, self.phase)
+        loss_dict = criterion(
+            outputs["logp"],
+            outputs["model_outputs"],
+            mel_input,
+            mel_lengths,
+            outputs["durations_log"],
+            outputs["durations_mas_log"],
+            text_lengths,
+            phase=self.phase,
+        )
+
+        return outputs, loss_dict
+
+    def _create_logs(self, batch, outputs, ap):  # pylint: disable=no-self-use
+        model_outputs = outputs["model_outputs"]
+        alignments = outputs["alignments"]
+        mel_input = batch["mel_input"]
+
+        pred_spec = model_outputs[0].data.cpu().numpy()
+        gt_spec = mel_input[0].data.cpu().numpy()
+        align_img = alignments[0].data.cpu().numpy()
+
+        figures = {
+            "prediction": plot_spectrogram(pred_spec, ap, output_fig=False),
+            "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False),
+            "alignment": plot_alignment(align_img, output_fig=False),
+        }
+
+        # Sample audio
+        train_audio = ap.inv_melspectrogram(pred_spec.T)
+        return figures, {"audio": train_audio}
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ) -> None:  # pylint: disable=no-self-use
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    def eval_step(self, batch: dict, criterion: nn.Module):
+        return self.train_step(batch, criterion)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+
+    def get_criterion(self):
+        from TTS.tts.layers.losses import AlignTTSLoss  # pylint: disable=import-outside-toplevel
+
+        return AlignTTSLoss(self.config)
+
+    @staticmethod
+    def _set_phase(config, global_step):
+        """Decide AlignTTS training phase"""
+        if isinstance(config.phase_start_steps, list):
+            vals = [i < global_step for i in config.phase_start_steps]
+            if not True in vals:
+                phase = 0
+            else:
+                phase = (
+                    len(config.phase_start_steps)
+                    - [i < global_step for i in config.phase_start_steps][::-1].index(True)
+                    - 1
+                )
+        else:
+            phase = None
+        return phase
+
+    def on_epoch_start(self, trainer):
+        """Set AlignTTS training phase on epoch start."""
+        self.phase = self._set_phase(trainer.config, trainer.total_steps_done)
+
+    @staticmethod
+    def init_from_config(config: "AlignTTSConfig", samples: Union[List[List], List[Dict]] = None):
+        """Initiate model from config
+
+        Args:
+            config (AlignTTSConfig): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config, samples)
+        return AlignTTS(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py
new file mode 100644
index 0000000000000000000000000000000000000000..4aaf5261111275f4b85fa66c9a83817fc815ddfe
--- /dev/null
+++ b/TTS/tts/models/base_tacotron.py
@@ -0,0 +1,300 @@
+import copy
+from abc import abstractmethod
+from typing import Dict, Tuple
+
+import torch
+from coqpit import Coqpit
+from torch import nn
+
+from TTS.tts.layers.losses import TacotronLoss
+from TTS.tts.models.base_tts import BaseTTS
+from TTS.tts.utils.helpers import sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.synthesis import synthesis
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+from TTS.utils.generic_utils import format_aux_input
+from TTS.utils.io import load_fsspec
+from TTS.utils.training import gradual_training_scheduler
+
+
+class BaseTacotron(BaseTTS):
+    """Base class shared by Tacotron and Tacotron2"""
+
+    def __init__(
+        self,
+        config: "TacotronConfig",
+        ap: "AudioProcessor",
+        tokenizer: "TTSTokenizer",
+        speaker_manager: SpeakerManager = None,
+    ):
+        super().__init__(config, ap, tokenizer, speaker_manager)
+
+        # pass all config fields as class attributes
+        for key in config:
+            setattr(self, key, config[key])
+
+        # layers
+        self.embedding = None
+        self.encoder = None
+        self.decoder = None
+        self.postnet = None
+
+        # init tensors
+        self.embedded_speakers = None
+        self.embedded_speakers_projected = None
+
+        # global style token
+        if self.gst and self.use_gst:
+            self.decoder_in_features += self.gst.gst_embedding_dim  # add gst embedding dim
+            self.gst_layer = None
+
+        # Capacitron
+        if self.capacitron_vae and self.use_capacitron_vae:
+            self.decoder_in_features += self.capacitron_vae.capacitron_VAE_embedding_dim  # add capacitron embedding dim
+            self.capacitron_vae_layer = None
+
+        # additional layers
+        self.decoder_backward = None
+        self.coarse_decoder = None
+
+    @staticmethod
+    def _format_aux_input(aux_input: Dict) -> Dict:
+        """Set missing fields to their default values"""
+        if aux_input:
+            return format_aux_input({"d_vectors": None, "speaker_ids": None}, aux_input)
+        return None
+
+    #############################
+    # INIT FUNCTIONS
+    #############################
+
+    def _init_backward_decoder(self):
+        """Init the backward decoder for Forward-Backward decoding."""
+        self.decoder_backward = copy.deepcopy(self.decoder)
+
+    def _init_coarse_decoder(self):
+        """Init the coarse decoder for Double-Decoder Consistency."""
+        self.coarse_decoder = copy.deepcopy(self.decoder)
+        self.coarse_decoder.r_init = self.ddc_r
+        self.coarse_decoder.set_r(self.ddc_r)
+
+    #############################
+    # CORE FUNCTIONS
+    #############################
+
+    @abstractmethod
+    def forward(self):
+        pass
+
+    @abstractmethod
+    def inference(self):
+        pass
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        """Load model checkpoint and set up internals.
+
+        Args:
+            config (Coqpi): model configuration.
+            checkpoint_path (str): path to checkpoint file.
+            eval (bool, optional): whether to load model for evaluation.
+            cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False.
+        """
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        # TODO: set r in run-time by taking it from the new config
+        if "r" in state:
+            # set r from the state (for compatibility with older checkpoints)
+            self.decoder.set_r(state["r"])
+        elif "config" in state:
+            # set r from config used at training time (for inference)
+            self.decoder.set_r(state["config"]["r"])
+        else:
+            # set r from the new config (for new-models)
+            self.decoder.set_r(config.r)
+        if eval:
+            self.eval()
+            print(f" > Model's reduction rate `r` is set to: {self.decoder.r}")
+            assert not self.training
+
+    def get_criterion(self) -> nn.Module:
+        """Get the model criterion used in training."""
+        return TacotronLoss(self.config)
+
+    @staticmethod
+    def init_from_config(config: Coqpit):
+        """Initialize model from config."""
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config)
+        tokenizer = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config)
+        return BaseTacotron(config, ap, tokenizer, speaker_manager)
+
+    ##########################
+    # TEST AND LOG FUNCTIONS #
+    ##########################
+
+    def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
+        """Generic test run for `tts` models used by `Trainer`.
+
+        You can override this for a different behaviour.
+
+        Args:
+            assets (dict): A dict of training assets. For `tts` models, it must include `{'audio_processor': ap}`.
+
+        Returns:
+            Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
+        """
+        print(" | > Synthesizing test sentences.")
+        test_audios = {}
+        test_figures = {}
+        test_sentences = self.config.test_sentences
+        aux_inputs = self._get_test_aux_input()
+        for idx, sen in enumerate(test_sentences):
+            outputs_dict = synthesis(
+                self,
+                sen,
+                self.config,
+                "cuda" in str(next(self.parameters()).device),
+                speaker_id=aux_inputs["speaker_id"],
+                d_vector=aux_inputs["d_vector"],
+                style_wav=aux_inputs["style_wav"],
+                use_griffin_lim=True,
+                do_trim_silence=False,
+            )
+            test_audios["{}-audio".format(idx)] = outputs_dict["wav"]
+            test_figures["{}-prediction".format(idx)] = plot_spectrogram(
+                outputs_dict["outputs"]["model_outputs"], self.ap, output_fig=False
+            )
+            test_figures["{}-alignment".format(idx)] = plot_alignment(
+                outputs_dict["outputs"]["alignments"], output_fig=False
+            )
+        return {"figures": test_figures, "audios": test_audios}
+
+    def test_log(
+        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+    ) -> None:
+        logger.test_audios(steps, outputs["audios"], self.ap.sample_rate)
+        logger.test_figures(steps, outputs["figures"])
+
+    #############################
+    # COMMON COMPUTE FUNCTIONS
+    #############################
+
+    def compute_masks(self, text_lengths, mel_lengths):
+        """Compute masks  against sequence paddings."""
+        # B x T_in_max (boolean)
+        input_mask = sequence_mask(text_lengths)
+        output_mask = None
+        if mel_lengths is not None:
+            max_len = mel_lengths.max()
+            r = self.decoder.r
+            max_len = max_len + (r - (max_len % r)) if max_len % r > 0 else max_len
+            output_mask = sequence_mask(mel_lengths, max_len=max_len)
+        return input_mask, output_mask
+
+    def _backward_pass(self, mel_specs, encoder_outputs, mask):
+        """Run backwards decoder"""
+        decoder_outputs_b, alignments_b, _ = self.decoder_backward(
+            encoder_outputs, torch.flip(mel_specs, dims=(1,)), mask
+        )
+        decoder_outputs_b = decoder_outputs_b.transpose(1, 2).contiguous()
+        return decoder_outputs_b, alignments_b
+
+    def _coarse_decoder_pass(self, mel_specs, encoder_outputs, alignments, input_mask):
+        """Double Decoder Consistency"""
+        T = mel_specs.shape[1]
+        if T % self.coarse_decoder.r > 0:
+            padding_size = self.coarse_decoder.r - (T % self.coarse_decoder.r)
+            mel_specs = torch.nn.functional.pad(mel_specs, (0, 0, 0, padding_size, 0, 0))
+        decoder_outputs_backward, alignments_backward, _ = self.coarse_decoder(
+            encoder_outputs.detach(), mel_specs, input_mask
+        )
+        # scale_factor = self.decoder.r_init / self.decoder.r
+        alignments_backward = torch.nn.functional.interpolate(
+            alignments_backward.transpose(1, 2),
+            size=alignments.shape[1],
+            mode="nearest",
+        ).transpose(1, 2)
+        decoder_outputs_backward = decoder_outputs_backward.transpose(1, 2)
+        decoder_outputs_backward = decoder_outputs_backward[:, :T, :]
+        return decoder_outputs_backward, alignments_backward
+
+    #############################
+    # EMBEDDING FUNCTIONS
+    #############################
+
+    def compute_gst(self, inputs, style_input, speaker_embedding=None):
+        """Compute global style token"""
+        if isinstance(style_input, dict):
+            # multiply each style token with a weight
+            query = torch.zeros(1, 1, self.gst.gst_embedding_dim // 2).type_as(inputs)
+            if speaker_embedding is not None:
+                query = torch.cat([query, speaker_embedding.reshape(1, 1, -1)], dim=-1)
+
+            _GST = torch.tanh(self.gst_layer.style_token_layer.style_tokens)
+            gst_outputs = torch.zeros(1, 1, self.gst.gst_embedding_dim).type_as(inputs)
+            for k_token, v_amplifier in style_input.items():
+                key = _GST[int(k_token)].unsqueeze(0).expand(1, -1, -1)
+                gst_outputs_att = self.gst_layer.style_token_layer.attention(query, key)
+                gst_outputs = gst_outputs + gst_outputs_att * v_amplifier
+        elif style_input is None:
+            # ignore style token and return zero tensor
+            gst_outputs = torch.zeros(1, 1, self.gst.gst_embedding_dim).type_as(inputs)
+        else:
+            # compute style tokens
+            gst_outputs = self.gst_layer(style_input, speaker_embedding)  # pylint: disable=not-callable
+        inputs = self._concat_speaker_embedding(inputs, gst_outputs)
+        return inputs
+
+    def compute_capacitron_VAE_embedding(self, inputs, reference_mel_info, text_info=None, speaker_embedding=None):
+        """Capacitron Variational Autoencoder"""
+        (VAE_outputs, posterior_distribution, prior_distribution, capacitron_beta,) = self.capacitron_vae_layer(
+            reference_mel_info,
+            text_info,
+            speaker_embedding,  # pylint: disable=not-callable
+        )
+
+        VAE_outputs = VAE_outputs.to(inputs.device)
+        encoder_output = self._concat_speaker_embedding(
+            inputs, VAE_outputs
+        )  # concatenate to the output of the basic tacotron encoder
+        return (
+            encoder_output,
+            posterior_distribution,
+            prior_distribution,
+            capacitron_beta,
+        )
+
+    @staticmethod
+    def _add_speaker_embedding(outputs, embedded_speakers):
+        embedded_speakers_ = embedded_speakers.expand(outputs.size(0), outputs.size(1), -1)
+        outputs = outputs + embedded_speakers_
+        return outputs
+
+    @staticmethod
+    def _concat_speaker_embedding(outputs, embedded_speakers):
+        embedded_speakers_ = embedded_speakers.expand(outputs.size(0), outputs.size(1), -1)
+        outputs = torch.cat([outputs, embedded_speakers_], dim=-1)
+        return outputs
+
+    #############################
+    # CALLBACKS
+    #############################
+
+    def on_epoch_start(self, trainer):
+        """Callback for setting values wrt gradual training schedule.
+
+        Args:
+            trainer (TrainerTTS): TTS trainer object that is used to train this model.
+        """
+        if self.gradual_training:
+            r, trainer.config.batch_size = gradual_training_scheduler(trainer.total_steps_done, trainer.config)
+            trainer.config.r = r
+            self.decoder.set_r(r)
+            if trainer.config.bidirectional_decoder:
+                trainer.model.decoder_backward.set_r(r)
+            print(f"\n > Number of output frames: {self.decoder.r}")
diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2222acba796928ef984b8bd70595917095841ad
--- /dev/null
+++ b/TTS/tts/models/base_tts.py
@@ -0,0 +1,432 @@
+import os
+import random
+from typing import Dict, List, Tuple, Union
+
+import torch
+import torch.distributed as dist
+from coqpit import Coqpit
+from torch import nn
+from torch.utils.data import DataLoader
+from torch.utils.data.sampler import WeightedRandomSampler
+from trainer.torch import DistributedSampler, DistributedSamplerWrapper
+
+from TTS.model import BaseTrainerModel
+from TTS.tts.datasets.dataset import TTSDataset
+from TTS.tts.utils.data import get_length_balancer_weights
+from TTS.tts.utils.languages import LanguageManager, get_language_balancer_weights
+from TTS.tts.utils.speakers import SpeakerManager, get_speaker_balancer_weights, get_speaker_manager
+from TTS.tts.utils.synthesis import synthesis
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+
+# pylint: skip-file
+
+
+class BaseTTS(BaseTrainerModel):
+    """Base `tts` class. Every new `tts` model must inherit this.
+
+    It defines common `tts` specific functions on top of `Model` implementation.
+    """
+
+    def __init__(
+        self,
+        config: Coqpit,
+        ap: "AudioProcessor",
+        tokenizer: "TTSTokenizer",
+        speaker_manager: SpeakerManager = None,
+        language_manager: LanguageManager = None,
+    ):
+        super().__init__()
+        self.config = config
+        self.ap = ap
+        self.tokenizer = tokenizer
+        self.speaker_manager = speaker_manager
+        self.language_manager = language_manager
+        self._set_model_args(config)
+
+    def _set_model_args(self, config: Coqpit):
+        """Setup model args based on the config type (`ModelConfig` or `ModelArgs`).
+
+        `ModelArgs` has all the fields reuqired to initialize the model architecture.
+
+        `ModelConfig` has all the fields required for training, inference and containes `ModelArgs`.
+
+        If the config is for training with a name like "*Config", then the model args are embeded in the
+        config.model_args
+
+        If the config is for the model with a name like "*Args", then we assign the directly.
+        """
+        # don't use isintance not to import recursively
+        if "Config" in config.__class__.__name__:
+            config_num_chars = (
+                self.config.model_args.num_chars if hasattr(self.config, "model_args") else self.config.num_chars
+            )
+            num_chars = config_num_chars if self.tokenizer is None else self.tokenizer.characters.num_chars
+            if "characters" in config:
+                self.config.num_chars = num_chars
+                if hasattr(self.config, "model_args"):
+                    config.model_args.num_chars = num_chars
+                    self.args = self.config.model_args
+            else:
+                self.config = config
+                self.args = config.model_args
+        elif "Args" in config.__class__.__name__:
+            self.args = config
+        else:
+            raise ValueError("config must be either a *Config or *Args")
+
+    def init_multispeaker(self, config: Coqpit, data: List = None):
+        """Initialize a speaker embedding layer if needen and define expected embedding channel size for defining
+        `in_channels` size of the connected layers.
+
+        This implementation yields 3 possible outcomes:
+
+        1. If `config.use_speaker_embedding` and `config.use_d_vector_file are False, do nothing.
+        2. If `config.use_d_vector_file` is True, set expected embedding channel size to `config.d_vector_dim` or 512.
+        3. If `config.use_speaker_embedding`, initialize a speaker embedding layer with channel size of
+        `config.d_vector_dim` or 512.
+
+        You can override this function for new models.
+
+        Args:
+            config (Coqpit): Model configuration.
+        """
+        # set number of speakers
+        if self.speaker_manager is not None:
+            self.num_speakers = self.speaker_manager.num_speakers
+        elif hasattr(config, "num_speakers"):
+            self.num_speakers = config.num_speakers
+
+        # set ultimate speaker embedding size
+        if config.use_speaker_embedding or config.use_d_vector_file:
+            self.embedded_speaker_dim = (
+                config.d_vector_dim if "d_vector_dim" in config and config.d_vector_dim is not None else 512
+            )
+        # init speaker embedding layer
+        if config.use_speaker_embedding and not config.use_d_vector_file:
+            print(" > Init speaker_embedding layer.")
+            self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim)
+            self.speaker_embedding.weight.data.normal_(0, 0.3)
+
+    def get_aux_input(self, **kwargs) -> Dict:
+        """Prepare and return `aux_input` used by `forward()`"""
+        return {"speaker_id": None, "style_wav": None, "d_vector": None, "language_id": None}
+
+    def get_aux_input_from_test_setences(self, sentence_info):
+        if hasattr(self.config, "model_args"):
+            config = self.config.model_args
+        else:
+            config = self.config
+
+        # extract speaker and language info
+        text, speaker_name, style_wav, language_name = None, None, None, None
+
+        if isinstance(sentence_info, list):
+            if len(sentence_info) == 1:
+                text = sentence_info[0]
+            elif len(sentence_info) == 2:
+                text, speaker_name = sentence_info
+            elif len(sentence_info) == 3:
+                text, speaker_name, style_wav = sentence_info
+            elif len(sentence_info) == 4:
+                text, speaker_name, style_wav, language_name = sentence_info
+        else:
+            text = sentence_info
+
+        # get speaker  id/d_vector
+        speaker_id, d_vector, language_id = None, None, None
+        if hasattr(self, "speaker_manager"):
+            if config.use_d_vector_file:
+                if speaker_name is None:
+                    d_vector = self.speaker_manager.get_random_embedding()
+                else:
+                    d_vector = self.speaker_manager.get_d_vector_by_name(speaker_name)
+            elif config.use_speaker_embedding:
+                if speaker_name is None:
+                    speaker_id = self.speaker_manager.get_random_id()
+                else:
+                    speaker_id = self.speaker_manager.name_to_id[speaker_name]
+
+        # get language id
+        if hasattr(self, "language_manager") and config.use_language_embedding and language_name is not None:
+            language_id = self.language_manager.name_to_id[language_name]
+
+        return {
+            "text": text,
+            "speaker_id": speaker_id,
+            "style_wav": style_wav,
+            "d_vector": d_vector,
+            "language_id": language_id,
+        }
+
+    def format_batch(self, batch: Dict) -> Dict:
+        """Generic batch formatting for `TTSDataset`.
+
+        You must override this if you use a custom dataset.
+
+        Args:
+            batch (Dict): [description]
+
+        Returns:
+            Dict: [description]
+        """
+        # setup input batch
+        text_input = batch["token_id"]
+        text_lengths = batch["token_id_lengths"]
+        speaker_names = batch["speaker_names"]
+        linear_input = batch["linear"]
+        mel_input = batch["mel"]
+        mel_lengths = batch["mel_lengths"]
+        stop_targets = batch["stop_targets"]
+        item_idx = batch["item_idxs"]
+        d_vectors = batch["d_vectors"]
+        speaker_ids = batch["speaker_ids"]
+        attn_mask = batch["attns"]
+        waveform = batch["waveform"]
+        pitch = batch["pitch"]
+        language_ids = batch["language_ids"]
+        max_text_length = torch.max(text_lengths.float())
+        max_spec_length = torch.max(mel_lengths.float())
+
+        # compute durations from attention masks
+        durations = None
+        if attn_mask is not None:
+            durations = torch.zeros(attn_mask.shape[0], attn_mask.shape[2])
+            for idx, am in enumerate(attn_mask):
+                # compute raw durations
+                c_idxs = am[:, : text_lengths[idx], : mel_lengths[idx]].max(1)[1]
+                # c_idxs, counts = torch.unique_consecutive(c_idxs, return_counts=True)
+                c_idxs, counts = torch.unique(c_idxs, return_counts=True)
+                dur = torch.ones([text_lengths[idx]]).to(counts.dtype)
+                dur[c_idxs] = counts
+                # smooth the durations and set any 0 duration to 1
+                # by cutting off from the largest duration indeces.
+                extra_frames = dur.sum() - mel_lengths[idx]
+                largest_idxs = torch.argsort(-dur)[:extra_frames]
+                dur[largest_idxs] -= 1
+                assert (
+                    dur.sum() == mel_lengths[idx]
+                ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
+                durations[idx, : text_lengths[idx]] = dur
+
+        # set stop targets wrt reduction factor
+        stop_targets = stop_targets.view(text_input.shape[0], stop_targets.size(1) // self.config.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze(2)
+        stop_target_lengths = torch.divide(mel_lengths, self.config.r).ceil_()
+
+        return {
+            "text_input": text_input,
+            "text_lengths": text_lengths,
+            "speaker_names": speaker_names,
+            "mel_input": mel_input,
+            "mel_lengths": mel_lengths,
+            "linear_input": linear_input,
+            "stop_targets": stop_targets,
+            "stop_target_lengths": stop_target_lengths,
+            "attn_mask": attn_mask,
+            "durations": durations,
+            "speaker_ids": speaker_ids,
+            "d_vectors": d_vectors,
+            "max_text_length": float(max_text_length),
+            "max_spec_length": float(max_spec_length),
+            "item_idx": item_idx,
+            "waveform": waveform,
+            "pitch": pitch,
+            "language_ids": language_ids,
+        }
+
+    def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1):
+        weights = None
+        data_items = dataset.samples
+
+        if getattr(config, "use_language_weighted_sampler", False):
+            alpha = getattr(config, "language_weighted_sampler_alpha", 1.0)
+            print(" > Using Language weighted sampler with alpha:", alpha)
+            weights = get_language_balancer_weights(data_items) * alpha
+
+        if getattr(config, "use_speaker_weighted_sampler", False):
+            alpha = getattr(config, "speaker_weighted_sampler_alpha", 1.0)
+            print(" > Using Speaker weighted sampler with alpha:", alpha)
+            if weights is not None:
+                weights += get_speaker_balancer_weights(data_items) * alpha
+            else:
+                weights = get_speaker_balancer_weights(data_items) * alpha
+
+        if getattr(config, "use_length_weighted_sampler", False):
+            alpha = getattr(config, "length_weighted_sampler_alpha", 1.0)
+            print(" > Using Length weighted sampler with alpha:", alpha)
+            if weights is not None:
+                weights += get_length_balancer_weights(data_items) * alpha
+            else:
+                weights = get_length_balancer_weights(data_items) * alpha
+
+        if weights is not None:
+            sampler = WeightedRandomSampler(weights, len(weights))
+        else:
+            sampler = None
+
+        # sampler for DDP
+        if sampler is None:
+            sampler = DistributedSampler(dataset) if num_gpus > 1 else None
+        else:  # If a sampler is already defined use this sampler and DDP sampler together
+            sampler = DistributedSamplerWrapper(sampler) if num_gpus > 1 else sampler
+
+        return sampler
+
+    def get_data_loader(
+        self,
+        config: Coqpit,
+        assets: Dict,
+        is_eval: bool,
+        samples: Union[List[Dict], List[List]],
+        verbose: bool,
+        num_gpus: int,
+        rank: int = None,
+    ) -> "DataLoader":
+        if is_eval and not config.run_eval:
+            loader = None
+        else:
+            # setup multi-speaker attributes
+            if hasattr(self, "speaker_manager") and self.speaker_manager is not None:
+                if hasattr(config, "model_args"):
+                    speaker_id_mapping = (
+                        self.speaker_manager.name_to_id if config.model_args.use_speaker_embedding else None
+                    )
+                    d_vector_mapping = self.speaker_manager.embeddings if config.model_args.use_d_vector_file else None
+                    config.use_d_vector_file = config.model_args.use_d_vector_file
+                else:
+                    speaker_id_mapping = self.speaker_manager.name_to_id if config.use_speaker_embedding else None
+                    d_vector_mapping = self.speaker_manager.embeddings if config.use_d_vector_file else None
+            else:
+                speaker_id_mapping = None
+                d_vector_mapping = None
+
+            # setup multi-lingual attributes
+            if hasattr(self, "language_manager") and self.language_manager is not None:
+                language_id_mapping = self.language_manager.name_to_id if self.args.use_language_embedding else None
+            else:
+                language_id_mapping = None
+
+            # init dataloader
+            dataset = TTSDataset(
+                outputs_per_step=config.r if "r" in config else 1,
+                compute_linear_spec=config.model.lower() == "tacotron" or config.compute_linear_spec,
+                compute_f0=config.get("compute_f0", False),
+                f0_cache_path=config.get("f0_cache_path", None),
+                samples=samples,
+                ap=self.ap,
+                return_wav=config.return_wav if "return_wav" in config else False,
+                batch_group_size=0 if is_eval else config.batch_group_size * config.batch_size,
+                min_text_len=config.min_text_len,
+                max_text_len=config.max_text_len,
+                min_audio_len=config.min_audio_len,
+                max_audio_len=config.max_audio_len,
+                phoneme_cache_path=config.phoneme_cache_path,
+                precompute_num_workers=config.precompute_num_workers,
+                use_noise_augment=False if is_eval else config.use_noise_augment,
+                verbose=verbose,
+                speaker_id_mapping=speaker_id_mapping,
+                d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None,
+                tokenizer=self.tokenizer,
+                start_by_longest=config.start_by_longest,
+                language_id_mapping=language_id_mapping,
+            )
+
+            # wait all the DDP process to be ready
+            if num_gpus > 1:
+                dist.barrier()
+
+            # sort input sequences from short to long
+            dataset.preprocess_samples()
+
+            # get samplers
+            sampler = self.get_sampler(config, dataset, num_gpus)
+
+            loader = DataLoader(
+                dataset,
+                batch_size=config.eval_batch_size if is_eval else config.batch_size,
+                shuffle=True,  # if there is no other sampler
+                collate_fn=dataset.collate_fn,
+                drop_last=False,  # setting this False might cause issues in AMP training.
+                sampler=sampler,
+                num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+                pin_memory=False,
+            )
+        return loader
+
+    def _get_test_aux_input(
+        self,
+    ) -> Dict:
+
+        d_vector = None
+        if self.config.use_d_vector_file:
+            d_vector = [self.speaker_manager.embeddings[name]["embedding"] for name in self.speaker_manager.embeddings]
+            d_vector = (random.sample(sorted(d_vector), 1),)
+
+        aux_inputs = {
+            "speaker_id": None
+            if not self.config.use_speaker_embedding
+            else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1),
+            "d_vector": d_vector,
+            "style_wav": None,  # TODO: handle GST style input
+        }
+        return aux_inputs
+
+    def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
+        """Generic test run for `tts` models used by `Trainer`.
+
+        You can override this for a different behaviour.
+
+        Args:
+            assets (dict): A dict of training assets. For `tts` models, it must include `{'audio_processor': ap}`.
+
+        Returns:
+            Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
+        """
+        print(" | > Synthesizing test sentences.")
+        test_audios = {}
+        test_figures = {}
+        test_sentences = self.config.test_sentences
+        aux_inputs = self._get_test_aux_input()
+        for idx, sen in enumerate(test_sentences):
+            outputs_dict = synthesis(
+                self,
+                sen,
+                self.config,
+                "cuda" in str(next(self.parameters()).device),
+                speaker_id=aux_inputs["speaker_id"],
+                d_vector=aux_inputs["d_vector"],
+                style_wav=aux_inputs["style_wav"],
+                use_griffin_lim=True,
+                do_trim_silence=False,
+            )
+            test_audios["{}-audio".format(idx)] = outputs_dict["wav"]
+            test_figures["{}-prediction".format(idx)] = plot_spectrogram(
+                outputs_dict["outputs"]["model_outputs"], self.ap, output_fig=False
+            )
+            test_figures["{}-alignment".format(idx)] = plot_alignment(
+                outputs_dict["outputs"]["alignments"], output_fig=False
+            )
+        return test_figures, test_audios
+
+    def on_init_start(self, trainer):
+        """Save the speaker.pth and language_ids.json at the beginning of the training. Also update both paths."""
+        if self.speaker_manager is not None:
+            output_path = os.path.join(trainer.output_path, "speakers.pth")
+            self.speaker_manager.save_ids_to_file(output_path)
+            trainer.config.speakers_file = output_path
+            # some models don't have `model_args` set
+            if hasattr(trainer.config, "model_args"):
+                trainer.config.model_args.speakers_file = output_path
+            trainer.config.save_json(os.path.join(trainer.output_path, "config.json"))
+            print(f" > `speakers.pth` is saved to {output_path}.")
+            print(" > `speakers_file` is updated in the config.json.")
+
+        if hasattr(self, "language_manager") and self.language_manager is not None:
+            output_path = os.path.join(trainer.output_path, "language_ids.json")
+            self.language_manager.save_ids_to_file(output_path)
+            trainer.config.language_ids_file = output_path
+            if hasattr(trainer.config, "model_args"):
+                trainer.config.model_args.language_ids_file = output_path
+            trainer.config.save_json(os.path.join(trainer.output_path, "config.json"))
+            print(f" > `language_ids.json` is saved to {output_path}.")
+            print(" > `language_ids_file` is updated in the config.json.")
diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1132df22cabe171c63606ae2a853d6dd56f6a84
--- /dev/null
+++ b/TTS/tts/models/forward_tts.py
@@ -0,0 +1,742 @@
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple, Union
+
+import torch
+from coqpit import Coqpit
+from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
+
+from TTS.tts.layers.feed_forward.decoder import Decoder
+from TTS.tts.layers.feed_forward.encoder import Encoder
+from TTS.tts.layers.generic.aligner import AlignmentNetwork
+from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
+from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
+from TTS.tts.models.base_tts import BaseTTS
+from TTS.tts.utils.helpers import average_over_durations, generate_path, maximum_path, sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_avg_pitch, plot_spectrogram
+from TTS.utils.io import load_fsspec
+
+
+@dataclass
+class ForwardTTSArgs(Coqpit):
+    """ForwardTTS Model arguments.
+
+    Args:
+
+        num_chars (int):
+            Number of characters in the vocabulary. Defaults to 100.
+
+        out_channels (int):
+            Number of output channels. Defaults to 80.
+
+        hidden_channels (int):
+            Number of base hidden channels of the model. Defaults to 512.
+
+        use_aligner (bool):
+            Whether to use aligner network to learn the text to speech alignment or use pre-computed durations.
+            If set False, durations should be computed by `TTS/bin/compute_attention_masks.py` and path to the
+            pre-computed durations must be provided to `config.datasets[0].meta_file_attn_mask`. Defaults to True.
+
+        use_pitch (bool):
+            Use pitch predictor to learn the pitch. Defaults to True.
+
+        duration_predictor_hidden_channels (int):
+            Number of hidden channels in the duration predictor. Defaults to 256.
+
+        duration_predictor_dropout_p (float):
+            Dropout rate for the duration predictor. Defaults to 0.1.
+
+        duration_predictor_kernel_size (int):
+            Kernel size of conv layers in the duration predictor. Defaults to 3.
+
+        pitch_predictor_hidden_channels (int):
+            Number of hidden channels in the pitch predictor. Defaults to 256.
+
+        pitch_predictor_dropout_p (float):
+            Dropout rate for the pitch predictor. Defaults to 0.1.
+
+        pitch_predictor_kernel_size (int):
+            Kernel size of conv layers in the pitch predictor. Defaults to 3.
+
+        pitch_embedding_kernel_size (int):
+            Kernel size of the projection layer in the pitch predictor. Defaults to 3.
+
+        positional_encoding (bool):
+            Whether to use positional encoding. Defaults to True.
+
+        positional_encoding_use_scale (bool):
+            Whether to use a learnable scale coeff in the positional encoding. Defaults to True.
+
+        length_scale (int):
+            Length scale that multiplies the predicted durations. Larger values result slower speech. Defaults to 1.0.
+
+        encoder_type (str):
+            Type of the encoder module. One of the encoders available in :class:`TTS.tts.layers.feed_forward.encoder`.
+            Defaults to `fftransformer` as in the paper.
+
+        encoder_params (dict):
+            Parameters of the encoder module. Defaults to ```{"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}```
+
+        decoder_type (str):
+            Type of the decoder module. One of the decoders available in :class:`TTS.tts.layers.feed_forward.decoder`.
+            Defaults to `fftransformer` as in the paper.
+
+        decoder_params (str):
+            Parameters of the decoder module. Defaults to ```{"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}```
+
+        detach_duration_predictor (bool):
+            Detach the input to the duration predictor from the earlier computation graph so that the duraiton loss
+            does not pass to the earlier layers. Defaults to True.
+
+        max_duration (int):
+            Maximum duration accepted by the model. Defaults to 75.
+
+        num_speakers (int):
+            Number of speakers for the speaker embedding layer. Defaults to 0.
+
+        speakers_file (str):
+            Path to the speaker mapping file for the Speaker Manager. Defaults to None.
+
+        speaker_embedding_channels (int):
+            Number of speaker embedding channels. Defaults to 256.
+
+        use_d_vector_file (bool):
+            Enable/Disable the use of d-vectors for multi-speaker training. Defaults to False.
+
+        d_vector_dim (int):
+            Number of d-vector channels. Defaults to 0.
+
+    """
+
+    num_chars: int = None
+    out_channels: int = 80
+    hidden_channels: int = 384
+    use_aligner: bool = True
+    use_pitch: bool = True
+    pitch_predictor_hidden_channels: int = 256
+    pitch_predictor_kernel_size: int = 3
+    pitch_predictor_dropout_p: float = 0.1
+    pitch_embedding_kernel_size: int = 3
+    duration_predictor_hidden_channels: int = 256
+    duration_predictor_kernel_size: int = 3
+    duration_predictor_dropout_p: float = 0.1
+    positional_encoding: bool = True
+    poisitonal_encoding_use_scale: bool = True
+    length_scale: int = 1
+    encoder_type: str = "fftransformer"
+    encoder_params: dict = field(
+        default_factory=lambda: {"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}
+    )
+    decoder_type: str = "fftransformer"
+    decoder_params: dict = field(
+        default_factory=lambda: {"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}
+    )
+    detach_duration_predictor: bool = False
+    max_duration: int = 75
+    num_speakers: int = 1
+    use_speaker_embedding: bool = False
+    speakers_file: str = None
+    use_d_vector_file: bool = False
+    d_vector_dim: int = None
+    d_vector_file: str = None
+
+
+class ForwardTTS(BaseTTS):
+    """General forward TTS model implementation that uses an encoder-decoder architecture with an optional alignment
+    network and a pitch predictor.
+
+    If the alignment network is used, the model learns the text-to-speech alignment
+    from the data instead of using pre-computed durations.
+
+    If the pitch predictor is used, the model trains a pitch predictor that predicts average pitch value for each
+    input character as in the FastPitch model.
+
+    `ForwardTTS` can be configured to one of these architectures,
+
+        - FastPitch
+        - SpeedySpeech
+        - FastSpeech
+        - TODO: FastSpeech2 (requires average speech energy predictor)
+
+    Args:
+        config (Coqpit): Model coqpit class.
+        speaker_manager (SpeakerManager): Speaker manager for multi-speaker training. Only used for multi-speaker models.
+            Defaults to None.
+
+    Examples:
+        >>> from TTS.tts.models.fast_pitch import ForwardTTS, ForwardTTSArgs
+        >>> config = ForwardTTSArgs()
+        >>> model = ForwardTTS(config)
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        config: Coqpit,
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+    ):
+        super().__init__(config, ap, tokenizer, speaker_manager)
+        self._set_model_args(config)
+
+        self.init_multispeaker(config)
+
+        self.max_duration = self.args.max_duration
+        self.use_aligner = self.args.use_aligner
+        self.use_pitch = self.args.use_pitch
+        self.binary_loss_weight = 0.0
+
+        self.length_scale = (
+            float(self.args.length_scale) if isinstance(self.args.length_scale, int) else self.args.length_scale
+        )
+
+        self.emb = nn.Embedding(self.args.num_chars, self.args.hidden_channels)
+
+        self.encoder = Encoder(
+            self.args.hidden_channels,
+            self.args.hidden_channels,
+            self.args.encoder_type,
+            self.args.encoder_params,
+            self.embedded_speaker_dim,
+        )
+
+        if self.args.positional_encoding:
+            self.pos_encoder = PositionalEncoding(self.args.hidden_channels)
+
+        self.decoder = Decoder(
+            self.args.out_channels,
+            self.args.hidden_channels,
+            self.args.decoder_type,
+            self.args.decoder_params,
+        )
+
+        self.duration_predictor = DurationPredictor(
+            self.args.hidden_channels + self.embedded_speaker_dim,
+            self.args.duration_predictor_hidden_channels,
+            self.args.duration_predictor_kernel_size,
+            self.args.duration_predictor_dropout_p,
+        )
+
+        if self.args.use_pitch:
+            self.pitch_predictor = DurationPredictor(
+                self.args.hidden_channels + self.embedded_speaker_dim,
+                self.args.pitch_predictor_hidden_channels,
+                self.args.pitch_predictor_kernel_size,
+                self.args.pitch_predictor_dropout_p,
+            )
+            self.pitch_emb = nn.Conv1d(
+                1,
+                self.args.hidden_channels,
+                kernel_size=self.args.pitch_embedding_kernel_size,
+                padding=int((self.args.pitch_embedding_kernel_size - 1) / 2),
+            )
+
+        if self.args.use_aligner:
+            self.aligner = AlignmentNetwork(
+                in_query_channels=self.args.out_channels, in_key_channels=self.args.hidden_channels
+            )
+
+    def init_multispeaker(self, config: Coqpit):
+        """Init for multi-speaker training.
+
+        Args:
+            config (Coqpit): Model configuration.
+        """
+        self.embedded_speaker_dim = 0
+        # init speaker manager
+        if self.speaker_manager is None and (config.use_d_vector_file or config.use_speaker_embedding):
+            raise ValueError(
+                " > SpeakerManager is not provided. You must provide the SpeakerManager before initializing a multi-speaker model."
+            )
+        # set number of speakers
+        if self.speaker_manager is not None:
+            self.num_speakers = self.speaker_manager.num_speakers
+        # init d-vector embedding
+        if config.use_d_vector_file:
+            self.embedded_speaker_dim = config.d_vector_dim
+            if self.args.d_vector_dim != self.args.hidden_channels:
+                self.proj_g = nn.Conv1d(self.args.d_vector_dim, self.args.hidden_channels, 1)
+        # init speaker embedding layer
+        if config.use_speaker_embedding and not config.use_d_vector_file:
+            print(" > Init speaker_embedding layer.")
+            self.emb_g = nn.Embedding(self.num_speakers, self.args.hidden_channels)
+            nn.init.uniform_(self.emb_g.weight, -0.1, 0.1)
+
+    @staticmethod
+    def generate_attn(dr, x_mask, y_mask=None):
+        """Generate an attention mask from the durations.
+
+        Shapes
+           - dr: :math:`(B, T_{en})`
+           - x_mask: :math:`(B, T_{en})`
+           - y_mask: :math:`(B, T_{de})`
+        """
+        # compute decode mask from the durations
+        if y_mask is None:
+            y_lengths = dr.sum(1).long()
+            y_lengths[y_lengths < 1] = 1
+            y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(dr.dtype)
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        attn = generate_path(dr, attn_mask.squeeze(1)).to(dr.dtype)
+        return attn
+
+    def expand_encoder_outputs(self, en, dr, x_mask, y_mask):
+        """Generate attention alignment map from durations and
+        expand encoder outputs
+
+        Shapes:
+            - en: :math:`(B, D_{en}, T_{en})`
+            - dr: :math:`(B, T_{en})`
+            - x_mask: :math:`(B, T_{en})`
+            - y_mask: :math:`(B, T_{de})`
+
+        Examples::
+
+            encoder output: [a,b,c,d]
+            durations: [1, 3, 2, 1]
+
+            expanded: [a, b, b, b, c, c, d]
+            attention map: [[0, 0, 0, 0, 0, 0, 1],
+                            [0, 0, 0, 0, 1, 1, 0],
+                            [0, 1, 1, 1, 0, 0, 0],
+                            [1, 0, 0, 0, 0, 0, 0]]
+        """
+        attn = self.generate_attn(dr, x_mask, y_mask)
+        o_en_ex = torch.matmul(attn.squeeze(1).transpose(1, 2).to(en.dtype), en.transpose(1, 2)).transpose(1, 2)
+        return o_en_ex, attn
+
+    def format_durations(self, o_dr_log, x_mask):
+        """Format predicted durations.
+        1. Convert to linear scale from log scale
+        2. Apply the length scale for speed adjustment
+        3. Apply masking.
+        4. Cast 0 durations to 1.
+        5. Round the duration values.
+
+        Args:
+            o_dr_log: Log scale durations.
+            x_mask: Input text mask.
+
+        Shapes:
+            - o_dr_log: :math:`(B, T_{de})`
+            - x_mask: :math:`(B, T_{en})`
+        """
+        o_dr = (torch.exp(o_dr_log) - 1) * x_mask * self.length_scale
+        o_dr[o_dr < 1] = 1.0
+        o_dr = torch.round(o_dr)
+        return o_dr
+
+    def _forward_encoder(
+        self, x: torch.LongTensor, x_mask: torch.FloatTensor, g: torch.FloatTensor = None
+    ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
+        """Encoding forward pass.
+
+        1. Embed speaker IDs if multi-speaker mode.
+        2. Embed character sequences.
+        3. Run the encoder network.
+        4. Sum encoder outputs and speaker embeddings
+
+        Args:
+            x (torch.LongTensor): Input sequence IDs.
+            x_mask (torch.FloatTensor): Input squence mask.
+            g (torch.FloatTensor, optional): Conditioning vectors. In general speaker embeddings. Defaults to None.
+
+        Returns:
+            Tuple[torch.tensor, torch.tensor, torch.tensor, torch.tensor, torch.tensor]:
+                encoder output, encoder output for the duration predictor, input sequence mask, speaker embeddings,
+                character embeddings
+
+        Shapes:
+            - x: :math:`(B, T_{en})`
+            - x_mask: :math:`(B, 1, T_{en})`
+            - g: :math:`(B, C)`
+        """
+        if hasattr(self, "emb_g"):
+            g = self.emb_g(g)  # [B, C, 1]
+        if g is not None:
+            g = g.unsqueeze(-1)
+        # [B, T, C]
+        x_emb = self.emb(x)
+        # encoder pass
+        o_en = self.encoder(torch.transpose(x_emb, 1, -1), x_mask)
+        # speaker conditioning
+        # TODO: try different ways of conditioning
+        if g is not None:
+            o_en = o_en + g
+        return o_en, x_mask, g, x_emb
+
+    def _forward_decoder(
+        self,
+        o_en: torch.FloatTensor,
+        dr: torch.IntTensor,
+        x_mask: torch.FloatTensor,
+        y_lengths: torch.IntTensor,
+        g: torch.FloatTensor,
+    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
+        """Decoding forward pass.
+
+        1. Compute the decoder output mask
+        2. Expand encoder output with the durations.
+        3. Apply position encoding.
+        4. Add speaker embeddings if multi-speaker mode.
+        5. Run the decoder.
+
+        Args:
+            o_en (torch.FloatTensor): Encoder output.
+            dr (torch.IntTensor): Ground truth durations or alignment network durations.
+            x_mask (torch.IntTensor): Input sequence mask.
+            y_lengths (torch.IntTensor): Output sequence lengths.
+            g (torch.FloatTensor): Conditioning vectors. In general speaker embeddings.
+
+        Returns:
+            Tuple[torch.FloatTensor, torch.FloatTensor]: Decoder output, attention map from durations.
+        """
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en.dtype)
+        # expand o_en with durations
+        o_en_ex, attn = self.expand_encoder_outputs(o_en, dr, x_mask, y_mask)
+        # positional encoding
+        if hasattr(self, "pos_encoder"):
+            o_en_ex = self.pos_encoder(o_en_ex, y_mask)
+        # decoder pass
+        o_de = self.decoder(o_en_ex, y_mask, g=g)
+        return o_de.transpose(1, 2), attn.transpose(1, 2)
+
+    def _forward_pitch_predictor(
+        self,
+        o_en: torch.FloatTensor,
+        x_mask: torch.IntTensor,
+        pitch: torch.FloatTensor = None,
+        dr: torch.IntTensor = None,
+    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
+        """Pitch predictor forward pass.
+
+        1. Predict pitch from encoder outputs.
+        2. In training - Compute average pitch values for each input character from the ground truth pitch values.
+        3. Embed average pitch values.
+
+        Args:
+            o_en (torch.FloatTensor): Encoder output.
+            x_mask (torch.IntTensor): Input sequence mask.
+            pitch (torch.FloatTensor, optional): Ground truth pitch values. Defaults to None.
+            dr (torch.IntTensor, optional): Ground truth durations. Defaults to None.
+
+        Returns:
+            Tuple[torch.FloatTensor, torch.FloatTensor]: Pitch embedding, pitch prediction.
+
+        Shapes:
+            - o_en: :math:`(B, C, T_{en})`
+            - x_mask: :math:`(B, 1, T_{en})`
+            - pitch: :math:`(B, 1, T_{de})`
+            - dr: :math:`(B, T_{en})`
+        """
+        o_pitch = self.pitch_predictor(o_en, x_mask)
+        if pitch is not None:
+            avg_pitch = average_over_durations(pitch, dr)
+            o_pitch_emb = self.pitch_emb(avg_pitch)
+            return o_pitch_emb, o_pitch, avg_pitch
+        o_pitch_emb = self.pitch_emb(o_pitch)
+        return o_pitch_emb, o_pitch
+
+    def _forward_aligner(
+        self, x: torch.FloatTensor, y: torch.FloatTensor, x_mask: torch.IntTensor, y_mask: torch.IntTensor
+    ) -> Tuple[torch.IntTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
+        """Aligner forward pass.
+
+        1. Compute a mask to apply to the attention map.
+        2. Run the alignment network.
+        3. Apply MAS to compute the hard alignment map.
+        4. Compute the durations from the hard alignment map.
+
+        Args:
+            x (torch.FloatTensor): Input sequence.
+            y (torch.FloatTensor): Output sequence.
+            x_mask (torch.IntTensor): Input sequence mask.
+            y_mask (torch.IntTensor): Output sequence mask.
+
+        Returns:
+            Tuple[torch.IntTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
+                Durations from the hard alignment map, soft alignment potentials, log scale alignment potentials,
+                hard alignment map.
+
+        Shapes:
+            - x: :math:`[B, T_en, C_en]`
+            - y: :math:`[B, T_de, C_de]`
+            - x_mask: :math:`[B, 1, T_en]`
+            - y_mask: :math:`[B, 1, T_de]`
+
+            - o_alignment_dur: :math:`[B, T_en]`
+            - alignment_soft: :math:`[B, T_en, T_de]`
+            - alignment_logprob: :math:`[B, 1, T_de, T_en]`
+            - alignment_mas: :math:`[B, T_en, T_de]`
+        """
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        alignment_soft, alignment_logprob = self.aligner(y.transpose(1, 2), x.transpose(1, 2), x_mask, None)
+        alignment_mas = maximum_path(
+            alignment_soft.squeeze(1).transpose(1, 2).contiguous(), attn_mask.squeeze(1).contiguous()
+        )
+        o_alignment_dur = torch.sum(alignment_mas, -1).int()
+        alignment_soft = alignment_soft.squeeze(1).transpose(1, 2)
+        return o_alignment_dur, alignment_soft, alignment_logprob, alignment_mas
+
+    def _set_speaker_input(self, aux_input: Dict):
+        d_vectors = aux_input.get("d_vectors", None)
+        speaker_ids = aux_input.get("speaker_ids", None)
+
+        if d_vectors is not None and speaker_ids is not None:
+            raise ValueError("[!] Cannot use d-vectors and speaker-ids together.")
+
+        if speaker_ids is not None and not hasattr(self, "emb_g"):
+            raise ValueError("[!] Cannot use speaker-ids without enabling speaker embedding.")
+
+        g = speaker_ids if speaker_ids is not None else d_vectors
+        return g
+
+    def forward(
+        self,
+        x: torch.LongTensor,
+        x_lengths: torch.LongTensor,
+        y_lengths: torch.LongTensor,
+        y: torch.FloatTensor = None,
+        dr: torch.IntTensor = None,
+        pitch: torch.FloatTensor = None,
+        aux_input: Dict = {"d_vectors": None, "speaker_ids": None},  # pylint: disable=unused-argument
+    ) -> Dict:
+        """Model's forward pass.
+
+        Args:
+            x (torch.LongTensor): Input character sequences.
+            x_lengths (torch.LongTensor): Input sequence lengths.
+            y_lengths (torch.LongTensor): Output sequnce lengths. Defaults to None.
+            y (torch.FloatTensor): Spectrogram frames. Only used when the alignment network is on. Defaults to None.
+            dr (torch.IntTensor): Character durations over the spectrogram frames. Only used when the alignment network is off. Defaults to None.
+            pitch (torch.FloatTensor): Pitch values for each spectrogram frame. Only used when the pitch predictor is on. Defaults to None.
+            aux_input (Dict): Auxiliary model inputs for multi-speaker training. Defaults to `{"d_vectors": 0, "speaker_ids": None}`.
+
+        Shapes:
+            - x: :math:`[B, T_max]`
+            - x_lengths: :math:`[B]`
+            - y_lengths: :math:`[B]`
+            - y: :math:`[B, T_max2]`
+            - dr: :math:`[B, T_max]`
+            - g: :math:`[B, C]`
+            - pitch: :math:`[B, 1, T]`
+        """
+        g = self._set_speaker_input(aux_input)
+        # compute sequence masks
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).float()
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).float()
+        # encoder pass
+        o_en, x_mask, g, x_emb = self._forward_encoder(x, x_mask, g)
+        # duration predictor pass
+        if self.args.detach_duration_predictor:
+            o_dr_log = self.duration_predictor(o_en.detach(), x_mask)
+        else:
+            o_dr_log = self.duration_predictor(o_en, x_mask)
+        o_dr = torch.clamp(torch.exp(o_dr_log) - 1, 0, self.max_duration)
+        # generate attn mask from predicted durations
+        o_attn = self.generate_attn(o_dr.squeeze(1), x_mask)
+        # aligner
+        o_alignment_dur = None
+        alignment_soft = None
+        alignment_logprob = None
+        alignment_mas = None
+        if self.use_aligner:
+            o_alignment_dur, alignment_soft, alignment_logprob, alignment_mas = self._forward_aligner(
+                x_emb, y, x_mask, y_mask
+            )
+            alignment_soft = alignment_soft.transpose(1, 2)
+            alignment_mas = alignment_mas.transpose(1, 2)
+            dr = o_alignment_dur
+        # pitch predictor pass
+        o_pitch = None
+        avg_pitch = None
+        if self.args.use_pitch:
+            o_pitch_emb, o_pitch, avg_pitch = self._forward_pitch_predictor(o_en, x_mask, pitch, dr)
+            o_en = o_en + o_pitch_emb
+        # decoder pass
+        o_de, attn = self._forward_decoder(
+            o_en, dr, x_mask, y_lengths, g=None
+        )  # TODO: maybe pass speaker embedding (g) too
+        outputs = {
+            "model_outputs": o_de,  # [B, T, C]
+            "durations_log": o_dr_log.squeeze(1),  # [B, T]
+            "durations": o_dr.squeeze(1),  # [B, T]
+            "attn_durations": o_attn,  # for visualization [B, T_en, T_de']
+            "pitch_avg": o_pitch,
+            "pitch_avg_gt": avg_pitch,
+            "alignments": attn,  # [B, T_de, T_en]
+            "alignment_soft": alignment_soft,
+            "alignment_mas": alignment_mas,
+            "o_alignment_dur": o_alignment_dur,
+            "alignment_logprob": alignment_logprob,
+            "x_mask": x_mask,
+            "y_mask": y_mask,
+        }
+        return outputs
+
+    @torch.no_grad()
+    def inference(self, x, aux_input={"d_vectors": None, "speaker_ids": None}):  # pylint: disable=unused-argument
+        """Model's inference pass.
+
+        Args:
+            x (torch.LongTensor): Input character sequence.
+            aux_input (Dict): Auxiliary model inputs. Defaults to `{"d_vectors": None, "speaker_ids": None}`.
+
+        Shapes:
+            - x: [B, T_max]
+            - x_lengths: [B]
+            - g: [B, C]
+        """
+        g = self._set_speaker_input(aux_input)
+        x_lengths = torch.tensor(x.shape[1:2]).to(x.device)
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).to(x.dtype).float()
+        # encoder pass
+        o_en, x_mask, g, _ = self._forward_encoder(x, x_mask, g)
+        # duration predictor pass
+        o_dr_log = self.duration_predictor(o_en, x_mask)
+        o_dr = self.format_durations(o_dr_log, x_mask).squeeze(1)
+        y_lengths = o_dr.sum(1)
+        # pitch predictor pass
+        o_pitch = None
+        if self.args.use_pitch:
+            o_pitch_emb, o_pitch = self._forward_pitch_predictor(o_en, x_mask)
+            o_en = o_en + o_pitch_emb
+        # decoder pass
+        o_de, attn = self._forward_decoder(o_en, o_dr, x_mask, y_lengths, g=None)
+        outputs = {
+            "model_outputs": o_de,
+            "alignments": attn,
+            "pitch": o_pitch,
+            "durations_log": o_dr_log,
+        }
+        return outputs
+
+    def train_step(self, batch: dict, criterion: nn.Module):
+        text_input = batch["text_input"]
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        mel_lengths = batch["mel_lengths"]
+        pitch = batch["pitch"] if self.args.use_pitch else None
+        d_vectors = batch["d_vectors"]
+        speaker_ids = batch["speaker_ids"]
+        durations = batch["durations"]
+        aux_input = {"d_vectors": d_vectors, "speaker_ids": speaker_ids}
+
+        # forward pass
+        outputs = self.forward(
+            text_input, text_lengths, mel_lengths, y=mel_input, dr=durations, pitch=pitch, aux_input=aux_input
+        )
+        # use aligner's output as the duration target
+        if self.use_aligner:
+            durations = outputs["o_alignment_dur"]
+        # use float32 in AMP
+        with autocast(enabled=False):
+            # compute loss
+            loss_dict = criterion(
+                decoder_output=outputs["model_outputs"],
+                decoder_target=mel_input,
+                decoder_output_lens=mel_lengths,
+                dur_output=outputs["durations_log"],
+                dur_target=durations,
+                pitch_output=outputs["pitch_avg"] if self.use_pitch else None,
+                pitch_target=outputs["pitch_avg_gt"] if self.use_pitch else None,
+                input_lens=text_lengths,
+                alignment_logprob=outputs["alignment_logprob"] if self.use_aligner else None,
+                alignment_soft=outputs["alignment_soft"],
+                alignment_hard=outputs["alignment_mas"],
+                binary_loss_weight=self.binary_loss_weight,
+            )
+            # compute duration error
+            durations_pred = outputs["durations"]
+            duration_error = torch.abs(durations - durations_pred).sum() / text_lengths.sum()
+            loss_dict["duration_error"] = duration_error
+
+        return outputs, loss_dict
+
+    def _create_logs(self, batch, outputs, ap):
+        """Create common logger outputs."""
+        model_outputs = outputs["model_outputs"]
+        alignments = outputs["alignments"]
+        mel_input = batch["mel_input"]
+
+        pred_spec = model_outputs[0].data.cpu().numpy()
+        gt_spec = mel_input[0].data.cpu().numpy()
+        align_img = alignments[0].data.cpu().numpy()
+
+        figures = {
+            "prediction": plot_spectrogram(pred_spec, ap, output_fig=False),
+            "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False),
+            "alignment": plot_alignment(align_img, output_fig=False),
+        }
+
+        # plot pitch figures
+        if self.args.use_pitch:
+            pitch_avg = abs(outputs["pitch_avg_gt"][0, 0].data.cpu().numpy())
+            pitch_avg_hat = abs(outputs["pitch_avg"][0, 0].data.cpu().numpy())
+            chars = self.tokenizer.decode(batch["text_input"][0].data.cpu().numpy())
+            pitch_figures = {
+                "pitch_ground_truth": plot_avg_pitch(pitch_avg, chars, output_fig=False),
+                "pitch_avg_predicted": plot_avg_pitch(pitch_avg_hat, chars, output_fig=False),
+            }
+            figures.update(pitch_figures)
+
+        # plot the attention mask computed from the predicted durations
+        if "attn_durations" in outputs:
+            alignments_hat = outputs["attn_durations"][0].data.cpu().numpy()
+            figures["alignment_hat"] = plot_alignment(alignments_hat.T, output_fig=False)
+
+        # Sample audio
+        train_audio = ap.inv_melspectrogram(pred_spec.T)
+        return figures, {"audio": train_audio}
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ) -> None:  # pylint: disable=no-self-use
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    def eval_step(self, batch: dict, criterion: nn.Module):
+        return self.train_step(batch, criterion)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+
+    def get_criterion(self):
+        from TTS.tts.layers.losses import ForwardTTSLoss  # pylint: disable=import-outside-toplevel
+
+        return ForwardTTSLoss(self.config)
+
+    def on_train_step_start(self, trainer):
+        """Schedule binary loss weight."""
+        self.binary_loss_weight = min(trainer.epochs_done / self.config.binary_loss_warmup_epochs, 1.0) * 1.0
+
+    @staticmethod
+    def init_from_config(config: "ForwardTTSConfig", samples: Union[List[List], List[Dict]] = None):
+        """Initiate model from config
+
+        Args:
+            config (ForwardTTSConfig): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config, samples)
+        return ForwardTTS(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc241c43b8d9d70529dc5175e0357a337a54737c
--- /dev/null
+++ b/TTS/tts/models/glow_tts.py
@@ -0,0 +1,558 @@
+import math
+from typing import Dict, List, Tuple, Union
+
+import torch
+from coqpit import Coqpit
+from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
+from torch.nn import functional as F
+
+from TTS.tts.configs.glow_tts_config import GlowTTSConfig
+from TTS.tts.layers.glow_tts.decoder import Decoder
+from TTS.tts.layers.glow_tts.encoder import Encoder
+from TTS.tts.models.base_tts import BaseTTS
+from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.synthesis import synthesis
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+from TTS.utils.io import load_fsspec
+
+
+class GlowTTS(BaseTTS):
+    """GlowTTS model.
+
+    Paper::
+        https://arxiv.org/abs/2005.11129
+
+    Paper abstract::
+        Recently, text-to-speech (TTS) models such as FastSpeech and ParaNet have been proposed to generate
+        mel-spectrograms from text in parallel. Despite the advantage, the parallel TTS models cannot be trained
+        without guidance from autoregressive TTS models as their external aligners. In this work, we propose Glow-TTS,
+        a flow-based generative model for parallel TTS that does not require any external aligner. By combining the
+        properties of flows and dynamic programming, the proposed model searches for the most probable monotonic
+        alignment between text and the latent representation of speech on its own. We demonstrate that enforcing hard
+        monotonic alignments enables robust TTS, which generalizes to long utterances, and employing generative flows
+        enables fast, diverse, and controllable speech synthesis. Glow-TTS obtains an order-of-magnitude speed-up over
+        the autoregressive model, Tacotron 2, at synthesis with comparable speech quality. We further show that our
+        model can be easily extended to a multi-speaker setting.
+
+    Check :class:`TTS.tts.configs.glow_tts_config.GlowTTSConfig` for class arguments.
+
+    Examples:
+        Init only model layers.
+
+        >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
+        >>> from TTS.tts.models.glow_tts import GlowTTS
+        >>> config = GlowTTSConfig(num_chars=2)
+        >>> model = GlowTTS(config)
+
+        Fully init a model ready for action. All the class attributes and class members
+        (e.g Tokenizer, AudioProcessor, etc.). are initialized internally based on config values.
+
+        >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
+        >>> from TTS.tts.models.glow_tts import GlowTTS
+        >>> config = GlowTTSConfig()
+        >>> model = GlowTTS.init_from_config(config, verbose=False)
+    """
+
+    def __init__(
+        self,
+        config: GlowTTSConfig,
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+    ):
+
+        super().__init__(config, ap, tokenizer, speaker_manager)
+
+        # pass all config fields to `self`
+        # for fewer code change
+        self.config = config
+        for key in config:
+            setattr(self, key, config[key])
+
+        self.decoder_output_dim = config.out_channels
+
+        # init multi-speaker layers if necessary
+        self.init_multispeaker(config)
+
+        self.run_data_dep_init = config.data_dep_init_steps > 0
+        self.encoder = Encoder(
+            self.num_chars,
+            out_channels=self.out_channels,
+            hidden_channels=self.hidden_channels_enc,
+            hidden_channels_dp=self.hidden_channels_dp,
+            encoder_type=self.encoder_type,
+            encoder_params=self.encoder_params,
+            mean_only=self.mean_only,
+            use_prenet=self.use_encoder_prenet,
+            dropout_p_dp=self.dropout_p_dp,
+            c_in_channels=self.c_in_channels,
+        )
+
+        self.decoder = Decoder(
+            self.out_channels,
+            self.hidden_channels_dec,
+            self.kernel_size_dec,
+            self.dilation_rate,
+            self.num_flow_blocks_dec,
+            self.num_block_layers,
+            dropout_p=self.dropout_p_dec,
+            num_splits=self.num_splits,
+            num_squeeze=self.num_squeeze,
+            sigmoid_scale=self.sigmoid_scale,
+            c_in_channels=self.c_in_channels,
+        )
+
+    def init_multispeaker(self, config: Coqpit):
+        """Init speaker embedding layer if `use_speaker_embedding` is True and set the expected speaker embedding
+        vector dimension to the encoder layer channel size. If model uses d-vectors, then it only sets
+        speaker embedding vector dimension to the d-vector dimension from the config.
+
+        Args:
+            config (Coqpit): Model configuration.
+        """
+        self.embedded_speaker_dim = 0
+        # set number of speakers - if num_speakers is set in config, use it, otherwise use speaker_manager
+        if self.speaker_manager is not None:
+            self.num_speakers = self.speaker_manager.num_speakers
+        # set ultimate speaker embedding size
+        if config.use_d_vector_file:
+            self.embedded_speaker_dim = (
+                config.d_vector_dim if "d_vector_dim" in config and config.d_vector_dim is not None else 512
+            )
+            if self.speaker_manager is not None:
+                assert (
+                    config.d_vector_dim == self.speaker_manager.embedding_dim
+                ), " [!] d-vector dimension mismatch b/w config and speaker manager."
+        # init speaker embedding layer
+        if config.use_speaker_embedding and not config.use_d_vector_file:
+            print(" > Init speaker_embedding layer.")
+            self.embedded_speaker_dim = self.hidden_channels_enc
+            self.emb_g = nn.Embedding(self.num_speakers, self.hidden_channels_enc)
+            nn.init.uniform_(self.emb_g.weight, -0.1, 0.1)
+        # set conditioning dimensions
+        self.c_in_channels = self.embedded_speaker_dim
+
+    @staticmethod
+    def compute_outputs(attn, o_mean, o_log_scale, x_mask):
+        """Compute and format the mode outputs with the given alignment map"""
+        y_mean = torch.matmul(attn.squeeze(1).transpose(1, 2), o_mean.transpose(1, 2)).transpose(
+            1, 2
+        )  # [b, t', t], [b, t, d] -> [b, d, t']
+        y_log_scale = torch.matmul(attn.squeeze(1).transpose(1, 2), o_log_scale.transpose(1, 2)).transpose(
+            1, 2
+        )  # [b, t', t], [b, t, d] -> [b, d, t']
+        # compute total duration with adjustment
+        o_attn_dur = torch.log(1 + torch.sum(attn, -1)) * x_mask
+        return y_mean, y_log_scale, o_attn_dur
+
+    def unlock_act_norm_layers(self):
+        """Unlock activation normalization layers for data depended initalization."""
+        for f in self.decoder.flows:
+            if getattr(f, "set_ddi", False):
+                f.set_ddi(True)
+
+    def lock_act_norm_layers(self):
+        """Lock activation normalization layers."""
+        for f in self.decoder.flows:
+            if getattr(f, "set_ddi", False):
+                f.set_ddi(False)
+
+    def _set_speaker_input(self, aux_input: Dict):
+        if aux_input is None:
+            d_vectors = None
+            speaker_ids = None
+        else:
+            d_vectors = aux_input.get("d_vectors", None)
+            speaker_ids = aux_input.get("speaker_ids", None)
+
+        if d_vectors is not None and speaker_ids is not None:
+            raise ValueError("[!] Cannot use d-vectors and speaker-ids together.")
+
+        if speaker_ids is not None and not hasattr(self, "emb_g"):
+            raise ValueError("[!] Cannot use speaker-ids without enabling speaker embedding.")
+
+        g = speaker_ids if speaker_ids is not None else d_vectors
+        return g
+
+    def _speaker_embedding(self, aux_input: Dict) -> Union[torch.tensor, None]:
+        g = self._set_speaker_input(aux_input)
+        # speaker embedding
+        if g is not None:
+            if hasattr(self, "emb_g"):
+                # use speaker embedding layer
+                if not g.size():  # if is a scalar
+                    g = g.unsqueeze(0)  # unsqueeze
+                g = F.normalize(self.emb_g(g)).unsqueeze(-1)  # [b, h, 1]
+            else:
+                # use d-vector
+                g = F.normalize(g).unsqueeze(-1)  # [b, h, 1]
+        return g
+
+    def forward(
+        self, x, x_lengths, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
+    ):  # pylint: disable=dangerous-default-value
+        """
+        Args:
+            x (torch.Tensor):
+                Input text sequence ids. :math:`[B, T_en]`
+
+            x_lengths (torch.Tensor):
+                Lengths of input text sequences. :math:`[B]`
+
+            y (torch.Tensor):
+                Target mel-spectrogram frames. :math:`[B, T_de, C_mel]`
+
+            y_lengths (torch.Tensor):
+                Lengths of target mel-spectrogram frames. :math:`[B]`
+
+            aux_input (Dict):
+                Auxiliary inputs. `d_vectors` is speaker embedding vectors for a multi-speaker model.
+                :math:`[B, D_vec]`. `speaker_ids` is speaker ids for a multi-speaker model usind speaker-embedding
+                layer. :math:`B`
+
+        Returns:
+            Dict:
+                - z: :math: `[B, T_de, C]`
+                - logdet: :math:`B`
+                - y_mean: :math:`[B, T_de, C]`
+                - y_log_scale: :math:`[B, T_de, C]`
+                - alignments: :math:`[B, T_en, T_de]`
+                - durations_log: :math:`[B, T_en, 1]`
+                - total_durations_log: :math:`[B, T_en, 1]`
+        """
+        # [B, T, C] -> [B, C, T]
+        y = y.transpose(1, 2)
+        y_max_length = y.size(2)
+        # norm speaker embeddings
+        g = self._speaker_embedding(aux_input)
+        # embedding pass
+        o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
+        # drop redisual frames wrt num_squeeze and set y_lengths.
+        y, y_lengths, y_max_length, attn = self.preprocess(y, y_lengths, y_max_length, None)
+        # create masks
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), 1).to(x_mask.dtype)
+        # [B, 1, T_en, T_de]
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        # decoder pass
+        z, logdet = self.decoder(y, y_mask, g=g, reverse=False)
+        # find the alignment path
+        with torch.no_grad():
+            o_scale = torch.exp(-2 * o_log_scale)
+            logp1 = torch.sum(-0.5 * math.log(2 * math.pi) - o_log_scale, [1]).unsqueeze(-1)  # [b, t, 1]
+            logp2 = torch.matmul(o_scale.transpose(1, 2), -0.5 * (z**2))  # [b, t, d] x [b, d, t'] = [b, t, t']
+            logp3 = torch.matmul((o_mean * o_scale).transpose(1, 2), z)  # [b, t, d] x [b, d, t'] = [b, t, t']
+            logp4 = torch.sum(-0.5 * (o_mean**2) * o_scale, [1]).unsqueeze(-1)  # [b, t, 1]
+            logp = logp1 + logp2 + logp3 + logp4  # [b, t, t']
+            attn = maximum_path(logp, attn_mask.squeeze(1)).unsqueeze(1).detach()
+        y_mean, y_log_scale, o_attn_dur = self.compute_outputs(attn, o_mean, o_log_scale, x_mask)
+        attn = attn.squeeze(1).permute(0, 2, 1)
+        outputs = {
+            "z": z.transpose(1, 2),
+            "logdet": logdet,
+            "y_mean": y_mean.transpose(1, 2),
+            "y_log_scale": y_log_scale.transpose(1, 2),
+            "alignments": attn,
+            "durations_log": o_dur_log.transpose(1, 2),
+            "total_durations_log": o_attn_dur.transpose(1, 2),
+        }
+        return outputs
+
+    @torch.no_grad()
+    def inference_with_MAS(
+        self, x, x_lengths, y=None, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
+    ):  # pylint: disable=dangerous-default-value
+        """
+        It's similar to the teacher forcing in Tacotron.
+        It was proposed in: https://arxiv.org/abs/2104.05557
+
+        Shapes:
+            - x: :math:`[B, T]`
+            - x_lenghts: :math:`B`
+            - y: :math:`[B, T, C]`
+            - y_lengths: :math:`B`
+            - g: :math:`[B, C] or B`
+        """
+        y = y.transpose(1, 2)
+        y_max_length = y.size(2)
+        # norm speaker embeddings
+        g = self._speaker_embedding(aux_input)
+        # embedding pass
+        o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
+        # drop redisual frames wrt num_squeeze and set y_lengths.
+        y, y_lengths, y_max_length, attn = self.preprocess(y, y_lengths, y_max_length, None)
+        # create masks
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), 1).to(x_mask.dtype)
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        # decoder pass
+        z, logdet = self.decoder(y, y_mask, g=g, reverse=False)
+        # find the alignment path between z and encoder output
+        o_scale = torch.exp(-2 * o_log_scale)
+        logp1 = torch.sum(-0.5 * math.log(2 * math.pi) - o_log_scale, [1]).unsqueeze(-1)  # [b, t, 1]
+        logp2 = torch.matmul(o_scale.transpose(1, 2), -0.5 * (z**2))  # [b, t, d] x [b, d, t'] = [b, t, t']
+        logp3 = torch.matmul((o_mean * o_scale).transpose(1, 2), z)  # [b, t, d] x [b, d, t'] = [b, t, t']
+        logp4 = torch.sum(-0.5 * (o_mean**2) * o_scale, [1]).unsqueeze(-1)  # [b, t, 1]
+        logp = logp1 + logp2 + logp3 + logp4  # [b, t, t']
+        attn = maximum_path(logp, attn_mask.squeeze(1)).unsqueeze(1).detach()
+
+        y_mean, y_log_scale, o_attn_dur = self.compute_outputs(attn, o_mean, o_log_scale, x_mask)
+        attn = attn.squeeze(1).permute(0, 2, 1)
+
+        # get predited aligned distribution
+        z = y_mean * y_mask
+
+        # reverse the decoder and predict using the aligned distribution
+        y, logdet = self.decoder(z, y_mask, g=g, reverse=True)
+        outputs = {
+            "model_outputs": z.transpose(1, 2),
+            "logdet": logdet,
+            "y_mean": y_mean.transpose(1, 2),
+            "y_log_scale": y_log_scale.transpose(1, 2),
+            "alignments": attn,
+            "durations_log": o_dur_log.transpose(1, 2),
+            "total_durations_log": o_attn_dur.transpose(1, 2),
+        }
+        return outputs
+
+    @torch.no_grad()
+    def decoder_inference(
+        self, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
+    ):  # pylint: disable=dangerous-default-value
+        """
+        Shapes:
+            - y: :math:`[B, T, C]`
+            - y_lengths: :math:`B`
+            - g: :math:`[B, C] or B`
+        """
+        y = y.transpose(1, 2)
+        y_max_length = y.size(2)
+        g = self._speaker_embedding(aux_input)
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), 1).to(y.dtype)
+        # decoder pass
+        z, logdet = self.decoder(y, y_mask, g=g, reverse=False)
+        # reverse decoder and predict
+        y, logdet = self.decoder(z, y_mask, g=g, reverse=True)
+        outputs = {}
+        outputs["model_outputs"] = y.transpose(1, 2)
+        outputs["logdet"] = logdet
+        return outputs
+
+    @torch.no_grad()
+    def inference(
+        self, x, aux_input={"x_lengths": None, "d_vectors": None, "speaker_ids": None}
+    ):  # pylint: disable=dangerous-default-value
+        x_lengths = aux_input["x_lengths"]
+        g = self._speaker_embedding(aux_input)
+        # embedding pass
+        o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
+        # compute output durations
+        w = (torch.exp(o_dur_log) - 1) * x_mask * self.length_scale
+        w_ceil = torch.clamp_min(torch.ceil(w), 1)
+        y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
+        y_max_length = None
+        # compute masks
+        y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), 1).to(x_mask.dtype)
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        # compute attention mask
+        attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
+        y_mean, y_log_scale, o_attn_dur = self.compute_outputs(attn, o_mean, o_log_scale, x_mask)
+
+        z = (y_mean + torch.exp(y_log_scale) * torch.randn_like(y_mean) * self.inference_noise_scale) * y_mask
+        # decoder pass
+        y, logdet = self.decoder(z, y_mask, g=g, reverse=True)
+        attn = attn.squeeze(1).permute(0, 2, 1)
+        outputs = {
+            "model_outputs": y.transpose(1, 2),
+            "logdet": logdet,
+            "y_mean": y_mean.transpose(1, 2),
+            "y_log_scale": y_log_scale.transpose(1, 2),
+            "alignments": attn,
+            "durations_log": o_dur_log.transpose(1, 2),
+            "total_durations_log": o_attn_dur.transpose(1, 2),
+        }
+        return outputs
+
+    def train_step(self, batch: dict, criterion: nn.Module):
+        """A single training step. Forward pass and loss computation. Run data depended initialization for the
+        first `config.data_dep_init_steps` steps.
+
+        Args:
+            batch (dict): [description]
+            criterion (nn.Module): [description]
+        """
+        text_input = batch["text_input"]
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        mel_lengths = batch["mel_lengths"]
+        d_vectors = batch["d_vectors"]
+        speaker_ids = batch["speaker_ids"]
+
+        if self.run_data_dep_init and self.training:
+            # compute data-dependent initialization of activation norm layers
+            self.unlock_act_norm_layers()
+            with torch.no_grad():
+                _ = self.forward(
+                    text_input,
+                    text_lengths,
+                    mel_input,
+                    mel_lengths,
+                    aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids},
+                )
+            outputs = None
+            loss_dict = None
+            self.lock_act_norm_layers()
+        else:
+            # normal training step
+            outputs = self.forward(
+                text_input,
+                text_lengths,
+                mel_input,
+                mel_lengths,
+                aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids},
+            )
+
+            with autocast(enabled=False):  # avoid mixed_precision in criterion
+                loss_dict = criterion(
+                    outputs["z"].float(),
+                    outputs["y_mean"].float(),
+                    outputs["y_log_scale"].float(),
+                    outputs["logdet"].float(),
+                    mel_lengths,
+                    outputs["durations_log"].float(),
+                    outputs["total_durations_log"].float(),
+                    text_lengths,
+                )
+        return outputs, loss_dict
+
+    def _create_logs(self, batch, outputs, ap):
+        alignments = outputs["alignments"]
+        text_input = batch["text_input"][:1] if batch["text_input"] is not None else None
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        d_vectors = batch["d_vectors"][:1] if batch["d_vectors"] is not None else None
+        speaker_ids = batch["speaker_ids"][:1] if batch["speaker_ids"] is not None else None
+
+        # model runs reverse flow to predict spectrograms
+        pred_outputs = self.inference(
+            text_input,
+            aux_input={"x_lengths": text_lengths[:1], "d_vectors": d_vectors, "speaker_ids": speaker_ids},
+        )
+        model_outputs = pred_outputs["model_outputs"]
+
+        pred_spec = model_outputs[0].data.cpu().numpy()
+        gt_spec = mel_input[0].data.cpu().numpy()
+        align_img = alignments[0].data.cpu().numpy()
+
+        figures = {
+            "prediction": plot_spectrogram(pred_spec, ap, output_fig=False),
+            "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False),
+            "alignment": plot_alignment(align_img, output_fig=False),
+        }
+
+        # Sample audio
+        train_audio = ap.inv_melspectrogram(pred_spec.T)
+        return figures, {"audio": train_audio}
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ) -> None:  # pylint: disable=no-self-use
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    @torch.no_grad()
+    def eval_step(self, batch: dict, criterion: nn.Module):
+        return self.train_step(batch, criterion)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    @torch.no_grad()
+    def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
+        """Generic test run for `tts` models used by `Trainer`.
+
+        You can override this for a different behaviour.
+
+        Returns:
+            Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
+        """
+        print(" | > Synthesizing test sentences.")
+        test_audios = {}
+        test_figures = {}
+        test_sentences = self.config.test_sentences
+        aux_inputs = self._get_test_aux_input()
+        if len(test_sentences) == 0:
+            print(" | [!] No test sentences provided.")
+        else:
+            for idx, sen in enumerate(test_sentences):
+                outputs = synthesis(
+                    self,
+                    sen,
+                    self.config,
+                    "cuda" in str(next(self.parameters()).device),
+                    speaker_id=aux_inputs["speaker_id"],
+                    d_vector=aux_inputs["d_vector"],
+                    style_wav=aux_inputs["style_wav"],
+                    use_griffin_lim=True,
+                    do_trim_silence=False,
+                )
+
+                test_audios["{}-audio".format(idx)] = outputs["wav"]
+                test_figures["{}-prediction".format(idx)] = plot_spectrogram(
+                    outputs["outputs"]["model_outputs"], self.ap, output_fig=False
+                )
+                test_figures["{}-alignment".format(idx)] = plot_alignment(outputs["alignments"], output_fig=False)
+        return test_figures, test_audios
+
+    def preprocess(self, y, y_lengths, y_max_length, attn=None):
+        if y_max_length is not None:
+            y_max_length = (y_max_length // self.num_squeeze) * self.num_squeeze
+            y = y[:, :, :y_max_length]
+            if attn is not None:
+                attn = attn[:, :, :, :y_max_length]
+        y_lengths = torch.div(y_lengths, self.num_squeeze, rounding_mode="floor") * self.num_squeeze
+        return y, y_lengths, y_max_length, attn
+
+    def store_inverse(self):
+        self.decoder.store_inverse()
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            self.store_inverse()
+            assert not self.training
+
+    @staticmethod
+    def get_criterion():
+        from TTS.tts.layers.losses import GlowTTSLoss  # pylint: disable=import-outside-toplevel
+
+        return GlowTTSLoss()
+
+    def on_train_step_start(self, trainer):
+        """Decide on every training step wheter enable/disable data depended initialization."""
+        self.run_data_dep_init = trainer.total_steps_done < self.data_dep_init_steps
+
+    @staticmethod
+    def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+        """Initiate model from config
+
+        Args:
+            config (VitsConfig): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+            verbose (bool): If True, print init messages. Defaults to True.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config, verbose)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config, samples)
+        return GlowTTS(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bfa6ba5e4d2ae502a89fb04621e8be0ac771e2f
--- /dev/null
+++ b/TTS/tts/models/tacotron.py
@@ -0,0 +1,410 @@
+# coding: utf-8
+
+from typing import Dict, List, Tuple, Union
+
+import torch
+from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
+from trainer.trainer_utils import get_optimizer, get_scheduler
+
+from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
+from TTS.tts.layers.tacotron.gst_layers import GST
+from TTS.tts.layers.tacotron.tacotron import Decoder, Encoder, PostCBHG
+from TTS.tts.models.base_tacotron import BaseTacotron
+from TTS.tts.utils.measures import alignment_diagonal_score
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+from TTS.utils.capacitron_optimizer import CapacitronOptimizer
+
+
+class Tacotron(BaseTacotron):
+    """Tacotron as in https://arxiv.org/abs/1703.10135
+    It's an autoregressive encoder-attention-decoder-postnet architecture.
+    Check `TacotronConfig` for the arguments.
+
+    Args:
+        config (TacotronConfig): Configuration for the Tacotron model.
+        speaker_manager (SpeakerManager): Speaker manager to handle multi-speaker settings. Only use if the model is
+            a multi-speaker model. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        config: "TacotronConfig",
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+    ):
+
+        super().__init__(config, ap, tokenizer, speaker_manager)
+
+        # pass all config fields to `self`
+        # for fewer code change
+        for key in config:
+            setattr(self, key, config[key])
+
+        # set speaker embedding channel size for determining `in_channels` for the connected layers.
+        # `init_multispeaker` needs to be called once more in training to initialize the speaker embedding layer based
+        # on the number of speakers infered from the dataset.
+        if self.use_speaker_embedding or self.use_d_vector_file:
+            self.init_multispeaker(config)
+            self.decoder_in_features += self.embedded_speaker_dim  # add speaker embedding dim
+
+        if self.use_gst:
+            self.decoder_in_features += self.gst.gst_embedding_dim
+
+        if self.use_capacitron_vae:
+            self.decoder_in_features += self.capacitron_vae.capacitron_VAE_embedding_dim
+
+        # embedding layer
+        self.embedding = nn.Embedding(self.num_chars, 256, padding_idx=0)
+        self.embedding.weight.data.normal_(0, 0.3)
+
+        # base model layers
+        self.encoder = Encoder(self.encoder_in_features)
+        self.decoder = Decoder(
+            self.decoder_in_features,
+            self.decoder_output_dim,
+            self.r,
+            self.memory_size,
+            self.attention_type,
+            self.windowing,
+            self.attention_norm,
+            self.prenet_type,
+            self.prenet_dropout,
+            self.use_forward_attn,
+            self.transition_agent,
+            self.forward_attn_mask,
+            self.location_attn,
+            self.attention_heads,
+            self.separate_stopnet,
+            self.max_decoder_steps,
+        )
+        self.postnet = PostCBHG(self.decoder_output_dim)
+        self.last_linear = nn.Linear(self.postnet.cbhg.gru_features * 2, self.out_channels)
+
+        # setup prenet dropout
+        self.decoder.prenet.dropout_at_inference = self.prenet_dropout_at_inference
+
+        # global style token layers
+        if self.gst and self.use_gst:
+            self.gst_layer = GST(
+                num_mel=self.decoder_output_dim,
+                num_heads=self.gst.gst_num_heads,
+                num_style_tokens=self.gst.gst_num_style_tokens,
+                gst_embedding_dim=self.gst.gst_embedding_dim,
+            )
+
+        # Capacitron layers
+        if self.capacitron_vae and self.use_capacitron_vae:
+            self.capacitron_vae_layer = CapacitronVAE(
+                num_mel=self.decoder_output_dim,
+                encoder_output_dim=self.encoder_in_features,
+                capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
+                speaker_embedding_dim=self.embedded_speaker_dim
+                if self.use_speaker_embedding and self.capacitron_vae.capacitron_use_speaker_embedding
+                else None,
+                text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim
+                if self.capacitron_vae.capacitron_use_text_summary_embeddings
+                else None,
+            )
+
+        # backward pass decoder
+        if self.bidirectional_decoder:
+            self._init_backward_decoder()
+        # setup DDC
+        if self.double_decoder_consistency:
+            self.coarse_decoder = Decoder(
+                self.decoder_in_features,
+                self.decoder_output_dim,
+                self.ddc_r,
+                self.memory_size,
+                self.attention_type,
+                self.windowing,
+                self.attention_norm,
+                self.prenet_type,
+                self.prenet_dropout,
+                self.use_forward_attn,
+                self.transition_agent,
+                self.forward_attn_mask,
+                self.location_attn,
+                self.attention_heads,
+                self.separate_stopnet,
+                self.max_decoder_steps,
+            )
+
+    def forward(  # pylint: disable=dangerous-default-value
+        self, text, text_lengths, mel_specs=None, mel_lengths=None, aux_input={"speaker_ids": None, "d_vectors": None}
+    ):
+        """
+        Shapes:
+            text: [B, T_in]
+            text_lengths: [B]
+            mel_specs: [B, T_out, C]
+            mel_lengths: [B]
+            aux_input: 'speaker_ids': [B, 1] and  'd_vectors':[B, C]
+        """
+        aux_input = self._format_aux_input(aux_input)
+        outputs = {"alignments_backward": None, "decoder_outputs_backward": None}
+        inputs = self.embedding(text)
+        input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths)
+        # B x T_in x encoder_in_features
+        encoder_outputs = self.encoder(inputs)
+        # sequence masking
+        encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs)
+        # global style token
+        if self.gst and self.use_gst:
+            # B x gst_dim
+            encoder_outputs = self.compute_gst(encoder_outputs, mel_specs)
+        # speaker embedding
+        if self.use_speaker_embedding or self.use_d_vector_file:
+            if not self.use_d_vector_file:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = self.speaker_embedding(aux_input["speaker_ids"])[:, None]
+            else:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = torch.unsqueeze(aux_input["d_vectors"], 1)
+            encoder_outputs = self._concat_speaker_embedding(encoder_outputs, embedded_speakers)
+        # Capacitron
+        if self.capacitron_vae and self.use_capacitron_vae:
+            # B x capacitron_VAE_embedding_dim
+            encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
+                encoder_outputs,
+                reference_mel_info=[mel_specs, mel_lengths],
+                text_info=[inputs, text_lengths]
+                if self.capacitron_vae.capacitron_use_text_summary_embeddings
+                else None,
+                speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
+            )
+        else:
+            capacitron_vae_outputs = None
+        # decoder_outputs: B x decoder_in_features x T_out
+        # alignments: B x T_in x encoder_in_features
+        # stop_tokens: B x T_in
+        decoder_outputs, alignments, stop_tokens = self.decoder(encoder_outputs, mel_specs, input_mask)
+        # sequence masking
+        if output_mask is not None:
+            decoder_outputs = decoder_outputs * output_mask.unsqueeze(1).expand_as(decoder_outputs)
+        # B x T_out x decoder_in_features
+        postnet_outputs = self.postnet(decoder_outputs)
+        # sequence masking
+        if output_mask is not None:
+            postnet_outputs = postnet_outputs * output_mask.unsqueeze(2).expand_as(postnet_outputs)
+        # B x T_out x posnet_dim
+        postnet_outputs = self.last_linear(postnet_outputs)
+        # B x T_out x decoder_in_features
+        decoder_outputs = decoder_outputs.transpose(1, 2).contiguous()
+        if self.bidirectional_decoder:
+            decoder_outputs_backward, alignments_backward = self._backward_pass(mel_specs, encoder_outputs, input_mask)
+            outputs["alignments_backward"] = alignments_backward
+            outputs["decoder_outputs_backward"] = decoder_outputs_backward
+        if self.double_decoder_consistency:
+            decoder_outputs_backward, alignments_backward = self._coarse_decoder_pass(
+                mel_specs, encoder_outputs, alignments, input_mask
+            )
+            outputs["alignments_backward"] = alignments_backward
+            outputs["decoder_outputs_backward"] = decoder_outputs_backward
+        outputs.update(
+            {
+                "model_outputs": postnet_outputs,
+                "decoder_outputs": decoder_outputs,
+                "alignments": alignments,
+                "stop_tokens": stop_tokens,
+                "capacitron_vae_outputs": capacitron_vae_outputs,
+            }
+        )
+        return outputs
+
+    @torch.no_grad()
+    def inference(self, text_input, aux_input=None):
+        aux_input = self._format_aux_input(aux_input)
+        inputs = self.embedding(text_input)
+        encoder_outputs = self.encoder(inputs)
+        if self.gst and self.use_gst:
+            # B x gst_dim
+            encoder_outputs = self.compute_gst(encoder_outputs, aux_input["style_mel"], aux_input["d_vectors"])
+        if self.capacitron_vae and self.use_capacitron_vae:
+            if aux_input["style_text"] is not None:
+                style_text_embedding = self.embedding(aux_input["style_text"])
+                style_text_length = torch.tensor([style_text_embedding.size(1)], dtype=torch.int64).to(
+                    encoder_outputs.device
+                )  # pylint: disable=not-callable
+            reference_mel_length = (
+                torch.tensor([aux_input["style_mel"].size(1)], dtype=torch.int64).to(encoder_outputs.device)
+                if aux_input["style_mel"] is not None
+                else None
+            )  # pylint: disable=not-callable
+            # B x capacitron_VAE_embedding_dim
+            encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
+                encoder_outputs,
+                reference_mel_info=[aux_input["style_mel"], reference_mel_length]
+                if aux_input["style_mel"] is not None
+                else None,
+                text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
+                speaker_embedding=aux_input["d_vectors"]
+                if self.capacitron_vae.capacitron_use_speaker_embedding
+                else None,
+            )
+        if self.num_speakers > 1:
+            if not self.use_d_vector_file:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = self.speaker_embedding(aux_input["speaker_ids"])
+                # reshape embedded_speakers
+                if embedded_speakers.ndim == 1:
+                    embedded_speakers = embedded_speakers[None, None, :]
+                elif embedded_speakers.ndim == 2:
+                    embedded_speakers = embedded_speakers[None, :]
+            else:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = torch.unsqueeze(aux_input["d_vectors"], 1)
+            encoder_outputs = self._concat_speaker_embedding(encoder_outputs, embedded_speakers)
+        decoder_outputs, alignments, stop_tokens = self.decoder.inference(encoder_outputs)
+        postnet_outputs = self.postnet(decoder_outputs)
+        postnet_outputs = self.last_linear(postnet_outputs)
+        decoder_outputs = decoder_outputs.transpose(1, 2)
+        outputs = {
+            "model_outputs": postnet_outputs,
+            "decoder_outputs": decoder_outputs,
+            "alignments": alignments,
+            "stop_tokens": stop_tokens,
+        }
+        return outputs
+
+    def before_backward_pass(self, loss_dict, optimizer) -> None:
+        # Extracting custom training specific operations for capacitron
+        # from the trainer
+        if self.use_capacitron_vae:
+            loss_dict["capacitron_vae_beta_loss"].backward()
+            optimizer.first_step()
+
+    def train_step(self, batch: Dict, criterion: torch.nn.Module) -> Tuple[Dict, Dict]:
+        """Perform a single training step by fetching the right set of samples from the batch.
+
+        Args:
+            batch ([Dict]): A dictionary of input tensors.
+            criterion ([torch.nn.Module]): Callable criterion to compute model loss.
+        """
+        text_input = batch["text_input"]
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        mel_lengths = batch["mel_lengths"]
+        linear_input = batch["linear_input"]
+        stop_targets = batch["stop_targets"]
+        stop_target_lengths = batch["stop_target_lengths"]
+        speaker_ids = batch["speaker_ids"]
+        d_vectors = batch["d_vectors"]
+
+        aux_input = {"speaker_ids": speaker_ids, "d_vectors": d_vectors}
+        outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input)
+
+        # set the [alignment] lengths wrt reduction factor for guided attention
+        if mel_lengths.max() % self.decoder.r != 0:
+            alignment_lengths = (
+                mel_lengths + (self.decoder.r - (mel_lengths.max() % self.decoder.r))
+            ) // self.decoder.r
+        else:
+            alignment_lengths = mel_lengths // self.decoder.r
+
+        # compute loss
+        with autocast(enabled=False):  # use float32 for the criterion
+            loss_dict = criterion(
+                outputs["model_outputs"].float(),
+                outputs["decoder_outputs"].float(),
+                mel_input.float(),
+                linear_input.float(),
+                outputs["stop_tokens"].float(),
+                stop_targets.float(),
+                stop_target_lengths,
+                outputs["capacitron_vae_outputs"] if self.capacitron_vae else None,
+                mel_lengths,
+                None if outputs["decoder_outputs_backward"] is None else outputs["decoder_outputs_backward"].float(),
+                outputs["alignments"].float(),
+                alignment_lengths,
+                None if outputs["alignments_backward"] is None else outputs["alignments_backward"].float(),
+                text_lengths,
+            )
+
+        # compute alignment error (the lower the better )
+        align_error = 1 - alignment_diagonal_score(outputs["alignments"])
+        loss_dict["align_error"] = align_error
+        return outputs, loss_dict
+
+    def get_optimizer(self) -> List:
+        if self.use_capacitron_vae:
+            return CapacitronOptimizer(self.config, self.named_parameters())
+        return get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr, self)
+
+    def get_scheduler(self, optimizer: object):
+        opt = optimizer.primary_optimizer if self.use_capacitron_vae else optimizer
+        return get_scheduler(self.config.lr_scheduler, self.config.lr_scheduler_params, opt)
+
+    def before_gradient_clipping(self):
+        if self.use_capacitron_vae:
+            # Capacitron model specific gradient clipping
+            model_params_to_clip = []
+            for name, param in self.named_parameters():
+                if param.requires_grad:
+                    if name != "capacitron_vae_layer.beta":
+                        model_params_to_clip.append(param)
+            torch.nn.utils.clip_grad_norm_(model_params_to_clip, self.capacitron_vae.capacitron_grad_clip)
+
+    def _create_logs(self, batch, outputs, ap):
+        postnet_outputs = outputs["model_outputs"]
+        decoder_outputs = outputs["decoder_outputs"]
+        alignments = outputs["alignments"]
+        alignments_backward = outputs["alignments_backward"]
+        mel_input = batch["mel_input"]
+        linear_input = batch["linear_input"]
+
+        pred_linear_spec = postnet_outputs[0].data.cpu().numpy()
+        pred_mel_spec = decoder_outputs[0].data.cpu().numpy()
+        gt_linear_spec = linear_input[0].data.cpu().numpy()
+        gt_mel_spec = mel_input[0].data.cpu().numpy()
+        align_img = alignments[0].data.cpu().numpy()
+
+        figures = {
+            "pred_linear_spec": plot_spectrogram(pred_linear_spec, ap, output_fig=False),
+            "real_linear_spec": plot_spectrogram(gt_linear_spec, ap, output_fig=False),
+            "pred_mel_spec": plot_spectrogram(pred_mel_spec, ap, output_fig=False),
+            "real_mel_spec": plot_spectrogram(gt_mel_spec, ap, output_fig=False),
+            "alignment": plot_alignment(align_img, output_fig=False),
+        }
+
+        if self.bidirectional_decoder or self.double_decoder_consistency:
+            figures["alignment_backward"] = plot_alignment(alignments_backward[0].data.cpu().numpy(), output_fig=False)
+
+        # Sample audio
+        audio = ap.inv_spectrogram(pred_linear_spec.T)
+        return figures, {"audio": audio}
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ) -> None:  # pylint: disable=no-self-use
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    def eval_step(self, batch: dict, criterion: nn.Module):
+        return self.train_step(batch, criterion)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    @staticmethod
+    def init_from_config(config: "TacotronConfig", samples: Union[List[List], List[Dict]] = None):
+        """Initiate model from config
+
+        Args:
+            config (TacotronConfig): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config, samples)
+        return Tacotron(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py
new file mode 100644
index 0000000000000000000000000000000000000000..95d339f17d54f7130e2bd5d435620df41d100b6d
--- /dev/null
+++ b/TTS/tts/models/tacotron2.py
@@ -0,0 +1,434 @@
+# coding: utf-8
+
+from typing import Dict, List, Union
+
+import torch
+from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
+from trainer.trainer_utils import get_optimizer, get_scheduler
+
+from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE
+from TTS.tts.layers.tacotron.gst_layers import GST
+from TTS.tts.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet
+from TTS.tts.models.base_tacotron import BaseTacotron
+from TTS.tts.utils.measures import alignment_diagonal_score
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
+from TTS.utils.capacitron_optimizer import CapacitronOptimizer
+
+
+class Tacotron2(BaseTacotron):
+    """Tacotron2 model implementation inherited from :class:`TTS.tts.models.base_tacotron.BaseTacotron`.
+
+    Paper::
+        https://arxiv.org/abs/1712.05884
+
+    Paper abstract::
+        This paper describes Tacotron 2, a neural network architecture for speech synthesis directly from text.
+        The system is composed of a recurrent sequence-to-sequence feature prediction network that maps character
+        embeddings to mel-scale spectrograms, followed by a modified WaveNet model acting as a vocoder to synthesize
+        timedomain waveforms from those spectrograms. Our model achieves a mean opinion score (MOS) of 4.53 comparable
+        to a MOS of 4.58 for professionally recorded speech. To validate our design choices, we present ablation
+        studies of key components of our system and evaluate the impact of using mel spectrograms as the input to
+        WaveNet instead of linguistic, duration, and F0 features. We further demonstrate that using a compact acoustic
+        intermediate representation enables significant simplification of the WaveNet architecture.
+
+    Check :class:`TTS.tts.configs.tacotron2_config.Tacotron2Config` for model arguments.
+
+    Args:
+        config (TacotronConfig):
+            Configuration for the Tacotron2 model.
+        speaker_manager (SpeakerManager):
+            Speaker manager for multi-speaker training. Uuse only for multi-speaker training. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        config: "Tacotron2Config",
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+    ):
+
+        super().__init__(config, ap, tokenizer, speaker_manager)
+
+        self.decoder_output_dim = config.out_channels
+
+        # pass all config fields to `self`
+        # for fewer code change
+        for key in config:
+            setattr(self, key, config[key])
+
+        # init multi-speaker layers
+        if self.use_speaker_embedding or self.use_d_vector_file:
+            self.init_multispeaker(config)
+            self.decoder_in_features += self.embedded_speaker_dim  # add speaker embedding dim
+
+        if self.use_gst:
+            self.decoder_in_features += self.gst.gst_embedding_dim
+
+        if self.use_capacitron_vae:
+            self.decoder_in_features += self.capacitron_vae.capacitron_VAE_embedding_dim
+
+        # embedding layer
+        self.embedding = nn.Embedding(self.num_chars, 512, padding_idx=0)
+
+        # base model layers
+        self.encoder = Encoder(self.encoder_in_features)
+
+        self.decoder = Decoder(
+            self.decoder_in_features,
+            self.decoder_output_dim,
+            self.r,
+            self.attention_type,
+            self.attention_win,
+            self.attention_norm,
+            self.prenet_type,
+            self.prenet_dropout,
+            self.use_forward_attn,
+            self.transition_agent,
+            self.forward_attn_mask,
+            self.location_attn,
+            self.attention_heads,
+            self.separate_stopnet,
+            self.max_decoder_steps,
+        )
+        self.postnet = Postnet(self.out_channels)
+
+        # setup prenet dropout
+        self.decoder.prenet.dropout_at_inference = self.prenet_dropout_at_inference
+
+        # global style token layers
+        if self.gst and self.use_gst:
+            self.gst_layer = GST(
+                num_mel=self.decoder_output_dim,
+                num_heads=self.gst.gst_num_heads,
+                num_style_tokens=self.gst.gst_num_style_tokens,
+                gst_embedding_dim=self.gst.gst_embedding_dim,
+            )
+
+        # Capacitron VAE Layers
+        if self.capacitron_vae and self.use_capacitron_vae:
+            self.capacitron_vae_layer = CapacitronVAE(
+                num_mel=self.decoder_output_dim,
+                encoder_output_dim=self.encoder_in_features,
+                capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
+                speaker_embedding_dim=self.embedded_speaker_dim
+                if self.capacitron_vae.capacitron_use_speaker_embedding
+                else None,
+                text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim
+                if self.capacitron_vae.capacitron_use_text_summary_embeddings
+                else None,
+            )
+
+        # backward pass decoder
+        if self.bidirectional_decoder:
+            self._init_backward_decoder()
+        # setup DDC
+        if self.double_decoder_consistency:
+            self.coarse_decoder = Decoder(
+                self.decoder_in_features,
+                self.decoder_output_dim,
+                self.ddc_r,
+                self.attention_type,
+                self.attention_win,
+                self.attention_norm,
+                self.prenet_type,
+                self.prenet_dropout,
+                self.use_forward_attn,
+                self.transition_agent,
+                self.forward_attn_mask,
+                self.location_attn,
+                self.attention_heads,
+                self.separate_stopnet,
+                self.max_decoder_steps,
+            )
+
+    @staticmethod
+    def shape_outputs(mel_outputs, mel_outputs_postnet, alignments):
+        """Final reshape of the model output tensors."""
+        mel_outputs = mel_outputs.transpose(1, 2)
+        mel_outputs_postnet = mel_outputs_postnet.transpose(1, 2)
+        return mel_outputs, mel_outputs_postnet, alignments
+
+    def forward(  # pylint: disable=dangerous-default-value
+        self, text, text_lengths, mel_specs=None, mel_lengths=None, aux_input={"speaker_ids": None, "d_vectors": None}
+    ):
+        """Forward pass for training with Teacher Forcing.
+
+        Shapes:
+            text: :math:`[B, T_in]`
+            text_lengths: :math:`[B]`
+            mel_specs: :math:`[B, T_out, C]`
+            mel_lengths: :math:`[B]`
+            aux_input: 'speaker_ids': :math:`[B, 1]` and  'd_vectors': :math:`[B, C]`
+        """
+        aux_input = self._format_aux_input(aux_input)
+        outputs = {"alignments_backward": None, "decoder_outputs_backward": None}
+        # compute mask for padding
+        # B x T_in_max (boolean)
+        input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths)
+        # B x D_embed x T_in_max
+        embedded_inputs = self.embedding(text).transpose(1, 2)
+        # B x T_in_max x D_en
+        encoder_outputs = self.encoder(embedded_inputs, text_lengths)
+        if self.gst and self.use_gst:
+            # B x gst_dim
+            encoder_outputs = self.compute_gst(encoder_outputs, mel_specs)
+
+        if self.use_speaker_embedding or self.use_d_vector_file:
+            if not self.use_d_vector_file:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = self.speaker_embedding(aux_input["speaker_ids"])[:, None]
+            else:
+                # B x 1 x speaker_embed_dim
+                embedded_speakers = torch.unsqueeze(aux_input["d_vectors"], 1)
+            encoder_outputs = self._concat_speaker_embedding(encoder_outputs, embedded_speakers)
+
+        # capacitron
+        if self.capacitron_vae and self.use_capacitron_vae:
+            # B x capacitron_VAE_embedding_dim
+            encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
+                encoder_outputs,
+                reference_mel_info=[mel_specs, mel_lengths],
+                text_info=[embedded_inputs.transpose(1, 2), text_lengths]
+                if self.capacitron_vae.capacitron_use_text_summary_embeddings
+                else None,
+                speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
+            )
+        else:
+            capacitron_vae_outputs = None
+
+        encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs)
+
+        # B x mel_dim x T_out -- B x T_out//r x T_in -- B x T_out//r
+        decoder_outputs, alignments, stop_tokens = self.decoder(encoder_outputs, mel_specs, input_mask)
+        # sequence masking
+        if mel_lengths is not None:
+            decoder_outputs = decoder_outputs * output_mask.unsqueeze(1).expand_as(decoder_outputs)
+        # B x mel_dim x T_out
+        postnet_outputs = self.postnet(decoder_outputs)
+        postnet_outputs = decoder_outputs + postnet_outputs
+        # sequence masking
+        if output_mask is not None:
+            postnet_outputs = postnet_outputs * output_mask.unsqueeze(1).expand_as(postnet_outputs)
+        # B x T_out x mel_dim -- B x T_out x mel_dim -- B x T_out//r x T_in
+        decoder_outputs, postnet_outputs, alignments = self.shape_outputs(decoder_outputs, postnet_outputs, alignments)
+        if self.bidirectional_decoder:
+            decoder_outputs_backward, alignments_backward = self._backward_pass(mel_specs, encoder_outputs, input_mask)
+            outputs["alignments_backward"] = alignments_backward
+            outputs["decoder_outputs_backward"] = decoder_outputs_backward
+        if self.double_decoder_consistency:
+            decoder_outputs_backward, alignments_backward = self._coarse_decoder_pass(
+                mel_specs, encoder_outputs, alignments, input_mask
+            )
+            outputs["alignments_backward"] = alignments_backward
+            outputs["decoder_outputs_backward"] = decoder_outputs_backward
+        outputs.update(
+            {
+                "model_outputs": postnet_outputs,
+                "decoder_outputs": decoder_outputs,
+                "alignments": alignments,
+                "stop_tokens": stop_tokens,
+                "capacitron_vae_outputs": capacitron_vae_outputs,
+            }
+        )
+        return outputs
+
+    @torch.no_grad()
+    def inference(self, text, aux_input=None):
+        """Forward pass for inference with no Teacher-Forcing.
+
+        Shapes:
+           text: :math:`[B, T_in]`
+           text_lengths: :math:`[B]`
+        """
+        aux_input = self._format_aux_input(aux_input)
+        embedded_inputs = self.embedding(text).transpose(1, 2)
+        encoder_outputs = self.encoder.inference(embedded_inputs)
+
+        if self.gst and self.use_gst:
+            # B x gst_dim
+            encoder_outputs = self.compute_gst(encoder_outputs, aux_input["style_mel"], aux_input["d_vectors"])
+
+        if self.capacitron_vae and self.use_capacitron_vae:
+            if aux_input["style_text"] is not None:
+                style_text_embedding = self.embedding(aux_input["style_text"])
+                style_text_length = torch.tensor([style_text_embedding.size(1)], dtype=torch.int64).to(
+                    encoder_outputs.device
+                )  # pylint: disable=not-callable
+            reference_mel_length = (
+                torch.tensor([aux_input["style_mel"].size(1)], dtype=torch.int64).to(encoder_outputs.device)
+                if aux_input["style_mel"] is not None
+                else None
+            )  # pylint: disable=not-callable
+            # B x capacitron_VAE_embedding_dim
+            encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
+                encoder_outputs,
+                reference_mel_info=[aux_input["style_mel"], reference_mel_length]
+                if aux_input["style_mel"] is not None
+                else None,
+                text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
+                speaker_embedding=aux_input["d_vectors"]
+                if self.capacitron_vae.capacitron_use_speaker_embedding
+                else None,
+            )
+
+        if self.num_speakers > 1:
+            if not self.use_d_vector_file:
+                embedded_speakers = self.speaker_embedding(aux_input["speaker_ids"])[None]
+                # reshape embedded_speakers
+                if embedded_speakers.ndim == 1:
+                    embedded_speakers = embedded_speakers[None, None, :]
+                elif embedded_speakers.ndim == 2:
+                    embedded_speakers = embedded_speakers[None, :]
+            else:
+                embedded_speakers = aux_input["d_vectors"]
+
+            encoder_outputs = self._concat_speaker_embedding(encoder_outputs, embedded_speakers)
+
+        decoder_outputs, alignments, stop_tokens = self.decoder.inference(encoder_outputs)
+        postnet_outputs = self.postnet(decoder_outputs)
+        postnet_outputs = decoder_outputs + postnet_outputs
+        decoder_outputs, postnet_outputs, alignments = self.shape_outputs(decoder_outputs, postnet_outputs, alignments)
+        outputs = {
+            "model_outputs": postnet_outputs,
+            "decoder_outputs": decoder_outputs,
+            "alignments": alignments,
+            "stop_tokens": stop_tokens,
+        }
+        return outputs
+
+    def before_backward_pass(self, loss_dict, optimizer) -> None:
+        # Extracting custom training specific operations for capacitron
+        # from the trainer
+        if self.use_capacitron_vae:
+            loss_dict["capacitron_vae_beta_loss"].backward()
+            optimizer.first_step()
+
+    def train_step(self, batch: Dict, criterion: torch.nn.Module):
+        """A single training step. Forward pass and loss computation.
+
+        Args:
+            batch ([Dict]): A dictionary of input tensors.
+            criterion ([type]): Callable criterion to compute model loss.
+        """
+        text_input = batch["text_input"]
+        text_lengths = batch["text_lengths"]
+        mel_input = batch["mel_input"]
+        mel_lengths = batch["mel_lengths"]
+        stop_targets = batch["stop_targets"]
+        stop_target_lengths = batch["stop_target_lengths"]
+        speaker_ids = batch["speaker_ids"]
+        d_vectors = batch["d_vectors"]
+
+        aux_input = {"speaker_ids": speaker_ids, "d_vectors": d_vectors}
+        outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input)
+
+        # set the [alignment] lengths wrt reduction factor for guided attention
+        if mel_lengths.max() % self.decoder.r != 0:
+            alignment_lengths = (
+                mel_lengths + (self.decoder.r - (mel_lengths.max() % self.decoder.r))
+            ) // self.decoder.r
+        else:
+            alignment_lengths = mel_lengths // self.decoder.r
+
+        # compute loss
+        with autocast(enabled=False):  # use float32 for the criterion
+            loss_dict = criterion(
+                outputs["model_outputs"].float(),
+                outputs["decoder_outputs"].float(),
+                mel_input.float(),
+                None,
+                outputs["stop_tokens"].float(),
+                stop_targets.float(),
+                stop_target_lengths,
+                outputs["capacitron_vae_outputs"] if self.capacitron_vae else None,
+                mel_lengths,
+                None if outputs["decoder_outputs_backward"] is None else outputs["decoder_outputs_backward"].float(),
+                outputs["alignments"].float(),
+                alignment_lengths,
+                None if outputs["alignments_backward"] is None else outputs["alignments_backward"].float(),
+                text_lengths,
+            )
+
+        # compute alignment error (the lower the better )
+        align_error = 1 - alignment_diagonal_score(outputs["alignments"])
+        loss_dict["align_error"] = align_error
+        return outputs, loss_dict
+
+    def get_optimizer(self) -> List:
+        if self.use_capacitron_vae:
+            return CapacitronOptimizer(self.config, self.named_parameters())
+        return get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr, self)
+
+    def get_scheduler(self, optimizer: object):
+        opt = optimizer.primary_optimizer if self.use_capacitron_vae else optimizer
+        return get_scheduler(self.config.lr_scheduler, self.config.lr_scheduler_params, opt)
+
+    def before_gradient_clipping(self):
+        if self.use_capacitron_vae:
+            # Capacitron model specific gradient clipping
+            model_params_to_clip = []
+            for name, param in self.named_parameters():
+                if param.requires_grad:
+                    if name != "capacitron_vae_layer.beta":
+                        model_params_to_clip.append(param)
+            torch.nn.utils.clip_grad_norm_(model_params_to_clip, self.capacitron_vae.capacitron_grad_clip)
+
+    def _create_logs(self, batch, outputs, ap):
+        """Create dashboard log information."""
+        postnet_outputs = outputs["model_outputs"]
+        alignments = outputs["alignments"]
+        alignments_backward = outputs["alignments_backward"]
+        mel_input = batch["mel_input"]
+
+        pred_spec = postnet_outputs[0].data.cpu().numpy()
+        gt_spec = mel_input[0].data.cpu().numpy()
+        align_img = alignments[0].data.cpu().numpy()
+
+        figures = {
+            "prediction": plot_spectrogram(pred_spec, ap, output_fig=False),
+            "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False),
+            "alignment": plot_alignment(align_img, output_fig=False),
+        }
+
+        if self.bidirectional_decoder or self.double_decoder_consistency:
+            figures["alignment_backward"] = plot_alignment(alignments_backward[0].data.cpu().numpy(), output_fig=False)
+
+        # Sample audio
+        audio = ap.inv_melspectrogram(pred_spec.T)
+        return figures, {"audio": audio}
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ) -> None:  # pylint: disable=no-self-use
+        """Log training progress."""
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    def eval_step(self, batch: dict, criterion: nn.Module):
+        return self.train_step(batch, criterion)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._create_logs(batch, outputs, self.ap)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    @staticmethod
+    def init_from_config(config: "Tacotron2Config", samples: Union[List[List], List[Dict]] = None):
+        """Initiate model from config
+
+        Args:
+            config (Tacotron2Config): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        ap = AudioProcessor.init_from_config(config)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(new_config, samples)
+        return Tacotron2(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
new file mode 100644
index 0000000000000000000000000000000000000000..4959d7baa153ec145cfc4deafc74f7ce654d9d9a
--- /dev/null
+++ b/TTS/tts/models/vits.py
@@ -0,0 +1,1803 @@
+import math
+import os
+from dataclasses import dataclass, field, replace
+from itertools import chain
+from typing import Dict, List, Tuple, Union
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torchaudio
+from coqpit import Coqpit
+from librosa.filters import mel as librosa_mel_fn
+from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.data.sampler import WeightedRandomSampler
+from trainer.torch import DistributedSampler, DistributedSamplerWrapper
+from trainer.trainer_utils import get_optimizer, get_scheduler
+
+from TTS.tts.configs.shared_configs import CharactersConfig
+from TTS.tts.datasets.dataset import TTSDataset, _parse_sample
+from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
+from TTS.tts.layers.vits.discriminator import VitsDiscriminator
+from TTS.tts.layers.vits.networks import PosteriorEncoder, ResidualCouplingBlocks, TextEncoder
+from TTS.tts.layers.vits.stochastic_duration_predictor import StochasticDurationPredictor
+from TTS.tts.models.base_tts import BaseTTS
+from TTS.tts.utils.helpers import generate_path, maximum_path, rand_segments, segment, sequence_mask
+from TTS.tts.utils.languages import LanguageManager
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.synthesis import synthesis
+from TTS.tts.utils.text.characters import BaseCharacters, _characters, _pad, _phonemes, _punctuations
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.tts.utils.visual import plot_alignment
+from TTS.utils.io import load_fsspec
+from TTS.utils.samplers import BucketBatchSampler
+from TTS.vocoder.models.hifigan_generator import HifiganGenerator
+from TTS.vocoder.utils.generic_utils import plot_results
+
+##############################
+# IO / Feature extraction
+##############################
+
+# pylint: disable=global-statement
+hann_window = {}
+mel_basis = {}
+
+
+@torch.no_grad()
+def weights_reset(m: nn.Module):
+    # check if the current module has reset_parameters and if it is reset the weight
+    reset_parameters = getattr(m, "reset_parameters", None)
+    if callable(reset_parameters):
+        m.reset_parameters()
+
+
+def get_module_weights_sum(mdl: nn.Module):
+    dict_sums = {}
+    for name, w in mdl.named_parameters():
+        if "weight" in name:
+            value = w.data.sum().item()
+            dict_sums[name] = value
+    return dict_sums
+
+
+def load_audio(file_path):
+    """Load the audio file normalized in [-1, 1]
+
+    Return Shapes:
+        - x: :math:`[1, T]`
+    """
+    x, sr = torchaudio.load(file_path)
+    assert (x > 1).sum() + (x < -1).sum() == 0
+    return x, sr
+
+
+def _amp_to_db(x, C=1, clip_val=1e-5):
+    return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def _db_to_amp(x, C=1):
+    return torch.exp(x) / C
+
+
+def amp_to_db(magnitudes):
+    output = _amp_to_db(magnitudes)
+    return output
+
+
+def db_to_amp(magnitudes):
+    output = _db_to_amp(magnitudes)
+    return output
+
+
+def wav_to_spec(y, n_fft, hop_length, win_length, center=False):
+    """
+    Args Shapes:
+        - y : :math:`[B, 1, T]`
+
+    Return Shapes:
+        - spec : :math:`[B,C,T]`
+    """
+    y = y.squeeze(1)
+
+    if torch.min(y) < -1.0:
+        print("min value is ", torch.min(y))
+    if torch.max(y) > 1.0:
+        print("max value is ", torch.max(y))
+
+    global hann_window
+    dtype_device = str(y.dtype) + "_" + str(y.device)
+    wnsize_dtype_device = str(win_length) + "_" + dtype_device
+    if wnsize_dtype_device not in hann_window:
+        hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device)
+
+    y = torch.nn.functional.pad(
+        y.unsqueeze(1),
+        (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)),
+        mode="reflect",
+    )
+    y = y.squeeze(1)
+
+    spec = torch.stft(
+        y,
+        n_fft,
+        hop_length=hop_length,
+        win_length=win_length,
+        window=hann_window[wnsize_dtype_device],
+        center=center,
+        pad_mode="reflect",
+        normalized=False,
+        onesided=True,
+    )
+
+    spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
+    return spec
+
+
+def spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax):
+    """
+    Args Shapes:
+        - spec : :math:`[B,C,T]`
+
+    Return Shapes:
+        - mel : :math:`[B,C,T]`
+    """
+    global mel_basis
+    dtype_device = str(spec.dtype) + "_" + str(spec.device)
+    fmax_dtype_device = str(fmax) + "_" + dtype_device
+    if fmax_dtype_device not in mel_basis:
+        mel = librosa_mel_fn(sample_rate, n_fft, num_mels, fmin, fmax)
+        mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device)
+    mel = torch.matmul(mel_basis[fmax_dtype_device], spec)
+    mel = amp_to_db(mel)
+    return mel
+
+
+def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fmax, center=False):
+    """
+    Args Shapes:
+        - y : :math:`[B, 1, T]`
+
+    Return Shapes:
+        - spec : :math:`[B,C,T]`
+    """
+    y = y.squeeze(1)
+
+    if torch.min(y) < -1.0:
+        print("min value is ", torch.min(y))
+    if torch.max(y) > 1.0:
+        print("max value is ", torch.max(y))
+
+    global mel_basis, hann_window
+    dtype_device = str(y.dtype) + "_" + str(y.device)
+    fmax_dtype_device = str(fmax) + "_" + dtype_device
+    wnsize_dtype_device = str(win_length) + "_" + dtype_device
+    if fmax_dtype_device not in mel_basis:
+        mel = librosa_mel_fn(sample_rate, n_fft, num_mels, fmin, fmax)
+        mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device)
+    if wnsize_dtype_device not in hann_window:
+        hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device)
+
+    y = torch.nn.functional.pad(
+        y.unsqueeze(1),
+        (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)),
+        mode="reflect",
+    )
+    y = y.squeeze(1)
+
+    spec = torch.stft(
+        y,
+        n_fft,
+        hop_length=hop_length,
+        win_length=win_length,
+        window=hann_window[wnsize_dtype_device],
+        center=center,
+        pad_mode="reflect",
+        normalized=False,
+        onesided=True,
+    )
+
+    spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
+    spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
+    spec = amp_to_db(spec)
+    return spec
+
+
+#############################
+# CONFIGS
+#############################
+
+
+@dataclass
+class VitsAudioConfig(Coqpit):
+    fft_size: int = 1024
+    sample_rate: int = 22050
+    win_length: int = 1024
+    hop_length: int = 256
+    num_mels: int = 80
+    mel_fmin: int = 0
+    mel_fmax: int = None
+
+
+##############################
+# DATASET
+##############################
+
+
+def get_attribute_balancer_weights(items: list, attr_name: str, multi_dict: dict = None):
+    """Create inverse frequency weights for balancing the dataset.
+    Use `multi_dict` to scale relative weights."""
+    attr_names_samples = np.array([item[attr_name] for item in items])
+    unique_attr_names = np.unique(attr_names_samples).tolist()
+    attr_idx = [unique_attr_names.index(l) for l in attr_names_samples]
+    attr_count = np.array([len(np.where(attr_names_samples == l)[0]) for l in unique_attr_names])
+    weight_attr = 1.0 / attr_count
+    dataset_samples_weight = np.array([weight_attr[l] for l in attr_idx])
+    dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight)
+    if multi_dict is not None:
+        # check if all keys are in the multi_dict
+        for k in multi_dict:
+            assert k in unique_attr_names, f"{k} not in {unique_attr_names}"
+        # scale weights
+        multiplier_samples = np.array([multi_dict.get(item[attr_name], 1.0) for item in items])
+        dataset_samples_weight *= multiplier_samples
+    return (
+        torch.from_numpy(dataset_samples_weight).float(),
+        unique_attr_names,
+        np.unique(dataset_samples_weight).tolist(),
+    )
+
+
+class VitsDataset(TTSDataset):
+    def __init__(self, model_args, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.pad_id = self.tokenizer.characters.pad_id
+        self.model_args = model_args
+
+    def __getitem__(self, idx):
+        item = self.samples[idx]
+        raw_text = item["text"]
+
+        wav, _ = load_audio(item["audio_file"])
+        if self.model_args.encoder_sample_rate is not None:
+            if wav.size(1) % self.model_args.encoder_sample_rate != 0:
+                wav = wav[:, : -int(wav.size(1) % self.model_args.encoder_sample_rate)]
+
+        wav_filename = os.path.basename(item["audio_file"])
+
+        token_ids = self.get_token_ids(idx, item["text"])
+
+        # after phonemization the text length may change
+        # this is a shameful 🤭 hack to prevent longer phonemes
+        # TODO: find a better fix
+        if len(token_ids) > self.max_text_len or wav.shape[1] < self.min_audio_len:
+            self.rescue_item_idx += 1
+            return self.__getitem__(self.rescue_item_idx)
+
+        return {
+            "raw_text": raw_text,
+            "token_ids": token_ids,
+            "token_len": len(token_ids),
+            "wav": wav,
+            "wav_file": wav_filename,
+            "speaker_name": item["speaker_name"],
+            "language_name": item["language"],
+            "audio_unique_name": item["audio_unique_name"],
+        }
+
+    @property
+    def lengths(self):
+        lens = []
+        for item in self.samples:
+            _, wav_file, *_ = _parse_sample(item)
+            audio_len = os.path.getsize(wav_file) / 16 * 8  # assuming 16bit audio
+            lens.append(audio_len)
+        return lens
+
+    def collate_fn(self, batch):
+        """
+        Return Shapes:
+            - tokens: :math:`[B, T]`
+            - token_lens :math:`[B]`
+            - token_rel_lens :math:`[B]`
+            - waveform: :math:`[B, 1, T]`
+            - waveform_lens: :math:`[B]`
+            - waveform_rel_lens: :math:`[B]`
+            - speaker_names: :math:`[B]`
+            - language_names: :math:`[B]`
+            - audiofile_paths: :math:`[B]`
+            - raw_texts: :math:`[B]`
+            - audio_unique_names: :math:`[B]`
+        """
+        # convert list of dicts to dict of lists
+        B = len(batch)
+        batch = {k: [dic[k] for dic in batch] for k in batch[0]}
+
+        _, ids_sorted_decreasing = torch.sort(
+            torch.LongTensor([x.size(1) for x in batch["wav"]]), dim=0, descending=True
+        )
+
+        max_text_len = max([len(x) for x in batch["token_ids"]])
+        token_lens = torch.LongTensor(batch["token_len"])
+        token_rel_lens = token_lens / token_lens.max()
+
+        wav_lens = [w.shape[1] for w in batch["wav"]]
+        wav_lens = torch.LongTensor(wav_lens)
+        wav_lens_max = torch.max(wav_lens)
+        wav_rel_lens = wav_lens / wav_lens_max
+
+        token_padded = torch.LongTensor(B, max_text_len)
+        wav_padded = torch.FloatTensor(B, 1, wav_lens_max)
+        token_padded = token_padded.zero_() + self.pad_id
+        wav_padded = wav_padded.zero_() + self.pad_id
+        for i in range(len(ids_sorted_decreasing)):
+            token_ids = batch["token_ids"][i]
+            token_padded[i, : batch["token_len"][i]] = torch.LongTensor(token_ids)
+
+            wav = batch["wav"][i]
+            wav_padded[i, :, : wav.size(1)] = torch.FloatTensor(wav)
+
+        return {
+            "tokens": token_padded,
+            "token_lens": token_lens,
+            "token_rel_lens": token_rel_lens,
+            "waveform": wav_padded,  # (B x T)
+            "waveform_lens": wav_lens,  # (B)
+            "waveform_rel_lens": wav_rel_lens,
+            "speaker_names": batch["speaker_name"],
+            "language_names": batch["language_name"],
+            "audio_files": batch["wav_file"],
+            "raw_text": batch["raw_text"],
+            "audio_unique_names": batch["audio_unique_name"],
+        }
+
+
+##############################
+# MODEL DEFINITION
+##############################
+
+
+@dataclass
+class VitsArgs(Coqpit):
+    """VITS model arguments.
+
+    Args:
+
+        num_chars (int):
+            Number of characters in the vocabulary. Defaults to 100.
+
+        out_channels (int):
+            Number of output channels of the decoder. Defaults to 513.
+
+        spec_segment_size (int):
+            Decoder input segment size. Defaults to 32 `(32 * hoplength = waveform length)`.
+
+        hidden_channels (int):
+            Number of hidden channels of the model. Defaults to 192.
+
+        hidden_channels_ffn_text_encoder (int):
+            Number of hidden channels of the feed-forward layers of the text encoder transformer. Defaults to 256.
+
+        num_heads_text_encoder (int):
+            Number of attention heads of the text encoder transformer. Defaults to 2.
+
+        num_layers_text_encoder (int):
+            Number of transformer layers in the text encoder. Defaults to 6.
+
+        kernel_size_text_encoder (int):
+            Kernel size of the text encoder transformer FFN layers. Defaults to 3.
+
+        dropout_p_text_encoder (float):
+            Dropout rate of the text encoder. Defaults to 0.1.
+
+        dropout_p_duration_predictor (float):
+            Dropout rate of the duration predictor. Defaults to 0.1.
+
+        kernel_size_posterior_encoder (int):
+            Kernel size of the posterior encoder's WaveNet layers. Defaults to 5.
+
+        dilatation_posterior_encoder (int):
+            Dilation rate of the posterior encoder's WaveNet layers. Defaults to 1.
+
+        num_layers_posterior_encoder (int):
+            Number of posterior encoder's WaveNet layers. Defaults to 16.
+
+        kernel_size_flow (int):
+            Kernel size of the Residual Coupling layers of the flow network. Defaults to 5.
+
+        dilatation_flow (int):
+            Dilation rate of the Residual Coupling WaveNet layers of the flow network. Defaults to 1.
+
+        num_layers_flow (int):
+            Number of Residual Coupling WaveNet layers of the flow network. Defaults to 6.
+
+        resblock_type_decoder (str):
+            Type of the residual block in the decoder network. Defaults to "1".
+
+        resblock_kernel_sizes_decoder (List[int]):
+            Kernel sizes of the residual blocks in the decoder network. Defaults to `[3, 7, 11]`.
+
+        resblock_dilation_sizes_decoder (List[List[int]]):
+            Dilation sizes of the residual blocks in the decoder network. Defaults to `[[1, 3, 5], [1, 3, 5], [1, 3, 5]]`.
+
+        upsample_rates_decoder (List[int]):
+            Upsampling rates for each concecutive upsampling layer in the decoder network. The multiply of these
+            values must be equal to the kop length used for computing spectrograms. Defaults to `[8, 8, 2, 2]`.
+
+        upsample_initial_channel_decoder (int):
+            Number of hidden channels of the first upsampling convolution layer of the decoder network. Defaults to 512.
+
+        upsample_kernel_sizes_decoder (List[int]):
+            Kernel sizes for each upsampling layer of the decoder network. Defaults to `[16, 16, 4, 4]`.
+
+        periods_multi_period_discriminator (List[int]):
+            Periods values for Vits Multi-Period Discriminator. Defaults to `[2, 3, 5, 7, 11]`.
+
+        use_sdp (bool):
+            Use Stochastic Duration Predictor. Defaults to True.
+
+        noise_scale (float):
+            Noise scale used for the sample noise tensor in training. Defaults to 1.0.
+
+        inference_noise_scale (float):
+            Noise scale used for the sample noise tensor in inference. Defaults to 0.667.
+
+        length_scale (float):
+            Scale factor for the predicted duration values. Smaller values result faster speech. Defaults to 1.
+
+        noise_scale_dp (float):
+            Noise scale used by the Stochastic Duration Predictor sample noise in training. Defaults to 1.0.
+
+        inference_noise_scale_dp (float):
+            Noise scale for the Stochastic Duration Predictor in inference. Defaults to 0.8.
+
+        max_inference_len (int):
+            Maximum inference length to limit the memory use. Defaults to None.
+
+        init_discriminator (bool):
+            Initialize the disciminator network if set True. Set False for inference. Defaults to True.
+
+        use_spectral_norm_disriminator (bool):
+            Use spectral normalization over weight norm in the discriminator. Defaults to False.
+
+        use_speaker_embedding (bool):
+            Enable/Disable speaker embedding for multi-speaker models. Defaults to False.
+
+        num_speakers (int):
+            Number of speakers for the speaker embedding layer. Defaults to 0.
+
+        speakers_file (str):
+            Path to the speaker mapping file for the Speaker Manager. Defaults to None.
+
+        speaker_embedding_channels (int):
+            Number of speaker embedding channels. Defaults to 256.
+
+        use_d_vector_file (bool):
+            Enable/Disable the use of d-vectors for multi-speaker training. Defaults to False.
+
+        d_vector_file (str):
+            Path to the file including pre-computed speaker embeddings. Defaults to None.
+
+        d_vector_dim (int):
+            Number of d-vector channels. Defaults to 0.
+
+        detach_dp_input (bool):
+            Detach duration predictor's input from the network for stopping the gradients. Defaults to True.
+
+        use_language_embedding (bool):
+            Enable/Disable language embedding for multilingual models. Defaults to False.
+
+        embedded_language_dim (int):
+            Number of language embedding channels. Defaults to 4.
+
+        num_languages (int):
+            Number of languages for the language embedding layer. Defaults to 0.
+
+        language_ids_file (str):
+            Path to the language mapping file for the Language Manager. Defaults to None.
+
+        use_speaker_encoder_as_loss (bool):
+            Enable/Disable Speaker Consistency Loss (SCL). Defaults to False.
+
+        speaker_encoder_config_path (str):
+            Path to the file speaker encoder config file, to use for SCL. Defaults to "".
+
+        speaker_encoder_model_path (str):
+            Path to the file speaker encoder checkpoint file, to use for SCL. Defaults to "".
+
+        condition_dp_on_speaker (bool):
+            Condition the duration predictor on the speaker embedding. Defaults to True.
+
+        freeze_encoder (bool):
+            Freeze the encoder weigths during training. Defaults to False.
+
+        freeze_DP (bool):
+            Freeze the duration predictor weigths during training. Defaults to False.
+
+        freeze_PE (bool):
+            Freeze the posterior encoder weigths during training. Defaults to False.
+
+        freeze_flow_encoder (bool):
+            Freeze the flow encoder weigths during training. Defaults to False.
+
+        freeze_waveform_decoder (bool):
+            Freeze the waveform decoder weigths during training. Defaults to False.
+
+        encoder_sample_rate (int):
+            If not None this sample rate will be used for training the Posterior Encoder,
+            flow, text_encoder and duration predictor. The decoder part (vocoder) will be
+            trained with the `config.audio.sample_rate`. Defaults to None.
+
+        interpolate_z (bool):
+            If `encoder_sample_rate` not None and  this parameter True the nearest interpolation
+            will be used to upsampling the latent variable z with the sampling rate `encoder_sample_rate`
+            to the `config.audio.sample_rate`. If it is False you will need to add extra
+            `upsample_rates_decoder` to match the shape. Defaults to True.
+
+    """
+
+    num_chars: int = 100
+    out_channels: int = 513
+    spec_segment_size: int = 32
+    hidden_channels: int = 192
+    hidden_channels_ffn_text_encoder: int = 768
+    num_heads_text_encoder: int = 2
+    num_layers_text_encoder: int = 6
+    kernel_size_text_encoder: int = 3
+    dropout_p_text_encoder: float = 0.1
+    dropout_p_duration_predictor: float = 0.5
+    kernel_size_posterior_encoder: int = 5
+    dilation_rate_posterior_encoder: int = 1
+    num_layers_posterior_encoder: int = 16
+    kernel_size_flow: int = 5
+    dilation_rate_flow: int = 1
+    num_layers_flow: int = 4
+    resblock_type_decoder: str = "1"
+    resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11])
+    resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]])
+    upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2])
+    upsample_initial_channel_decoder: int = 512
+    upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4])
+    periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11])
+    use_sdp: bool = True
+    noise_scale: float = 1.0
+    inference_noise_scale: float = 0.667
+    length_scale: float = 1
+    noise_scale_dp: float = 1.0
+    inference_noise_scale_dp: float = 1.0
+    max_inference_len: int = None
+    init_discriminator: bool = True
+    use_spectral_norm_disriminator: bool = False
+    use_speaker_embedding: bool = False
+    num_speakers: int = 0
+    speakers_file: str = None
+    d_vector_file: str = None
+    speaker_embedding_channels: int = 256
+    use_d_vector_file: bool = False
+    d_vector_dim: int = 0
+    detach_dp_input: bool = True
+    use_language_embedding: bool = False
+    embedded_language_dim: int = 4
+    num_languages: int = 0
+    language_ids_file: str = None
+    use_speaker_encoder_as_loss: bool = False
+    speaker_encoder_config_path: str = ""
+    speaker_encoder_model_path: str = ""
+    condition_dp_on_speaker: bool = True
+    freeze_encoder: bool = False
+    freeze_DP: bool = False
+    freeze_PE: bool = False
+    freeze_flow_decoder: bool = False
+    freeze_waveform_decoder: bool = False
+    encoder_sample_rate: int = None
+    interpolate_z: bool = True
+    reinit_DP: bool = False
+    reinit_text_encoder: bool = False
+
+
+class Vits(BaseTTS):
+    """VITS TTS model
+
+    Paper::
+        https://arxiv.org/pdf/2106.06103.pdf
+
+    Paper Abstract::
+        Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel
+        sampling have been proposed, but their sample quality does not match that of two-stage TTS systems.
+        In this work, we present a parallel endto-end TTS method that generates more natural sounding audio than
+        current two-stage models. Our method adopts variational inference augmented with normalizing flows and
+        an adversarial training process, which improves the expressive power of generative modeling. We also propose a
+        stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the
+        uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the
+        natural one-to-many relationship in which a text input can be spoken in multiple ways
+        with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS)
+        on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly
+        available TTS systems and achieves a MOS comparable to ground truth.
+
+    Check :class:`TTS.tts.configs.vits_config.VitsConfig` for class arguments.
+
+    Examples:
+        >>> from TTS.tts.configs.vits_config import VitsConfig
+        >>> from TTS.tts.models.vits import Vits
+        >>> config = VitsConfig()
+        >>> model = Vits(config)
+    """
+
+    def __init__(
+        self,
+        config: Coqpit,
+        ap: "AudioProcessor" = None,
+        tokenizer: "TTSTokenizer" = None,
+        speaker_manager: SpeakerManager = None,
+        language_manager: LanguageManager = None,
+    ):
+
+        super().__init__(config, ap, tokenizer, speaker_manager, language_manager)
+
+        self.init_multispeaker(config)
+        self.init_multilingual(config)
+        self.init_upsampling()
+
+        self.length_scale = self.args.length_scale
+        self.noise_scale = self.args.noise_scale
+        self.inference_noise_scale = self.args.inference_noise_scale
+        self.inference_noise_scale_dp = self.args.inference_noise_scale_dp
+        self.noise_scale_dp = self.args.noise_scale_dp
+        self.max_inference_len = self.args.max_inference_len
+        self.spec_segment_size = self.args.spec_segment_size
+
+        self.text_encoder = TextEncoder(
+            self.args.num_chars,
+            self.args.hidden_channels,
+            self.args.hidden_channels,
+            self.args.hidden_channels_ffn_text_encoder,
+            self.args.num_heads_text_encoder,
+            self.args.num_layers_text_encoder,
+            self.args.kernel_size_text_encoder,
+            self.args.dropout_p_text_encoder,
+            language_emb_dim=self.embedded_language_dim,
+        )
+
+        self.posterior_encoder = PosteriorEncoder(
+            self.args.out_channels,
+            self.args.hidden_channels,
+            self.args.hidden_channels,
+            kernel_size=self.args.kernel_size_posterior_encoder,
+            dilation_rate=self.args.dilation_rate_posterior_encoder,
+            num_layers=self.args.num_layers_posterior_encoder,
+            cond_channels=self.embedded_speaker_dim,
+        )
+
+        self.flow = ResidualCouplingBlocks(
+            self.args.hidden_channels,
+            self.args.hidden_channels,
+            kernel_size=self.args.kernel_size_flow,
+            dilation_rate=self.args.dilation_rate_flow,
+            num_layers=self.args.num_layers_flow,
+            cond_channels=self.embedded_speaker_dim,
+        )
+
+        if self.args.use_sdp:
+            self.duration_predictor = StochasticDurationPredictor(
+                self.args.hidden_channels,
+                192,
+                3,
+                self.args.dropout_p_duration_predictor,
+                4,
+                cond_channels=self.embedded_speaker_dim if self.args.condition_dp_on_speaker else 0,
+                language_emb_dim=self.embedded_language_dim,
+            )
+        else:
+            self.duration_predictor = DurationPredictor(
+                self.args.hidden_channels,
+                256,
+                3,
+                self.args.dropout_p_duration_predictor,
+                cond_channels=self.embedded_speaker_dim,
+                language_emb_dim=self.embedded_language_dim,
+            )
+
+        self.waveform_decoder = HifiganGenerator(
+            self.args.hidden_channels,
+            1,
+            self.args.resblock_type_decoder,
+            self.args.resblock_dilation_sizes_decoder,
+            self.args.resblock_kernel_sizes_decoder,
+            self.args.upsample_kernel_sizes_decoder,
+            self.args.upsample_initial_channel_decoder,
+            self.args.upsample_rates_decoder,
+            inference_padding=0,
+            cond_channels=self.embedded_speaker_dim,
+            conv_pre_weight_norm=False,
+            conv_post_weight_norm=False,
+            conv_post_bias=False,
+        )
+
+        if self.args.init_discriminator:
+            self.disc = VitsDiscriminator(
+                periods=self.args.periods_multi_period_discriminator,
+                use_spectral_norm=self.args.use_spectral_norm_disriminator,
+            )
+
+    @property
+    def device(self):
+        return next(self.parameters()).device
+
+    def init_multispeaker(self, config: Coqpit):
+        """Initialize multi-speaker modules of a model. A model can be trained either with a speaker embedding layer
+        or with external `d_vectors` computed from a speaker encoder model.
+
+        You must provide a `speaker_manager` at initialization to set up the multi-speaker modules.
+
+        Args:
+            config (Coqpit): Model configuration.
+            data (List, optional): Dataset items to infer number of speakers. Defaults to None.
+        """
+        self.embedded_speaker_dim = 0
+        self.num_speakers = self.args.num_speakers
+        self.audio_transform = None
+
+        if self.speaker_manager:
+            self.num_speakers = self.speaker_manager.num_speakers
+
+        if self.args.use_speaker_embedding:
+            self._init_speaker_embedding()
+
+        if self.args.use_d_vector_file:
+            self._init_d_vector()
+
+        # TODO: make this a function
+        if self.args.use_speaker_encoder_as_loss:
+            if self.speaker_manager.encoder is None and (
+                not self.args.speaker_encoder_model_path or not self.args.speaker_encoder_config_path
+            ):
+                raise RuntimeError(
+                    " [!] To use the speaker consistency loss (SCL) you need to specify speaker_encoder_model_path and speaker_encoder_config_path !!"
+                )
+
+            self.speaker_manager.encoder.eval()
+            print(" > External Speaker Encoder Loaded !!")
+
+            if (
+                hasattr(self.speaker_manager.encoder, "audio_config")
+                and self.config.audio.sample_rate != self.speaker_manager.encoder.audio_config["sample_rate"]
+            ):
+                self.audio_transform = torchaudio.transforms.Resample(
+                    orig_freq=self.config.audio.sample_rate,
+                    new_freq=self.speaker_manager.encoder.audio_config["sample_rate"],
+                )
+
+    def _init_speaker_embedding(self):
+        # pylint: disable=attribute-defined-outside-init
+        if self.num_speakers > 0:
+            print(" > initialization of speaker-embedding layers.")
+            self.embedded_speaker_dim = self.args.speaker_embedding_channels
+            self.emb_g = nn.Embedding(self.num_speakers, self.embedded_speaker_dim)
+
+    def _init_d_vector(self):
+        # pylint: disable=attribute-defined-outside-init
+        if hasattr(self, "emb_g"):
+            raise ValueError("[!] Speaker embedding layer already initialized before d_vector settings.")
+        self.embedded_speaker_dim = self.args.d_vector_dim
+
+    def init_multilingual(self, config: Coqpit):
+        """Initialize multilingual modules of a model.
+
+        Args:
+            config (Coqpit): Model configuration.
+        """
+        if self.args.language_ids_file is not None:
+            self.language_manager = LanguageManager(language_ids_file_path=config.language_ids_file)
+
+        if self.args.use_language_embedding and self.language_manager:
+            print(" > initialization of language-embedding layers.")
+            self.num_languages = self.language_manager.num_languages
+            self.embedded_language_dim = self.args.embedded_language_dim
+            self.emb_l = nn.Embedding(self.num_languages, self.embedded_language_dim)
+            torch.nn.init.xavier_uniform_(self.emb_l.weight)
+        else:
+            self.embedded_language_dim = 0
+
+    def init_upsampling(self):
+        """
+        Initialize upsampling modules of a model.
+        """
+        if self.args.encoder_sample_rate:
+            self.interpolate_factor = self.config.audio["sample_rate"] / self.args.encoder_sample_rate
+            self.audio_resampler = torchaudio.transforms.Resample(
+                orig_freq=self.config.audio["sample_rate"], new_freq=self.args.encoder_sample_rate
+            )  # pylint: disable=W0201
+
+    def on_epoch_start(self, trainer):  # pylint: disable=W0613
+        """Freeze layers at the beginning of an epoch"""
+        self._freeze_layers()
+        # set the device of speaker encoder
+        if self.args.use_speaker_encoder_as_loss:
+            self.speaker_manager.encoder = self.speaker_manager.encoder.to(self.device)
+
+    def on_init_end(self, trainer):  # pylint: disable=W0613
+        """Reinit layes if needed"""
+        if self.args.reinit_DP:
+            before_dict = get_module_weights_sum(self.duration_predictor)
+            # Applies weights_reset recursively to every submodule of the duration predictor
+            self.duration_predictor.apply(fn=weights_reset)
+            after_dict = get_module_weights_sum(self.duration_predictor)
+            for key, value in after_dict.items():
+                if value == before_dict[key]:
+                    raise RuntimeError(" [!] The weights of Duration Predictor was not reinit check it !")
+            print(" > Duration Predictor was reinit.")
+
+        if self.args.reinit_text_encoder:
+            before_dict = get_module_weights_sum(self.text_encoder)
+            # Applies weights_reset recursively to every submodule of the duration predictor
+            self.text_encoder.apply(fn=weights_reset)
+            after_dict = get_module_weights_sum(self.text_encoder)
+            for key, value in after_dict.items():
+                if value == before_dict[key]:
+                    raise RuntimeError(" [!] The weights of Text Encoder was not reinit check it !")
+            print(" > Text Encoder was reinit.")
+
+    def get_aux_input(self, aux_input: Dict):
+        sid, g, lid, _ = self._set_cond_input(aux_input)
+        return {"speaker_ids": sid, "style_wav": None, "d_vectors": g, "language_ids": lid}
+
+    def _freeze_layers(self):
+        if self.args.freeze_encoder:
+            for param in self.text_encoder.parameters():
+                param.requires_grad = False
+
+            if hasattr(self, "emb_l"):
+                for param in self.emb_l.parameters():
+                    param.requires_grad = False
+
+        if self.args.freeze_PE:
+            for param in self.posterior_encoder.parameters():
+                param.requires_grad = False
+
+        if self.args.freeze_DP:
+            for param in self.duration_predictor.parameters():
+                param.requires_grad = False
+
+        if self.args.freeze_flow_decoder:
+            for param in self.flow.parameters():
+                param.requires_grad = False
+
+        if self.args.freeze_waveform_decoder:
+            for param in self.waveform_decoder.parameters():
+                param.requires_grad = False
+
+    @staticmethod
+    def _set_cond_input(aux_input: Dict):
+        """Set the speaker conditioning input based on the multi-speaker mode."""
+        sid, g, lid, durations = None, None, None, None
+        if "speaker_ids" in aux_input and aux_input["speaker_ids"] is not None:
+            sid = aux_input["speaker_ids"]
+            if sid.ndim == 0:
+                sid = sid.unsqueeze_(0)
+        if "d_vectors" in aux_input and aux_input["d_vectors"] is not None:
+            g = F.normalize(aux_input["d_vectors"]).unsqueeze(-1)
+            if g.ndim == 2:
+                g = g.unsqueeze_(0)
+
+        if "language_ids" in aux_input and aux_input["language_ids"] is not None:
+            lid = aux_input["language_ids"]
+            if lid.ndim == 0:
+                lid = lid.unsqueeze_(0)
+
+        if "durations" in aux_input and aux_input["durations"] is not None:
+            durations = aux_input["durations"]
+
+        return sid, g, lid, durations
+
+    def _set_speaker_input(self, aux_input: Dict):
+        d_vectors = aux_input.get("d_vectors", None)
+        speaker_ids = aux_input.get("speaker_ids", None)
+
+        if d_vectors is not None and speaker_ids is not None:
+            raise ValueError("[!] Cannot use d-vectors and speaker-ids together.")
+
+        if speaker_ids is not None and not hasattr(self, "emb_g"):
+            raise ValueError("[!] Cannot use speaker-ids without enabling speaker embedding.")
+
+        g = speaker_ids if speaker_ids is not None else d_vectors
+        return g
+
+    def forward_mas(self, outputs, z_p, m_p, logs_p, x, x_mask, y_mask, g, lang_emb):
+        # find the alignment path
+        attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2)
+        with torch.no_grad():
+            o_scale = torch.exp(-2 * logs_p)
+            logp1 = torch.sum(-0.5 * math.log(2 * math.pi) - logs_p, [1]).unsqueeze(-1)  # [b, t, 1]
+            logp2 = torch.einsum("klm, kln -> kmn", [o_scale, -0.5 * (z_p**2)])
+            logp3 = torch.einsum("klm, kln -> kmn", [m_p * o_scale, z_p])
+            logp4 = torch.sum(-0.5 * (m_p**2) * o_scale, [1]).unsqueeze(-1)  # [b, t, 1]
+            logp = logp2 + logp3 + logp1 + logp4
+            attn = maximum_path(logp, attn_mask.squeeze(1)).unsqueeze(1).detach()  # [b, 1, t, t']
+
+        # duration predictor
+        attn_durations = attn.sum(3)
+        if self.args.use_sdp:
+            loss_duration = self.duration_predictor(
+                x.detach() if self.args.detach_dp_input else x,
+                x_mask,
+                attn_durations,
+                g=g.detach() if self.args.detach_dp_input and g is not None else g,
+                lang_emb=lang_emb.detach() if self.args.detach_dp_input and lang_emb is not None else lang_emb,
+            )
+            loss_duration = loss_duration / torch.sum(x_mask)
+        else:
+            attn_log_durations = torch.log(attn_durations + 1e-6) * x_mask
+            log_durations = self.duration_predictor(
+                x.detach() if self.args.detach_dp_input else x,
+                x_mask,
+                g=g.detach() if self.args.detach_dp_input and g is not None else g,
+                lang_emb=lang_emb.detach() if self.args.detach_dp_input and lang_emb is not None else lang_emb,
+            )
+            loss_duration = torch.sum((log_durations - attn_log_durations) ** 2, [1, 2]) / torch.sum(x_mask)
+        outputs["loss_duration"] = loss_duration
+        return outputs, attn
+
+    def upsampling_z(self, z, slice_ids=None, y_lengths=None, y_mask=None):
+        spec_segment_size = self.spec_segment_size
+        if self.args.encoder_sample_rate:
+            # recompute the slices and spec_segment_size if needed
+            slice_ids = slice_ids * int(self.interpolate_factor) if slice_ids is not None else slice_ids
+            spec_segment_size = spec_segment_size * int(self.interpolate_factor)
+            # interpolate z if needed
+            if self.args.interpolate_z:
+                z = torch.nn.functional.interpolate(z, scale_factor=[self.interpolate_factor], mode="linear").squeeze(0)
+                # recompute the mask if needed
+                if y_lengths is not None and y_mask is not None:
+                    y_mask = (
+                        sequence_mask(y_lengths * self.interpolate_factor, None).to(y_mask.dtype).unsqueeze(1)
+                    )  # [B, 1, T_dec_resampled]
+
+        return z, spec_segment_size, slice_ids, y_mask
+
+    def forward(  # pylint: disable=dangerous-default-value
+        self,
+        x: torch.tensor,
+        x_lengths: torch.tensor,
+        y: torch.tensor,
+        y_lengths: torch.tensor,
+        waveform: torch.tensor,
+        aux_input={"d_vectors": None, "speaker_ids": None, "language_ids": None},
+    ) -> Dict:
+        """Forward pass of the model.
+
+        Args:
+            x (torch.tensor): Batch of input character sequence IDs.
+            x_lengths (torch.tensor): Batch of input character sequence lengths.
+            y (torch.tensor): Batch of input spectrograms.
+            y_lengths (torch.tensor): Batch of input spectrogram lengths.
+            waveform (torch.tensor): Batch of ground truth waveforms per sample.
+            aux_input (dict, optional): Auxiliary inputs for multi-speaker and multi-lingual training.
+                Defaults to {"d_vectors": None, "speaker_ids": None, "language_ids": None}.
+
+        Returns:
+            Dict: model outputs keyed by the output name.
+
+        Shapes:
+            - x: :math:`[B, T_seq]`
+            - x_lengths: :math:`[B]`
+            - y: :math:`[B, C, T_spec]`
+            - y_lengths: :math:`[B]`
+            - waveform: :math:`[B, 1, T_wav]`
+            - d_vectors: :math:`[B, C, 1]`
+            - speaker_ids: :math:`[B]`
+            - language_ids: :math:`[B]`
+
+        Return Shapes:
+            - model_outputs: :math:`[B, 1, T_wav]`
+            - alignments: :math:`[B, T_seq, T_dec]`
+            - z: :math:`[B, C, T_dec]`
+            - z_p: :math:`[B, C, T_dec]`
+            - m_p: :math:`[B, C, T_dec]`
+            - logs_p: :math:`[B, C, T_dec]`
+            - m_q: :math:`[B, C, T_dec]`
+            - logs_q: :math:`[B, C, T_dec]`
+            - waveform_seg: :math:`[B, 1, spec_seg_size * hop_length]`
+            - gt_spk_emb: :math:`[B, 1, speaker_encoder.proj_dim]`
+            - syn_spk_emb: :math:`[B, 1, speaker_encoder.proj_dim]`
+        """
+        outputs = {}
+        sid, g, lid, _ = self._set_cond_input(aux_input)
+        # speaker embedding
+        if self.args.use_speaker_embedding and sid is not None:
+            g = self.emb_g(sid).unsqueeze(-1)  # [b, h, 1]
+
+        # language embedding
+        lang_emb = None
+        if self.args.use_language_embedding and lid is not None:
+            lang_emb = self.emb_l(lid).unsqueeze(-1)
+
+        x, m_p, logs_p, x_mask = self.text_encoder(x, x_lengths, lang_emb=lang_emb)
+
+        # posterior encoder
+        z, m_q, logs_q, y_mask = self.posterior_encoder(y, y_lengths, g=g)
+
+        # flow layers
+        z_p = self.flow(z, y_mask, g=g)
+
+        # duration predictor
+        outputs, attn = self.forward_mas(outputs, z_p, m_p, logs_p, x, x_mask, y_mask, g=g, lang_emb=lang_emb)
+
+        # expand prior
+        m_p = torch.einsum("klmn, kjm -> kjn", [attn, m_p])
+        logs_p = torch.einsum("klmn, kjm -> kjn", [attn, logs_p])
+
+        # select a random feature segment for the waveform decoder
+        z_slice, slice_ids = rand_segments(z, y_lengths, self.spec_segment_size, let_short_samples=True, pad_short=True)
+
+        # interpolate z if needed
+        z_slice, spec_segment_size, slice_ids, _ = self.upsampling_z(z_slice, slice_ids=slice_ids)
+
+        o = self.waveform_decoder(z_slice, g=g)
+
+        wav_seg = segment(
+            waveform,
+            slice_ids * self.config.audio.hop_length,
+            spec_segment_size * self.config.audio.hop_length,
+            pad_short=True,
+        )
+
+        if self.args.use_speaker_encoder_as_loss and self.speaker_manager.encoder is not None:
+            # concate generated and GT waveforms
+            wavs_batch = torch.cat((wav_seg, o), dim=0)
+
+            # resample audio to speaker encoder sample_rate
+            # pylint: disable=W0105
+            if self.audio_transform is not None:
+                wavs_batch = self.audio_transform(wavs_batch)
+
+            pred_embs = self.speaker_manager.encoder.forward(wavs_batch, l2_norm=True)
+
+            # split generated and GT speaker embeddings
+            gt_spk_emb, syn_spk_emb = torch.chunk(pred_embs, 2, dim=0)
+        else:
+            gt_spk_emb, syn_spk_emb = None, None
+
+        outputs.update(
+            {
+                "model_outputs": o,
+                "alignments": attn.squeeze(1),
+                "m_p": m_p,
+                "logs_p": logs_p,
+                "z": z,
+                "z_p": z_p,
+                "m_q": m_q,
+                "logs_q": logs_q,
+                "waveform_seg": wav_seg,
+                "gt_spk_emb": gt_spk_emb,
+                "syn_spk_emb": syn_spk_emb,
+                "slice_ids": slice_ids,
+            }
+        )
+        return outputs
+
+    @staticmethod
+    def _set_x_lengths(x, aux_input):
+        if "x_lengths" in aux_input and aux_input["x_lengths"] is not None:
+            return aux_input["x_lengths"]
+        return torch.tensor(x.shape[1:2]).to(x.device)
+
+    @torch.no_grad()
+    def inference(
+        self,
+        x,
+        aux_input={"x_lengths": None, "d_vectors": None, "speaker_ids": None, "language_ids": None, "durations": None},
+    ):  # pylint: disable=dangerous-default-value
+        """
+        Note:
+            To run in batch mode, provide `x_lengths` else model assumes that the batch size is 1.
+
+        Shapes:
+            - x: :math:`[B, T_seq]`
+            - x_lengths: :math:`[B]`
+            - d_vectors: :math:`[B, C]`
+            - speaker_ids: :math:`[B]`
+
+        Return Shapes:
+            - model_outputs: :math:`[B, 1, T_wav]`
+            - alignments: :math:`[B, T_seq, T_dec]`
+            - z: :math:`[B, C, T_dec]`
+            - z_p: :math:`[B, C, T_dec]`
+            - m_p: :math:`[B, C, T_dec]`
+            - logs_p: :math:`[B, C, T_dec]`
+        """
+        sid, g, lid, durations = self._set_cond_input(aux_input)
+        x_lengths = self._set_x_lengths(x, aux_input)
+
+        # speaker embedding
+        if self.args.use_speaker_embedding and sid is not None:
+            g = self.emb_g(sid).unsqueeze(-1)
+
+        # language embedding
+        lang_emb = None
+        if self.args.use_language_embedding and lid is not None:
+            lang_emb = self.emb_l(lid).unsqueeze(-1)
+
+        x, m_p, logs_p, x_mask = self.text_encoder(x, x_lengths, lang_emb=lang_emb)
+
+        if durations is None:
+            if self.args.use_sdp:
+                logw = self.duration_predictor(
+                    x,
+                    x_mask,
+                    g=g if self.args.condition_dp_on_speaker else None,
+                    reverse=True,
+                    noise_scale=self.inference_noise_scale_dp,
+                    lang_emb=lang_emb,
+                )
+            else:
+                logw = self.duration_predictor(
+                    x, x_mask, g=g if self.args.condition_dp_on_speaker else None, lang_emb=lang_emb
+                )
+            w = torch.exp(logw) * x_mask * self.length_scale
+        else:
+            assert durations.shape[-1] == x.shape[-1]
+            w = durations.unsqueeze(0)
+
+        w_ceil = torch.ceil(w)
+        y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
+        y_mask = sequence_mask(y_lengths, None).to(x_mask.dtype).unsqueeze(1)  # [B, 1, T_dec]
+
+        attn_mask = x_mask * y_mask.transpose(1, 2)  # [B, 1, T_enc] * [B, T_dec, 1]
+        attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1).transpose(1, 2))
+
+        m_p = torch.matmul(attn.transpose(1, 2), m_p.transpose(1, 2)).transpose(1, 2)
+        logs_p = torch.matmul(attn.transpose(1, 2), logs_p.transpose(1, 2)).transpose(1, 2)
+
+        z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * self.inference_noise_scale
+        z = self.flow(z_p, y_mask, g=g, reverse=True)
+
+        # upsampling if needed
+        z, _, _, y_mask = self.upsampling_z(z, y_lengths=y_lengths, y_mask=y_mask)
+
+        o = self.waveform_decoder((z * y_mask)[:, :, : self.max_inference_len], g=g)
+
+        outputs = {
+            "model_outputs": o,
+            "alignments": attn.squeeze(1),
+            "durations": w_ceil,
+            "z": z,
+            "z_p": z_p,
+            "m_p": m_p,
+            "logs_p": logs_p,
+            "y_mask": y_mask,
+        }
+        return outputs
+
+    @torch.no_grad()
+    def inference_voice_conversion(
+        self, reference_wav, speaker_id=None, d_vector=None, reference_speaker_id=None, reference_d_vector=None
+    ):
+        """Inference for voice conversion
+
+        Args:
+            reference_wav (Tensor): Reference wavform. Tensor of shape [B, T]
+            speaker_id (Tensor): speaker_id of the target speaker. Tensor of shape [B]
+            d_vector (Tensor): d_vector embedding of target speaker. Tensor of shape `[B, C]`
+            reference_speaker_id (Tensor): speaker_id of the reference_wav speaker. Tensor of shape [B]
+            reference_d_vector (Tensor): d_vector embedding of the reference_wav speaker. Tensor of shape `[B, C]`
+        """
+        # compute spectrograms
+        y = wav_to_spec(
+            reference_wav,
+            self.config.audio.fft_size,
+            self.config.audio.hop_length,
+            self.config.audio.win_length,
+            center=False,
+        )
+        y_lengths = torch.tensor([y.size(-1)]).to(y.device)
+        speaker_cond_src = reference_speaker_id if reference_speaker_id is not None else reference_d_vector
+        speaker_cond_tgt = speaker_id if speaker_id is not None else d_vector
+        wav, _, _ = self.voice_conversion(y, y_lengths, speaker_cond_src, speaker_cond_tgt)
+        return wav
+
+    def voice_conversion(self, y, y_lengths, speaker_cond_src, speaker_cond_tgt):
+        """Forward pass for voice conversion
+
+        TODO: create an end-point for voice conversion
+
+        Args:
+            y (Tensor): Reference spectrograms. Tensor of shape [B, T, C]
+            y_lengths (Tensor): Length of each reference spectrogram. Tensor of shape [B]
+            speaker_cond_src (Tensor): Reference speaker ID. Tensor of shape [B,]
+            speaker_cond_tgt (Tensor): Target speaker ID. Tensor of shape [B,]
+        """
+        assert self.num_speakers > 0, "num_speakers have to be larger than 0."
+        # speaker embedding
+        if self.args.use_speaker_embedding and not self.args.use_d_vector_file:
+            g_src = self.emb_g(speaker_cond_src).unsqueeze(-1)
+            g_tgt = self.emb_g(speaker_cond_tgt).unsqueeze(-1)
+        elif not self.args.use_speaker_embedding and self.args.use_d_vector_file:
+            g_src = F.normalize(speaker_cond_src).unsqueeze(-1)
+            g_tgt = F.normalize(speaker_cond_tgt).unsqueeze(-1)
+        else:
+            raise RuntimeError(" [!] Voice conversion is only supported on multi-speaker models.")
+
+        z, _, _, y_mask = self.posterior_encoder(y, y_lengths, g=g_src)
+        z_p = self.flow(z, y_mask, g=g_src)
+        z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True)
+        o_hat = self.waveform_decoder(z_hat * y_mask, g=g_tgt)
+        return o_hat, y_mask, (z, z_p, z_hat)
+
+    def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> Tuple[Dict, Dict]:
+        """Perform a single training step. Run the model forward pass and compute losses.
+
+        Args:
+            batch (Dict): Input tensors.
+            criterion (nn.Module): Loss layer designed for the model.
+            optimizer_idx (int): Index of optimizer to use. 0 for the generator and 1 for the discriminator networks.
+
+        Returns:
+            Tuple[Dict, Dict]: Model ouputs and computed losses.
+        """
+
+        spec_lens = batch["spec_lens"]
+
+        if optimizer_idx == 0:
+            tokens = batch["tokens"]
+            token_lenghts = batch["token_lens"]
+            spec = batch["spec"]
+
+            d_vectors = batch["d_vectors"]
+            speaker_ids = batch["speaker_ids"]
+            language_ids = batch["language_ids"]
+            waveform = batch["waveform"]
+
+            # generator pass
+            outputs = self.forward(
+                tokens,
+                token_lenghts,
+                spec,
+                spec_lens,
+                waveform,
+                aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids, "language_ids": language_ids},
+            )
+
+            # cache tensors for the generator pass
+            self.model_outputs_cache = outputs  # pylint: disable=attribute-defined-outside-init
+
+            # compute scores and features
+            scores_disc_fake, _, scores_disc_real, _ = self.disc(
+                outputs["model_outputs"].detach(), outputs["waveform_seg"]
+            )
+
+            # compute loss
+            with autocast(enabled=False):  # use float32 for the criterion
+                loss_dict = criterion[optimizer_idx](
+                    scores_disc_real,
+                    scores_disc_fake,
+                )
+            return outputs, loss_dict
+
+        if optimizer_idx == 1:
+            mel = batch["mel"]
+
+            # compute melspec segment
+            with autocast(enabled=False):
+
+                if self.args.encoder_sample_rate:
+                    spec_segment_size = self.spec_segment_size * int(self.interpolate_factor)
+                else:
+                    spec_segment_size = self.spec_segment_size
+
+                mel_slice = segment(
+                    mel.float(), self.model_outputs_cache["slice_ids"], spec_segment_size, pad_short=True
+                )
+                mel_slice_hat = wav_to_mel(
+                    y=self.model_outputs_cache["model_outputs"].float(),
+                    n_fft=self.config.audio.fft_size,
+                    sample_rate=self.config.audio.sample_rate,
+                    num_mels=self.config.audio.num_mels,
+                    hop_length=self.config.audio.hop_length,
+                    win_length=self.config.audio.win_length,
+                    fmin=self.config.audio.mel_fmin,
+                    fmax=self.config.audio.mel_fmax,
+                    center=False,
+                )
+
+            # compute discriminator scores and features
+            scores_disc_fake, feats_disc_fake, _, feats_disc_real = self.disc(
+                self.model_outputs_cache["model_outputs"], self.model_outputs_cache["waveform_seg"]
+            )
+
+            # compute losses
+            with autocast(enabled=False):  # use float32 for the criterion
+                loss_dict = criterion[optimizer_idx](
+                    mel_slice_hat=mel_slice.float(),
+                    mel_slice=mel_slice_hat.float(),
+                    z_p=self.model_outputs_cache["z_p"].float(),
+                    logs_q=self.model_outputs_cache["logs_q"].float(),
+                    m_p=self.model_outputs_cache["m_p"].float(),
+                    logs_p=self.model_outputs_cache["logs_p"].float(),
+                    z_len=spec_lens,
+                    scores_disc_fake=scores_disc_fake,
+                    feats_disc_fake=feats_disc_fake,
+                    feats_disc_real=feats_disc_real,
+                    loss_duration=self.model_outputs_cache["loss_duration"],
+                    use_speaker_encoder_as_loss=self.args.use_speaker_encoder_as_loss,
+                    gt_spk_emb=self.model_outputs_cache["gt_spk_emb"],
+                    syn_spk_emb=self.model_outputs_cache["syn_spk_emb"],
+                )
+
+            return self.model_outputs_cache, loss_dict
+
+        raise ValueError(" [!] Unexpected `optimizer_idx`.")
+
+    def _log(self, ap, batch, outputs, name_prefix="train"):  # pylint: disable=unused-argument,no-self-use
+        y_hat = outputs[1]["model_outputs"]
+        y = outputs[1]["waveform_seg"]
+        figures = plot_results(y_hat, y, ap, name_prefix)
+        sample_voice = y_hat[0].squeeze(0).detach().cpu().numpy()
+        audios = {f"{name_prefix}/audio": sample_voice}
+
+        alignments = outputs[1]["alignments"]
+        align_img = alignments[0].data.cpu().numpy().T
+
+        figures.update(
+            {
+                "alignment": plot_alignment(align_img, output_fig=False),
+            }
+        )
+        return figures, audios
+
+    def train_log(
+        self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+    ):  # pylint: disable=no-self-use
+        """Create visualizations and waveform examples.
+
+        For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
+        be projected onto Tensorboard.
+
+        Args:
+            ap (AudioProcessor): audio processor used at training.
+            batch (Dict): Model inputs used at the previous training step.
+            outputs (Dict): Model outputs generated at the previoud training step.
+
+        Returns:
+            Tuple[Dict, np.ndarray]: training plots and output waveform.
+        """
+        figures, audios = self._log(self.ap, batch, outputs, "train")
+        logger.train_figures(steps, figures)
+        logger.train_audios(steps, audios, self.ap.sample_rate)
+
+    @torch.no_grad()
+    def eval_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int):
+        return self.train_step(batch, criterion, optimizer_idx)
+
+    def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+        figures, audios = self._log(self.ap, batch, outputs, "eval")
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    def get_aux_input_from_test_sentences(self, sentence_info):
+        if hasattr(self.config, "model_args"):
+            config = self.config.model_args
+        else:
+            config = self.config
+
+        # extract speaker and language info
+        text, speaker_name, style_wav, language_name = None, None, None, None
+
+        if isinstance(sentence_info, list):
+            if len(sentence_info) == 1:
+                text = sentence_info[0]
+            elif len(sentence_info) == 2:
+                text, speaker_name = sentence_info
+            elif len(sentence_info) == 3:
+                text, speaker_name, style_wav = sentence_info
+            elif len(sentence_info) == 4:
+                text, speaker_name, style_wav, language_name = sentence_info
+        else:
+            text = sentence_info
+
+        # get speaker  id/d_vector
+        speaker_id, d_vector, language_id = None, None, None
+        if hasattr(self, "speaker_manager"):
+            if config.use_d_vector_file:
+                if speaker_name is None:
+                    d_vector = self.speaker_manager.get_random_embedding()
+                else:
+                    d_vector = self.speaker_manager.get_mean_embedding(speaker_name, num_samples=None, randomize=False)
+            elif config.use_speaker_embedding:
+                if speaker_name is None:
+                    speaker_id = self.speaker_manager.get_random_id()
+                else:
+                    speaker_id = self.speaker_manager.name_to_id[speaker_name]
+
+        # get language id
+        if hasattr(self, "language_manager") and config.use_language_embedding and language_name is not None:
+            language_id = self.language_manager.name_to_id[language_name]
+
+        return {
+            "text": text,
+            "speaker_id": speaker_id,
+            "style_wav": style_wav,
+            "d_vector": d_vector,
+            "language_id": language_id,
+            "language_name": language_name,
+        }
+
+    @torch.no_grad()
+    def test_run(self, assets) -> Tuple[Dict, Dict]:
+        """Generic test run for `tts` models used by `Trainer`.
+
+        You can override this for a different behaviour.
+
+        Returns:
+            Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
+        """
+        print(" | > Synthesizing test sentences.")
+        test_audios = {}
+        test_figures = {}
+        test_sentences = self.config.test_sentences
+        for idx, s_info in enumerate(test_sentences):
+            aux_inputs = self.get_aux_input_from_test_sentences(s_info)
+            wav, alignment, _, _ = synthesis(
+                self,
+                aux_inputs["text"],
+                self.config,
+                "cuda" in str(next(self.parameters()).device),
+                speaker_id=aux_inputs["speaker_id"],
+                d_vector=aux_inputs["d_vector"],
+                style_wav=aux_inputs["style_wav"],
+                language_id=aux_inputs["language_id"],
+                use_griffin_lim=True,
+                do_trim_silence=False,
+            ).values()
+            test_audios["{}-audio".format(idx)] = wav
+            test_figures["{}-alignment".format(idx)] = plot_alignment(alignment.T, output_fig=False)
+        return {"figures": test_figures, "audios": test_audios}
+
+    def test_log(
+        self, outputs: dict, logger: "Logger", assets: dict, steps: int  # pylint: disable=unused-argument
+    ) -> None:
+        logger.test_audios(steps, outputs["audios"], self.ap.sample_rate)
+        logger.test_figures(steps, outputs["figures"])
+
+    def format_batch(self, batch: Dict) -> Dict:
+        """Compute speaker, langugage IDs and d_vector for the batch if necessary."""
+        speaker_ids = None
+        language_ids = None
+        d_vectors = None
+
+        # get numerical speaker ids from speaker names
+        if self.speaker_manager is not None and self.speaker_manager.name_to_id and self.args.use_speaker_embedding:
+            speaker_ids = [self.speaker_manager.name_to_id[sn] for sn in batch["speaker_names"]]
+
+        if speaker_ids is not None:
+            speaker_ids = torch.LongTensor(speaker_ids)
+            batch["speaker_ids"] = speaker_ids
+
+        # get d_vectors from audio file names
+        if self.speaker_manager is not None and self.speaker_manager.embeddings and self.args.use_d_vector_file:
+            d_vector_mapping = self.speaker_manager.embeddings
+            d_vectors = [d_vector_mapping[w]["embedding"] for w in batch["audio_unique_names"]]
+            d_vectors = torch.FloatTensor(d_vectors)
+
+        # get language ids from language names
+        if self.language_manager is not None and self.language_manager.name_to_id and self.args.use_language_embedding:
+            language_ids = [self.language_manager.name_to_id[ln] for ln in batch["language_names"]]
+
+        if language_ids is not None:
+            language_ids = torch.LongTensor(language_ids)
+
+        batch["language_ids"] = language_ids
+        batch["d_vectors"] = d_vectors
+        batch["speaker_ids"] = speaker_ids
+        return batch
+
+    def format_batch_on_device(self, batch):
+        """Compute spectrograms on the device."""
+        ac = self.config.audio
+
+        if self.args.encoder_sample_rate:
+            wav = self.audio_resampler(batch["waveform"])
+        else:
+            wav = batch["waveform"]
+
+        # compute spectrograms
+        batch["spec"] = wav_to_spec(wav, ac.fft_size, ac.hop_length, ac.win_length, center=False)
+
+        if self.args.encoder_sample_rate:
+            # recompute spec with high sampling rate to the loss
+            spec_mel = wav_to_spec(batch["waveform"], ac.fft_size, ac.hop_length, ac.win_length, center=False)
+            # remove extra stft frames if needed
+            if spec_mel.size(2) > int(batch["spec"].size(2) * self.interpolate_factor):
+                spec_mel = spec_mel[:, :, : int(batch["spec"].size(2) * self.interpolate_factor)]
+            else:
+                batch["spec"] = batch["spec"][:, :, : int(spec_mel.size(2) / self.interpolate_factor)]
+        else:
+            spec_mel = batch["spec"]
+
+        batch["mel"] = spec_to_mel(
+            spec=spec_mel,
+            n_fft=ac.fft_size,
+            num_mels=ac.num_mels,
+            sample_rate=ac.sample_rate,
+            fmin=ac.mel_fmin,
+            fmax=ac.mel_fmax,
+        )
+
+        if self.args.encoder_sample_rate:
+            assert batch["spec"].shape[2] == int(
+                batch["mel"].shape[2] / self.interpolate_factor
+            ), f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}"
+        else:
+            assert batch["spec"].shape[2] == batch["mel"].shape[2], f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}"
+
+        # compute spectrogram frame lengths
+        batch["spec_lens"] = (batch["spec"].shape[2] * batch["waveform_rel_lens"]).int()
+        batch["mel_lens"] = (batch["mel"].shape[2] * batch["waveform_rel_lens"]).int()
+
+        if self.args.encoder_sample_rate:
+            assert (batch["spec_lens"] - (batch["mel_lens"] / self.interpolate_factor).int()).sum() == 0
+        else:
+            assert (batch["spec_lens"] - batch["mel_lens"]).sum() == 0
+
+        # zero the padding frames
+        batch["spec"] = batch["spec"] * sequence_mask(batch["spec_lens"]).unsqueeze(1)
+        batch["mel"] = batch["mel"] * sequence_mask(batch["mel_lens"]).unsqueeze(1)
+        return batch
+
+    def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1, is_eval=False):
+        weights = None
+        data_items = dataset.samples
+        if getattr(config, "use_weighted_sampler", False):
+            for attr_name, alpha in config.weighted_sampler_attrs.items():
+                print(f" > Using weighted sampler for attribute '{attr_name}' with alpha '{alpha}'")
+                multi_dict = config.weighted_sampler_multipliers.get(attr_name, None)
+                print(multi_dict)
+                weights, attr_names, attr_weights = get_attribute_balancer_weights(
+                    attr_name=attr_name, items=data_items, multi_dict=multi_dict
+                )
+                weights = weights * alpha
+                print(f" > Attribute weights for '{attr_names}' \n | > {attr_weights}")
+
+        # input_audio_lenghts = [os.path.getsize(x["audio_file"]) for x in data_items]
+
+        if weights is not None:
+            w_sampler = WeightedRandomSampler(weights, len(weights))
+            batch_sampler = BucketBatchSampler(
+                w_sampler,
+                data=data_items,
+                batch_size=config.eval_batch_size if is_eval else config.batch_size,
+                sort_key=lambda x: os.path.getsize(x["audio_file"]),
+                drop_last=True,
+            )
+        else:
+            batch_sampler = None
+        # sampler for DDP
+        if batch_sampler is None:
+            batch_sampler = DistributedSampler(dataset) if num_gpus > 1 else None
+        else:  # If a sampler is already defined use this sampler and DDP sampler together
+            batch_sampler = (
+                DistributedSamplerWrapper(batch_sampler) if num_gpus > 1 else batch_sampler
+            )  # TODO: check batch_sampler with multi-gpu
+        return batch_sampler
+
+    def get_data_loader(
+        self,
+        config: Coqpit,
+        assets: Dict,
+        is_eval: bool,
+        samples: Union[List[Dict], List[List]],
+        verbose: bool,
+        num_gpus: int,
+        rank: int = None,
+    ) -> "DataLoader":
+        if is_eval and not config.run_eval:
+            loader = None
+        else:
+            # init dataloader
+            dataset = VitsDataset(
+                model_args=self.args,
+                samples=samples,
+                batch_group_size=0 if is_eval else config.batch_group_size * config.batch_size,
+                min_text_len=config.min_text_len,
+                max_text_len=config.max_text_len,
+                min_audio_len=config.min_audio_len,
+                max_audio_len=config.max_audio_len,
+                phoneme_cache_path=config.phoneme_cache_path,
+                precompute_num_workers=config.precompute_num_workers,
+                verbose=verbose,
+                tokenizer=self.tokenizer,
+                start_by_longest=config.start_by_longest,
+            )
+
+            # wait all the DDP process to be ready
+            if num_gpus > 1:
+                dist.barrier()
+
+            # sort input sequences from short to long
+            dataset.preprocess_samples()
+
+            # get samplers
+            sampler = self.get_sampler(config, dataset, num_gpus)
+            if sampler is None:
+                loader = DataLoader(
+                    dataset,
+                    batch_size=config.eval_batch_size if is_eval else config.batch_size,
+                    shuffle=False,  # shuffle is done in the dataset.
+                    collate_fn=dataset.collate_fn,
+                    drop_last=False,  # setting this False might cause issues in AMP training.
+                    num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+                    pin_memory=False,
+                )
+            else:
+                loader = DataLoader(
+                    dataset,
+                    batch_sampler=sampler,
+                    collate_fn=dataset.collate_fn,
+                    num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+                    pin_memory=False,
+                )
+        return loader
+
+    def get_optimizer(self) -> List:
+        """Initiate and return the GAN optimizers based on the config parameters.
+        It returnes 2 optimizers in a list. First one is for the generator and the second one is for the discriminator.
+        Returns:
+            List: optimizers.
+        """
+        # select generator parameters
+        optimizer0 = get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr_disc, self.disc)
+
+        gen_parameters = chain(params for k, params in self.named_parameters() if not k.startswith("disc."))
+        optimizer1 = get_optimizer(
+            self.config.optimizer, self.config.optimizer_params, self.config.lr_gen, parameters=gen_parameters
+        )
+        return [optimizer0, optimizer1]
+
+    def get_lr(self) -> List:
+        """Set the initial learning rates for each optimizer.
+
+        Returns:
+            List: learning rates for each optimizer.
+        """
+        return [self.config.lr_disc, self.config.lr_gen]
+
+    def get_scheduler(self, optimizer) -> List:
+        """Set the schedulers for each optimizer.
+
+        Args:
+            optimizer (List[`torch.optim.Optimizer`]): List of optimizers.
+
+        Returns:
+            List: Schedulers, one for each optimizer.
+        """
+        scheduler_G = get_scheduler(self.config.lr_scheduler_gen, self.config.lr_scheduler_gen_params, optimizer[0])
+        scheduler_D = get_scheduler(self.config.lr_scheduler_disc, self.config.lr_scheduler_disc_params, optimizer[1])
+        return [scheduler_D, scheduler_G]
+
+    def get_criterion(self):
+        """Get criterions for each optimizer. The index in the output list matches the optimizer idx used in
+        `train_step()`"""
+        from TTS.tts.layers.losses import (  # pylint: disable=import-outside-toplevel
+            VitsDiscriminatorLoss,
+            VitsGeneratorLoss,
+        )
+
+        return [VitsDiscriminatorLoss(self.config), VitsGeneratorLoss(self.config)]
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, strict=True, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        """Load the model checkpoint and setup for training or inference"""
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        # compat band-aid for the pre-trained models to not use the encoder baked into the model
+        # TODO: consider baking the speaker encoder into the model and call it from there.
+        # as it is probably easier for model distribution.
+        state["model"] = {k: v for k, v in state["model"].items() if "speaker_encoder" not in k}
+
+        if self.args.encoder_sample_rate is not None and eval:
+            # audio resampler is not used in inference time
+            self.audio_resampler = None
+
+        # handle fine-tuning from a checkpoint with additional speakers
+        if hasattr(self, "emb_g") and state["model"]["emb_g.weight"].shape != self.emb_g.weight.shape:
+            num_new_speakers = self.emb_g.weight.shape[0] - state["model"]["emb_g.weight"].shape[0]
+            print(f" > Loading checkpoint with {num_new_speakers} additional speakers.")
+            emb_g = state["model"]["emb_g.weight"]
+            new_row = torch.randn(num_new_speakers, emb_g.shape[1])
+            emb_g = torch.cat([emb_g, new_row], axis=0)
+            state["model"]["emb_g.weight"] = emb_g
+        # load the model weights
+        self.load_state_dict(state["model"], strict=strict)
+
+        if eval:
+            self.eval()
+            assert not self.training
+
+    @staticmethod
+    def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+        """Initiate model from config
+
+        Args:
+            config (VitsConfig): Model config.
+            samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
+                Defaults to None.
+        """
+        from TTS.utils.audio import AudioProcessor
+
+        upsample_rate = torch.prod(torch.as_tensor(config.model_args.upsample_rates_decoder)).item()
+
+        if not config.model_args.encoder_sample_rate:
+            assert (
+                upsample_rate == config.audio.hop_length
+            ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {config.audio.hop_length}"
+        else:
+            encoder_to_vocoder_upsampling_factor = config.audio.sample_rate / config.model_args.encoder_sample_rate
+            effective_hop_length = config.audio.hop_length * encoder_to_vocoder_upsampling_factor
+            assert (
+                upsample_rate == effective_hop_length
+            ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}"
+
+        ap = AudioProcessor.init_from_config(config, verbose=verbose)
+        tokenizer, new_config = TTSTokenizer.init_from_config(config)
+        speaker_manager = SpeakerManager.init_from_config(config, samples)
+        language_manager = LanguageManager.init_from_config(config)
+
+        if config.model_args.speaker_encoder_model_path:
+            speaker_manager.init_encoder(
+                config.model_args.speaker_encoder_model_path, config.model_args.speaker_encoder_config_path
+            )
+        return Vits(new_config, ap, tokenizer, speaker_manager, language_manager)
+
+
+##################################
+# VITS CHARACTERS
+##################################
+
+
+class VitsCharacters(BaseCharacters):
+    """Characters class for VITs model for compatibility with pre-trained models"""
+
+    def __init__(
+        self,
+        graphemes: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        ipa_characters: str = _phonemes,
+    ) -> None:
+        if ipa_characters is not None:
+            graphemes += ipa_characters
+        super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True)
+
+    def _create_vocab(self):
+        self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank]
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        # pylint: disable=unnecessary-comprehension
+        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+
+    @staticmethod
+    def init_from_config(config: Coqpit):
+        if config.characters is not None:
+            _pad = config.characters["pad"]
+            _punctuations = config.characters["punctuations"]
+            _letters = config.characters["characters"]
+            _letters_ipa = config.characters["phonemes"]
+            return (
+                VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad),
+                config,
+            )
+        characters = VitsCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=None,
+            bos=None,
+            blank=self._blank,
+            is_unique=False,
+            is_sorted=True,
+        )
diff --git a/TTS/tts/utils/.DS_Store b/TTS/tts/utils/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..faae9a64b1c36dac8832fd9caaa713d4f6a08147
Binary files /dev/null and b/TTS/tts/utils/.DS_Store differ
diff --git a/TTS/tts/utils/__init__.py b/TTS/tts/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/__pycache__/__init__.cpython-310.pyc b/TTS/tts/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b494638da101654a8317ed5d942a248e6fa7feec
Binary files /dev/null and b/TTS/tts/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/tts/utils/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb96192386058f619fa6d058a469692395ee0526
Binary files /dev/null and b/TTS/tts/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b38224e6c82ea98f8559b31f8a00b6a4a082ceb
Binary files /dev/null and b/TTS/tts/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a2fe6ce9a342974fd442c45109367172dcd0262
Binary files /dev/null and b/TTS/tts/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/data.cpython-37.pyc b/TTS/tts/utils/__pycache__/data.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96836b7d6ef09e6fb9ddedc1e49839e62935e4f9
Binary files /dev/null and b/TTS/tts/utils/__pycache__/data.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/data.cpython-38.pyc b/TTS/tts/utils/__pycache__/data.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03d852bf21c18cceddc13452d9d3fc6940a5f6aa
Binary files /dev/null and b/TTS/tts/utils/__pycache__/data.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/data.cpython-39.pyc b/TTS/tts/utils/__pycache__/data.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6b5c9c1939544cd5ae2d64208e01eaeeaf0736d
Binary files /dev/null and b/TTS/tts/utils/__pycache__/data.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/helpers.cpython-310.pyc b/TTS/tts/utils/__pycache__/helpers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c02b3e27c025068938c31d2e3f84805bae8c5db
Binary files /dev/null and b/TTS/tts/utils/__pycache__/helpers.cpython-310.pyc differ
diff --git a/TTS/tts/utils/__pycache__/helpers.cpython-37.pyc b/TTS/tts/utils/__pycache__/helpers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27f83094d627e704a0259efcd159628b6f06b76a
Binary files /dev/null and b/TTS/tts/utils/__pycache__/helpers.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/helpers.cpython-38.pyc b/TTS/tts/utils/__pycache__/helpers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..993ae35088f88bb2f9fbcb4e8784bdd22a9a209c
Binary files /dev/null and b/TTS/tts/utils/__pycache__/helpers.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/helpers.cpython-39.pyc b/TTS/tts/utils/__pycache__/helpers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3861947944c2652aad15105db03450cc59ac4529
Binary files /dev/null and b/TTS/tts/utils/__pycache__/helpers.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/languages.cpython-37.pyc b/TTS/tts/utils/__pycache__/languages.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1b0f5ae5c4afb5e30f8270d4727b1a4d098ed65
Binary files /dev/null and b/TTS/tts/utils/__pycache__/languages.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/languages.cpython-38.pyc b/TTS/tts/utils/__pycache__/languages.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ff56613f0d59bc9159d967e99236517e9276773
Binary files /dev/null and b/TTS/tts/utils/__pycache__/languages.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/languages.cpython-39.pyc b/TTS/tts/utils/__pycache__/languages.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e91d89ac7c1ccbf23a9a79e29acee53f5152476a
Binary files /dev/null and b/TTS/tts/utils/__pycache__/languages.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/managers.cpython-37.pyc b/TTS/tts/utils/__pycache__/managers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1bf02500272146d9050001a941472e5b56f54429
Binary files /dev/null and b/TTS/tts/utils/__pycache__/managers.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/managers.cpython-38.pyc b/TTS/tts/utils/__pycache__/managers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..678a1b05ba369fd81906d14f94186f1bd49063b7
Binary files /dev/null and b/TTS/tts/utils/__pycache__/managers.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/managers.cpython-39.pyc b/TTS/tts/utils/__pycache__/managers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..57e23897197dc7bc1f2b1b7434d390557a5e2965
Binary files /dev/null and b/TTS/tts/utils/__pycache__/managers.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/measures.cpython-37.pyc b/TTS/tts/utils/__pycache__/measures.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a2c3a698786ba40dcc5f9f713e6870498b424d03
Binary files /dev/null and b/TTS/tts/utils/__pycache__/measures.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/measures.cpython-38.pyc b/TTS/tts/utils/__pycache__/measures.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7dcee8a5fe78a57ec56440a6cac793067d3405e7
Binary files /dev/null and b/TTS/tts/utils/__pycache__/measures.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/measures.cpython-39.pyc b/TTS/tts/utils/__pycache__/measures.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0b67b104d82c660e09e1fefee0b7e7a90c8ed54
Binary files /dev/null and b/TTS/tts/utils/__pycache__/measures.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/speakers.cpython-37.pyc b/TTS/tts/utils/__pycache__/speakers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..242fb433ea1708cbd448d3396305722707444098
Binary files /dev/null and b/TTS/tts/utils/__pycache__/speakers.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/speakers.cpython-38.pyc b/TTS/tts/utils/__pycache__/speakers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e49b879bb2844b7e774c3a5f3af2e3d7a4606864
Binary files /dev/null and b/TTS/tts/utils/__pycache__/speakers.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/speakers.cpython-39.pyc b/TTS/tts/utils/__pycache__/speakers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee7f9cc3f568c8e8fddcd9179df59e0a5dfeda72
Binary files /dev/null and b/TTS/tts/utils/__pycache__/speakers.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/ssim.cpython-37.pyc b/TTS/tts/utils/__pycache__/ssim.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ff3af8fe7899ea554c32ff5744537e35d3df8ca
Binary files /dev/null and b/TTS/tts/utils/__pycache__/ssim.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/ssim.cpython-38.pyc b/TTS/tts/utils/__pycache__/ssim.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3deafd07789a8dee1bb3fe3fda6c68a77616d897
Binary files /dev/null and b/TTS/tts/utils/__pycache__/ssim.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/ssim.cpython-39.pyc b/TTS/tts/utils/__pycache__/ssim.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81edef0d332e43a0539ae04ebfde50c9e7a26fef
Binary files /dev/null and b/TTS/tts/utils/__pycache__/ssim.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/synthesis.cpython-37.pyc b/TTS/tts/utils/__pycache__/synthesis.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e89ee1dd7bef803f760c7b81f58b92e547b5945
Binary files /dev/null and b/TTS/tts/utils/__pycache__/synthesis.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/synthesis.cpython-38.pyc b/TTS/tts/utils/__pycache__/synthesis.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..030cd6bf031ceff7b695140b899f41591bad8133
Binary files /dev/null and b/TTS/tts/utils/__pycache__/synthesis.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/synthesis.cpython-39.pyc b/TTS/tts/utils/__pycache__/synthesis.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66c03b5160055e09335a8a2610572625465f00ef
Binary files /dev/null and b/TTS/tts/utils/__pycache__/synthesis.cpython-39.pyc differ
diff --git a/TTS/tts/utils/__pycache__/visual.cpython-310.pyc b/TTS/tts/utils/__pycache__/visual.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7cf99b688289efbe0ce70b3ea3da151c7569788b
Binary files /dev/null and b/TTS/tts/utils/__pycache__/visual.cpython-310.pyc differ
diff --git a/TTS/tts/utils/__pycache__/visual.cpython-37.pyc b/TTS/tts/utils/__pycache__/visual.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3250c42bca80c8303b391192a72b5d5786f84aac
Binary files /dev/null and b/TTS/tts/utils/__pycache__/visual.cpython-37.pyc differ
diff --git a/TTS/tts/utils/__pycache__/visual.cpython-38.pyc b/TTS/tts/utils/__pycache__/visual.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aaa76106b315496a46b49d6215d4231ac5c9f849
Binary files /dev/null and b/TTS/tts/utils/__pycache__/visual.cpython-38.pyc differ
diff --git a/TTS/tts/utils/__pycache__/visual.cpython-39.pyc b/TTS/tts/utils/__pycache__/visual.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77a33f17bbc34a59f725966d189ecf54a2d8a331
Binary files /dev/null and b/TTS/tts/utils/__pycache__/visual.cpython-39.pyc differ
diff --git a/TTS/tts/utils/data.py b/TTS/tts/utils/data.py
new file mode 100644
index 0000000000000000000000000000000000000000..22e46b683adfc7f6c7c8a57fb5b697e422cd915c
--- /dev/null
+++ b/TTS/tts/utils/data.py
@@ -0,0 +1,79 @@
+import bisect
+
+import numpy as np
+import torch
+
+
+def _pad_data(x, length):
+    _pad = 0
+    assert x.ndim == 1
+    return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=_pad)
+
+
+def prepare_data(inputs):
+    max_len = max((len(x) for x in inputs))
+    return np.stack([_pad_data(x, max_len) for x in inputs])
+
+
+def _pad_tensor(x, length):
+    _pad = 0.0
+    assert x.ndim == 2
+    x = np.pad(x, [[0, 0], [0, length - x.shape[1]]], mode="constant", constant_values=_pad)
+    return x
+
+
+def prepare_tensor(inputs, out_steps):
+    max_len = max((x.shape[1] for x in inputs))
+    remainder = max_len % out_steps
+    pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len
+    return np.stack([_pad_tensor(x, pad_len) for x in inputs])
+
+
+def _pad_stop_target(x: np.ndarray, length: int, pad_val=1) -> np.ndarray:
+    """Pad stop target array.
+
+    Args:
+        x (np.ndarray): Stop target array.
+        length (int): Length after padding.
+        pad_val (int, optional): Padding value. Defaults to 1.
+
+    Returns:
+        np.ndarray: Padded stop target array.
+    """
+    assert x.ndim == 1
+    return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=pad_val)
+
+
+def prepare_stop_target(inputs, out_steps):
+    """Pad row vectors with 1."""
+    max_len = max((x.shape[0] for x in inputs))
+    remainder = max_len % out_steps
+    pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len
+    return np.stack([_pad_stop_target(x, pad_len) for x in inputs])
+
+
+def pad_per_step(inputs, pad_len):
+    return np.pad(inputs, [[0, 0], [0, 0], [0, pad_len]], mode="constant", constant_values=0.0)
+
+
+def get_length_balancer_weights(items: list, num_buckets=10):
+    # get all durations
+    audio_lengths = np.array([item["audio_length"] for item in items])
+    # create the $num_buckets buckets classes based in the dataset max and min length
+    max_length = int(max(audio_lengths))
+    min_length = int(min(audio_lengths))
+    step = int((max_length - min_length) / num_buckets) + 1
+    buckets_classes = [i + step for i in range(min_length, (max_length - step) + num_buckets + 1, step)]
+    # add each sample in their respective length bucket
+    buckets_names = np.array(
+        [buckets_classes[bisect.bisect_left(buckets_classes, item["audio_length"])] for item in items]
+    )
+    # count and compute the weights_bucket for each sample
+    unique_buckets_names = np.unique(buckets_names).tolist()
+    bucket_ids = [unique_buckets_names.index(l) for l in buckets_names]
+    bucket_count = np.array([len(np.where(buckets_names == l)[0]) for l in unique_buckets_names])
+    weight_bucket = 1.0 / bucket_count
+    dataset_samples_weight = np.array([weight_bucket[l] for l in bucket_ids])
+    # normalize
+    dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight)
+    return torch.from_numpy(dataset_samples_weight).float()
diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..b62004c8edbdbb2b3d5743ab030d518177851d5f
--- /dev/null
+++ b/TTS/tts/utils/helpers.py
@@ -0,0 +1,238 @@
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+try:
+    from TTS.tts.utils.monotonic_align.core import maximum_path_c
+
+    CYTHON = True
+except ModuleNotFoundError:
+    CYTHON = False
+
+
+class StandardScaler:
+    """StandardScaler for mean-scale normalization with the given mean and scale values."""
+
+    def __init__(self, mean: np.ndarray = None, scale: np.ndarray = None) -> None:
+        self.mean_ = mean
+        self.scale_ = scale
+
+    def set_stats(self, mean, scale):
+        self.mean_ = mean
+        self.scale_ = scale
+
+    def reset_stats(self):
+        delattr(self, "mean_")
+        delattr(self, "scale_")
+
+    def transform(self, X):
+        X = np.asarray(X)
+        X -= self.mean_
+        X /= self.scale_
+        return X
+
+    def inverse_transform(self, X):
+        X = np.asarray(X)
+        X *= self.scale_
+        X += self.mean_
+        return X
+
+
+# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1
+def sequence_mask(sequence_length, max_len=None):
+    """Create a sequence mask for filtering padding in a sequence tensor.
+
+    Args:
+        sequence_length (torch.tensor): Sequence lengths.
+        max_len (int, Optional): Maximum sequence length. Defaults to None.
+
+    Shapes:
+        - mask: :math:`[B, T_max]`
+    """
+    if max_len is None:
+        max_len = sequence_length.data.max()
+    seq_range = torch.arange(max_len, dtype=sequence_length.dtype, device=sequence_length.device)
+    # B x T_max
+    mask = seq_range.unsqueeze(0) < sequence_length.unsqueeze(1)
+    return mask
+
+
+def segment(x: torch.tensor, segment_indices: torch.tensor, segment_size=4, pad_short=False):
+    """Segment each sample in a batch based on the provided segment indices
+
+    Args:
+        x (torch.tensor): Input tensor.
+        segment_indices (torch.tensor): Segment indices.
+        segment_size (int): Expected output segment size.
+        pad_short (bool): Pad the end of input tensor with zeros if shorter than the segment size.
+    """
+    # pad the input tensor if it is shorter than the segment size
+    if pad_short and x.shape[-1] < segment_size:
+        x = torch.nn.functional.pad(x, (0, segment_size - x.size(2)))
+
+    segments = torch.zeros_like(x[:, :, :segment_size])
+
+    for i in range(x.size(0)):
+        index_start = segment_indices[i]
+        index_end = index_start + segment_size
+        x_i = x[i]
+        if pad_short and index_end >= x.size(2):
+            # pad the sample if it is shorter than the segment size
+            x_i = torch.nn.functional.pad(x_i, (0, (index_end + 1) - x.size(2)))
+        segments[i] = x_i[:, index_start:index_end]
+    return segments
+
+
+def rand_segments(
+    x: torch.tensor, x_lengths: torch.tensor = None, segment_size=4, let_short_samples=False, pad_short=False
+):
+    """Create random segments based on the input lengths.
+
+    Args:
+        x (torch.tensor): Input tensor.
+        x_lengths (torch.tensor): Input lengths.
+        segment_size (int): Expected output segment size.
+        let_short_samples (bool): Allow shorter samples than the segment size.
+        pad_short (bool): Pad the end of input tensor with zeros if shorter than the segment size.
+
+    Shapes:
+        - x: :math:`[B, C, T]`
+        - x_lengths: :math:`[B]`
+    """
+    _x_lenghts = x_lengths.clone()
+    B, _, T = x.size()
+    if pad_short:
+        if T < segment_size:
+            x = torch.nn.functional.pad(x, (0, segment_size - T))
+            T = segment_size
+    if _x_lenghts is None:
+        _x_lenghts = T
+    len_diff = _x_lenghts - segment_size
+    if let_short_samples:
+        _x_lenghts[len_diff < 0] = segment_size
+        len_diff = _x_lenghts - segment_size
+    else:
+        assert all(
+            len_diff > 0
+        ), f" [!] At least one sample is shorter than the segment size ({segment_size}). \n {_x_lenghts}"
+    segment_indices = (torch.rand([B]).type_as(x) * (len_diff + 1)).long()
+    ret = segment(x, segment_indices, segment_size, pad_short=pad_short)
+    return ret, segment_indices
+
+
+def average_over_durations(values, durs):
+    """Average values over durations.
+
+    Shapes:
+        - values: :math:`[B, 1, T_de]`
+        - durs: :math:`[B, T_en]`
+        - avg: :math:`[B, 1, T_en]`
+    """
+    durs_cums_ends = torch.cumsum(durs, dim=1).long()
+    durs_cums_starts = torch.nn.functional.pad(durs_cums_ends[:, :-1], (1, 0))
+    values_nonzero_cums = torch.nn.functional.pad(torch.cumsum(values != 0.0, dim=2), (1, 0))
+    values_cums = torch.nn.functional.pad(torch.cumsum(values, dim=2), (1, 0))
+
+    bs, l = durs_cums_ends.size()
+    n_formants = values.size(1)
+    dcs = durs_cums_starts[:, None, :].expand(bs, n_formants, l)
+    dce = durs_cums_ends[:, None, :].expand(bs, n_formants, l)
+
+    values_sums = (torch.gather(values_cums, 2, dce) - torch.gather(values_cums, 2, dcs)).float()
+    values_nelems = (torch.gather(values_nonzero_cums, 2, dce) - torch.gather(values_nonzero_cums, 2, dcs)).float()
+
+    avg = torch.where(values_nelems == 0.0, values_nelems, values_sums / values_nelems)
+    return avg
+
+
+def convert_pad_shape(pad_shape):
+    l = pad_shape[::-1]
+    pad_shape = [item for sublist in l for item in sublist]
+    return pad_shape
+
+
+def generate_path(duration, mask):
+    """
+    Shapes:
+        - duration: :math:`[B, T_en]`
+        - mask: :math:'[B, T_en, T_de]`
+        - path: :math:`[B, T_en, T_de]`
+    """
+    device = duration.device
+    b, t_x, t_y = mask.shape
+    cum_duration = torch.cumsum(duration, 1)
+    path = torch.zeros(b, t_x, t_y, dtype=mask.dtype).to(device=device)
+
+    cum_duration_flat = cum_duration.view(b * t_x)
+    path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
+    path = path.view(b, t_x, t_y)
+    path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
+    path = path * mask
+    return path
+
+
+def maximum_path(value, mask):
+    if CYTHON:
+        return maximum_path_cython(value, mask)
+    return maximum_path_numpy(value, mask)
+
+
+def maximum_path_cython(value, mask):
+    """Cython optimised version.
+    Shapes:
+        - value: :math:`[B, T_en, T_de]`
+        - mask: :math:`[B, T_en, T_de]`
+    """
+    value = value * mask
+    device = value.device
+    dtype = value.dtype
+    value = value.data.cpu().numpy().astype(np.float32)
+    path = np.zeros_like(value).astype(np.int32)
+    mask = mask.data.cpu().numpy()
+
+    t_x_max = mask.sum(1)[:, 0].astype(np.int32)
+    t_y_max = mask.sum(2)[:, 0].astype(np.int32)
+    maximum_path_c(path, value, t_x_max, t_y_max)
+    return torch.from_numpy(path).to(device=device, dtype=dtype)
+
+
+def maximum_path_numpy(value, mask, max_neg_val=None):
+    """
+    Monotonic alignment search algorithm
+    Numpy-friendly version. It's about 4 times faster than torch version.
+    value: [b, t_x, t_y]
+    mask: [b, t_x, t_y]
+    """
+    if max_neg_val is None:
+        max_neg_val = -np.inf  # Patch for Sphinx complaint
+    value = value * mask
+
+    device = value.device
+    dtype = value.dtype
+    value = value.cpu().detach().numpy()
+    mask = mask.cpu().detach().numpy().astype(np.bool)
+
+    b, t_x, t_y = value.shape
+    direction = np.zeros(value.shape, dtype=np.int64)
+    v = np.zeros((b, t_x), dtype=np.float32)
+    x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1)
+    for j in range(t_y):
+        v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1]
+        v1 = v
+        max_mask = v1 >= v0
+        v_max = np.where(max_mask, v1, v0)
+        direction[:, :, j] = max_mask
+
+        index_mask = x_range <= j
+        v = np.where(index_mask, v_max + value[:, :, j], max_neg_val)
+    direction = np.where(mask, direction, 1)
+
+    path = np.zeros(value.shape, dtype=np.float32)
+    index = mask[:, :, 0].sum(1).astype(np.int64) - 1
+    index_range = np.arange(b)
+    for j in reversed(range(t_y)):
+        path[index_range, index, j] = 1
+        index = index + direction[index_range, index, j] - 1
+    path = path * mask.astype(np.float32)
+    path = torch.from_numpy(path).to(device=device, dtype=dtype)
+    return path
diff --git a/TTS/tts/utils/languages.py b/TTS/tts/utils/languages.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e1836b32ce2010ad55a0253849f2e59c61dad82
--- /dev/null
+++ b/TTS/tts/utils/languages.py
@@ -0,0 +1,125 @@
+import os
+from typing import Any, Dict, List
+
+import fsspec
+import numpy as np
+import torch
+from coqpit import Coqpit
+
+from TTS.config import check_config_and_model_args
+from TTS.tts.utils.managers import BaseIDManager
+
+
+class LanguageManager(BaseIDManager):
+    """Manage the languages for multi-lingual 🐸TTS models. Load a datafile and parse the information
+    in a way that can be queried by language.
+
+    Args:
+        language_ids_file_path (str, optional): Path to the metafile that maps language names to ids used by
+        TTS models. Defaults to "".
+        config (Coqpit, optional): Coqpit config that contains the language information in the datasets filed.
+        Defaults to None.
+
+    Examples:
+        >>> manager = LanguageManager(language_ids_file_path=language_ids_file_path)
+        >>> language_id_mapper = manager.language_ids
+    """
+
+    def __init__(
+        self,
+        language_ids_file_path: str = "",
+        config: Coqpit = None,
+    ):
+        super().__init__(id_file_path=language_ids_file_path)
+
+        if config:
+            self.set_language_ids_from_config(config)
+
+    @property
+    def num_languages(self) -> int:
+        return len(list(self.name_to_id.keys()))
+
+    @property
+    def language_names(self) -> List:
+        return list(self.name_to_id.keys())
+
+    @staticmethod
+    def parse_language_ids_from_config(c: Coqpit) -> Dict:
+        """Set language id from config.
+
+        Args:
+            c (Coqpit): Config
+
+        Returns:
+            Tuple[Dict, int]: Language ID mapping and the number of languages.
+        """
+        languages = set({})
+        for dataset in c.datasets:
+            if "language" in dataset:
+                languages.add(dataset["language"])
+            else:
+                raise ValueError(f"Dataset {dataset['name']} has no language specified.")
+        return {name: i for i, name in enumerate(sorted(list(languages)))}
+
+    def set_language_ids_from_config(self, c: Coqpit) -> None:
+        """Set language IDs from config samples.
+
+        Args:
+            c (Coqpit): Config.
+        """
+        self.name_to_id = self.parse_language_ids_from_config(c)
+
+    @staticmethod
+    def parse_ids_from_data(items: List, parse_key: str) -> Any:
+        raise NotImplementedError
+
+    def set_ids_from_data(self, items: List, parse_key: str) -> Any:
+        raise NotImplementedError
+
+    def save_ids_to_file(self, file_path: str) -> None:
+        """Save language IDs to a json file.
+
+        Args:
+            file_path (str): Path to the output file.
+        """
+        self._save_json(file_path, self.name_to_id)
+
+    @staticmethod
+    def init_from_config(config: Coqpit) -> "LanguageManager":
+        """Initialize the language manager from a Coqpit config.
+
+        Args:
+            config (Coqpit): Coqpit config.
+        """
+        language_manager = None
+        if check_config_and_model_args(config, "use_language_embedding", True):
+            if config.get("language_ids_file", None):
+                language_manager = LanguageManager(language_ids_file_path=config.language_ids_file)
+            language_manager = LanguageManager(config=config)
+        return language_manager
+
+
+def _set_file_path(path):
+    """Find the language_ids.json under the given path or the above it.
+    Intended to band aid the different paths returned in restored and continued training."""
+    path_restore = os.path.join(os.path.dirname(path), "language_ids.json")
+    path_continue = os.path.join(path, "language_ids.json")
+    fs = fsspec.get_mapper(path).fs
+    if fs.exists(path_restore):
+        return path_restore
+    if fs.exists(path_continue):
+        return path_continue
+    return None
+
+
+def get_language_balancer_weights(items: list):
+    language_names = np.array([item["language"] for item in items])
+    unique_language_names = np.unique(language_names).tolist()
+    language_ids = [unique_language_names.index(l) for l in language_names]
+    language_count = np.array([len(np.where(language_names == l)[0]) for l in unique_language_names])
+    weight_language = 1.0 / language_count
+    # get weight for each sample
+    dataset_samples_weight = np.array([weight_language[l] for l in language_ids])
+    # normalize
+    dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight)
+    return torch.from_numpy(dataset_samples_weight).float()
diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d999a228e2eb87505c13996ddbce2e3ed1962b
--- /dev/null
+++ b/TTS/tts/utils/managers.py
@@ -0,0 +1,380 @@
+import json
+import random
+from typing import Any, Dict, List, Tuple, Union
+
+import fsspec
+import numpy as np
+import torch
+
+from TTS.config import load_config
+from TTS.encoder.utils.generic_utils import setup_encoder_model
+from TTS.utils.audio import AudioProcessor
+
+
+def load_file(path: str):
+    if path.endswith(".json"):
+        with fsspec.open(path, "r") as f:
+            return json.load(f)
+    elif path.endswith(".pth"):
+        with fsspec.open(path, "rb") as f:
+            return torch.load(f, map_location="cpu")
+    else:
+        raise ValueError("Unsupported file type")
+
+
+def save_file(obj: Any, path: str):
+    if path.endswith(".json"):
+        with fsspec.open(path, "w") as f:
+            json.dump(obj, f, indent=4)
+    elif path.endswith(".pth"):
+        with fsspec.open(path, "wb") as f:
+            torch.save(obj, f)
+    else:
+        raise ValueError("Unsupported file type")
+
+
+class BaseIDManager:
+    """Base `ID` Manager class. Every new `ID` manager must inherit this.
+    It defines common `ID` manager specific functions.
+    """
+
+    def __init__(self, id_file_path: str = ""):
+        self.name_to_id = {}
+
+        if id_file_path:
+            self.load_ids_from_file(id_file_path)
+
+    @staticmethod
+    def _load_json(json_file_path: str) -> Dict:
+        with fsspec.open(json_file_path, "r") as f:
+            return json.load(f)
+
+    @staticmethod
+    def _save_json(json_file_path: str, data: dict) -> None:
+        with fsspec.open(json_file_path, "w") as f:
+            json.dump(data, f, indent=4)
+
+    def set_ids_from_data(self, items: List, parse_key: str) -> None:
+        """Set IDs from data samples.
+
+        Args:
+            items (List): Data sampled returned by `load_tts_samples()`.
+        """
+        self.name_to_id = self.parse_ids_from_data(items, parse_key=parse_key)
+
+    def load_ids_from_file(self, file_path: str) -> None:
+        """Set IDs from a file.
+
+        Args:
+            file_path (str): Path to the file.
+        """
+        self.name_to_id = load_file(file_path)
+
+    def save_ids_to_file(self, file_path: str) -> None:
+        """Save IDs to a json file.
+
+        Args:
+            file_path (str): Path to the output file.
+        """
+        save_file(self.name_to_id, file_path)
+
+    def get_random_id(self) -> Any:
+        """Get a random embedding.
+
+        Args:
+
+        Returns:
+            np.ndarray: embedding.
+        """
+        if self.name_to_id:
+            return self.name_to_id[random.choices(list(self.name_to_id.keys()))[0]]
+
+        return None
+
+    @staticmethod
+    def parse_ids_from_data(items: List, parse_key: str) -> Tuple[Dict]:
+        """Parse IDs from data samples retured by `load_tts_samples()`.
+
+        Args:
+            items (list): Data sampled returned by `load_tts_samples()`.
+            parse_key (str): The key to being used to parse the data.
+        Returns:
+            Tuple[Dict]: speaker IDs.
+        """
+        classes = sorted({item[parse_key] for item in items})
+        ids = {name: i for i, name in enumerate(classes)}
+        return ids
+
+
+class EmbeddingManager(BaseIDManager):
+    """Base `Embedding` Manager class. Every new `Embedding` manager must inherit this.
+    It defines common `Embedding` manager specific functions.
+
+    It expects embeddings files in the following format:
+
+    ::
+
+        {
+            'audio_file_key':{
+                'name': 'category_name',
+                'embedding'[<embedding_values>]
+            },
+            ...
+        }
+
+    `audio_file_key` is a unique key to the audio file in the dataset. It can be the path to the file or any other unique key.
+    `embedding` is the embedding vector of the audio file.
+    `name` can be name of the speaker of the audio file.
+    """
+
+    def __init__(
+        self,
+        embedding_file_path: Union[str, List[str]] = "",
+        id_file_path: str = "",
+        encoder_model_path: str = "",
+        encoder_config_path: str = "",
+        use_cuda: bool = False,
+    ):
+        super().__init__(id_file_path=id_file_path)
+
+        self.embeddings = {}
+        self.embeddings_by_names = {}
+        self.clip_ids = []
+        self.encoder = None
+        self.encoder_ap = None
+        self.use_cuda = use_cuda
+
+        if embedding_file_path:
+            if isinstance(embedding_file_path, list):
+                self.load_embeddings_from_list_of_files(embedding_file_path)
+            else:
+                self.load_embeddings_from_file(embedding_file_path)
+
+        if encoder_model_path and encoder_config_path:
+            self.init_encoder(encoder_model_path, encoder_config_path, use_cuda)
+
+    @property
+    def num_embeddings(self):
+        """Get number of embeddings."""
+        return len(self.embeddings)
+
+    @property
+    def num_names(self):
+        """Get number of embeddings."""
+        return len(self.embeddings_by_names)
+
+    @property
+    def embedding_dim(self):
+        """Dimensionality of embeddings. If embeddings are not loaded, returns zero."""
+        if self.embeddings:
+            return len(self.embeddings[list(self.embeddings.keys())[0]]["embedding"])
+        return 0
+
+    @property
+    def embedding_names(self):
+        """Get embedding names."""
+        return list(self.embeddings_by_names.keys())
+
+    def save_embeddings_to_file(self, file_path: str) -> None:
+        """Save embeddings to a json file.
+
+        Args:
+            file_path (str): Path to the output file.
+        """
+        save_file(self.embeddings, file_path)
+
+    @staticmethod
+    def read_embeddings_from_file(file_path: str):
+        """Load embeddings from a json file.
+
+        Args:
+            file_path (str): Path to the file.
+        """
+        embeddings = load_file(file_path)
+        speakers = sorted({x["name"] for x in embeddings.values()})
+        name_to_id = {name: i for i, name in enumerate(speakers)}
+        clip_ids = list(set(sorted(clip_name for clip_name in embeddings.keys())))
+        # cache embeddings_by_names for fast inference using a bigger speakers.json
+        embeddings_by_names = {}
+        for x in embeddings.values():
+            if x["name"] not in embeddings_by_names.keys():
+                embeddings_by_names[x["name"]] = [x["embedding"]]
+            else:
+                embeddings_by_names[x["name"]].append(x["embedding"])
+        return name_to_id, clip_ids, embeddings, embeddings_by_names
+
+    def load_embeddings_from_file(self, file_path: str) -> None:
+        """Load embeddings from a json file.
+
+        Args:
+            file_path (str): Path to the target json file.
+        """
+        self.name_to_id, self.clip_ids, self.embeddings, self.embeddings_by_names = self.read_embeddings_from_file(
+            file_path
+        )
+
+    def load_embeddings_from_list_of_files(self, file_paths: List[str]) -> None:
+        """Load embeddings from a list of json files and don't allow duplicate keys.
+
+        Args:
+            file_paths (List[str]): List of paths to the target json files.
+        """
+        self.name_to_id = {}
+        self.clip_ids = []
+        self.embeddings_by_names = {}
+        self.embeddings = {}
+        for file_path in file_paths:
+            ids, clip_ids, embeddings, embeddings_by_names = self.read_embeddings_from_file(file_path)
+            # check colliding keys
+            duplicates = set(self.embeddings.keys()) & set(embeddings.keys())
+            if duplicates:
+                raise ValueError(f" [!] Duplicate embedding names <{duplicates}> in {file_path}")
+            # store values
+            self.name_to_id.update(ids)
+            self.clip_ids.extend(clip_ids)
+            self.embeddings_by_names.update(embeddings_by_names)
+            self.embeddings.update(embeddings)
+
+    def get_embedding_by_clip(self, clip_idx: str) -> List:
+        """Get embedding by clip ID.
+
+        Args:
+            clip_idx (str): Target clip ID.
+
+        Returns:
+            List: embedding as a list.
+        """
+        return self.embeddings[clip_idx]["embedding"]
+
+    def get_embeddings_by_name(self, idx: str) -> List[List]:
+        """Get all embeddings of a speaker.
+
+        Args:
+            idx (str): Target name.
+
+        Returns:
+            List[List]: all the embeddings of the given speaker.
+        """
+        return self.embeddings_by_names[idx]
+
+    def get_embeddings_by_names(self) -> Dict:
+        """Get all embeddings by names.
+
+        Returns:
+            Dict: all the embeddings of each speaker.
+        """
+        embeddings_by_names = {}
+        for x in self.embeddings.values():
+            if x["name"] not in embeddings_by_names.keys():
+                embeddings_by_names[x["name"]] = [x["embedding"]]
+            else:
+                embeddings_by_names[x["name"]].append(x["embedding"])
+        return embeddings_by_names
+
+    def get_mean_embedding(self, idx: str, num_samples: int = None, randomize: bool = False) -> np.ndarray:
+        """Get mean embedding of a idx.
+
+        Args:
+            idx (str): Target name.
+            num_samples (int, optional): Number of samples to be averaged. Defaults to None.
+            randomize (bool, optional): Pick random `num_samples` of embeddings. Defaults to False.
+
+        Returns:
+            np.ndarray: Mean embedding.
+        """
+        embeddings = self.get_embeddings_by_name(idx)
+        if num_samples is None:
+            embeddings = np.stack(embeddings).mean(0)
+        else:
+            assert len(embeddings) >= num_samples, f" [!] {idx} has number of samples < {num_samples}"
+            if randomize:
+                embeddings = np.stack(random.choices(embeddings, k=num_samples)).mean(0)
+            else:
+                embeddings = np.stack(embeddings[:num_samples]).mean(0)
+        return embeddings
+
+    def get_random_embedding(self) -> Any:
+        """Get a random embedding.
+
+        Args:
+
+        Returns:
+            np.ndarray: embedding.
+        """
+        if self.embeddings:
+            return self.embeddings[random.choices(list(self.embeddings.keys()))[0]]["embedding"]
+
+        return None
+
+    def get_clips(self) -> List:
+        return sorted(self.embeddings.keys())
+
+    def init_encoder(self, model_path: str, config_path: str, use_cuda=False) -> None:
+        """Initialize a speaker encoder model.
+
+        Args:
+            model_path (str): Model file path.
+            config_path (str): Model config file path.
+            use_cuda (bool, optional): Use CUDA. Defaults to False.
+        """
+        self.use_cuda = use_cuda
+        self.encoder_config = load_config(config_path)
+        self.encoder = setup_encoder_model(self.encoder_config)
+        self.encoder_criterion = self.encoder.load_checkpoint(
+            self.encoder_config, model_path, eval=True, use_cuda=use_cuda
+        )
+        self.encoder_ap = AudioProcessor(**self.encoder_config.audio)
+
+    def compute_embedding_from_clip(self, wav_file: Union[str, List[str]]) -> list:
+        """Compute a embedding from a given audio file.
+
+        Args:
+            wav_file (Union[str, List[str]]): Target file path.
+
+        Returns:
+            list: Computed embedding.
+        """
+
+        def _compute(wav_file: str):
+            waveform = self.encoder_ap.load_wav(wav_file, sr=self.encoder_ap.sample_rate)
+            if not self.encoder_config.model_params.get("use_torch_spec", False):
+                m_input = self.encoder_ap.melspectrogram(waveform)
+                m_input = torch.from_numpy(m_input)
+            else:
+                m_input = torch.from_numpy(waveform)
+
+            if self.use_cuda:
+                m_input = m_input.cuda()
+            m_input = m_input.unsqueeze(0)
+            embedding = self.encoder.compute_embedding(m_input)
+            return embedding
+
+        if isinstance(wav_file, list):
+            # compute the mean embedding
+            embeddings = None
+            for wf in wav_file:
+                embedding = _compute(wf)
+                if embeddings is None:
+                    embeddings = embedding
+                else:
+                    embeddings += embedding
+            return (embeddings / len(wav_file))[0].tolist()
+        embedding = _compute(wav_file)
+        return embedding[0].tolist()
+
+    def compute_embeddings(self, feats: Union[torch.Tensor, np.ndarray]) -> List:
+        """Compute embedding from features.
+
+        Args:
+            feats (Union[torch.Tensor, np.ndarray]): Input features.
+
+        Returns:
+            List: computed embedding.
+        """
+        if isinstance(feats, np.ndarray):
+            feats = torch.from_numpy(feats)
+        if feats.ndim == 2:
+            feats = feats.unsqueeze(0)
+        if self.use_cuda:
+            feats = feats.cuda()
+        return self.encoder.compute_embedding(feats)
diff --git a/TTS/tts/utils/measures.py b/TTS/tts/utils/measures.py
new file mode 100644
index 0000000000000000000000000000000000000000..90e862e1190bdb8443933580b3ff47321f70cecd
--- /dev/null
+++ b/TTS/tts/utils/measures.py
@@ -0,0 +1,15 @@
+def alignment_diagonal_score(alignments, binary=False):
+    """
+    Compute how diagonal alignment predictions are. It is useful
+    to measure the alignment consistency of a model
+    Args:
+        alignments (torch.Tensor): batch of alignments.
+        binary (bool): if True, ignore scores and consider attention
+        as a binary mask.
+    Shape:
+        - alignments : :math:`[B, T_de, T_en]`
+    """
+    maxs = alignments.max(dim=1)[0]
+    if binary:
+        maxs[maxs > 0] = 1
+    return maxs.mean(dim=1).mean(dim=0).item()
diff --git a/TTS/tts/utils/monotonic_align/__init__.py b/TTS/tts/utils/monotonic_align/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-310.pyc b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..790e2a324abb338cfe781fea0243f738a1cb11a1
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2cf886c4b18f470a68204e5f142bc84423459341
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a721030dde414c954c8c9b027c889757f17a9b7
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3953b5ef58746849365ef4a0c2c6fa25ae08e5bf
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/monotonic_align/core.c b/TTS/tts/utils/monotonic_align/core.c
new file mode 100644
index 0000000000000000000000000000000000000000..7b02e1337c5f5bea7a1eaeab55a000dd87b3d56f
--- /dev/null
+++ b/TTS/tts/utils/monotonic_align/core.c
@@ -0,0 +1,23398 @@
+/* Generated by Cython 0.29.28 */
+
+/* BEGIN: Cython Metadata
+{
+    "distutils": {
+        "depends": [],
+        "name": "TTS.tts.utils.monotonic_align.core",
+        "sources": [
+            "TTS/tts/utils/monotonic_align/core.pyx"
+        ]
+    },
+    "module_name": "TTS.tts.utils.monotonic_align.core"
+}
+END: Cython Metadata */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#include "Python.h"
+#ifndef Py_PYTHON_H
+    #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+    #error Cython requires Python 2.6+ or Python 3.3+.
+#else
+#define CYTHON_ABI "0_29_28"
+#define CYTHON_HEX_VERSION 0x001D1CF0
+#define CYTHON_FUTURE_DIVISION 1
+#include <stddef.h>
+#ifndef offsetof
+  #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+  #if PY_VERSION_HEX >= 0x02070000
+    #define HAVE_LONG_LONG
+  #endif
+#endif
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+  #define CYTHON_COMPILING_IN_PYPY 1
+  #define CYTHON_COMPILING_IN_PYSTON 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #undef CYTHON_USE_TYPE_SLOTS
+  #define CYTHON_USE_TYPE_SLOTS 0
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #if PY_VERSION_HEX < 0x03050000
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_INTERNALS
+  #define CYTHON_USE_UNICODE_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #undef CYTHON_AVOID_BORROWED_REFS
+  #define CYTHON_AVOID_BORROWED_REFS 1
+  #undef CYTHON_ASSUME_SAFE_MACROS
+  #define CYTHON_ASSUME_SAFE_MACROS 0
+  #undef CYTHON_UNPACK_METHODS
+  #define CYTHON_UNPACK_METHODS 0
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE 0
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+#elif defined(PYSTON_VERSION)
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_PYSTON 1
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #undef CYTHON_USE_ASYNC_SLOTS
+  #define CYTHON_USE_ASYNC_SLOTS 0
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE 0
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+#else
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_PYSTON 0
+  #define CYTHON_COMPILING_IN_CPYTHON 1
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #if PY_VERSION_HEX < 0x02070000
+    #undef CYTHON_USE_PYTYPE_LOOKUP
+    #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
+    #define CYTHON_USE_PYTYPE_LOOKUP 1
+  #endif
+  #if PY_MAJOR_VERSION < 3
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #if PY_VERSION_HEX < 0x02070000
+    #undef CYTHON_USE_PYLONG_INTERNALS
+    #define CYTHON_USE_PYLONG_INTERNALS 0
+  #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
+    #define CYTHON_USE_PYLONG_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_PYLIST_INTERNALS
+    #define CYTHON_USE_PYLIST_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+    #undef CYTHON_USE_UNICODE_WRITER
+    #define CYTHON_USE_UNICODE_WRITER 0
+  #elif !defined(CYTHON_USE_UNICODE_WRITER)
+    #define CYTHON_USE_UNICODE_WRITER 1
+  #endif
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #if PY_VERSION_HEX >= 0x030B00A4
+    #undef CYTHON_FAST_THREAD_STATE
+    #define CYTHON_FAST_THREAD_STATE 0
+  #elif !defined(CYTHON_FAST_THREAD_STATE)
+    #define CYTHON_FAST_THREAD_STATE 1
+  #endif
+  #ifndef CYTHON_FAST_PYCALL
+    #define CYTHON_FAST_PYCALL (PY_VERSION_HEX < 0x030B00A1)
+  #endif
+  #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
+  #endif
+  #ifndef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
+  #endif
+  #ifndef CYTHON_USE_DICT_VERSIONS
+    #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1)
+  #endif
+  #if PY_VERSION_HEX >= 0x030B00A4
+    #undef CYTHON_USE_EXC_INFO_STACK
+    #define CYTHON_USE_EXC_INFO_STACK 0
+  #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+    #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
+  #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL  (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+  #if PY_MAJOR_VERSION < 3
+    #include "longintrepr.h"
+  #endif
+  #undef SHIFT
+  #undef BASE
+  #undef MASK
+  #ifdef SIZEOF_VOID_P
+    enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+  #endif
+#endif
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+  #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+  #if defined(__GNUC__)
+    #define CYTHON_RESTRICT __restrict__
+  #elif defined(_MSC_VER) && _MSC_VER >= 1400
+    #define CYTHON_RESTRICT __restrict
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_RESTRICT restrict
+  #else
+    #define CYTHON_RESTRICT
+  #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define CYTHON_UNUSED __attribute__ ((__unused__))
+#   else
+#     define CYTHON_UNUSED
+#   endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+#   define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+#   define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+#  if defined(__cplusplus)
+     template<class T> void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
+#  else
+#    define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
+#  endif
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+#  define CYTHON_NCP_UNUSED
+# else
+#  define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+    #ifndef _MSC_STDINT_H_
+        #if _MSC_VER < 1300
+           typedef unsigned char     uint8_t;
+           typedef unsigned int      uint32_t;
+        #else
+           typedef unsigned __int8   uint8_t;
+           typedef unsigned __int32  uint32_t;
+        #endif
+    #endif
+#else
+   #include <stdint.h>
+#endif
+#ifndef CYTHON_FALLTHROUGH
+  #if defined(__cplusplus) && __cplusplus >= 201103L
+    #if __has_cpp_attribute(fallthrough)
+      #define CYTHON_FALLTHROUGH [[fallthrough]]
+    #elif __has_cpp_attribute(clang::fallthrough)
+      #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+    #elif __has_cpp_attribute(gnu::fallthrough)
+      #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+    #endif
+  #endif
+  #ifndef CYTHON_FALLTHROUGH
+    #if __has_attribute(fallthrough)
+      #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+    #else
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+  #if defined(__clang__ ) && defined(__apple_build_version__)
+    #if __apple_build_version__ < 7000000
+      #undef  CYTHON_FALLTHROUGH
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+#endif
+
+#ifndef CYTHON_INLINE
+  #if defined(__clang__)
+    #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+  #elif defined(__GNUC__)
+    #define CYTHON_INLINE __inline__
+  #elif defined(_MSC_VER)
+    #define CYTHON_INLINE __inline
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_INLINE inline
+  #else
+    #define CYTHON_INLINE
+  #endif
+#endif
+
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
+  #define Py_OptimizeFlag 0
+#endif
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+  #define __Pyx_DefaultClassType PyClass_Type
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+  #define __Pyx_DefaultClassType PyType_Type
+#if PY_VERSION_HEX >= 0x030B00A1
+    static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l, int s, int f,
+                                                    PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+                                                    PyObject *fv, PyObject *cell, PyObject* fn,
+                                                    PyObject *name, int fline, PyObject *lnos) {
+        PyObject *kwds=NULL, *argcount=NULL, *posonlyargcount=NULL, *kwonlyargcount=NULL;
+        PyObject *nlocals=NULL, *stacksize=NULL, *flags=NULL, *replace=NULL, *call_result=NULL, *empty=NULL;
+        const char *fn_cstr=NULL;
+        const char *name_cstr=NULL;
+        PyCodeObject* co=NULL;
+        PyObject *type, *value, *traceback;
+        PyErr_Fetch(&type, &value, &traceback);
+        if (!(kwds=PyDict_New())) goto end;
+        if (!(argcount=PyLong_FromLong(a))) goto end;
+        if (PyDict_SetItemString(kwds, "co_argcount", argcount) != 0) goto end;
+        if (!(posonlyargcount=PyLong_FromLong(0))) goto end;
+        if (PyDict_SetItemString(kwds, "co_posonlyargcount", posonlyargcount) != 0) goto end;
+        if (!(kwonlyargcount=PyLong_FromLong(k))) goto end;
+        if (PyDict_SetItemString(kwds, "co_kwonlyargcount", kwonlyargcount) != 0) goto end;
+        if (!(nlocals=PyLong_FromLong(l))) goto end;
+        if (PyDict_SetItemString(kwds, "co_nlocals", nlocals) != 0) goto end;
+        if (!(stacksize=PyLong_FromLong(s))) goto end;
+        if (PyDict_SetItemString(kwds, "co_stacksize", stacksize) != 0) goto end;
+        if (!(flags=PyLong_FromLong(f))) goto end;
+        if (PyDict_SetItemString(kwds, "co_flags", flags) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_code", code) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_consts", c) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_names", n) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_varnames", v) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_freevars", fv) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_cellvars", cell) != 0) goto end;
+        if (PyDict_SetItemString(kwds, "co_linetable", lnos) != 0) goto end;
+        if (!(fn_cstr=PyUnicode_AsUTF8AndSize(fn, NULL))) goto end;
+        if (!(name_cstr=PyUnicode_AsUTF8AndSize(name, NULL))) goto end;
+        if (!(co = PyCode_NewEmpty(fn_cstr, name_cstr, fline))) goto end;
+        if (!(replace = PyObject_GetAttrString((PyObject*)co, "replace"))) goto cleanup_code_too;
+        if (!(empty = PyTuple_New(0))) goto cleanup_code_too; // unfortunately __pyx_empty_tuple isn't available here
+        if (!(call_result = PyObject_Call(replace, empty, kwds))) goto cleanup_code_too;
+        Py_XDECREF((PyObject*)co);
+        co = (PyCodeObject*)call_result;
+        call_result = NULL;
+        if (0) {
+            cleanup_code_too:
+            Py_XDECREF((PyObject*)co);
+            co = NULL;
+        }
+        end:
+        Py_XDECREF(kwds);
+        Py_XDECREF(argcount);
+        Py_XDECREF(posonlyargcount);
+        Py_XDECREF(kwonlyargcount);
+        Py_XDECREF(nlocals);
+        Py_XDECREF(stacksize);
+        Py_XDECREF(replace);
+        Py_XDECREF(call_result);
+        Py_XDECREF(empty);
+        if (type) {
+            PyErr_Restore(type, value, traceback);
+        }
+        return co;
+    }
+#else
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+  #define __Pyx_DefaultClassType PyType_Type
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+  #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+  #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef METH_STACKLESS
+  #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+  #ifndef METH_FASTCALL
+     #define METH_FASTCALL 0x80
+  #endif
+  typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+  typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+                                                          Py_ssize_t nargs, PyObject *kwnames);
+#else
+  #define __Pyx_PyCFunctionFast _PyCFunctionFast
+  #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+#endif
+#if CYTHON_FAST_PYCCALL
+#define __Pyx_PyFastCFunction_Check(func)\
+    ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
+#else
+#define __Pyx_PyFastCFunction_Check(func) 0
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+  #define PyObject_Malloc(s)   PyMem_Malloc(s)
+  #define PyObject_Free(p)     PyMem_Free(p)
+  #define PyObject_Realloc(p)  PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
+  #define PyMem_RawMalloc(n)           PyMem_Malloc(n)
+  #define PyMem_RawRealloc(p, n)       PyMem_Realloc(p, n)
+  #define PyMem_RawFree(p)             PyMem_Free(p)
+#endif
+#if CYTHON_COMPILING_IN_PYSTON
+  #define __Pyx_PyCode_HasFreeVars(co)  PyCode_HasFreeVars(co)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
+#else
+  #define __Pyx_PyCode_HasFreeVars(co)  (PyCode_GetNumFree(co) > 0)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno)  (frame)->f_lineno = (lineno)
+#endif
+#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x03060000
+  #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+  #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+  *key = PyThread_create_key();
+  return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+  Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+  *key = Py_tss_NEEDS_INIT;
+  return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+  PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+  return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+  PyThread_delete_key(*key);
+  *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+  return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+  return PyThread_get_key_value(*key);
+}
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n)  ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n)  PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStr(dict, name)  _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+#else
+#define __Pyx_PyDict_GetItemStr(dict, name)  PyDict_GetItem(dict, name)
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+  #define CYTHON_PEP393_ENABLED 1
+  #if defined(PyUnicode_IS_READY)
+  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
+                                              0 : _PyUnicode_Ready((PyObject *)(op)))
+  #else
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #endif
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
+  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
+  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, ch)
+  #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
+  #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+  #else
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+  #endif
+  #else
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_LENGTH(u))
+  #endif
+#else
+  #define CYTHON_PEP393_ENABLED 0
+  #define PyUnicode_1BYTE_KIND  1
+  #define PyUnicode_2BYTE_KIND  2
+  #define PyUnicode_4BYTE_KIND  4
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
+  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
+#else
+  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
+  #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
+  #define PyByteArray_Check(obj)  PyObject_TypeCheck(obj, &PyByteArray_Type)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
+  #define PyObject_Format(obj, fmt)  PyObject_CallMethod(obj, "__format__", "O", fmt)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
+#else
+  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+  #define PyObject_ASCII(o)            PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+  #define PyObject_Unicode             PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+  #define __Pyx_PySequence_SIZE(seq)  Py_SIZE(seq)
+#else
+  #define __Pyx_PySequence_SIZE(seq)  PySequence_Size(seq)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define PyNumber_Int                 PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+  #ifndef PyUnicode_InternFromString
+    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+  #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsHash_t
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
+#else
+  #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+  #if PY_VERSION_HEX >= 0x030500B1
+    #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+    #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+  #else
+    #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+  #endif
+#else
+  #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+    typedef struct {
+        unaryfunc am_await;
+        unaryfunc am_aiter;
+        unaryfunc am_anext;
+    } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(WIN32) || defined(MS_WINDOWS)
+  #define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+  float value;
+  memset(&value, 0xFF, sizeof(value));
+  return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+    { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifndef __PYX_EXTERN_C
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+#endif
+
+#define __PYX_HAVE__TTS__tts__utils__monotonic_align__core
+#define __PYX_HAVE_API__TTS__tts__utils__monotonic_align__core
+/* Early includes */
+#include <string.h>
+#include <stdio.h>
+#include "numpy/arrayobject.h"
+#include "numpy/ndarrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayscalars.h"
+#include "numpy/ufuncobject.h"
+
+    /* NumPy API declarations from "numpy/__init__.pxd" */
+    
+#include "pythread.h"
+#include <stdlib.h>
+#include "pystate.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
+    (sizeof(type) < sizeof(Py_ssize_t))  ||\
+    (sizeof(type) > sizeof(Py_ssize_t) &&\
+          likely(v < (type)PY_SSIZE_T_MAX ||\
+                 v == (type)PY_SSIZE_T_MAX)  &&\
+          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+                                v == (type)PY_SSIZE_T_MIN)))  ||\
+    (sizeof(type) == sizeof(Py_ssize_t) &&\
+          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+                               v == (type)PY_SSIZE_T_MAX)))  )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+    return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+    #include <cstdlib>
+    #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+    #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+    #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+    #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString        PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s)     ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s)    ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s)    ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s)     ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s)    ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s)    ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s)    ((char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s)    ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s)    ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s)    ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
+    const Py_UNICODE *u_end = u;
+    while (*u_end++) ;
+    return (size_t)(u_end - u - 1);
+}
+#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+    (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    PyObject* ascii_chars_u = NULL;
+    PyObject* ascii_chars_b = NULL;
+    const char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    if (strcmp(default_encoding_c, "ascii") == 0) {
+        __Pyx_sys_getdefaultencoding_not_ascii = 0;
+    } else {
+        char ascii_chars[128];
+        int c;
+        for (c = 0; c < 128; c++) {
+            ascii_chars[c] = c;
+        }
+        __Pyx_sys_getdefaultencoding_not_ascii = 1;
+        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+        if (!ascii_chars_u) goto bad;
+        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+            PyErr_Format(
+                PyExc_ValueError,
+                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+                default_encoding_c);
+            goto bad;
+        }
+        Py_DECREF(ascii_chars_u);
+        Py_DECREF(ascii_chars_b);
+    }
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    Py_XDECREF(ascii_chars_u);
+    Py_XDECREF(ascii_chars_b);
+    return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+  #define likely(x)   __builtin_expect(!!(x), 1)
+  #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+static PyObject *__pyx_m = NULL;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_cython_runtime = NULL;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static PyObject *__pyx_empty_unicode;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+/* Header.proto */
+#if !defined(CYTHON_CCOMPLEX)
+  #if defined(__cplusplus)
+    #define CYTHON_CCOMPLEX 1
+  #elif defined(_Complex_I)
+    #define CYTHON_CCOMPLEX 1
+  #else
+    #define CYTHON_CCOMPLEX 0
+  #endif
+#endif
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #include <complex>
+  #else
+    #include <complex.h>
+  #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+  #undef _Complex_I
+  #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+  "TTS/tts/utils/monotonic_align/core.pyx",
+  "__init__.pxd",
+  "stringsource",
+  "type.pxd",
+};
+/* NoFastGil.proto */
+#define __Pyx_PyGILState_Ensure PyGILState_Ensure
+#define __Pyx_PyGILState_Release PyGILState_Release
+#define __Pyx_FastGIL_Remember()
+#define __Pyx_FastGIL_Forget()
+#define __Pyx_FastGilFuncInit()
+
+/* MemviewSliceStruct.proto */
+struct __pyx_memoryview_obj;
+typedef struct {
+  struct __pyx_memoryview_obj *memview;
+  char *data;
+  Py_ssize_t shape[8];
+  Py_ssize_t strides[8];
+  Py_ssize_t suboffsets[8];
+} __Pyx_memviewslice;
+#define __Pyx_MemoryView_Len(m)  (m.shape[0])
+
+/* Atomics.proto */
+#include <pythread.h>
+#ifndef CYTHON_ATOMICS
+    #define CYTHON_ATOMICS 1
+#endif
+#define __pyx_atomic_int_type int
+#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 ||\
+                    (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) &&\
+                    !defined(__i386__)
+    #define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1)
+    #define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1)
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Using GNU atomics"
+    #endif
+#elif CYTHON_ATOMICS && defined(_MSC_VER) && 0
+    #include <Windows.h>
+    #undef __pyx_atomic_int_type
+    #define __pyx_atomic_int_type LONG
+    #define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value)
+    #define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value)
+    #ifdef __PYX_DEBUG_ATOMICS
+        #pragma message ("Using MSVC atomics")
+    #endif
+#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0
+    #define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value)
+    #define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value)
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Using Intel atomics"
+    #endif
+#else
+    #undef CYTHON_ATOMICS
+    #define CYTHON_ATOMICS 0
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Not using atomics"
+    #endif
+#endif
+typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
+#if CYTHON_ATOMICS
+    #define __pyx_add_acquisition_count(memview)\
+             __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
+    #define __pyx_sub_acquisition_count(memview)\
+            __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
+#else
+    #define __pyx_add_acquisition_count(memview)\
+            __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
+    #define __pyx_sub_acquisition_count(memview)\
+            __pyx_sub_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
+#endif
+
+/* ForceInitThreads.proto */
+#ifndef __PYX_FORCE_INIT_THREADS
+  #define __PYX_FORCE_INIT_THREADS 0
+#endif
+
+/* BufferFormatStructs.proto */
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+  const char* name;
+  struct __Pyx_StructField_* fields;
+  size_t size;
+  size_t arraysize[8];
+  int ndim;
+  char typegroup;
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":690
+ * # in Cython to enable them only on the right systems.
+ * 
+ * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":691
+ * 
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":692
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int64      int64_t
+ * #ctypedef npy_int96      int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":693
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96      int96_t
+ * #ctypedef npy_int128     int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":697
+ * #ctypedef npy_int128     int128_t
+ * 
+ * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":698
+ * 
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":699
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64     uint64_t
+ * #ctypedef npy_uint96     uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":700
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96     uint96_t
+ * #ctypedef npy_uint128    uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":704
+ * #ctypedef npy_uint128    uint128_t
+ * 
+ * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_float64    float64_t
+ * #ctypedef npy_float80    float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":705
+ * 
+ * ctypedef npy_float32    float32_t
+ * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80    float80_t
+ * #ctypedef npy_float128   float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":714
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":715
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   longlong_t
+ * 
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":716
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_ulong      uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":718
+ * ctypedef npy_longlong   longlong_t
+ * 
+ * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":719
+ * 
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":720
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_intp       intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":722
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp      uintp_t
+ * 
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":723
+ * 
+ * ctypedef npy_intp       intp_t
+ * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_double     float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":725
+ * ctypedef npy_uintp      uintp_t
+ * 
+ * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":726
+ * 
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":727
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< float > __pyx_t_float_complex;
+  #else
+    typedef float _Complex __pyx_t_float_complex;
+  #endif
+#else
+    typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< double > __pyx_t_double_complex;
+  #else
+    typedef double _Complex __pyx_t_double_complex;
+  #endif
+#else
+    typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+
+/*--- Type declarations ---*/
+struct __pyx_array_obj;
+struct __pyx_MemviewEnum_obj;
+struct __pyx_memoryview_obj;
+struct __pyx_memoryviewslice_obj;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":729
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":730
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":731
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cdouble     complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":733
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c;
+
+/* "TTS/tts/utils/monotonic_align/core.pyx":42
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int b = values.shape[0]
+ * 
+ */
+struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c {
+  int __pyx_n;
+  float max_neg_val;
+};
+
+/* "View.MemoryView":105
+ * 
+ * @cname("__pyx_array")
+ * cdef class array:             # <<<<<<<<<<<<<<
+ * 
+ *     cdef:
+ */
+struct __pyx_array_obj {
+  PyObject_HEAD
+  struct __pyx_vtabstruct_array *__pyx_vtab;
+  char *data;
+  Py_ssize_t len;
+  char *format;
+  int ndim;
+  Py_ssize_t *_shape;
+  Py_ssize_t *_strides;
+  Py_ssize_t itemsize;
+  PyObject *mode;
+  PyObject *_format;
+  void (*callback_free_data)(void *);
+  int free_data;
+  int dtype_is_object;
+};
+
+
+/* "View.MemoryView":279
+ * 
+ * @cname('__pyx_MemviewEnum')
+ * cdef class Enum(object):             # <<<<<<<<<<<<<<
+ *     cdef object name
+ *     def __init__(self, name):
+ */
+struct __pyx_MemviewEnum_obj {
+  PyObject_HEAD
+  PyObject *name;
+};
+
+
+/* "View.MemoryView":330
+ * 
+ * @cname('__pyx_memoryview')
+ * cdef class memoryview(object):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef object obj
+ */
+struct __pyx_memoryview_obj {
+  PyObject_HEAD
+  struct __pyx_vtabstruct_memoryview *__pyx_vtab;
+  PyObject *obj;
+  PyObject *_size;
+  PyObject *_array_interface;
+  PyThread_type_lock lock;
+  __pyx_atomic_int acquisition_count[2];
+  __pyx_atomic_int *acquisition_count_aligned_p;
+  Py_buffer view;
+  int flags;
+  int dtype_is_object;
+  __Pyx_TypeInfo *typeinfo;
+};
+
+
+/* "View.MemoryView":965
+ * 
+ * @cname('__pyx_memoryviewslice')
+ * cdef class _memoryviewslice(memoryview):             # <<<<<<<<<<<<<<
+ *     "Internal class for passing memoryview slices to Python"
+ * 
+ */
+struct __pyx_memoryviewslice_obj {
+  struct __pyx_memoryview_obj __pyx_base;
+  __Pyx_memviewslice from_slice;
+  PyObject *from_object;
+  PyObject *(*to_object_func)(char *);
+  int (*to_dtype_func)(char *, PyObject *);
+};
+
+
+
+/* "View.MemoryView":105
+ * 
+ * @cname("__pyx_array")
+ * cdef class array:             # <<<<<<<<<<<<<<
+ * 
+ *     cdef:
+ */
+
+struct __pyx_vtabstruct_array {
+  PyObject *(*get_memview)(struct __pyx_array_obj *);
+};
+static struct __pyx_vtabstruct_array *__pyx_vtabptr_array;
+
+
+/* "View.MemoryView":330
+ * 
+ * @cname('__pyx_memoryview')
+ * cdef class memoryview(object):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef object obj
+ */
+
+struct __pyx_vtabstruct_memoryview {
+  char *(*get_item_pointer)(struct __pyx_memoryview_obj *, PyObject *);
+  PyObject *(*is_slice)(struct __pyx_memoryview_obj *, PyObject *);
+  PyObject *(*setitem_slice_assignment)(struct __pyx_memoryview_obj *, PyObject *, PyObject *);
+  PyObject *(*setitem_slice_assign_scalar)(struct __pyx_memoryview_obj *, struct __pyx_memoryview_obj *, PyObject *);
+  PyObject *(*setitem_indexed)(struct __pyx_memoryview_obj *, PyObject *, PyObject *);
+  PyObject *(*convert_item_to_object)(struct __pyx_memoryview_obj *, char *);
+  PyObject *(*assign_item_from_object)(struct __pyx_memoryview_obj *, char *, PyObject *);
+};
+static struct __pyx_vtabstruct_memoryview *__pyx_vtabptr_memoryview;
+
+
+/* "View.MemoryView":965
+ * 
+ * @cname('__pyx_memoryviewslice')
+ * cdef class _memoryviewslice(memoryview):             # <<<<<<<<<<<<<<
+ *     "Internal class for passing memoryview slices to Python"
+ * 
+ */
+
+struct __pyx_vtabstruct__memoryviewslice {
+  struct __pyx_vtabstruct_memoryview __pyx_base;
+};
+static struct __pyx_vtabstruct__memoryviewslice *__pyx_vtabptr__memoryviewslice;
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, int);
+    void (*DECREF)(void*, PyObject*, int);
+    void (*GOTREF)(void*, PyObject*, int);
+    void (*GIVEREF)(void*, PyObject*, int);
+    void* (*SetupContext)(const char*, int, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          if (acquire_gil) {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+              PyGILState_Release(__pyx_gilstate_save);\
+          } else {\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+  #define __Pyx_RefNannyFinishContext()\
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_XDECREF(tmp);\
+    } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_DECREF(tmp);\
+    } while (0)
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* MemviewSliceInit.proto */
+#define __Pyx_BUF_MAX_NDIMS %(BUF_MAX_NDIMS)d
+#define __Pyx_MEMVIEW_DIRECT   1
+#define __Pyx_MEMVIEW_PTR      2
+#define __Pyx_MEMVIEW_FULL     4
+#define __Pyx_MEMVIEW_CONTIG   8
+#define __Pyx_MEMVIEW_STRIDED  16
+#define __Pyx_MEMVIEW_FOLLOW   32
+#define __Pyx_IS_C_CONTIG 1
+#define __Pyx_IS_F_CONTIG 2
+static int __Pyx_init_memviewslice(
+                struct __pyx_memoryview_obj *memview,
+                int ndim,
+                __Pyx_memviewslice *memviewslice,
+                int memview_is_new_reference);
+static CYTHON_INLINE int __pyx_add_acquisition_count_locked(
+    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
+static CYTHON_INLINE int __pyx_sub_acquisition_count_locked(
+    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
+#define __pyx_get_slice_count_pointer(memview) (memview->acquisition_count_aligned_p)
+#define __pyx_get_slice_count(memview) (*__pyx_get_slice_count_pointer(memview))
+#define __PYX_INC_MEMVIEW(slice, have_gil) __Pyx_INC_MEMVIEW(slice, have_gil, __LINE__)
+#define __PYX_XDEC_MEMVIEW(slice, have_gil) __Pyx_XDEC_MEMVIEW(slice, have_gil, __LINE__)
+static CYTHON_INLINE void __Pyx_INC_MEMVIEW(__Pyx_memviewslice *, int, int);
+static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW(__Pyx_memviewslice *, int, int);
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
+    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
+    const char* function_name);
+
+/* None.proto */
+static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname);
+
+/* GetTopmostException.proto */
+#if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
+#endif
+
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare  PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign  __pyx_tstate = __Pyx_PyThreadState_Current;
+#define __Pyx_PyErr_Occurred()  __pyx_tstate->curexc_type
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred()  PyErr_Occurred()
+#endif
+
+/* SaveResetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSave(type, value, tb)  __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#define __Pyx_ExceptionReset(type, value, tb)  __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+#else
+#define __Pyx_ExceptionSave(type, value, tb)   PyErr_GetExcInfo(type, value, tb)
+#define __Pyx_ExceptionReset(type, value, tb)  PyErr_SetExcInfo(type, value, tb)
+#endif
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err)  PyErr_ExceptionMatches(err)
+#endif
+
+/* GetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_GetException(type, value, tb)  __Pyx__GetException(__pyx_tstate, type, value, tb)
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)    __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+    ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+        __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* PyCFunctionFastCall.proto */
+#if CYTHON_FAST_PYCCALL
+static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs);
+#else
+#define __Pyx_PyCFunction_FastCall(func, args, nargs)  (assert(0), NULL)
+#endif
+
+/* PyFunctionFastCall.proto */
+#if CYTHON_FAST_PYCALL
+#define __Pyx_PyFunction_FastCall(func, args, nargs)\
+    __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
+#if 1 || PY_VERSION_HEX < 0x030600B1
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
+#else
+#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs)
+#endif
+#define __Pyx_BUILD_ASSERT_EXPR(cond)\
+    (sizeof(char [1 - 2*!(cond)]) - 1)
+#ifndef Py_MEMBER_SIZE
+#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
+#endif
+#if CYTHON_FAST_PYCALL
+  static size_t __pyx_pyframe_localsplus_offset = 0;
+  #include "frameobject.h"
+  #define __Pxy_PyFrame_Initialize_Offsets()\
+    ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
+     (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
+  #define __Pyx_PyFrame_GetLocalsplus(frame)\
+    (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
+#endif // CYTHON_FAST_PYCALL
+#endif
+
+/* PyObjectCall2Args.proto */
+static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2);
+
+/* PyObjectCallMethO.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+/* PyObjectCallOneArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+/* IncludeStringH.proto */
+#include <string.h>
+
+/* BytesEquals.proto */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* UnicodeEquals.proto */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* StrEquals.proto */
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
+#else
+#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
+#endif
+
+/* DivInt[Py_ssize_t].proto */
+static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t, Py_ssize_t);
+
+/* UnaryNegOverflows.proto */
+#define UNARY_NEG_WOULD_OVERFLOW(x)\
+        (((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x)))
+
+static CYTHON_UNUSED int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *); /*proto*/
+/* GetAttr.proto */
+static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *);
+
+/* GetItemInt.proto */
+#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
+               __Pyx_GetItemInt_Generic(o, to_py_func(i))))
+#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
+                                                     int is_list, int wraparound, int boundscheck);
+
+/* ObjectGetItem.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key);
+#else
+#define __Pyx_PyObject_GetItem(obj, key)  PyObject_GetItem(obj, key)
+#endif
+
+/* decode_c_string_utf16.proto */
+static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) {
+    int byteorder = 0;
+    return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
+}
+static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) {
+    int byteorder = -1;
+    return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
+}
+static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) {
+    int byteorder = 1;
+    return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
+}
+
+/* decode_c_string.proto */
+static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
+         const char* cstring, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
+
+/* GetAttr3.proto */
+static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *, PyObject *, PyObject *);
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT  ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict)  (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+    (version_var) = __PYX_GET_DICT_VERSION(dict);\
+    (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+        (VAR) = __pyx_dict_cached_value;\
+    } else {\
+        (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+        __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+    }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict)  (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP)  (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name)  {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+        (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+        __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+}
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  {\
+    PY_UINT64_T __pyx_dict_version;\
+    PyObject *__pyx_dict_cached_value;\
+    (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+}
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* RaiseTooManyValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
+
+/* RaiseNeedMoreValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
+
+/* RaiseNoneIterError.proto */
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* SwapException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSwap(type, value, tb)  __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+static CYTHON_UNUSED int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+/* ListCompAppend.proto */
+#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
+static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
+    PyListObject* L = (PyListObject*) list;
+    Py_ssize_t len = Py_SIZE(list);
+    if (likely(L->allocated > len)) {
+        Py_INCREF(x);
+        PyList_SET_ITEM(list, len, x);
+        __Pyx_SET_SIZE(list, len + 1);
+        return 0;
+    }
+    return PyList_Append(list, x);
+}
+#else
+#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
+#endif
+
+/* PyIntBinop.proto */
+#if !CYTHON_COMPILING_IN_PYPY
+static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
+#else
+#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace, zerodivision_check)\
+    (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
+#endif
+
+/* ListExtend.proto */
+static CYTHON_INLINE int __Pyx_PyList_Extend(PyObject* L, PyObject* v) {
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject* none = _PyList_Extend((PyListObject*)L, v);
+    if (unlikely(!none))
+        return -1;
+    Py_DECREF(none);
+    return 0;
+#else
+    return PyList_SetSlice(L, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, v);
+#endif
+}
+
+/* ListAppend.proto */
+#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
+static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
+    PyListObject* L = (PyListObject*) list;
+    Py_ssize_t len = Py_SIZE(list);
+    if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
+        Py_INCREF(x);
+        PyList_SET_ITEM(list, len, x);
+        __Pyx_SET_SIZE(list, len + 1);
+        return 0;
+    }
+    return PyList_Append(list, x);
+}
+#else
+#define __Pyx_PyList_Append(L,x) PyList_Append(L,x)
+#endif
+
+/* DivInt[long].proto */
+static CYTHON_INLINE long __Pyx_div_long(long, long);
+
+/* ImportFrom.proto */
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name);
+
+/* HasAttr.proto */
+static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *);
+
+/* PyObject_GenericGetAttrNoDict.proto */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr
+#endif
+
+/* PyObject_GenericGetAttr.proto */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr
+#endif
+
+/* SetVTable.proto */
+static int __Pyx_SetVtable(PyObject *dict, void *vtable);
+
+/* PyObjectGetAttrStrNoError.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name);
+
+/* SetupReduce.proto */
+static int __Pyx_setup_reduce(PyObject* type_obj);
+
+/* TypeImport.proto */
+#ifndef __PYX_HAVE_RT_ImportType_proto
+#define __PYX_HAVE_RT_ImportType_proto
+enum __Pyx_ImportType_CheckSize {
+   __Pyx_ImportType_CheckSize_Error = 0,
+   __Pyx_ImportType_CheckSize_Warn = 1,
+   __Pyx_ImportType_CheckSize_Ignore = 2
+};
+static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name, const char *class_name, size_t size, enum __Pyx_ImportType_CheckSize check_size);
+#endif
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line)  (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+typedef struct {
+    PyCodeObject* code_object;
+    int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename);
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* BufferStructDeclare.proto */
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+/* MemviewSliceIsContig.proto */
+static int __pyx_memviewslice_is_contig(const __Pyx_memviewslice mvs, char order, int ndim);
+
+/* OverlappingSlices.proto */
+static int __pyx_slices_overlap(__Pyx_memviewslice *slice1,
+                                __Pyx_memviewslice *slice2,
+                                int ndim, size_t itemsize);
+
+/* Capsule.proto */
+static CYTHON_INLINE PyObject *__pyx_capsule_create(void *p, const char *sig);
+
+/* IsLittleEndian.proto */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
+
+/* BufferFormatCheck.proto */
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type);
+
+/* TypeInfoCompare.proto */
+static int __pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b);
+
+/* MemviewSliceValidateAndInit.proto */
+static int __Pyx_ValidateAndInit_memviewslice(
+                int *axes_specs,
+                int c_or_f_flag,
+                int buf_flags,
+                int ndim,
+                __Pyx_TypeInfo *dtype,
+                __Pyx_BufFmt_StackElem stack[],
+                __Pyx_memviewslice *memviewslice,
+                PyObject *original_obj);
+
+/* ObjectToMemviewSlice.proto */
+static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_int(PyObject *, int writable_flag);
+
+/* ObjectToMemviewSlice.proto */
+static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_float(PyObject *, int writable_flag);
+
+/* ObjectToMemviewSlice.proto */
+static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_dc_int(PyObject *, int writable_flag);
+
+/* GCCDiagnostics.proto */
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* RealImag.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #define __Pyx_CREAL(z) ((z).real())
+    #define __Pyx_CIMAG(z) ((z).imag())
+  #else
+    #define __Pyx_CREAL(z) (__real__(z))
+    #define __Pyx_CIMAG(z) (__imag__(z))
+  #endif
+#else
+    #define __Pyx_CREAL(z) ((z).real)
+    #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX\
+        && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+    #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+    #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+    #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+    #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq_float(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_float(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_float(a, b) ((a)-(b))
+    #define __Pyx_c_prod_float(a, b) ((a)*(b))
+    #define __Pyx_c_quot_float(a, b) ((a)/(b))
+    #define __Pyx_c_neg_float(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
+    #define __Pyx_c_conj_float(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (::std::abs(z))
+        #define __Pyx_c_pow_float(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_float(z) ((z)==0)
+    #define __Pyx_c_conj_float(z)    (conjf(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (cabsf(z))
+        #define __Pyx_c_pow_float(a, b)  (cpowf(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    #endif
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq_double(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_double(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_double(a, b) ((a)-(b))
+    #define __Pyx_c_prod_double(a, b) ((a)*(b))
+    #define __Pyx_c_quot_double(a, b) ((a)/(b))
+    #define __Pyx_c_neg_double(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
+    #define __Pyx_c_conj_double(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (::std::abs(z))
+        #define __Pyx_c_pow_double(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_double(z) ((z)==0)
+    #define __Pyx_c_conj_double(z)    (conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (cabs(z))
+        #define __Pyx_c_pow_double(a, b)  (cpow(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    #endif
+#endif
+
+/* MemviewSliceCopyTemplate.proto */
+static __Pyx_memviewslice
+__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
+                                 const char *mode, int ndim,
+                                 size_t sizeof_dtype, int contig_flag,
+                                 int dtype_is_object);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *);
+
+/* CheckBinaryVersion.proto */
+static int __Pyx_check_binary_version(void);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self); /* proto*/
+static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto*/
+static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj); /* proto*/
+static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_dst, PyObject *__pyx_v_src); /* proto*/
+static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memoryview_obj *__pyx_v_self, struct __pyx_memoryview_obj *__pyx_v_dst, PyObject *__pyx_v_value); /* proto*/
+static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto*/
+static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/
+static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/
+static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/
+static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/
+
+/* Module declarations from 'cython.view' */
+
+/* Module declarations from 'cython' */
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'libc.string' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from '__builtin__' */
+
+/* Module declarations from 'cpython.type' */
+static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
+
+/* Module declarations from 'cpython' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'cpython.mem' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_generic = 0;
+static PyTypeObject *__pyx_ptype_5numpy_number = 0;
+static PyTypeObject *__pyx_ptype_5numpy_integer = 0;
+static PyTypeObject *__pyx_ptype_5numpy_signedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_unsignedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_inexact = 0;
+static PyTypeObject *__pyx_ptype_5numpy_floating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_complexfloating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flexible = 0;
+static PyTypeObject *__pyx_ptype_5numpy_character = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+
+/* Module declarations from 'TTS.tts.utils.monotonic_align.core' */
+static PyTypeObject *__pyx_array_type = 0;
+static PyTypeObject *__pyx_MemviewEnum_type = 0;
+static PyTypeObject *__pyx_memoryview_type = 0;
+static PyTypeObject *__pyx_memoryviewslice_type = 0;
+static PyObject *generic = 0;
+static PyObject *strided = 0;
+static PyObject *indirect = 0;
+static PyObject *contiguous = 0;
+static PyObject *indirect_contiguous = 0;
+static int __pyx_memoryview_thread_locks_used;
+static PyThread_type_lock __pyx_memoryview_thread_locks[8];
+static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, float); /*proto*/
+static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch, struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c *__pyx_optional_args); /*proto*/
+static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/
+static void *__pyx_align_pointer(void *, size_t); /*proto*/
+static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/
+static CYTHON_INLINE int __pyx_memoryview_check(PyObject *); /*proto*/
+static PyObject *_unellipsify(PyObject *, int); /*proto*/
+static PyObject *assert_direct_dimensions(Py_ssize_t *, int); /*proto*/
+static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_obj *, PyObject *); /*proto*/
+static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int, int); /*proto*/
+static char *__pyx_pybuffer_index(Py_buffer *, char *, Py_ssize_t, Py_ssize_t); /*proto*/
+static int __pyx_memslice_transpose(__Pyx_memviewslice *); /*proto*/
+static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice, int, PyObject *(*)(char *), int (*)(char *, PyObject *), int); /*proto*/
+static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
+static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
+static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *); /*proto*/
+static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
+static Py_ssize_t abs_py_ssize_t(Py_ssize_t); /*proto*/
+static char __pyx_get_best_slice_order(__Pyx_memviewslice *, int); /*proto*/
+static void _copy_strided_to_strided(char *, Py_ssize_t *, char *, Py_ssize_t *, Py_ssize_t *, Py_ssize_t *, int, size_t); /*proto*/
+static void copy_strided_to_strided(__Pyx_memviewslice *, __Pyx_memviewslice *, int, size_t); /*proto*/
+static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *, int); /*proto*/
+static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *, Py_ssize_t *, Py_ssize_t, int, char); /*proto*/
+static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *, __Pyx_memviewslice *, char, int); /*proto*/
+static int __pyx_memoryview_err_extents(int, Py_ssize_t, Py_ssize_t); /*proto*/
+static int __pyx_memoryview_err_dim(PyObject *, char *, int); /*proto*/
+static int __pyx_memoryview_err(PyObject *, char *); /*proto*/
+static int __pyx_memoryview_copy_contents(__Pyx_memviewslice, __Pyx_memviewslice, int, int, int); /*proto*/
+static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *, int, int); /*proto*/
+static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *, int, int, int); /*proto*/
+static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/
+static void __pyx_memoryview_refcount_objects_in_slice(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/
+static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *, int, size_t, void *, int); /*proto*/
+static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize_t *, int, size_t, void *); /*proto*/
+static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
+static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
+#define __Pyx_MODULE_NAME "TTS.tts.utils.monotonic_align.core"
+extern int __pyx_module_is_main_TTS__tts__utils__monotonic_align__core;
+int __pyx_module_is_main_TTS__tts__utils__monotonic_align__core = 0;
+
+/* Implementation of 'TTS.tts.utils.monotonic_align.core' */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ImportError;
+static PyObject *__pyx_builtin_ValueError;
+static PyObject *__pyx_builtin_MemoryError;
+static PyObject *__pyx_builtin_enumerate;
+static PyObject *__pyx_builtin_TypeError;
+static PyObject *__pyx_builtin_Ellipsis;
+static PyObject *__pyx_builtin_id;
+static PyObject *__pyx_builtin_IndexError;
+static const char __pyx_k_O[] = "O";
+static const char __pyx_k_c[] = "c";
+static const char __pyx_k_id[] = "id";
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_new[] = "__new__";
+static const char __pyx_k_obj[] = "obj";
+static const char __pyx_k_base[] = "base";
+static const char __pyx_k_dict[] = "__dict__";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_mode[] = "mode";
+static const char __pyx_k_name[] = "name";
+static const char __pyx_k_ndim[] = "ndim";
+static const char __pyx_k_pack[] = "pack";
+static const char __pyx_k_size[] = "size";
+static const char __pyx_k_step[] = "step";
+static const char __pyx_k_stop[] = "stop";
+static const char __pyx_k_t_xs[] = "t_xs";
+static const char __pyx_k_t_ys[] = "t_ys";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_ASCII[] = "ASCII";
+static const char __pyx_k_class[] = "__class__";
+static const char __pyx_k_error[] = "error";
+static const char __pyx_k_flags[] = "flags";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_paths[] = "paths";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_shape[] = "shape";
+static const char __pyx_k_start[] = "start";
+static const char __pyx_k_encode[] = "encode";
+static const char __pyx_k_format[] = "format";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_name_2[] = "__name__";
+static const char __pyx_k_pickle[] = "pickle";
+static const char __pyx_k_reduce[] = "__reduce__";
+static const char __pyx_k_struct[] = "struct";
+static const char __pyx_k_unpack[] = "unpack";
+static const char __pyx_k_update[] = "update";
+static const char __pyx_k_values[] = "values";
+static const char __pyx_k_fortran[] = "fortran";
+static const char __pyx_k_memview[] = "memview";
+static const char __pyx_k_Ellipsis[] = "Ellipsis";
+static const char __pyx_k_getstate[] = "__getstate__";
+static const char __pyx_k_itemsize[] = "itemsize";
+static const char __pyx_k_pyx_type[] = "__pyx_type";
+static const char __pyx_k_setstate[] = "__setstate__";
+static const char __pyx_k_TypeError[] = "TypeError";
+static const char __pyx_k_enumerate[] = "enumerate";
+static const char __pyx_k_pyx_state[] = "__pyx_state";
+static const char __pyx_k_reduce_ex[] = "__reduce_ex__";
+static const char __pyx_k_IndexError[] = "IndexError";
+static const char __pyx_k_ValueError[] = "ValueError";
+static const char __pyx_k_pyx_result[] = "__pyx_result";
+static const char __pyx_k_pyx_vtable[] = "__pyx_vtable__";
+static const char __pyx_k_ImportError[] = "ImportError";
+static const char __pyx_k_MemoryError[] = "MemoryError";
+static const char __pyx_k_PickleError[] = "PickleError";
+static const char __pyx_k_max_neg_val[] = "max_neg_val";
+static const char __pyx_k_pyx_checksum[] = "__pyx_checksum";
+static const char __pyx_k_stringsource[] = "stringsource";
+static const char __pyx_k_pyx_getbuffer[] = "__pyx_getbuffer";
+static const char __pyx_k_reduce_cython[] = "__reduce_cython__";
+static const char __pyx_k_View_MemoryView[] = "View.MemoryView";
+static const char __pyx_k_allocate_buffer[] = "allocate_buffer";
+static const char __pyx_k_dtype_is_object[] = "dtype_is_object";
+static const char __pyx_k_pyx_PickleError[] = "__pyx_PickleError";
+static const char __pyx_k_setstate_cython[] = "__setstate_cython__";
+static const char __pyx_k_pyx_unpickle_Enum[] = "__pyx_unpickle_Enum";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_strided_and_direct[] = "<strided and direct>";
+static const char __pyx_k_strided_and_indirect[] = "<strided and indirect>";
+static const char __pyx_k_contiguous_and_direct[] = "<contiguous and direct>";
+static const char __pyx_k_MemoryView_of_r_object[] = "<MemoryView of %r object>";
+static const char __pyx_k_MemoryView_of_r_at_0x_x[] = "<MemoryView of %r at 0x%x>";
+static const char __pyx_k_contiguous_and_indirect[] = "<contiguous and indirect>";
+static const char __pyx_k_Cannot_index_with_type_s[] = "Cannot index with type '%s'";
+static const char __pyx_k_Invalid_shape_in_axis_d_d[] = "Invalid shape in axis %d: %d.";
+static const char __pyx_k_itemsize_0_for_cython_array[] = "itemsize <= 0 for cython.array";
+static const char __pyx_k_unable_to_allocate_array_data[] = "unable to allocate array data.";
+static const char __pyx_k_strided_and_direct_or_indirect[] = "<strided and direct or indirect>";
+static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
+static const char __pyx_k_Buffer_view_does_not_expose_stri[] = "Buffer view does not expose strides";
+static const char __pyx_k_Can_only_create_a_buffer_that_is[] = "Can only create a buffer that is contiguous in memory.";
+static const char __pyx_k_Cannot_assign_to_read_only_memor[] = "Cannot assign to read-only memoryview";
+static const char __pyx_k_Cannot_create_writable_memory_vi[] = "Cannot create writable memory view from read-only memoryview";
+static const char __pyx_k_Empty_shape_tuple_for_cython_arr[] = "Empty shape tuple for cython.array";
+static const char __pyx_k_Incompatible_checksums_s_vs_0xb0[] = "Incompatible checksums (%s vs 0xb068931 = (name))";
+static const char __pyx_k_Indirect_dimensions_not_supporte[] = "Indirect dimensions not supported";
+static const char __pyx_k_Invalid_mode_expected_c_or_fortr[] = "Invalid mode, expected 'c' or 'fortran', got %s";
+static const char __pyx_k_Out_of_bounds_on_buffer_access_a[] = "Out of bounds on buffer access (axis %d)";
+static const char __pyx_k_Unable_to_convert_item_to_object[] = "Unable to convert item to object";
+static const char __pyx_k_got_differing_extents_in_dimensi[] = "got differing extents in dimension %d (got %d and %d)";
+static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__";
+static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
+static const char __pyx_k_unable_to_allocate_shape_and_str[] = "unable to allocate shape and strides.";
+static PyObject *__pyx_n_s_ASCII;
+static PyObject *__pyx_kp_s_Buffer_view_does_not_expose_stri;
+static PyObject *__pyx_kp_s_Can_only_create_a_buffer_that_is;
+static PyObject *__pyx_kp_s_Cannot_assign_to_read_only_memor;
+static PyObject *__pyx_kp_s_Cannot_create_writable_memory_vi;
+static PyObject *__pyx_kp_s_Cannot_index_with_type_s;
+static PyObject *__pyx_n_s_Ellipsis;
+static PyObject *__pyx_kp_s_Empty_shape_tuple_for_cython_arr;
+static PyObject *__pyx_n_s_ImportError;
+static PyObject *__pyx_kp_s_Incompatible_checksums_s_vs_0xb0;
+static PyObject *__pyx_n_s_IndexError;
+static PyObject *__pyx_kp_s_Indirect_dimensions_not_supporte;
+static PyObject *__pyx_kp_s_Invalid_mode_expected_c_or_fortr;
+static PyObject *__pyx_kp_s_Invalid_shape_in_axis_d_d;
+static PyObject *__pyx_n_s_MemoryError;
+static PyObject *__pyx_kp_s_MemoryView_of_r_at_0x_x;
+static PyObject *__pyx_kp_s_MemoryView_of_r_object;
+static PyObject *__pyx_n_b_O;
+static PyObject *__pyx_kp_s_Out_of_bounds_on_buffer_access_a;
+static PyObject *__pyx_n_s_PickleError;
+static PyObject *__pyx_n_s_TypeError;
+static PyObject *__pyx_kp_s_Unable_to_convert_item_to_object;
+static PyObject *__pyx_n_s_ValueError;
+static PyObject *__pyx_n_s_View_MemoryView;
+static PyObject *__pyx_n_s_allocate_buffer;
+static PyObject *__pyx_n_s_base;
+static PyObject *__pyx_n_s_c;
+static PyObject *__pyx_n_u_c;
+static PyObject *__pyx_n_s_class;
+static PyObject *__pyx_n_s_cline_in_traceback;
+static PyObject *__pyx_kp_s_contiguous_and_direct;
+static PyObject *__pyx_kp_s_contiguous_and_indirect;
+static PyObject *__pyx_n_s_dict;
+static PyObject *__pyx_n_s_dtype_is_object;
+static PyObject *__pyx_n_s_encode;
+static PyObject *__pyx_n_s_enumerate;
+static PyObject *__pyx_n_s_error;
+static PyObject *__pyx_n_s_flags;
+static PyObject *__pyx_n_s_format;
+static PyObject *__pyx_n_s_fortran;
+static PyObject *__pyx_n_u_fortran;
+static PyObject *__pyx_n_s_getstate;
+static PyObject *__pyx_kp_s_got_differing_extents_in_dimensi;
+static PyObject *__pyx_n_s_id;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_itemsize;
+static PyObject *__pyx_kp_s_itemsize_0_for_cython_array;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_n_s_max_neg_val;
+static PyObject *__pyx_n_s_memview;
+static PyObject *__pyx_n_s_mode;
+static PyObject *__pyx_n_s_name;
+static PyObject *__pyx_n_s_name_2;
+static PyObject *__pyx_n_s_ndim;
+static PyObject *__pyx_n_s_new;
+static PyObject *__pyx_kp_s_no_default___reduce___due_to_non;
+static PyObject *__pyx_n_s_np;
+static PyObject *__pyx_n_s_numpy;
+static PyObject *__pyx_kp_u_numpy_core_multiarray_failed_to;
+static PyObject *__pyx_kp_u_numpy_core_umath_failed_to_impor;
+static PyObject *__pyx_n_s_obj;
+static PyObject *__pyx_n_s_pack;
+static PyObject *__pyx_n_s_paths;
+static PyObject *__pyx_n_s_pickle;
+static PyObject *__pyx_n_s_pyx_PickleError;
+static PyObject *__pyx_n_s_pyx_checksum;
+static PyObject *__pyx_n_s_pyx_getbuffer;
+static PyObject *__pyx_n_s_pyx_result;
+static PyObject *__pyx_n_s_pyx_state;
+static PyObject *__pyx_n_s_pyx_type;
+static PyObject *__pyx_n_s_pyx_unpickle_Enum;
+static PyObject *__pyx_n_s_pyx_vtable;
+static PyObject *__pyx_n_s_range;
+static PyObject *__pyx_n_s_reduce;
+static PyObject *__pyx_n_s_reduce_cython;
+static PyObject *__pyx_n_s_reduce_ex;
+static PyObject *__pyx_n_s_setstate;
+static PyObject *__pyx_n_s_setstate_cython;
+static PyObject *__pyx_n_s_shape;
+static PyObject *__pyx_n_s_size;
+static PyObject *__pyx_n_s_start;
+static PyObject *__pyx_n_s_step;
+static PyObject *__pyx_n_s_stop;
+static PyObject *__pyx_kp_s_strided_and_direct;
+static PyObject *__pyx_kp_s_strided_and_direct_or_indirect;
+static PyObject *__pyx_kp_s_strided_and_indirect;
+static PyObject *__pyx_kp_s_stringsource;
+static PyObject *__pyx_n_s_struct;
+static PyObject *__pyx_n_s_t_xs;
+static PyObject *__pyx_n_s_t_ys;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_kp_s_unable_to_allocate_array_data;
+static PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str;
+static PyObject *__pyx_n_s_unpack;
+static PyObject *__pyx_n_s_update;
+static PyObject *__pyx_n_s_values;
+static PyObject *__pyx_pf_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, float __pyx_v_max_neg_val); /* proto */
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
+static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct __pyx_array_obj *__pyx_v_self); /* proto */
+static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(struct __pyx_array_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_attr); /* proto */
+static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item); /* proto */
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value); /* proto */
+static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
+static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v_name); /* proto */
+static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v___pyx_state); /* proto */
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj, int __pyx_v_flags, int __pyx_v_dtype_is_object); /* proto */
+static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto */
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto */
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(struct __pyx_memoryview_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
+static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
+static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v___pyx_type, long __pyx_v___pyx_checksum, PyObject *__pyx_v___pyx_state); /* proto */
+static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_int_0;
+static PyObject *__pyx_int_1;
+static PyObject *__pyx_int_184977713;
+static PyObject *__pyx_int_neg_1;
+static float __pyx_k_;
+static PyObject *__pyx_tuple__2;
+static PyObject *__pyx_tuple__3;
+static PyObject *__pyx_tuple__4;
+static PyObject *__pyx_tuple__5;
+static PyObject *__pyx_tuple__6;
+static PyObject *__pyx_tuple__7;
+static PyObject *__pyx_tuple__8;
+static PyObject *__pyx_tuple__9;
+static PyObject *__pyx_slice__18;
+static PyObject *__pyx_tuple__10;
+static PyObject *__pyx_tuple__11;
+static PyObject *__pyx_tuple__12;
+static PyObject *__pyx_tuple__13;
+static PyObject *__pyx_tuple__14;
+static PyObject *__pyx_tuple__15;
+static PyObject *__pyx_tuple__16;
+static PyObject *__pyx_tuple__17;
+static PyObject *__pyx_tuple__19;
+static PyObject *__pyx_tuple__20;
+static PyObject *__pyx_tuple__21;
+static PyObject *__pyx_tuple__22;
+static PyObject *__pyx_tuple__23;
+static PyObject *__pyx_tuple__24;
+static PyObject *__pyx_tuple__25;
+static PyObject *__pyx_tuple__26;
+static PyObject *__pyx_tuple__27;
+static PyObject *__pyx_codeobj__28;
+/* Late includes */
+
+/* "TTS/tts/utils/monotonic_align/core.pyx":11
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int x
+ *   cdef int y
+ */
+
+static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_x, int __pyx_v_t_y, float __pyx_v_max_neg_val) {
+  int __pyx_v_x;
+  int __pyx_v_y;
+  float __pyx_v_v_prev;
+  float __pyx_v_v_cur;
+  int __pyx_v_index;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  long __pyx_t_4;
+  int __pyx_t_5;
+  long __pyx_t_6;
+  long __pyx_t_7;
+  int __pyx_t_8;
+  Py_ssize_t __pyx_t_9;
+  Py_ssize_t __pyx_t_10;
+  float __pyx_t_11;
+  float __pyx_t_12;
+  float __pyx_t_13;
+  Py_ssize_t __pyx_t_14;
+  Py_ssize_t __pyx_t_15;
+  int __pyx_t_16;
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":17
+ *   cdef float v_cur
+ *   cdef float tmp
+ *   cdef int index = t_x - 1             # <<<<<<<<<<<<<<
+ * 
+ *   for y in range(t_y):
+ */
+  __pyx_v_index = (__pyx_v_t_x - 1);
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":19
+ *   cdef int index = t_x - 1
+ * 
+ *   for y in range(t_y):             # <<<<<<<<<<<<<<
+ *     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+ *       if x == y:
+ */
+  __pyx_t_1 = __pyx_v_t_y;
+  __pyx_t_2 = __pyx_t_1;
+  for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+    __pyx_v_y = __pyx_t_3;
+
+    /* "TTS/tts/utils/monotonic_align/core.pyx":20
+ * 
+ *   for y in range(t_y):
+ *     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):             # <<<<<<<<<<<<<<
+ *       if x == y:
+ *         v_cur = max_neg_val
+ */
+    __pyx_t_4 = (__pyx_v_y + 1);
+    __pyx_t_5 = __pyx_v_t_x;
+    if (((__pyx_t_4 < __pyx_t_5) != 0)) {
+      __pyx_t_6 = __pyx_t_4;
+    } else {
+      __pyx_t_6 = __pyx_t_5;
+    }
+    __pyx_t_4 = __pyx_t_6;
+    __pyx_t_5 = ((__pyx_v_t_x + __pyx_v_y) - __pyx_v_t_y);
+    __pyx_t_6 = 0;
+    if (((__pyx_t_5 > __pyx_t_6) != 0)) {
+      __pyx_t_7 = __pyx_t_5;
+    } else {
+      __pyx_t_7 = __pyx_t_6;
+    }
+    __pyx_t_6 = __pyx_t_4;
+    for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) {
+      __pyx_v_x = __pyx_t_5;
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":21
+ *   for y in range(t_y):
+ *     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+ *       if x == y:             # <<<<<<<<<<<<<<
+ *         v_cur = max_neg_val
+ *       else:
+ */
+      __pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0);
+      if (__pyx_t_8) {
+
+        /* "TTS/tts/utils/monotonic_align/core.pyx":22
+ *     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+ *       if x == y:
+ *         v_cur = max_neg_val             # <<<<<<<<<<<<<<
+ *       else:
+ *         v_cur = value[x, y-1]
+ */
+        __pyx_v_v_cur = __pyx_v_max_neg_val;
+
+        /* "TTS/tts/utils/monotonic_align/core.pyx":21
+ *   for y in range(t_y):
+ *     for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+ *       if x == y:             # <<<<<<<<<<<<<<
+ *         v_cur = max_neg_val
+ *       else:
+ */
+        goto __pyx_L7;
+      }
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":24
+ *         v_cur = max_neg_val
+ *       else:
+ *         v_cur = value[x, y-1]             # <<<<<<<<<<<<<<
+ *       if x == 0:
+ *         if y == 0:
+ */
+      /*else*/ {
+        __pyx_t_9 = __pyx_v_x;
+        __pyx_t_10 = (__pyx_v_y - 1);
+        __pyx_v_v_cur = (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) )));
+      }
+      __pyx_L7:;
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":25
+ *       else:
+ *         v_cur = value[x, y-1]
+ *       if x == 0:             # <<<<<<<<<<<<<<
+ *         if y == 0:
+ *           v_prev = 0.
+ */
+      __pyx_t_8 = ((__pyx_v_x == 0) != 0);
+      if (__pyx_t_8) {
+
+        /* "TTS/tts/utils/monotonic_align/core.pyx":26
+ *         v_cur = value[x, y-1]
+ *       if x == 0:
+ *         if y == 0:             # <<<<<<<<<<<<<<
+ *           v_prev = 0.
+ *         else:
+ */
+        __pyx_t_8 = ((__pyx_v_y == 0) != 0);
+        if (__pyx_t_8) {
+
+          /* "TTS/tts/utils/monotonic_align/core.pyx":27
+ *       if x == 0:
+ *         if y == 0:
+ *           v_prev = 0.             # <<<<<<<<<<<<<<
+ *         else:
+ *           v_prev = max_neg_val
+ */
+          __pyx_v_v_prev = 0.;
+
+          /* "TTS/tts/utils/monotonic_align/core.pyx":26
+ *         v_cur = value[x, y-1]
+ *       if x == 0:
+ *         if y == 0:             # <<<<<<<<<<<<<<
+ *           v_prev = 0.
+ *         else:
+ */
+          goto __pyx_L9;
+        }
+
+        /* "TTS/tts/utils/monotonic_align/core.pyx":29
+ *           v_prev = 0.
+ *         else:
+ *           v_prev = max_neg_val             # <<<<<<<<<<<<<<
+ *       else:
+ *         v_prev = value[x-1, y-1]
+ */
+        /*else*/ {
+          __pyx_v_v_prev = __pyx_v_max_neg_val;
+        }
+        __pyx_L9:;
+
+        /* "TTS/tts/utils/monotonic_align/core.pyx":25
+ *       else:
+ *         v_cur = value[x, y-1]
+ *       if x == 0:             # <<<<<<<<<<<<<<
+ *         if y == 0:
+ *           v_prev = 0.
+ */
+        goto __pyx_L8;
+      }
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":31
+ *           v_prev = max_neg_val
+ *       else:
+ *         v_prev = value[x-1, y-1]             # <<<<<<<<<<<<<<
+ *       value[x, y] = max(v_cur, v_prev) + value[x, y]
+ * 
+ */
+      /*else*/ {
+        __pyx_t_10 = (__pyx_v_x - 1);
+        __pyx_t_9 = (__pyx_v_y - 1);
+        __pyx_v_v_prev = (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_10 * __pyx_v_value.strides[0]) )) + __pyx_t_9)) )));
+      }
+      __pyx_L8:;
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":32
+ *       else:
+ *         v_prev = value[x-1, y-1]
+ *       value[x, y] = max(v_cur, v_prev) + value[x, y]             # <<<<<<<<<<<<<<
+ * 
+ *   for y in range(t_y - 1, -1, -1):
+ */
+      __pyx_t_11 = __pyx_v_v_prev;
+      __pyx_t_12 = __pyx_v_v_cur;
+      if (((__pyx_t_11 > __pyx_t_12) != 0)) {
+        __pyx_t_13 = __pyx_t_11;
+      } else {
+        __pyx_t_13 = __pyx_t_12;
+      }
+      __pyx_t_9 = __pyx_v_x;
+      __pyx_t_10 = __pyx_v_y;
+      __pyx_t_14 = __pyx_v_x;
+      __pyx_t_15 = __pyx_v_y;
+      *((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_14 * __pyx_v_value.strides[0]) )) + __pyx_t_15)) )) = (__pyx_t_13 + (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) ))));
+    }
+  }
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":34
+ *       value[x, y] = max(v_cur, v_prev) + value[x, y]
+ * 
+ *   for y in range(t_y - 1, -1, -1):             # <<<<<<<<<<<<<<
+ *     path[index, y] = 1
+ *     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
+ */
+  for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
+    __pyx_v_y = __pyx_t_1;
+
+    /* "TTS/tts/utils/monotonic_align/core.pyx":35
+ * 
+ *   for y in range(t_y - 1, -1, -1):
+ *     path[index, y] = 1             # <<<<<<<<<<<<<<
+ *     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
+ *       index = index - 1
+ */
+    __pyx_t_10 = __pyx_v_index;
+    __pyx_t_9 = __pyx_v_y;
+    *((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1;
+
+    /* "TTS/tts/utils/monotonic_align/core.pyx":36
+ *   for y in range(t_y - 1, -1, -1):
+ *     path[index, y] = 1
+ *     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):             # <<<<<<<<<<<<<<
+ *       index = index - 1
+ * 
+ */
+    __pyx_t_16 = ((__pyx_v_index != 0) != 0);
+    if (__pyx_t_16) {
+    } else {
+      __pyx_t_8 = __pyx_t_16;
+      goto __pyx_L13_bool_binop_done;
+    }
+    __pyx_t_16 = ((__pyx_v_index == __pyx_v_y) != 0);
+    if (!__pyx_t_16) {
+    } else {
+      __pyx_t_8 = __pyx_t_16;
+      goto __pyx_L13_bool_binop_done;
+    }
+    __pyx_t_9 = __pyx_v_index;
+    __pyx_t_10 = (__pyx_v_y - 1);
+    __pyx_t_15 = (__pyx_v_index - 1);
+    __pyx_t_14 = (__pyx_v_y - 1);
+    __pyx_t_16 = (((*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) ))) < (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_15 * __pyx_v_value.strides[0]) )) + __pyx_t_14)) )))) != 0);
+    __pyx_t_8 = __pyx_t_16;
+    __pyx_L13_bool_binop_done:;
+    if (__pyx_t_8) {
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":37
+ *     path[index, y] = 1
+ *     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
+ *       index = index - 1             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_v_index = (__pyx_v_index - 1);
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":36
+ *   for y in range(t_y - 1, -1, -1):
+ *     path[index, y] = 1
+ *     if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):             # <<<<<<<<<<<<<<
+ *       index = index - 1
+ * 
+ */
+    }
+  }
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":11
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int x
+ *   cdef int y
+ */
+
+  /* function exit code */
+}
+
+/* "TTS/tts/utils/monotonic_align/core.pyx":42
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int b = values.shape[0]
+ * 
+ */
+
+static PyObject *__pyx_pw_3TTS_3tts_5utils_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, CYTHON_UNUSED int __pyx_skip_dispatch, struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c *__pyx_optional_args) {
+  float __pyx_v_max_neg_val = __pyx_k_;
+  CYTHON_UNUSED int __pyx_v_b;
+  int __pyx_v_i;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  __Pyx_memviewslice __pyx_t_4 = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_t_5 = { 0, 0, { 0 }, { 0 }, { 0 } };
+  Py_ssize_t __pyx_t_6;
+  Py_ssize_t __pyx_t_7;
+  if (__pyx_optional_args) {
+    if (__pyx_optional_args->__pyx_n > 0) {
+      __pyx_v_max_neg_val = __pyx_optional_args->max_neg_val;
+    }
+  }
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":43
+ * @cython.wraparound(False)
+ * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:
+ *   cdef int b = values.shape[0]             # <<<<<<<<<<<<<<
+ * 
+ *   cdef int i
+ */
+  __pyx_v_b = (__pyx_v_values.shape[0]);
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":46
+ * 
+ *   cdef int i
+ *   for i in prange(b, nogil=True):             # <<<<<<<<<<<<<<
+ *     maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
+ */
+  {
+      #ifdef WITH_THREAD
+      PyThreadState *_save;
+      Py_UNBLOCK_THREADS
+      __Pyx_FastGIL_Remember();
+      #endif
+      /*try:*/ {
+        __pyx_t_1 = __pyx_v_b;
+        if ((1 == 0)) abort();
+        {
+            #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+                #undef likely
+                #undef unlikely
+                #define likely(x)   (x)
+                #define unlikely(x) (x)
+            #endif
+            __pyx_t_3 = (__pyx_t_1 - 0 + 1 - 1/abs(1)) / 1;
+            if (__pyx_t_3 > 0)
+            {
+                #ifdef _OPENMP
+                #pragma omp parallel private(__pyx_t_6, __pyx_t_7) firstprivate(__pyx_t_4, __pyx_t_5)
+                #endif /* _OPENMP */
+                {
+                    #ifdef _OPENMP
+                    #pragma omp for firstprivate(__pyx_v_i) lastprivate(__pyx_v_i)
+                    #endif /* _OPENMP */
+                    for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2++){
+                        {
+                            __pyx_v_i = (int)(0 + 1 * __pyx_t_2);
+
+                            /* "TTS/tts/utils/monotonic_align/core.pyx":47
+ *   cdef int i
+ *   for i in prange(b, nogil=True):
+ *     maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)             # <<<<<<<<<<<<<<
+ */
+                            __pyx_t_4.data = __pyx_v_paths.data;
+                            __pyx_t_4.memview = __pyx_v_paths.memview;
+                            __PYX_INC_MEMVIEW(&__pyx_t_4, 0);
+                            {
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_i;
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_paths.strides[0];
+        __pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+__pyx_t_4.shape[0] = __pyx_v_paths.shape[1];
+__pyx_t_4.strides[0] = __pyx_v_paths.strides[1];
+    __pyx_t_4.suboffsets[0] = -1;
+
+__pyx_t_4.shape[1] = __pyx_v_paths.shape[2];
+__pyx_t_4.strides[1] = __pyx_v_paths.strides[2];
+    __pyx_t_4.suboffsets[1] = -1;
+
+__pyx_t_5.data = __pyx_v_values.data;
+                            __pyx_t_5.memview = __pyx_v_values.memview;
+                            __PYX_INC_MEMVIEW(&__pyx_t_5, 0);
+                            {
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_i;
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_values.strides[0];
+        __pyx_t_5.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+__pyx_t_5.shape[0] = __pyx_v_values.shape[1];
+__pyx_t_5.strides[0] = __pyx_v_values.strides[1];
+    __pyx_t_5.suboffsets[0] = -1;
+
+__pyx_t_5.shape[1] = __pyx_v_values.shape[2];
+__pyx_t_5.strides[1] = __pyx_v_values.strides[2];
+    __pyx_t_5.suboffsets[1] = -1;
+
+__pyx_t_6 = __pyx_v_i;
+                            __pyx_t_7 = __pyx_v_i;
+                            __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_7)) ))), __pyx_v_max_neg_val);
+                            __PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
+                            __pyx_t_4.memview = NULL;
+                            __pyx_t_4.data = NULL;
+                            __PYX_XDEC_MEMVIEW(&__pyx_t_5, 0);
+                            __pyx_t_5.memview = NULL;
+                            __pyx_t_5.data = NULL;
+                        }
+                    }
+                }
+            }
+        }
+        #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+            #undef likely
+            #undef unlikely
+            #define likely(x)   __builtin_expect(!!(x), 1)
+            #define unlikely(x) __builtin_expect(!!(x), 0)
+        #endif
+      }
+
+      /* "TTS/tts/utils/monotonic_align/core.pyx":46
+ * 
+ *   cdef int i
+ *   for i in prange(b, nogil=True):             # <<<<<<<<<<<<<<
+ *     maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
+ */
+      /*finally:*/ {
+        /*normal exit:*/{
+          #ifdef WITH_THREAD
+          __Pyx_FastGIL_Forget();
+          Py_BLOCK_THREADS
+          #endif
+          goto __pyx_L5;
+        }
+        __pyx_L5:;
+      }
+  }
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":42
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int b = values.shape[0]
+ * 
+ */
+
+  /* function exit code */
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_3TTS_3tts_5utils_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_3TTS_3tts_5utils_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  __Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_t_xs = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } };
+  float __pyx_v_max_neg_val;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("maximum_path_c (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_paths,&__pyx_n_s_values,&__pyx_n_s_t_xs,&__pyx_n_s_t_ys,&__pyx_n_s_max_neg_val,0};
+    PyObject* values[5] = {0,0,0,0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
+        CYTHON_FALLTHROUGH;
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+        CYTHON_FALLTHROUGH;
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_paths)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_values)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 1); __PYX_ERR(0, 42, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  2:
+        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_t_xs)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 2); __PYX_ERR(0, 42, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  3:
+        if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_t_ys)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 3); __PYX_ERR(0, 42, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  4:
+        if (kw_args > 0) {
+          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_neg_val);
+          if (value) { values[4] = value; kw_args--; }
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "maximum_path_c") < 0)) __PYX_ERR(0, 42, __pyx_L3_error)
+      }
+    } else {
+      switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
+        CYTHON_FALLTHROUGH;
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+        values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+    }
+    __pyx_v_paths = __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_int(values[0], PyBUF_WRITABLE); if (unlikely(!__pyx_v_paths.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
+    __pyx_v_values = __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_float(values[1], PyBUF_WRITABLE); if (unlikely(!__pyx_v_values.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
+    __pyx_v_t_xs = __Pyx_PyObject_to_MemoryviewSlice_dc_int(values[2], PyBUF_WRITABLE); if (unlikely(!__pyx_v_t_xs.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
+    __pyx_v_t_ys = __Pyx_PyObject_to_MemoryviewSlice_dc_int(values[3], PyBUF_WRITABLE); if (unlikely(!__pyx_v_t_ys.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
+    if (values[4]) {
+      __pyx_v_max_neg_val = __pyx_PyFloat_AsFloat(values[4]); if (unlikely((__pyx_v_max_neg_val == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 42, __pyx_L3_error)
+    } else {
+      __pyx_v_max_neg_val = __pyx_k_;
+    }
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 42, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("TTS.tts.utils.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_xs, __pyx_v_t_ys, __pyx_v_max_neg_val);
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, float __pyx_v_max_neg_val) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("maximum_path_c", 0);
+  __Pyx_XDECREF(__pyx_r);
+  if (unlikely(!__pyx_v_paths.memview)) { __Pyx_RaiseUnboundLocalError("paths"); __PYX_ERR(0, 42, __pyx_L1_error) }
+  if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 42, __pyx_L1_error) }
+  if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 42, __pyx_L1_error) }
+  if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 42, __pyx_L1_error) }
+  __pyx_t_1.__pyx_n = 1;
+  __pyx_t_1.max_neg_val = __pyx_v_max_neg_val;
+  __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_xs, __pyx_v_t_ys, 0, &__pyx_t_1); 
+  __pyx_t_2 = __Pyx_void_to_None(NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("TTS.tts.utils.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_values, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_t_xs, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_t_ys, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":735
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":736
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 736, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":735
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":738
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":739
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 739, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":738
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":741
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":742
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 742, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":741
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":744
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":745
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 745, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":744
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":747
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":748
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 748, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":747
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":750
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":751
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
+  if (__pyx_t_1) {
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":752
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape             # <<<<<<<<<<<<<<
+ *     else:
+ *         return ()
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
+    __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
+    goto __pyx_L0;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":751
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":754
+ *         return <tuple>d.subarray.shape
+ *     else:
+ *         return ()             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(__pyx_empty_tuple);
+    __pyx_r = __pyx_empty_tuple;
+    goto __pyx_L0;
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":750
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":929
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("set_array_base", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":930
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!             # <<<<<<<<<<<<<<
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ */
+  Py_INCREF(__pyx_v_base);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":931
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ */
+  (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":929
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":933
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+  PyObject *__pyx_v_base;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("get_array_base", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":934
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)             # <<<<<<<<<<<<<<
+ *     if base is NULL:
+ *         return None
+ */
+  __pyx_v_base = PyArray_BASE(__pyx_v_arr);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":935
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  __pyx_t_1 = ((__pyx_v_base == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":936
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ *         return None             # <<<<<<<<<<<<<<
+ *     return <object>base
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+    goto __pyx_L0;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":935
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":937
+ *     if base is NULL:
+ *         return None
+ *     return <object>base             # <<<<<<<<<<<<<<
+ * 
+ * # Versions of the import_* functions which are more suitable for
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_base));
+  __pyx_r = ((PyObject *)__pyx_v_base);
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":933
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":941
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_array", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":943
+ * cdef inline int import_array() except -1:
+ *     try:
+ *         __pyx_import_array()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ */
+      __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 943, __pyx_L3_error)
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":944
+ *     try:
+ *         __pyx_import_array()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 944, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":945
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 945, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 945, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":942
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":941
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":947
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_umath", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":949
+ * cdef inline int import_umath() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 949, __pyx_L3_error)
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":950
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 950, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":951
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 951, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 951, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":948
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":947
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":953
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_ufunc", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":955
+ * cdef inline int import_ufunc() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 955, __pyx_L3_error)
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":956
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 956, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GOTREF(__pyx_t_7);
+
+      /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":957
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef extern from *:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 957, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 957, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":954
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":953
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":967
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_timedelta64_object", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":979
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":967
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":982
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_datetime64_object", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":994
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":982
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":997
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) {
+  npy_datetime __pyx_r;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1004
+ *     also needed.  That can be found using `get_datetime64_unit`.
+ *     """
+ *     return (<PyDatetimeScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":997
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1007
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) {
+  npy_timedelta __pyx_r;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1011
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ *     """
+ *     return (<PyTimedeltaScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1007
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1014
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) {
+  NPY_DATETIMEUNIT __pyx_r;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1018
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ *     """
+ *     return <NPY_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base             # <<<<<<<<<<<<<<
+ */
+  __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base);
+  goto __pyx_L0;
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":1014
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":122
+ *         cdef bint dtype_is_object
+ * 
+ *     def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None,             # <<<<<<<<<<<<<<
+ *                   mode="c", bint allocate_buffer=True):
+ * 
+ */
+
+/* Python wrapper */
+static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_shape = 0;
+  Py_ssize_t __pyx_v_itemsize;
+  PyObject *__pyx_v_format = 0;
+  PyObject *__pyx_v_mode = 0;
+  int __pyx_v_allocate_buffer;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_shape,&__pyx_n_s_itemsize,&__pyx_n_s_format,&__pyx_n_s_mode,&__pyx_n_s_allocate_buffer,0};
+    PyObject* values[5] = {0,0,0,0,0};
+    values[3] = ((PyObject *)__pyx_n_s_c);
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
+        CYTHON_FALLTHROUGH;
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+        CYTHON_FALLTHROUGH;
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_shape)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_itemsize)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 1); __PYX_ERR(2, 122, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  2:
+        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_format)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 2); __PYX_ERR(2, 122, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  3:
+        if (kw_args > 0) {
+          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_mode);
+          if (value) { values[3] = value; kw_args--; }
+        }
+        CYTHON_FALLTHROUGH;
+        case  4:
+        if (kw_args > 0) {
+          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_allocate_buffer);
+          if (value) { values[4] = value; kw_args--; }
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(2, 122, __pyx_L3_error)
+      }
+    } else {
+      switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
+        CYTHON_FALLTHROUGH;
+        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+        CYTHON_FALLTHROUGH;
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+    }
+    __pyx_v_shape = ((PyObject*)values[0]);
+    __pyx_v_itemsize = __Pyx_PyIndex_AsSsize_t(values[1]); if (unlikely((__pyx_v_itemsize == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 122, __pyx_L3_error)
+    __pyx_v_format = values[2];
+    __pyx_v_mode = values[3];
+    if (values[4]) {
+      __pyx_v_allocate_buffer = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_allocate_buffer == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 123, __pyx_L3_error)
+    } else {
+
+      /* "View.MemoryView":123
+ * 
+ *     def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None,
+ *                   mode="c", bint allocate_buffer=True):             # <<<<<<<<<<<<<<
+ * 
+ *         cdef int idx
+ */
+      __pyx_v_allocate_buffer = ((int)1);
+    }
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(2, 122, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("View.MemoryView.array.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_shape), (&PyTuple_Type), 1, "shape", 1))) __PYX_ERR(2, 122, __pyx_L1_error)
+  if (unlikely(((PyObject *)__pyx_v_format) == Py_None)) {
+    PyErr_Format(PyExc_TypeError, "Argument '%.200s' must not be None", "format"); __PYX_ERR(2, 122, __pyx_L1_error)
+  }
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(((struct __pyx_array_obj *)__pyx_v_self), __pyx_v_shape, __pyx_v_itemsize, __pyx_v_format, __pyx_v_mode, __pyx_v_allocate_buffer);
+
+  /* "View.MemoryView":122
+ *         cdef bint dtype_is_object
+ * 
+ *     def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None,             # <<<<<<<<<<<<<<
+ *                   mode="c", bint allocate_buffer=True):
+ * 
+ */
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer) {
+  int __pyx_v_idx;
+  Py_ssize_t __pyx_v_i;
+  Py_ssize_t __pyx_v_dim;
+  PyObject **__pyx_v_p;
+  char __pyx_v_order;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  char *__pyx_t_7;
+  int __pyx_t_8;
+  Py_ssize_t __pyx_t_9;
+  PyObject *__pyx_t_10 = NULL;
+  Py_ssize_t __pyx_t_11;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__cinit__", 0);
+  __Pyx_INCREF(__pyx_v_format);
+
+  /* "View.MemoryView":129
+ *         cdef PyObject **p
+ * 
+ *         self.ndim = <int> len(shape)             # <<<<<<<<<<<<<<
+ *         self.itemsize = itemsize
+ * 
+ */
+  if (unlikely(__pyx_v_shape == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+    __PYX_ERR(2, 129, __pyx_L1_error)
+  }
+  __pyx_t_1 = PyTuple_GET_SIZE(__pyx_v_shape); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(2, 129, __pyx_L1_error)
+  __pyx_v_self->ndim = ((int)__pyx_t_1);
+
+  /* "View.MemoryView":130
+ * 
+ *         self.ndim = <int> len(shape)
+ *         self.itemsize = itemsize             # <<<<<<<<<<<<<<
+ * 
+ *         if not self.ndim:
+ */
+  __pyx_v_self->itemsize = __pyx_v_itemsize;
+
+  /* "View.MemoryView":132
+ *         self.itemsize = itemsize
+ * 
+ *         if not self.ndim:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Empty shape tuple for cython.array")
+ * 
+ */
+  __pyx_t_2 = ((!(__pyx_v_self->ndim != 0)) != 0);
+  if (unlikely(__pyx_t_2)) {
+
+    /* "View.MemoryView":133
+ * 
+ *         if not self.ndim:
+ *             raise ValueError("Empty shape tuple for cython.array")             # <<<<<<<<<<<<<<
+ * 
+ *         if itemsize <= 0:
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 133, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 133, __pyx_L1_error)
+
+    /* "View.MemoryView":132
+ *         self.itemsize = itemsize
+ * 
+ *         if not self.ndim:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Empty shape tuple for cython.array")
+ * 
+ */
+  }
+
+  /* "View.MemoryView":135
+ *             raise ValueError("Empty shape tuple for cython.array")
+ * 
+ *         if itemsize <= 0:             # <<<<<<<<<<<<<<
+ *             raise ValueError("itemsize <= 0 for cython.array")
+ * 
+ */
+  __pyx_t_2 = ((__pyx_v_itemsize <= 0) != 0);
+  if (unlikely(__pyx_t_2)) {
+
+    /* "View.MemoryView":136
+ * 
+ *         if itemsize <= 0:
+ *             raise ValueError("itemsize <= 0 for cython.array")             # <<<<<<<<<<<<<<
+ * 
+ *         if not isinstance(format, bytes):
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 136, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 136, __pyx_L1_error)
+
+    /* "View.MemoryView":135
+ *             raise ValueError("Empty shape tuple for cython.array")
+ * 
+ *         if itemsize <= 0:             # <<<<<<<<<<<<<<
+ *             raise ValueError("itemsize <= 0 for cython.array")
+ * 
+ */
+  }
+
+  /* "View.MemoryView":138
+ *             raise ValueError("itemsize <= 0 for cython.array")
+ * 
+ *         if not isinstance(format, bytes):             # <<<<<<<<<<<<<<
+ *             format = format.encode('ASCII')
+ *         self._format = format  # keep a reference to the byte string
+ */
+  __pyx_t_2 = PyBytes_Check(__pyx_v_format); 
+  __pyx_t_4 = ((!(__pyx_t_2 != 0)) != 0);
+  if (__pyx_t_4) {
+
+    /* "View.MemoryView":139
+ * 
+ *         if not isinstance(format, bytes):
+ *             format = format.encode('ASCII')             # <<<<<<<<<<<<<<
+ *         self._format = format  # keep a reference to the byte string
+ *         self.format = self._format
+ */
+    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_format, __pyx_n_s_encode); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 139, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __pyx_t_6 = NULL;
+    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
+      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
+      if (likely(__pyx_t_6)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+        __Pyx_INCREF(__pyx_t_6);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_5, function);
+      }
+    }
+    __pyx_t_3 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_n_s_ASCII) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_n_s_ASCII);
+    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 139, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF_SET(__pyx_v_format, __pyx_t_3);
+    __pyx_t_3 = 0;
+
+    /* "View.MemoryView":138
+ *             raise ValueError("itemsize <= 0 for cython.array")
+ * 
+ *         if not isinstance(format, bytes):             # <<<<<<<<<<<<<<
+ *             format = format.encode('ASCII')
+ *         self._format = format  # keep a reference to the byte string
+ */
+  }
+
+  /* "View.MemoryView":140
+ *         if not isinstance(format, bytes):
+ *             format = format.encode('ASCII')
+ *         self._format = format  # keep a reference to the byte string             # <<<<<<<<<<<<<<
+ *         self.format = self._format
+ * 
+ */
+  if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(2, 140, __pyx_L1_error)
+  __pyx_t_3 = __pyx_v_format;
+  __Pyx_INCREF(__pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_3);
+  __Pyx_GOTREF(__pyx_v_self->_format);
+  __Pyx_DECREF(__pyx_v_self->_format);
+  __pyx_v_self->_format = ((PyObject*)__pyx_t_3);
+  __pyx_t_3 = 0;
+
+  /* "View.MemoryView":141
+ *             format = format.encode('ASCII')
+ *         self._format = format  # keep a reference to the byte string
+ *         self.format = self._format             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  if (unlikely(__pyx_v_self->_format == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found");
+    __PYX_ERR(2, 141, __pyx_L1_error)
+  }
+  __pyx_t_7 = __Pyx_PyBytes_AsWritableString(__pyx_v_self->_format); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) __PYX_ERR(2, 141, __pyx_L1_error)
+  __pyx_v_self->format = __pyx_t_7;
+
+  /* "View.MemoryView":144
+ * 
+ * 
+ *         self._shape = <Py_ssize_t *> PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2)             # <<<<<<<<<<<<<<
+ *         self._strides = self._shape + self.ndim
+ * 
+ */
+  __pyx_v_self->_shape = ((Py_ssize_t *)PyObject_Malloc((((sizeof(Py_ssize_t)) * __pyx_v_self->ndim) * 2)));
+
+  /* "View.MemoryView":145
+ * 
+ *         self._shape = <Py_ssize_t *> PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2)
+ *         self._strides = self._shape + self.ndim             # <<<<<<<<<<<<<<
+ * 
+ *         if not self._shape:
+ */
+  __pyx_v_self->_strides = (__pyx_v_self->_shape + __pyx_v_self->ndim);
+
+  /* "View.MemoryView":147
+ *         self._strides = self._shape + self.ndim
+ * 
+ *         if not self._shape:             # <<<<<<<<<<<<<<
+ *             raise MemoryError("unable to allocate shape and strides.")
+ * 
+ */
+  __pyx_t_4 = ((!(__pyx_v_self->_shape != 0)) != 0);
+  if (unlikely(__pyx_t_4)) {
+
+    /* "View.MemoryView":148
+ * 
+ *         if not self._shape:
+ *             raise MemoryError("unable to allocate shape and strides.")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 148, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 148, __pyx_L1_error)
+
+    /* "View.MemoryView":147
+ *         self._strides = self._shape + self.ndim
+ * 
+ *         if not self._shape:             # <<<<<<<<<<<<<<
+ *             raise MemoryError("unable to allocate shape and strides.")
+ * 
+ */
+  }
+
+  /* "View.MemoryView":151
+ * 
+ * 
+ *         for idx, dim in enumerate(shape):             # <<<<<<<<<<<<<<
+ *             if dim <= 0:
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+ */
+  __pyx_t_8 = 0;
+  __pyx_t_3 = __pyx_v_shape; __Pyx_INCREF(__pyx_t_3); __pyx_t_1 = 0;
+  for (;;) {
+    if (__pyx_t_1 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
+    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_1); __Pyx_INCREF(__pyx_t_5); __pyx_t_1++; if (unlikely(0 < 0)) __PYX_ERR(2, 151, __pyx_L1_error)
+    #else
+    __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 151, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    #endif
+    __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 151, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __pyx_v_dim = __pyx_t_9;
+    __pyx_v_idx = __pyx_t_8;
+    __pyx_t_8 = (__pyx_t_8 + 1);
+
+    /* "View.MemoryView":152
+ * 
+ *         for idx, dim in enumerate(shape):
+ *             if dim <= 0:             # <<<<<<<<<<<<<<
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+ *             self._shape[idx] = dim
+ */
+    __pyx_t_4 = ((__pyx_v_dim <= 0) != 0);
+    if (unlikely(__pyx_t_4)) {
+
+      /* "View.MemoryView":153
+ *         for idx, dim in enumerate(shape):
+ *             if dim <= 0:
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))             # <<<<<<<<<<<<<<
+ *             self._shape[idx] = dim
+ * 
+ */
+      __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_idx); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 153, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 153, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) __PYX_ERR(2, 153, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_GIVEREF(__pyx_t_5);
+      PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_5);
+      __Pyx_GIVEREF(__pyx_t_6);
+      PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_t_6);
+      __pyx_t_5 = 0;
+      __pyx_t_6 = 0;
+      __pyx_t_6 = __Pyx_PyString_Format(__pyx_kp_s_Invalid_shape_in_axis_d_d, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 153, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+      __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_6); if (unlikely(!__pyx_t_10)) __PYX_ERR(2, 153, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __Pyx_Raise(__pyx_t_10, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+      __PYX_ERR(2, 153, __pyx_L1_error)
+
+      /* "View.MemoryView":152
+ * 
+ *         for idx, dim in enumerate(shape):
+ *             if dim <= 0:             # <<<<<<<<<<<<<<
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+ *             self._shape[idx] = dim
+ */
+    }
+
+    /* "View.MemoryView":154
+ *             if dim <= 0:
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+ *             self._shape[idx] = dim             # <<<<<<<<<<<<<<
+ * 
+ *         cdef char order
+ */
+    (__pyx_v_self->_shape[__pyx_v_idx]) = __pyx_v_dim;
+
+    /* "View.MemoryView":151
+ * 
+ * 
+ *         for idx, dim in enumerate(shape):             # <<<<<<<<<<<<<<
+ *             if dim <= 0:
+ *                 raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+ */
+  }
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "View.MemoryView":157
+ * 
+ *         cdef char order
+ *         if mode == 'fortran':             # <<<<<<<<<<<<<<
+ *             order = b'F'
+ *             self.mode = u'fortran'
+ */
+  __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_fortran, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(2, 157, __pyx_L1_error)
+  if (__pyx_t_4) {
+
+    /* "View.MemoryView":158
+ *         cdef char order
+ *         if mode == 'fortran':
+ *             order = b'F'             # <<<<<<<<<<<<<<
+ *             self.mode = u'fortran'
+ *         elif mode == 'c':
+ */
+    __pyx_v_order = 'F';
+
+    /* "View.MemoryView":159
+ *         if mode == 'fortran':
+ *             order = b'F'
+ *             self.mode = u'fortran'             # <<<<<<<<<<<<<<
+ *         elif mode == 'c':
+ *             order = b'C'
+ */
+    __Pyx_INCREF(__pyx_n_u_fortran);
+    __Pyx_GIVEREF(__pyx_n_u_fortran);
+    __Pyx_GOTREF(__pyx_v_self->mode);
+    __Pyx_DECREF(__pyx_v_self->mode);
+    __pyx_v_self->mode = __pyx_n_u_fortran;
+
+    /* "View.MemoryView":157
+ * 
+ *         cdef char order
+ *         if mode == 'fortran':             # <<<<<<<<<<<<<<
+ *             order = b'F'
+ *             self.mode = u'fortran'
+ */
+    goto __pyx_L10;
+  }
+
+  /* "View.MemoryView":160
+ *             order = b'F'
+ *             self.mode = u'fortran'
+ *         elif mode == 'c':             # <<<<<<<<<<<<<<
+ *             order = b'C'
+ *             self.mode = u'c'
+ */
+  __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_c, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(2, 160, __pyx_L1_error)
+  if (likely(__pyx_t_4)) {
+
+    /* "View.MemoryView":161
+ *             self.mode = u'fortran'
+ *         elif mode == 'c':
+ *             order = b'C'             # <<<<<<<<<<<<<<
+ *             self.mode = u'c'
+ *         else:
+ */
+    __pyx_v_order = 'C';
+
+    /* "View.MemoryView":162
+ *         elif mode == 'c':
+ *             order = b'C'
+ *             self.mode = u'c'             # <<<<<<<<<<<<<<
+ *         else:
+ *             raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode)
+ */
+    __Pyx_INCREF(__pyx_n_u_c);
+    __Pyx_GIVEREF(__pyx_n_u_c);
+    __Pyx_GOTREF(__pyx_v_self->mode);
+    __Pyx_DECREF(__pyx_v_self->mode);
+    __pyx_v_self->mode = __pyx_n_u_c;
+
+    /* "View.MemoryView":160
+ *             order = b'F'
+ *             self.mode = u'fortran'
+ *         elif mode == 'c':             # <<<<<<<<<<<<<<
+ *             order = b'C'
+ *             self.mode = u'c'
+ */
+    goto __pyx_L10;
+  }
+
+  /* "View.MemoryView":164
+ *             self.mode = u'c'
+ *         else:
+ *             raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode)             # <<<<<<<<<<<<<<
+ * 
+ *         self.len = fill_contig_strides_array(self._shape, self._strides,
+ */
+  /*else*/ {
+    __pyx_t_3 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Invalid_mode_expected_c_or_fortr, __pyx_v_mode); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 164, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(2, 164, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_10);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __Pyx_Raise(__pyx_t_10, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+    __PYX_ERR(2, 164, __pyx_L1_error)
+  }
+  __pyx_L10:;
+
+  /* "View.MemoryView":166
+ *             raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode)
+ * 
+ *         self.len = fill_contig_strides_array(self._shape, self._strides,             # <<<<<<<<<<<<<<
+ *                                              itemsize, self.ndim, order)
+ * 
+ */
+  __pyx_v_self->len = __pyx_fill_contig_strides_array(__pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_itemsize, __pyx_v_self->ndim, __pyx_v_order);
+
+  /* "View.MemoryView":169
+ *                                              itemsize, self.ndim, order)
+ * 
+ *         self.free_data = allocate_buffer             # <<<<<<<<<<<<<<
+ *         self.dtype_is_object = format == b'O'
+ *         if allocate_buffer:
+ */
+  __pyx_v_self->free_data = __pyx_v_allocate_buffer;
+
+  /* "View.MemoryView":170
+ * 
+ *         self.free_data = allocate_buffer
+ *         self.dtype_is_object = format == b'O'             # <<<<<<<<<<<<<<
+ *         if allocate_buffer:
+ * 
+ */
+  __pyx_t_10 = PyObject_RichCompare(__pyx_v_format, __pyx_n_b_O, Py_EQ); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) __PYX_ERR(2, 170, __pyx_L1_error)
+  __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 170, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+  __pyx_v_self->dtype_is_object = __pyx_t_4;
+
+  /* "View.MemoryView":171
+ *         self.free_data = allocate_buffer
+ *         self.dtype_is_object = format == b'O'
+ *         if allocate_buffer:             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_4 = (__pyx_v_allocate_buffer != 0);
+  if (__pyx_t_4) {
+
+    /* "View.MemoryView":174
+ * 
+ * 
+ *             self.data = <char *>malloc(self.len)             # <<<<<<<<<<<<<<
+ *             if not self.data:
+ *                 raise MemoryError("unable to allocate array data.")
+ */
+    __pyx_v_self->data = ((char *)malloc(__pyx_v_self->len));
+
+    /* "View.MemoryView":175
+ * 
+ *             self.data = <char *>malloc(self.len)
+ *             if not self.data:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError("unable to allocate array data.")
+ * 
+ */
+    __pyx_t_4 = ((!(__pyx_v_self->data != 0)) != 0);
+    if (unlikely(__pyx_t_4)) {
+
+      /* "View.MemoryView":176
+ *             self.data = <char *>malloc(self.len)
+ *             if not self.data:
+ *                 raise MemoryError("unable to allocate array data.")             # <<<<<<<<<<<<<<
+ * 
+ *             if self.dtype_is_object:
+ */
+      __pyx_t_10 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) __PYX_ERR(2, 176, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_Raise(__pyx_t_10, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+      __PYX_ERR(2, 176, __pyx_L1_error)
+
+      /* "View.MemoryView":175
+ * 
+ *             self.data = <char *>malloc(self.len)
+ *             if not self.data:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError("unable to allocate array data.")
+ * 
+ */
+    }
+
+    /* "View.MemoryView":178
+ *                 raise MemoryError("unable to allocate array data.")
+ * 
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 p = <PyObject **> self.data
+ *                 for i in range(self.len / itemsize):
+ */
+    __pyx_t_4 = (__pyx_v_self->dtype_is_object != 0);
+    if (__pyx_t_4) {
+
+      /* "View.MemoryView":179
+ * 
+ *             if self.dtype_is_object:
+ *                 p = <PyObject **> self.data             # <<<<<<<<<<<<<<
+ *                 for i in range(self.len / itemsize):
+ *                     p[i] = Py_None
+ */
+      __pyx_v_p = ((PyObject **)__pyx_v_self->data);
+
+      /* "View.MemoryView":180
+ *             if self.dtype_is_object:
+ *                 p = <PyObject **> self.data
+ *                 for i in range(self.len / itemsize):             # <<<<<<<<<<<<<<
+ *                     p[i] = Py_None
+ *                     Py_INCREF(Py_None)
+ */
+      if (unlikely(__pyx_v_itemsize == 0)) {
+        PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
+        __PYX_ERR(2, 180, __pyx_L1_error)
+      }
+      else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_itemsize == (Py_ssize_t)-1)  && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_v_self->len))) {
+        PyErr_SetString(PyExc_OverflowError, "value too large to perform division");
+        __PYX_ERR(2, 180, __pyx_L1_error)
+      }
+      __pyx_t_1 = __Pyx_div_Py_ssize_t(__pyx_v_self->len, __pyx_v_itemsize);
+      __pyx_t_9 = __pyx_t_1;
+      for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_9; __pyx_t_11+=1) {
+        __pyx_v_i = __pyx_t_11;
+
+        /* "View.MemoryView":181
+ *                 p = <PyObject **> self.data
+ *                 for i in range(self.len / itemsize):
+ *                     p[i] = Py_None             # <<<<<<<<<<<<<<
+ *                     Py_INCREF(Py_None)
+ * 
+ */
+        (__pyx_v_p[__pyx_v_i]) = Py_None;
+
+        /* "View.MemoryView":182
+ *                 for i in range(self.len / itemsize):
+ *                     p[i] = Py_None
+ *                     Py_INCREF(Py_None)             # <<<<<<<<<<<<<<
+ * 
+ *     @cname('getbuffer')
+ */
+        Py_INCREF(Py_None);
+      }
+
+      /* "View.MemoryView":178
+ *                 raise MemoryError("unable to allocate array data.")
+ * 
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 p = <PyObject **> self.data
+ *                 for i in range(self.len / itemsize):
+ */
+    }
+
+    /* "View.MemoryView":171
+ *         self.free_data = allocate_buffer
+ *         self.dtype_is_object = format == b'O'
+ *         if allocate_buffer:             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  }
+
+  /* "View.MemoryView":122
+ *         cdef bint dtype_is_object
+ * 
+ *     def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None,             # <<<<<<<<<<<<<<
+ *                   mode="c", bint allocate_buffer=True):
+ * 
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_AddTraceback("View.MemoryView.array.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_format);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":185
+ * 
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):             # <<<<<<<<<<<<<<
+ *         cdef int bufmode = -1
+ *         if self.mode == u"c":
+ */
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(((struct __pyx_array_obj *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_v_bufmode;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  char *__pyx_t_4;
+  Py_ssize_t __pyx_t_5;
+  int __pyx_t_6;
+  Py_ssize_t *__pyx_t_7;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  if (__pyx_v_info == NULL) {
+    PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete");
+    return -1;
+  }
+  __Pyx_RefNannySetupContext("__getbuffer__", 0);
+  __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
+  __Pyx_GIVEREF(__pyx_v_info->obj);
+
+  /* "View.MemoryView":186
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         cdef int bufmode = -1             # <<<<<<<<<<<<<<
+ *         if self.mode == u"c":
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ */
+  __pyx_v_bufmode = -1;
+
+  /* "View.MemoryView":187
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         cdef int bufmode = -1
+ *         if self.mode == u"c":             # <<<<<<<<<<<<<<
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         elif self.mode == u"fortran":
+ */
+  __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_c, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 187, __pyx_L1_error)
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":188
+ *         cdef int bufmode = -1
+ *         if self.mode == u"c":
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS             # <<<<<<<<<<<<<<
+ *         elif self.mode == u"fortran":
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ */
+    __pyx_v_bufmode = (PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS);
+
+    /* "View.MemoryView":187
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         cdef int bufmode = -1
+ *         if self.mode == u"c":             # <<<<<<<<<<<<<<
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         elif self.mode == u"fortran":
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":189
+ *         if self.mode == u"c":
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         elif self.mode == u"fortran":             # <<<<<<<<<<<<<<
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):
+ */
+  __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_fortran, Py_EQ)); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(2, 189, __pyx_L1_error)
+  __pyx_t_1 = (__pyx_t_2 != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":190
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         elif self.mode == u"fortran":
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS             # <<<<<<<<<<<<<<
+ *         if not (flags & bufmode):
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")
+ */
+    __pyx_v_bufmode = (PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS);
+
+    /* "View.MemoryView":189
+ *         if self.mode == u"c":
+ *             bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         elif self.mode == u"fortran":             # <<<<<<<<<<<<<<
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):
+ */
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":191
+ *         elif self.mode == u"fortran":
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):             # <<<<<<<<<<<<<<
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")
+ *         info.buf = self.data
+ */
+  __pyx_t_1 = ((!((__pyx_v_flags & __pyx_v_bufmode) != 0)) != 0);
+  if (unlikely(__pyx_t_1)) {
+
+    /* "View.MemoryView":192
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")             # <<<<<<<<<<<<<<
+ *         info.buf = self.data
+ *         info.len = self.len
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 192, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 192, __pyx_L1_error)
+
+    /* "View.MemoryView":191
+ *         elif self.mode == u"fortran":
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):             # <<<<<<<<<<<<<<
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")
+ *         info.buf = self.data
+ */
+  }
+
+  /* "View.MemoryView":193
+ *         if not (flags & bufmode):
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")
+ *         info.buf = self.data             # <<<<<<<<<<<<<<
+ *         info.len = self.len
+ *         info.ndim = self.ndim
+ */
+  __pyx_t_4 = __pyx_v_self->data;
+  __pyx_v_info->buf = __pyx_t_4;
+
+  /* "View.MemoryView":194
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")
+ *         info.buf = self.data
+ *         info.len = self.len             # <<<<<<<<<<<<<<
+ *         info.ndim = self.ndim
+ *         info.shape = self._shape
+ */
+  __pyx_t_5 = __pyx_v_self->len;
+  __pyx_v_info->len = __pyx_t_5;
+
+  /* "View.MemoryView":195
+ *         info.buf = self.data
+ *         info.len = self.len
+ *         info.ndim = self.ndim             # <<<<<<<<<<<<<<
+ *         info.shape = self._shape
+ *         info.strides = self._strides
+ */
+  __pyx_t_6 = __pyx_v_self->ndim;
+  __pyx_v_info->ndim = __pyx_t_6;
+
+  /* "View.MemoryView":196
+ *         info.len = self.len
+ *         info.ndim = self.ndim
+ *         info.shape = self._shape             # <<<<<<<<<<<<<<
+ *         info.strides = self._strides
+ *         info.suboffsets = NULL
+ */
+  __pyx_t_7 = __pyx_v_self->_shape;
+  __pyx_v_info->shape = __pyx_t_7;
+
+  /* "View.MemoryView":197
+ *         info.ndim = self.ndim
+ *         info.shape = self._shape
+ *         info.strides = self._strides             # <<<<<<<<<<<<<<
+ *         info.suboffsets = NULL
+ *         info.itemsize = self.itemsize
+ */
+  __pyx_t_7 = __pyx_v_self->_strides;
+  __pyx_v_info->strides = __pyx_t_7;
+
+  /* "View.MemoryView":198
+ *         info.shape = self._shape
+ *         info.strides = self._strides
+ *         info.suboffsets = NULL             # <<<<<<<<<<<<<<
+ *         info.itemsize = self.itemsize
+ *         info.readonly = 0
+ */
+  __pyx_v_info->suboffsets = NULL;
+
+  /* "View.MemoryView":199
+ *         info.strides = self._strides
+ *         info.suboffsets = NULL
+ *         info.itemsize = self.itemsize             # <<<<<<<<<<<<<<
+ *         info.readonly = 0
+ * 
+ */
+  __pyx_t_5 = __pyx_v_self->itemsize;
+  __pyx_v_info->itemsize = __pyx_t_5;
+
+  /* "View.MemoryView":200
+ *         info.suboffsets = NULL
+ *         info.itemsize = self.itemsize
+ *         info.readonly = 0             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_FORMAT:
+ */
+  __pyx_v_info->readonly = 0;
+
+  /* "View.MemoryView":202
+ *         info.readonly = 0
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             info.format = self.format
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":203
+ * 
+ *         if flags & PyBUF_FORMAT:
+ *             info.format = self.format             # <<<<<<<<<<<<<<
+ *         else:
+ *             info.format = NULL
+ */
+    __pyx_t_4 = __pyx_v_self->format;
+    __pyx_v_info->format = __pyx_t_4;
+
+    /* "View.MemoryView":202
+ *         info.readonly = 0
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             info.format = self.format
+ *         else:
+ */
+    goto __pyx_L5;
+  }
+
+  /* "View.MemoryView":205
+ *             info.format = self.format
+ *         else:
+ *             info.format = NULL             # <<<<<<<<<<<<<<
+ * 
+ *         info.obj = self
+ */
+  /*else*/ {
+    __pyx_v_info->format = NULL;
+  }
+  __pyx_L5:;
+
+  /* "View.MemoryView":207
+ *             info.format = NULL
+ * 
+ *         info.obj = self             # <<<<<<<<<<<<<<
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)")
+ */
+  __Pyx_INCREF(((PyObject *)__pyx_v_self));
+  __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
+  __Pyx_GOTREF(__pyx_v_info->obj);
+  __Pyx_DECREF(__pyx_v_info->obj);
+  __pyx_v_info->obj = ((PyObject *)__pyx_v_self);
+
+  /* "View.MemoryView":185
+ * 
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):             # <<<<<<<<<<<<<<
+ *         cdef int bufmode = -1
+ *         if self.mode == u"c":
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.array.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  if (__pyx_v_info->obj != NULL) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0;
+  }
+  goto __pyx_L2;
+  __pyx_L0:;
+  if (__pyx_v_info->obj == Py_None) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0;
+  }
+  __pyx_L2:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":211
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)")
+ * 
+ *     def __dealloc__(array self):             # <<<<<<<<<<<<<<
+ *         if self.callback_free_data != NULL:
+ *             self.callback_free_data(self.data)
+ */
+
+/* Python wrapper */
+static void __pyx_array___dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_array___dealloc__(PyObject *__pyx_v_self) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+  __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(((struct __pyx_array_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self) {
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+  /* "View.MemoryView":212
+ * 
+ *     def __dealloc__(array self):
+ *         if self.callback_free_data != NULL:             # <<<<<<<<<<<<<<
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:
+ */
+  __pyx_t_1 = ((__pyx_v_self->callback_free_data != NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":213
+ *     def __dealloc__(array self):
+ *         if self.callback_free_data != NULL:
+ *             self.callback_free_data(self.data)             # <<<<<<<<<<<<<<
+ *         elif self.free_data:
+ *             if self.dtype_is_object:
+ */
+    __pyx_v_self->callback_free_data(__pyx_v_self->data);
+
+    /* "View.MemoryView":212
+ * 
+ *     def __dealloc__(array self):
+ *         if self.callback_free_data != NULL:             # <<<<<<<<<<<<<<
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":214
+ *         if self.callback_free_data != NULL:
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:             # <<<<<<<<<<<<<<
+ *             if self.dtype_is_object:
+ *                 refcount_objects_in_slice(self.data, self._shape,
+ */
+  __pyx_t_1 = (__pyx_v_self->free_data != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":215
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 refcount_objects_in_slice(self.data, self._shape,
+ *                                           self._strides, self.ndim, False)
+ */
+    __pyx_t_1 = (__pyx_v_self->dtype_is_object != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":216
+ *         elif self.free_data:
+ *             if self.dtype_is_object:
+ *                 refcount_objects_in_slice(self.data, self._shape,             # <<<<<<<<<<<<<<
+ *                                           self._strides, self.ndim, False)
+ *             free(self.data)
+ */
+      __pyx_memoryview_refcount_objects_in_slice(__pyx_v_self->data, __pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_self->ndim, 0);
+
+      /* "View.MemoryView":215
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 refcount_objects_in_slice(self.data, self._shape,
+ *                                           self._strides, self.ndim, False)
+ */
+    }
+
+    /* "View.MemoryView":218
+ *                 refcount_objects_in_slice(self.data, self._shape,
+ *                                           self._strides, self.ndim, False)
+ *             free(self.data)             # <<<<<<<<<<<<<<
+ *         PyObject_Free(self._shape)
+ * 
+ */
+    free(__pyx_v_self->data);
+
+    /* "View.MemoryView":214
+ *         if self.callback_free_data != NULL:
+ *             self.callback_free_data(self.data)
+ *         elif self.free_data:             # <<<<<<<<<<<<<<
+ *             if self.dtype_is_object:
+ *                 refcount_objects_in_slice(self.data, self._shape,
+ */
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":219
+ *                                           self._strides, self.ndim, False)
+ *             free(self.data)
+ *         PyObject_Free(self._shape)             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  PyObject_Free(__pyx_v_self->_shape);
+
+  /* "View.MemoryView":211
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)")
+ * 
+ *     def __dealloc__(array self):             # <<<<<<<<<<<<<<
+ *         if self.callback_free_data != NULL:
+ *             self.callback_free_data(self.data)
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "View.MemoryView":222
+ * 
+ *     @property
+ *     def memview(self):             # <<<<<<<<<<<<<<
+ *         return self.get_memview()
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_5array_7memview___get__(((struct __pyx_array_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct __pyx_array_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":223
+ *     @property
+ *     def memview(self):
+ *         return self.get_memview()             # <<<<<<<<<<<<<<
+ * 
+ *     @cname('get_memview')
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = ((struct __pyx_vtabstruct_array *)__pyx_v_self->__pyx_vtab)->get_memview(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 223, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":222
+ * 
+ *     @property
+ *     def memview(self):             # <<<<<<<<<<<<<<
+ *         return self.get_memview()
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.array.memview.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":226
+ * 
+ *     @cname('get_memview')
+ *     cdef get_memview(self):             # <<<<<<<<<<<<<<
+ *         flags =  PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE
+ *         return  memoryview(self, flags, self.dtype_is_object)
+ */
+
+static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) {
+  int __pyx_v_flags;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("get_memview", 0);
+
+  /* "View.MemoryView":227
+ *     @cname('get_memview')
+ *     cdef get_memview(self):
+ *         flags =  PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE             # <<<<<<<<<<<<<<
+ *         return  memoryview(self, flags, self.dtype_is_object)
+ * 
+ */
+  __pyx_v_flags = ((PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) | PyBUF_WRITABLE);
+
+  /* "View.MemoryView":228
+ *     cdef get_memview(self):
+ *         flags =  PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE
+ *         return  memoryview(self, flags, self.dtype_is_object)             # <<<<<<<<<<<<<<
+ * 
+ *     def __len__(self):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 228, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 228, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 228, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_INCREF(((PyObject *)__pyx_v_self));
+  __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
+  PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_self));
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_2);
+  PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2);
+  __pyx_t_1 = 0;
+  __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 228, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":226
+ * 
+ *     @cname('get_memview')
+ *     cdef get_memview(self):             # <<<<<<<<<<<<<<
+ *         flags =  PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE
+ *         return  memoryview(self, flags, self.dtype_is_object)
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.array.get_memview", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":230
+ *         return  memoryview(self, flags, self.dtype_is_object)
+ * 
+ *     def __len__(self):             # <<<<<<<<<<<<<<
+ *         return self._shape[0]
+ * 
+ */
+
+/* Python wrapper */
+static Py_ssize_t __pyx_array___len__(PyObject *__pyx_v_self); /*proto*/
+static Py_ssize_t __pyx_array___len__(PyObject *__pyx_v_self) {
+  Py_ssize_t __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__len__ (wrapper)", 0);
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(((struct __pyx_array_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(struct __pyx_array_obj *__pyx_v_self) {
+  Py_ssize_t __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__len__", 0);
+
+  /* "View.MemoryView":231
+ * 
+ *     def __len__(self):
+ *         return self._shape[0]             # <<<<<<<<<<<<<<
+ * 
+ *     def __getattr__(self, attr):
+ */
+  __pyx_r = (__pyx_v_self->_shape[0]);
+  goto __pyx_L0;
+
+  /* "View.MemoryView":230
+ *         return  memoryview(self, flags, self.dtype_is_object)
+ * 
+ *     def __len__(self):             # <<<<<<<<<<<<<<
+ *         return self._shape[0]
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":233
+ *         return self._shape[0]
+ * 
+ *     def __getattr__(self, attr):             # <<<<<<<<<<<<<<
+ *         return getattr(self.memview, attr)
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_array___getattr__(PyObject *__pyx_v_self, PyObject *__pyx_v_attr); /*proto*/
+static PyObject *__pyx_array___getattr__(PyObject *__pyx_v_self, PyObject *__pyx_v_attr) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getattr__ (wrapper)", 0);
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_attr));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_attr) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__getattr__", 0);
+
+  /* "View.MemoryView":234
+ * 
+ *     def __getattr__(self, attr):
+ *         return getattr(self.memview, attr)             # <<<<<<<<<<<<<<
+ * 
+ *     def __getitem__(self, item):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 234, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_GetAttr(__pyx_t_1, __pyx_v_attr); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 234, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":233
+ *         return self._shape[0]
+ * 
+ *     def __getattr__(self, attr):             # <<<<<<<<<<<<<<
+ *         return getattr(self.memview, attr)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.array.__getattr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":236
+ *         return getattr(self.memview, attr)
+ * 
+ *     def __getitem__(self, item):             # <<<<<<<<<<<<<<
+ *         return self.memview[item]
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_array___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item); /*proto*/
+static PyObject *__pyx_array___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0);
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_item));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__getitem__", 0);
+
+  /* "View.MemoryView":237
+ * 
+ *     def __getitem__(self, item):
+ *         return self.memview[item]             # <<<<<<<<<<<<<<
+ * 
+ *     def __setitem__(self, item, value):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 237, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_item); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 237, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":236
+ *         return getattr(self.memview, attr)
+ * 
+ *     def __getitem__(self, item):             # <<<<<<<<<<<<<<
+ *         return self.memview[item]
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.array.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":239
+ *         return self.memview[item]
+ * 
+ *     def __setitem__(self, item, value):             # <<<<<<<<<<<<<<
+ *         self.memview[item] = value
+ * 
+ */
+
+/* Python wrapper */
+static int __pyx_array___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value); /*proto*/
+static int __pyx_array___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setitem__ (wrapper)", 0);
+  __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_item), ((PyObject *)__pyx_v_value));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setitem__", 0);
+
+  /* "View.MemoryView":240
+ * 
+ *     def __setitem__(self, item, value):
+ *         self.memview[item] = value             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 240, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (unlikely(PyObject_SetItem(__pyx_t_1, __pyx_v_item, __pyx_v_value) < 0)) __PYX_ERR(2, 240, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "View.MemoryView":239
+ *         return self.memview[item]
+ * 
+ *     def __setitem__(self, item, value):             # <<<<<<<<<<<<<<
+ *         self.memview[item] = value
+ * 
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.array.__setitem__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_array_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw___pyx_array_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_array___reduce_cython__(((struct __pyx_array_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__reduce_cython__", 0);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 2, __pyx_L1_error)
+
+  /* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.array.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_array_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state); /*proto*/
+static PyObject *__pyx_pw___pyx_array_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_array_2__setstate_cython__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v___pyx_state));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setstate_cython__", 0);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 4, __pyx_L1_error)
+
+  /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.array.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":244
+ * 
+ * @cname("__pyx_array_new")
+ * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format,             # <<<<<<<<<<<<<<
+ *                           char *mode, char *buf):
+ *     cdef array result
+ */
+
+static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, char *__pyx_v_format, char *__pyx_v_mode, char *__pyx_v_buf) {
+  struct __pyx_array_obj *__pyx_v_result = 0;
+  struct __pyx_array_obj *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("array_cwrapper", 0);
+
+  /* "View.MemoryView":248
+ *     cdef array result
+ * 
+ *     if buf == NULL:             # <<<<<<<<<<<<<<
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'))
+ *     else:
+ */
+  __pyx_t_1 = ((__pyx_v_buf == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":249
+ * 
+ *     if buf == NULL:
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'))             # <<<<<<<<<<<<<<
+ *     else:
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'),
+ */
+    __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 249, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 249, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_4 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 249, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 249, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_INCREF(__pyx_v_shape);
+    __Pyx_GIVEREF(__pyx_v_shape);
+    PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_shape);
+    __Pyx_GIVEREF(__pyx_t_2);
+    PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2);
+    __Pyx_GIVEREF(__pyx_t_3);
+    PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3);
+    __Pyx_GIVEREF(__pyx_t_4);
+    PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_4);
+    __pyx_t_2 = 0;
+    __pyx_t_3 = 0;
+    __pyx_t_4 = 0;
+    __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 249, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_4);
+    __pyx_t_4 = 0;
+
+    /* "View.MemoryView":248
+ *     cdef array result
+ * 
+ *     if buf == NULL:             # <<<<<<<<<<<<<<
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'))
+ *     else:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":251
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'))
+ *     else:
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'),             # <<<<<<<<<<<<<<
+ *                        allocate_buffer=False)
+ *         result.data = buf
+ */
+  /*else*/ {
+    __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 251, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_5 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 251, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 251, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_2 = PyTuple_New(4); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 251, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_INCREF(__pyx_v_shape);
+    __Pyx_GIVEREF(__pyx_v_shape);
+    PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_shape);
+    __Pyx_GIVEREF(__pyx_t_4);
+    PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_4);
+    __Pyx_GIVEREF(__pyx_t_5);
+    PyTuple_SET_ITEM(__pyx_t_2, 2, __pyx_t_5);
+    __Pyx_GIVEREF(__pyx_t_3);
+    PyTuple_SET_ITEM(__pyx_t_2, 3, __pyx_t_3);
+    __pyx_t_4 = 0;
+    __pyx_t_5 = 0;
+    __pyx_t_3 = 0;
+
+    /* "View.MemoryView":252
+ *     else:
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'),
+ *                        allocate_buffer=False)             # <<<<<<<<<<<<<<
+ *         result.data = buf
+ * 
+ */
+    __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 252, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_allocate_buffer, Py_False) < 0) __PYX_ERR(2, 252, __pyx_L1_error)
+
+    /* "View.MemoryView":251
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'))
+ *     else:
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'),             # <<<<<<<<<<<<<<
+ *                        allocate_buffer=False)
+ *         result.data = buf
+ */
+    __pyx_t_5 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 251, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_5);
+    __pyx_t_5 = 0;
+
+    /* "View.MemoryView":253
+ *         result = array(shape, itemsize, format, mode.decode('ASCII'),
+ *                        allocate_buffer=False)
+ *         result.data = buf             # <<<<<<<<<<<<<<
+ * 
+ *     return result
+ */
+    __pyx_v_result->data = __pyx_v_buf;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":255
+ *         result.data = buf
+ * 
+ *     return result             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __Pyx_XDECREF(((PyObject *)__pyx_r));
+  __Pyx_INCREF(((PyObject *)__pyx_v_result));
+  __pyx_r = __pyx_v_result;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":244
+ * 
+ * @cname("__pyx_array_new")
+ * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format,             # <<<<<<<<<<<<<<
+ *                           char *mode, char *buf):
+ *     cdef array result
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.array_cwrapper", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_result);
+  __Pyx_XGIVEREF((PyObject *)__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":281
+ * cdef class Enum(object):
+ *     cdef object name
+ *     def __init__(self, name):             # <<<<<<<<<<<<<<
+ *         self.name = name
+ *     def __repr__(self):
+ */
+
+/* Python wrapper */
+static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_name = 0;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__init__ (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_name,0};
+    PyObject* values[1] = {0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_name)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) __PYX_ERR(2, 281, __pyx_L3_error)
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 1) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+    }
+    __pyx_v_name = values[0];
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(2, 281, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("View.MemoryView.Enum.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self), __pyx_v_name);
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v_name) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__init__", 0);
+
+  /* "View.MemoryView":282
+ *     cdef object name
+ *     def __init__(self, name):
+ *         self.name = name             # <<<<<<<<<<<<<<
+ *     def __repr__(self):
+ *         return self.name
+ */
+  __Pyx_INCREF(__pyx_v_name);
+  __Pyx_GIVEREF(__pyx_v_name);
+  __Pyx_GOTREF(__pyx_v_self->name);
+  __Pyx_DECREF(__pyx_v_self->name);
+  __pyx_v_self->name = __pyx_v_name;
+
+  /* "View.MemoryView":281
+ * cdef class Enum(object):
+ *     cdef object name
+ *     def __init__(self, name):             # <<<<<<<<<<<<<<
+ *         self.name = name
+ *     def __repr__(self):
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":283
+ *     def __init__(self, name):
+ *         self.name = name
+ *     def __repr__(self):             # <<<<<<<<<<<<<<
+ *         return self.name
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_MemviewEnum___repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_MemviewEnum___repr__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+  __pyx_r = __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(struct __pyx_MemviewEnum_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__repr__", 0);
+
+  /* "View.MemoryView":284
+ *         self.name = name
+ *     def __repr__(self):
+ *         return self.name             # <<<<<<<<<<<<<<
+ * 
+ * cdef generic = Enum("<strided and direct or indirect>")
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v_self->name);
+  __pyx_r = __pyx_v_self->name;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":283
+ *     def __init__(self, name):
+ *         self.name = name
+ *     def __repr__(self):             # <<<<<<<<<<<<<<
+ *         return self.name
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     cdef tuple state
+ *     cdef object _dict
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_MemviewEnum_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw___pyx_MemviewEnum_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_MemviewEnum___reduce_cython__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self) {
+  PyObject *__pyx_v_state = 0;
+  PyObject *__pyx_v__dict = 0;
+  int __pyx_v_use_setstate;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__reduce_cython__", 0);
+
+  /* "(tree fragment)":5
+ *     cdef object _dict
+ *     cdef bint use_setstate
+ *     state = (self.name,)             # <<<<<<<<<<<<<<
+ *     _dict = getattr(self, '__dict__', None)
+ *     if _dict is not None:
+ */
+  __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 5, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_INCREF(__pyx_v_self->name);
+  __Pyx_GIVEREF(__pyx_v_self->name);
+  PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->name);
+  __pyx_v_state = ((PyObject*)__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "(tree fragment)":6
+ *     cdef bint use_setstate
+ *     state = (self.name,)
+ *     _dict = getattr(self, '__dict__', None)             # <<<<<<<<<<<<<<
+ *     if _dict is not None:
+ *         state += (_dict,)
+ */
+  __pyx_t_1 = __Pyx_GetAttr3(((PyObject *)__pyx_v_self), __pyx_n_s_dict, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 6, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_v__dict = __pyx_t_1;
+  __pyx_t_1 = 0;
+
+  /* "(tree fragment)":7
+ *     state = (self.name,)
+ *     _dict = getattr(self, '__dict__', None)
+ *     if _dict is not None:             # <<<<<<<<<<<<<<
+ *         state += (_dict,)
+ *         use_setstate = True
+ */
+  __pyx_t_2 = (__pyx_v__dict != Py_None);
+  __pyx_t_3 = (__pyx_t_2 != 0);
+  if (__pyx_t_3) {
+
+    /* "(tree fragment)":8
+ *     _dict = getattr(self, '__dict__', None)
+ *     if _dict is not None:
+ *         state += (_dict,)             # <<<<<<<<<<<<<<
+ *         use_setstate = True
+ *     else:
+ */
+    __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 8, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(__pyx_v__dict);
+    __Pyx_GIVEREF(__pyx_v__dict);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v__dict);
+    __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_state, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 8, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF_SET(__pyx_v_state, ((PyObject*)__pyx_t_4));
+    __pyx_t_4 = 0;
+
+    /* "(tree fragment)":9
+ *     if _dict is not None:
+ *         state += (_dict,)
+ *         use_setstate = True             # <<<<<<<<<<<<<<
+ *     else:
+ *         use_setstate = self.name is not None
+ */
+    __pyx_v_use_setstate = 1;
+
+    /* "(tree fragment)":7
+ *     state = (self.name,)
+ *     _dict = getattr(self, '__dict__', None)
+ *     if _dict is not None:             # <<<<<<<<<<<<<<
+ *         state += (_dict,)
+ *         use_setstate = True
+ */
+    goto __pyx_L3;
+  }
+
+  /* "(tree fragment)":11
+ *         use_setstate = True
+ *     else:
+ *         use_setstate = self.name is not None             # <<<<<<<<<<<<<<
+ *     if use_setstate:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, None), state
+ */
+  /*else*/ {
+    __pyx_t_3 = (__pyx_v_self->name != Py_None);
+    __pyx_v_use_setstate = __pyx_t_3;
+  }
+  __pyx_L3:;
+
+  /* "(tree fragment)":12
+ *     else:
+ *         use_setstate = self.name is not None
+ *     if use_setstate:             # <<<<<<<<<<<<<<
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, None), state
+ *     else:
+ */
+  __pyx_t_3 = (__pyx_v_use_setstate != 0);
+  if (__pyx_t_3) {
+
+    /* "(tree fragment)":13
+ *         use_setstate = self.name is not None
+ *     if use_setstate:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, None), state             # <<<<<<<<<<<<<<
+ *     else:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, state)
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_pyx_unpickle_Enum); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 13, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 13, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    __Pyx_GIVEREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    __Pyx_INCREF(__pyx_int_184977713);
+    __Pyx_GIVEREF(__pyx_int_184977713);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_184977713);
+    __Pyx_INCREF(Py_None);
+    __Pyx_GIVEREF(Py_None);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, Py_None);
+    __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 13, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_GIVEREF(__pyx_t_4);
+    PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4);
+    __Pyx_GIVEREF(__pyx_t_1);
+    PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_1);
+    __Pyx_INCREF(__pyx_v_state);
+    __Pyx_GIVEREF(__pyx_v_state);
+    PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_v_state);
+    __pyx_t_4 = 0;
+    __pyx_t_1 = 0;
+    __pyx_r = __pyx_t_5;
+    __pyx_t_5 = 0;
+    goto __pyx_L0;
+
+    /* "(tree fragment)":12
+ *     else:
+ *         use_setstate = self.name is not None
+ *     if use_setstate:             # <<<<<<<<<<<<<<
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, None), state
+ *     else:
+ */
+  }
+
+  /* "(tree fragment)":15
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, None), state
+ *     else:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, state)             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     __pyx_unpickle_Enum__set_state(self, __pyx_state)
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_pyx_unpickle_Enum); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 15, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 15, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    __Pyx_GIVEREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))));
+    __Pyx_INCREF(__pyx_int_184977713);
+    __Pyx_GIVEREF(__pyx_int_184977713);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_184977713);
+    __Pyx_INCREF(__pyx_v_state);
+    __Pyx_GIVEREF(__pyx_v_state);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_v_state);
+    __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 15, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_GIVEREF(__pyx_t_5);
+    PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5);
+    __Pyx_GIVEREF(__pyx_t_1);
+    PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_1);
+    __pyx_t_5 = 0;
+    __pyx_t_1 = 0;
+    __pyx_r = __pyx_t_4;
+    __pyx_t_4 = 0;
+    goto __pyx_L0;
+  }
+
+  /* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     cdef tuple state
+ *     cdef object _dict
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.Enum.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_state);
+  __Pyx_XDECREF(__pyx_v__dict);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":16
+ *     else:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, state)
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     __pyx_unpickle_Enum__set_state(self, __pyx_state)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_MemviewEnum_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state); /*proto*/
+static PyObject *__pyx_pw___pyx_MemviewEnum_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_MemviewEnum_2__setstate_cython__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self), ((PyObject *)__pyx_v___pyx_state));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setstate_cython__", 0);
+
+  /* "(tree fragment)":17
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, state)
+ * def __setstate_cython__(self, __pyx_state):
+ *     __pyx_unpickle_Enum__set_state(self, __pyx_state)             # <<<<<<<<<<<<<<
+ */
+  if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(2, 17, __pyx_L1_error)
+  __pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 17, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "(tree fragment)":16
+ *     else:
+ *         return __pyx_unpickle_Enum, (type(self), 0xb068931, state)
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     __pyx_unpickle_Enum__set_state(self, __pyx_state)
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.Enum.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":298
+ * 
+ * @cname('__pyx_align_pointer')
+ * cdef void *align_pointer(void *memory, size_t alignment) nogil:             # <<<<<<<<<<<<<<
+ *     "Align pointer memory on a given boundary"
+ *     cdef Py_intptr_t aligned_p = <Py_intptr_t> memory
+ */
+
+static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) {
+  Py_intptr_t __pyx_v_aligned_p;
+  size_t __pyx_v_offset;
+  void *__pyx_r;
+  int __pyx_t_1;
+
+  /* "View.MemoryView":300
+ * cdef void *align_pointer(void *memory, size_t alignment) nogil:
+ *     "Align pointer memory on a given boundary"
+ *     cdef Py_intptr_t aligned_p = <Py_intptr_t> memory             # <<<<<<<<<<<<<<
+ *     cdef size_t offset
+ * 
+ */
+  __pyx_v_aligned_p = ((Py_intptr_t)__pyx_v_memory);
+
+  /* "View.MemoryView":304
+ * 
+ *     with cython.cdivision(True):
+ *         offset = aligned_p % alignment             # <<<<<<<<<<<<<<
+ * 
+ *     if offset > 0:
+ */
+  __pyx_v_offset = (__pyx_v_aligned_p % __pyx_v_alignment);
+
+  /* "View.MemoryView":306
+ *         offset = aligned_p % alignment
+ * 
+ *     if offset > 0:             # <<<<<<<<<<<<<<
+ *         aligned_p += alignment - offset
+ * 
+ */
+  __pyx_t_1 = ((__pyx_v_offset > 0) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":307
+ * 
+ *     if offset > 0:
+ *         aligned_p += alignment - offset             # <<<<<<<<<<<<<<
+ * 
+ *     return <void *> aligned_p
+ */
+    __pyx_v_aligned_p = (__pyx_v_aligned_p + (__pyx_v_alignment - __pyx_v_offset));
+
+    /* "View.MemoryView":306
+ *         offset = aligned_p % alignment
+ * 
+ *     if offset > 0:             # <<<<<<<<<<<<<<
+ *         aligned_p += alignment - offset
+ * 
+ */
+  }
+
+  /* "View.MemoryView":309
+ *         aligned_p += alignment - offset
+ * 
+ *     return <void *> aligned_p             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((void *)__pyx_v_aligned_p);
+  goto __pyx_L0;
+
+  /* "View.MemoryView":298
+ * 
+ * @cname('__pyx_align_pointer')
+ * cdef void *align_pointer(void *memory, size_t alignment) nogil:             # <<<<<<<<<<<<<<
+ *     "Align pointer memory on a given boundary"
+ *     cdef Py_intptr_t aligned_p = <Py_intptr_t> memory
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":345
+ *     cdef __Pyx_TypeInfo *typeinfo
+ * 
+ *     def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False):             # <<<<<<<<<<<<<<
+ *         self.obj = obj
+ *         self.flags = flags
+ */
+
+/* Python wrapper */
+static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_obj = 0;
+  int __pyx_v_flags;
+  int __pyx_v_dtype_is_object;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_obj,&__pyx_n_s_flags,&__pyx_n_s_dtype_is_object,0};
+    PyObject* values[3] = {0,0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_obj)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_flags)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, 1); __PYX_ERR(2, 345, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  2:
+        if (kw_args > 0) {
+          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_dtype_is_object);
+          if (value) { values[2] = value; kw_args--; }
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(2, 345, __pyx_L3_error)
+      }
+    } else {
+      switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+    }
+    __pyx_v_obj = values[0];
+    __pyx_v_flags = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 345, __pyx_L3_error)
+    if (values[2]) {
+      __pyx_v_dtype_is_object = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_dtype_is_object == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 345, __pyx_L3_error)
+    } else {
+      __pyx_v_dtype_is_object = ((int)0);
+    }
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(2, 345, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_obj, __pyx_v_flags, __pyx_v_dtype_is_object);
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj, int __pyx_v_flags, int __pyx_v_dtype_is_object) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__cinit__", 0);
+
+  /* "View.MemoryView":346
+ * 
+ *     def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False):
+ *         self.obj = obj             # <<<<<<<<<<<<<<
+ *         self.flags = flags
+ *         if type(self) is memoryview or obj is not None:
+ */
+  __Pyx_INCREF(__pyx_v_obj);
+  __Pyx_GIVEREF(__pyx_v_obj);
+  __Pyx_GOTREF(__pyx_v_self->obj);
+  __Pyx_DECREF(__pyx_v_self->obj);
+  __pyx_v_self->obj = __pyx_v_obj;
+
+  /* "View.MemoryView":347
+ *     def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False):
+ *         self.obj = obj
+ *         self.flags = flags             # <<<<<<<<<<<<<<
+ *         if type(self) is memoryview or obj is not None:
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ */
+  __pyx_v_self->flags = __pyx_v_flags;
+
+  /* "View.MemoryView":348
+ *         self.obj = obj
+ *         self.flags = flags
+ *         if type(self) is memoryview or obj is not None:             # <<<<<<<<<<<<<<
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ *             if <PyObject *> self.view.obj == NULL:
+ */
+  __pyx_t_2 = (((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))) == ((PyObject *)__pyx_memoryview_type));
+  __pyx_t_3 = (__pyx_t_2 != 0);
+  if (!__pyx_t_3) {
+  } else {
+    __pyx_t_1 = __pyx_t_3;
+    goto __pyx_L4_bool_binop_done;
+  }
+  __pyx_t_3 = (__pyx_v_obj != Py_None);
+  __pyx_t_2 = (__pyx_t_3 != 0);
+  __pyx_t_1 = __pyx_t_2;
+  __pyx_L4_bool_binop_done:;
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":349
+ *         self.flags = flags
+ *         if type(self) is memoryview or obj is not None:
+ *             __Pyx_GetBuffer(obj, &self.view, flags)             # <<<<<<<<<<<<<<
+ *             if <PyObject *> self.view.obj == NULL:
+ *                 (<__pyx_buffer *> &self.view).obj = Py_None
+ */
+    __pyx_t_4 = __Pyx_GetBuffer(__pyx_v_obj, (&__pyx_v_self->view), __pyx_v_flags); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 349, __pyx_L1_error)
+
+    /* "View.MemoryView":350
+ *         if type(self) is memoryview or obj is not None:
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ *             if <PyObject *> self.view.obj == NULL:             # <<<<<<<<<<<<<<
+ *                 (<__pyx_buffer *> &self.view).obj = Py_None
+ *                 Py_INCREF(Py_None)
+ */
+    __pyx_t_1 = ((((PyObject *)__pyx_v_self->view.obj) == NULL) != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":351
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ *             if <PyObject *> self.view.obj == NULL:
+ *                 (<__pyx_buffer *> &self.view).obj = Py_None             # <<<<<<<<<<<<<<
+ *                 Py_INCREF(Py_None)
+ * 
+ */
+      ((Py_buffer *)(&__pyx_v_self->view))->obj = Py_None;
+
+      /* "View.MemoryView":352
+ *             if <PyObject *> self.view.obj == NULL:
+ *                 (<__pyx_buffer *> &self.view).obj = Py_None
+ *                 Py_INCREF(Py_None)             # <<<<<<<<<<<<<<
+ * 
+ *         global __pyx_memoryview_thread_locks_used
+ */
+      Py_INCREF(Py_None);
+
+      /* "View.MemoryView":350
+ *         if type(self) is memoryview or obj is not None:
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ *             if <PyObject *> self.view.obj == NULL:             # <<<<<<<<<<<<<<
+ *                 (<__pyx_buffer *> &self.view).obj = Py_None
+ *                 Py_INCREF(Py_None)
+ */
+    }
+
+    /* "View.MemoryView":348
+ *         self.obj = obj
+ *         self.flags = flags
+ *         if type(self) is memoryview or obj is not None:             # <<<<<<<<<<<<<<
+ *             __Pyx_GetBuffer(obj, &self.view, flags)
+ *             if <PyObject *> self.view.obj == NULL:
+ */
+  }
+
+  /* "View.MemoryView":355
+ * 
+ *         global __pyx_memoryview_thread_locks_used
+ *         if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:             # <<<<<<<<<<<<<<
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
+ *             __pyx_memoryview_thread_locks_used += 1
+ */
+  __pyx_t_1 = ((__pyx_memoryview_thread_locks_used < 8) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":356
+ *         global __pyx_memoryview_thread_locks_used
+ *         if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]             # <<<<<<<<<<<<<<
+ *             __pyx_memoryview_thread_locks_used += 1
+ *         if self.lock is NULL:
+ */
+    __pyx_v_self->lock = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]);
+
+    /* "View.MemoryView":357
+ *         if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
+ *             __pyx_memoryview_thread_locks_used += 1             # <<<<<<<<<<<<<<
+ *         if self.lock is NULL:
+ *             self.lock = PyThread_allocate_lock()
+ */
+    __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used + 1);
+
+    /* "View.MemoryView":355
+ * 
+ *         global __pyx_memoryview_thread_locks_used
+ *         if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:             # <<<<<<<<<<<<<<
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
+ *             __pyx_memoryview_thread_locks_used += 1
+ */
+  }
+
+  /* "View.MemoryView":358
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
+ *             __pyx_memoryview_thread_locks_used += 1
+ *         if self.lock is NULL:             # <<<<<<<<<<<<<<
+ *             self.lock = PyThread_allocate_lock()
+ *             if self.lock is NULL:
+ */
+  __pyx_t_1 = ((__pyx_v_self->lock == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":359
+ *             __pyx_memoryview_thread_locks_used += 1
+ *         if self.lock is NULL:
+ *             self.lock = PyThread_allocate_lock()             # <<<<<<<<<<<<<<
+ *             if self.lock is NULL:
+ *                 raise MemoryError
+ */
+    __pyx_v_self->lock = PyThread_allocate_lock();
+
+    /* "View.MemoryView":360
+ *         if self.lock is NULL:
+ *             self.lock = PyThread_allocate_lock()
+ *             if self.lock is NULL:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError
+ * 
+ */
+    __pyx_t_1 = ((__pyx_v_self->lock == NULL) != 0);
+    if (unlikely(__pyx_t_1)) {
+
+      /* "View.MemoryView":361
+ *             self.lock = PyThread_allocate_lock()
+ *             if self.lock is NULL:
+ *                 raise MemoryError             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_FORMAT:
+ */
+      PyErr_NoMemory(); __PYX_ERR(2, 361, __pyx_L1_error)
+
+      /* "View.MemoryView":360
+ *         if self.lock is NULL:
+ *             self.lock = PyThread_allocate_lock()
+ *             if self.lock is NULL:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError
+ * 
+ */
+    }
+
+    /* "View.MemoryView":358
+ *             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
+ *             __pyx_memoryview_thread_locks_used += 1
+ *         if self.lock is NULL:             # <<<<<<<<<<<<<<
+ *             self.lock = PyThread_allocate_lock()
+ *             if self.lock is NULL:
+ */
+  }
+
+  /* "View.MemoryView":363
+ *                 raise MemoryError
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0')
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":364
+ * 
+ *         if flags & PyBUF_FORMAT:
+ *             self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0')             # <<<<<<<<<<<<<<
+ *         else:
+ *             self.dtype_is_object = dtype_is_object
+ */
+    __pyx_t_2 = (((__pyx_v_self->view.format[0]) == 'O') != 0);
+    if (__pyx_t_2) {
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+      goto __pyx_L11_bool_binop_done;
+    }
+    __pyx_t_2 = (((__pyx_v_self->view.format[1]) == '\x00') != 0);
+    __pyx_t_1 = __pyx_t_2;
+    __pyx_L11_bool_binop_done:;
+    __pyx_v_self->dtype_is_object = __pyx_t_1;
+
+    /* "View.MemoryView":363
+ *                 raise MemoryError
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0')
+ *         else:
+ */
+    goto __pyx_L10;
+  }
+
+  /* "View.MemoryView":366
+ *             self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0')
+ *         else:
+ *             self.dtype_is_object = dtype_is_object             # <<<<<<<<<<<<<<
+ * 
+ *         self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer(
+ */
+  /*else*/ {
+    __pyx_v_self->dtype_is_object = __pyx_v_dtype_is_object;
+  }
+  __pyx_L10:;
+
+  /* "View.MemoryView":368
+ *             self.dtype_is_object = dtype_is_object
+ * 
+ *         self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer(             # <<<<<<<<<<<<<<
+ *                   <void *> &self.acquisition_count[0], sizeof(__pyx_atomic_int))
+ *         self.typeinfo = NULL
+ */
+  __pyx_v_self->acquisition_count_aligned_p = ((__pyx_atomic_int *)__pyx_align_pointer(((void *)(&(__pyx_v_self->acquisition_count[0]))), (sizeof(__pyx_atomic_int))));
+
+  /* "View.MemoryView":370
+ *         self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer(
+ *                   <void *> &self.acquisition_count[0], sizeof(__pyx_atomic_int))
+ *         self.typeinfo = NULL             # <<<<<<<<<<<<<<
+ * 
+ *     def __dealloc__(memoryview self):
+ */
+  __pyx_v_self->typeinfo = NULL;
+
+  /* "View.MemoryView":345
+ *     cdef __Pyx_TypeInfo *typeinfo
+ * 
+ *     def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False):             # <<<<<<<<<<<<<<
+ *         self.obj = obj
+ *         self.flags = flags
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":372
+ *         self.typeinfo = NULL
+ * 
+ *     def __dealloc__(memoryview self):             # <<<<<<<<<<<<<<
+ *         if self.obj is not None:
+ *             __Pyx_ReleaseBuffer(&self.view)
+ */
+
+/* Python wrapper */
+static void __pyx_memoryview___dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_memoryview___dealloc__(PyObject *__pyx_v_self) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+  __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  int __pyx_v_i;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  PyThread_type_lock __pyx_t_6;
+  PyThread_type_lock __pyx_t_7;
+  __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+  /* "View.MemoryView":373
+ * 
+ *     def __dealloc__(memoryview self):
+ *         if self.obj is not None:             # <<<<<<<<<<<<<<
+ *             __Pyx_ReleaseBuffer(&self.view)
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:
+ */
+  __pyx_t_1 = (__pyx_v_self->obj != Py_None);
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":374
+ *     def __dealloc__(memoryview self):
+ *         if self.obj is not None:
+ *             __Pyx_ReleaseBuffer(&self.view)             # <<<<<<<<<<<<<<
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:
+ * 
+ */
+    __Pyx_ReleaseBuffer((&__pyx_v_self->view));
+
+    /* "View.MemoryView":373
+ * 
+ *     def __dealloc__(memoryview self):
+ *         if self.obj is not None:             # <<<<<<<<<<<<<<
+ *             __Pyx_ReleaseBuffer(&self.view)
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":375
+ *         if self.obj is not None:
+ *             __Pyx_ReleaseBuffer(&self.view)
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:             # <<<<<<<<<<<<<<
+ * 
+ *             (<__pyx_buffer *> &self.view).obj = NULL
+ */
+  __pyx_t_2 = ((((Py_buffer *)(&__pyx_v_self->view))->obj == Py_None) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":377
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:
+ * 
+ *             (<__pyx_buffer *> &self.view).obj = NULL             # <<<<<<<<<<<<<<
+ *             Py_DECREF(Py_None)
+ * 
+ */
+    ((Py_buffer *)(&__pyx_v_self->view))->obj = NULL;
+
+    /* "View.MemoryView":378
+ * 
+ *             (<__pyx_buffer *> &self.view).obj = NULL
+ *             Py_DECREF(Py_None)             # <<<<<<<<<<<<<<
+ * 
+ *         cdef int i
+ */
+    Py_DECREF(Py_None);
+
+    /* "View.MemoryView":375
+ *         if self.obj is not None:
+ *             __Pyx_ReleaseBuffer(&self.view)
+ *         elif (<__pyx_buffer *> &self.view).obj == Py_None:             # <<<<<<<<<<<<<<
+ * 
+ *             (<__pyx_buffer *> &self.view).obj = NULL
+ */
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":382
+ *         cdef int i
+ *         global __pyx_memoryview_thread_locks_used
+ *         if self.lock != NULL:             # <<<<<<<<<<<<<<
+ *             for i in range(__pyx_memoryview_thread_locks_used):
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ */
+  __pyx_t_2 = ((__pyx_v_self->lock != NULL) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":383
+ *         global __pyx_memoryview_thread_locks_used
+ *         if self.lock != NULL:
+ *             for i in range(__pyx_memoryview_thread_locks_used):             # <<<<<<<<<<<<<<
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ */
+    __pyx_t_3 = __pyx_memoryview_thread_locks_used;
+    __pyx_t_4 = __pyx_t_3;
+    for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
+      __pyx_v_i = __pyx_t_5;
+
+      /* "View.MemoryView":384
+ *         if self.lock != NULL:
+ *             for i in range(__pyx_memoryview_thread_locks_used):
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:             # <<<<<<<<<<<<<<
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ *                     if i != __pyx_memoryview_thread_locks_used:
+ */
+      __pyx_t_2 = (((__pyx_memoryview_thread_locks[__pyx_v_i]) == __pyx_v_self->lock) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":385
+ *             for i in range(__pyx_memoryview_thread_locks_used):
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ *                     __pyx_memoryview_thread_locks_used -= 1             # <<<<<<<<<<<<<<
+ *                     if i != __pyx_memoryview_thread_locks_used:
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (
+ */
+        __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used - 1);
+
+        /* "View.MemoryView":386
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ *                     if i != __pyx_memoryview_thread_locks_used:             # <<<<<<<<<<<<<<
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (
+ *                             __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i])
+ */
+        __pyx_t_2 = ((__pyx_v_i != __pyx_memoryview_thread_locks_used) != 0);
+        if (__pyx_t_2) {
+
+          /* "View.MemoryView":388
+ *                     if i != __pyx_memoryview_thread_locks_used:
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (
+ *                             __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i])             # <<<<<<<<<<<<<<
+ *                     break
+ *             else:
+ */
+          __pyx_t_6 = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]);
+          __pyx_t_7 = (__pyx_memoryview_thread_locks[__pyx_v_i]);
+
+          /* "View.MemoryView":387
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ *                     if i != __pyx_memoryview_thread_locks_used:
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (             # <<<<<<<<<<<<<<
+ *                             __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i])
+ *                     break
+ */
+          (__pyx_memoryview_thread_locks[__pyx_v_i]) = __pyx_t_6;
+          (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]) = __pyx_t_7;
+
+          /* "View.MemoryView":386
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ *                     if i != __pyx_memoryview_thread_locks_used:             # <<<<<<<<<<<<<<
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (
+ *                             __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i])
+ */
+        }
+
+        /* "View.MemoryView":389
+ *                         __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = (
+ *                             __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i])
+ *                     break             # <<<<<<<<<<<<<<
+ *             else:
+ *                 PyThread_free_lock(self.lock)
+ */
+        goto __pyx_L6_break;
+
+        /* "View.MemoryView":384
+ *         if self.lock != NULL:
+ *             for i in range(__pyx_memoryview_thread_locks_used):
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:             # <<<<<<<<<<<<<<
+ *                     __pyx_memoryview_thread_locks_used -= 1
+ *                     if i != __pyx_memoryview_thread_locks_used:
+ */
+      }
+    }
+    /*else*/ {
+
+      /* "View.MemoryView":391
+ *                     break
+ *             else:
+ *                 PyThread_free_lock(self.lock)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef char *get_item_pointer(memoryview self, object index) except NULL:
+ */
+      PyThread_free_lock(__pyx_v_self->lock);
+    }
+    __pyx_L6_break:;
+
+    /* "View.MemoryView":382
+ *         cdef int i
+ *         global __pyx_memoryview_thread_locks_used
+ *         if self.lock != NULL:             # <<<<<<<<<<<<<<
+ *             for i in range(__pyx_memoryview_thread_locks_used):
+ *                 if __pyx_memoryview_thread_locks[i] is self.lock:
+ */
+  }
+
+  /* "View.MemoryView":372
+ *         self.typeinfo = NULL
+ * 
+ *     def __dealloc__(memoryview self):             # <<<<<<<<<<<<<<
+ *         if self.obj is not None:
+ *             __Pyx_ReleaseBuffer(&self.view)
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "View.MemoryView":393
+ *                 PyThread_free_lock(self.lock)
+ * 
+ *     cdef char *get_item_pointer(memoryview self, object index) except NULL:             # <<<<<<<<<<<<<<
+ *         cdef Py_ssize_t dim
+ *         cdef char *itemp = <char *> self.view.buf
+ */
+
+static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index) {
+  Py_ssize_t __pyx_v_dim;
+  char *__pyx_v_itemp;
+  PyObject *__pyx_v_idx = NULL;
+  char *__pyx_r;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  Py_ssize_t __pyx_t_3;
+  PyObject *(*__pyx_t_4)(PyObject *);
+  PyObject *__pyx_t_5 = NULL;
+  Py_ssize_t __pyx_t_6;
+  char *__pyx_t_7;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("get_item_pointer", 0);
+
+  /* "View.MemoryView":395
+ *     cdef char *get_item_pointer(memoryview self, object index) except NULL:
+ *         cdef Py_ssize_t dim
+ *         cdef char *itemp = <char *> self.view.buf             # <<<<<<<<<<<<<<
+ * 
+ *         for dim, idx in enumerate(index):
+ */
+  __pyx_v_itemp = ((char *)__pyx_v_self->view.buf);
+
+  /* "View.MemoryView":397
+ *         cdef char *itemp = <char *> self.view.buf
+ * 
+ *         for dim, idx in enumerate(index):             # <<<<<<<<<<<<<<
+ *             itemp = pybuffer_index(&self.view, itemp, idx, dim)
+ * 
+ */
+  __pyx_t_1 = 0;
+  if (likely(PyList_CheckExact(__pyx_v_index)) || PyTuple_CheckExact(__pyx_v_index)) {
+    __pyx_t_2 = __pyx_v_index; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0;
+    __pyx_t_4 = NULL;
+  } else {
+    __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 397, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 397, __pyx_L1_error)
+  }
+  for (;;) {
+    if (likely(!__pyx_t_4)) {
+      if (likely(PyList_CheckExact(__pyx_t_2))) {
+        if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(2, 397, __pyx_L1_error)
+        #else
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 397, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_5);
+        #endif
+      } else {
+        if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(2, 397, __pyx_L1_error)
+        #else
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 397, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_5);
+        #endif
+      }
+    } else {
+      __pyx_t_5 = __pyx_t_4(__pyx_t_2);
+      if (unlikely(!__pyx_t_5)) {
+        PyObject* exc_type = PyErr_Occurred();
+        if (exc_type) {
+          if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
+          else __PYX_ERR(2, 397, __pyx_L1_error)
+        }
+        break;
+      }
+      __Pyx_GOTREF(__pyx_t_5);
+    }
+    __Pyx_XDECREF_SET(__pyx_v_idx, __pyx_t_5);
+    __pyx_t_5 = 0;
+    __pyx_v_dim = __pyx_t_1;
+    __pyx_t_1 = (__pyx_t_1 + 1);
+
+    /* "View.MemoryView":398
+ * 
+ *         for dim, idx in enumerate(index):
+ *             itemp = pybuffer_index(&self.view, itemp, idx, dim)             # <<<<<<<<<<<<<<
+ * 
+ *         return itemp
+ */
+    __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 398, __pyx_L1_error)
+    __pyx_t_7 = __pyx_pybuffer_index((&__pyx_v_self->view), __pyx_v_itemp, __pyx_t_6, __pyx_v_dim); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(2, 398, __pyx_L1_error)
+    __pyx_v_itemp = __pyx_t_7;
+
+    /* "View.MemoryView":397
+ *         cdef char *itemp = <char *> self.view.buf
+ * 
+ *         for dim, idx in enumerate(index):             # <<<<<<<<<<<<<<
+ *             itemp = pybuffer_index(&self.view, itemp, idx, dim)
+ * 
+ */
+  }
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+  /* "View.MemoryView":400
+ *             itemp = pybuffer_index(&self.view, itemp, idx, dim)
+ * 
+ *         return itemp             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = __pyx_v_itemp;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":393
+ *                 PyThread_free_lock(self.lock)
+ * 
+ *     cdef char *get_item_pointer(memoryview self, object index) except NULL:             # <<<<<<<<<<<<<<
+ *         cdef Py_ssize_t dim
+ *         cdef char *itemp = <char *> self.view.buf
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.get_item_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_idx);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":403
+ * 
+ * 
+ *     def __getitem__(memoryview self, object index):             # <<<<<<<<<<<<<<
+ *         if index is Ellipsis:
+ *             return self
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index); /*proto*/
+static PyObject *__pyx_memoryview___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((PyObject *)__pyx_v_index));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index) {
+  PyObject *__pyx_v_have_slices = NULL;
+  PyObject *__pyx_v_indices = NULL;
+  char *__pyx_v_itemp;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  char *__pyx_t_6;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__getitem__", 0);
+
+  /* "View.MemoryView":404
+ * 
+ *     def __getitem__(memoryview self, object index):
+ *         if index is Ellipsis:             # <<<<<<<<<<<<<<
+ *             return self
+ * 
+ */
+  __pyx_t_1 = (__pyx_v_index == __pyx_builtin_Ellipsis);
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":405
+ *     def __getitem__(memoryview self, object index):
+ *         if index is Ellipsis:
+ *             return self             # <<<<<<<<<<<<<<
+ * 
+ *         have_slices, indices = _unellipsify(index, self.view.ndim)
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject *)__pyx_v_self));
+    __pyx_r = ((PyObject *)__pyx_v_self);
+    goto __pyx_L0;
+
+    /* "View.MemoryView":404
+ * 
+ *     def __getitem__(memoryview self, object index):
+ *         if index is Ellipsis:             # <<<<<<<<<<<<<<
+ *             return self
+ * 
+ */
+  }
+
+  /* "View.MemoryView":407
+ *             return self
+ * 
+ *         have_slices, indices = _unellipsify(index, self.view.ndim)             # <<<<<<<<<<<<<<
+ * 
+ *         cdef char *itemp
+ */
+  __pyx_t_3 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 407, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  if (likely(__pyx_t_3 != Py_None)) {
+    PyObject* sequence = __pyx_t_3;
+    Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
+    if (unlikely(size != 2)) {
+      if (size > 2) __Pyx_RaiseTooManyValuesError(2);
+      else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
+      __PYX_ERR(2, 407, __pyx_L1_error)
+    }
+    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); 
+    __pyx_t_5 = PyTuple_GET_ITEM(sequence, 1); 
+    __Pyx_INCREF(__pyx_t_4);
+    __Pyx_INCREF(__pyx_t_5);
+    #else
+    __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 407, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 407, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    #endif
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  } else {
+    __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(2, 407, __pyx_L1_error)
+  }
+  __pyx_v_have_slices = __pyx_t_4;
+  __pyx_t_4 = 0;
+  __pyx_v_indices = __pyx_t_5;
+  __pyx_t_5 = 0;
+
+  /* "View.MemoryView":410
+ * 
+ *         cdef char *itemp
+ *         if have_slices:             # <<<<<<<<<<<<<<
+ *             return memview_slice(self, indices)
+ *         else:
+ */
+  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(2, 410, __pyx_L1_error)
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":411
+ *         cdef char *itemp
+ *         if have_slices:
+ *             return memview_slice(self, indices)             # <<<<<<<<<<<<<<
+ *         else:
+ *             itemp = self.get_item_pointer(indices)
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_3 = ((PyObject *)__pyx_memview_slice(__pyx_v_self, __pyx_v_indices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 411, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_r = __pyx_t_3;
+    __pyx_t_3 = 0;
+    goto __pyx_L0;
+
+    /* "View.MemoryView":410
+ * 
+ *         cdef char *itemp
+ *         if have_slices:             # <<<<<<<<<<<<<<
+ *             return memview_slice(self, indices)
+ *         else:
+ */
+  }
+
+  /* "View.MemoryView":413
+ *             return memview_slice(self, indices)
+ *         else:
+ *             itemp = self.get_item_pointer(indices)             # <<<<<<<<<<<<<<
+ *             return self.convert_item_to_object(itemp)
+ * 
+ */
+  /*else*/ {
+    __pyx_t_6 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_indices); if (unlikely(__pyx_t_6 == ((char *)NULL))) __PYX_ERR(2, 413, __pyx_L1_error)
+    __pyx_v_itemp = __pyx_t_6;
+
+    /* "View.MemoryView":414
+ *         else:
+ *             itemp = self.get_item_pointer(indices)
+ *             return self.convert_item_to_object(itemp)             # <<<<<<<<<<<<<<
+ * 
+ *     def __setitem__(memoryview self, object index, object value):
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->convert_item_to_object(__pyx_v_self, __pyx_v_itemp); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 414, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_r = __pyx_t_3;
+    __pyx_t_3 = 0;
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":403
+ * 
+ * 
+ *     def __getitem__(memoryview self, object index):             # <<<<<<<<<<<<<<
+ *         if index is Ellipsis:
+ *             return self
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_have_slices);
+  __Pyx_XDECREF(__pyx_v_indices);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":416
+ *             return self.convert_item_to_object(itemp)
+ * 
+ *     def __setitem__(memoryview self, object index, object value):             # <<<<<<<<<<<<<<
+ *         if self.view.readonly:
+ *             raise TypeError("Cannot assign to read-only memoryview")
+ */
+
+/* Python wrapper */
+static int __pyx_memoryview___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /*proto*/
+static int __pyx_memoryview___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setitem__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((PyObject *)__pyx_v_index), ((PyObject *)__pyx_v_value));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) {
+  PyObject *__pyx_v_have_slices = NULL;
+  PyObject *__pyx_v_obj = NULL;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setitem__", 0);
+  __Pyx_INCREF(__pyx_v_index);
+
+  /* "View.MemoryView":417
+ * 
+ *     def __setitem__(memoryview self, object index, object value):
+ *         if self.view.readonly:             # <<<<<<<<<<<<<<
+ *             raise TypeError("Cannot assign to read-only memoryview")
+ * 
+ */
+  __pyx_t_1 = (__pyx_v_self->view.readonly != 0);
+  if (unlikely(__pyx_t_1)) {
+
+    /* "View.MemoryView":418
+ *     def __setitem__(memoryview self, object index, object value):
+ *         if self.view.readonly:
+ *             raise TypeError("Cannot assign to read-only memoryview")             # <<<<<<<<<<<<<<
+ * 
+ *         have_slices, index = _unellipsify(index, self.view.ndim)
+ */
+    __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 418, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __PYX_ERR(2, 418, __pyx_L1_error)
+
+    /* "View.MemoryView":417
+ * 
+ *     def __setitem__(memoryview self, object index, object value):
+ *         if self.view.readonly:             # <<<<<<<<<<<<<<
+ *             raise TypeError("Cannot assign to read-only memoryview")
+ * 
+ */
+  }
+
+  /* "View.MemoryView":420
+ *             raise TypeError("Cannot assign to read-only memoryview")
+ * 
+ *         have_slices, index = _unellipsify(index, self.view.ndim)             # <<<<<<<<<<<<<<
+ * 
+ *         if have_slices:
+ */
+  __pyx_t_2 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 420, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  if (likely(__pyx_t_2 != Py_None)) {
+    PyObject* sequence = __pyx_t_2;
+    Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
+    if (unlikely(size != 2)) {
+      if (size > 2) __Pyx_RaiseTooManyValuesError(2);
+      else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
+      __PYX_ERR(2, 420, __pyx_L1_error)
+    }
+    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); 
+    __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); 
+    __Pyx_INCREF(__pyx_t_3);
+    __Pyx_INCREF(__pyx_t_4);
+    #else
+    __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 420, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 420, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    #endif
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  } else {
+    __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(2, 420, __pyx_L1_error)
+  }
+  __pyx_v_have_slices = __pyx_t_3;
+  __pyx_t_3 = 0;
+  __Pyx_DECREF_SET(__pyx_v_index, __pyx_t_4);
+  __pyx_t_4 = 0;
+
+  /* "View.MemoryView":422
+ *         have_slices, index = _unellipsify(index, self.view.ndim)
+ * 
+ *         if have_slices:             # <<<<<<<<<<<<<<
+ *             obj = self.is_slice(value)
+ *             if obj:
+ */
+  __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 422, __pyx_L1_error)
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":423
+ * 
+ *         if have_slices:
+ *             obj = self.is_slice(value)             # <<<<<<<<<<<<<<
+ *             if obj:
+ *                 self.setitem_slice_assignment(self[index], obj)
+ */
+    __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->is_slice(__pyx_v_self, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 423, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_v_obj = __pyx_t_2;
+    __pyx_t_2 = 0;
+
+    /* "View.MemoryView":424
+ *         if have_slices:
+ *             obj = self.is_slice(value)
+ *             if obj:             # <<<<<<<<<<<<<<
+ *                 self.setitem_slice_assignment(self[index], obj)
+ *             else:
+ */
+    __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_obj); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 424, __pyx_L1_error)
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":425
+ *             obj = self.is_slice(value)
+ *             if obj:
+ *                 self.setitem_slice_assignment(self[index], obj)             # <<<<<<<<<<<<<<
+ *             else:
+ *                 self.setitem_slice_assign_scalar(self[index], value)
+ */
+      __pyx_t_2 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 425, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_4 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assignment(__pyx_v_self, __pyx_t_2, __pyx_v_obj); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 425, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+
+      /* "View.MemoryView":424
+ *         if have_slices:
+ *             obj = self.is_slice(value)
+ *             if obj:             # <<<<<<<<<<<<<<
+ *                 self.setitem_slice_assignment(self[index], obj)
+ *             else:
+ */
+      goto __pyx_L5;
+    }
+
+    /* "View.MemoryView":427
+ *                 self.setitem_slice_assignment(self[index], obj)
+ *             else:
+ *                 self.setitem_slice_assign_scalar(self[index], value)             # <<<<<<<<<<<<<<
+ *         else:
+ *             self.setitem_indexed(index, value)
+ */
+    /*else*/ {
+      __pyx_t_4 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 427, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_memoryview_type))))) __PYX_ERR(2, 427, __pyx_L1_error)
+      __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assign_scalar(__pyx_v_self, ((struct __pyx_memoryview_obj *)__pyx_t_4), __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 427, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    }
+    __pyx_L5:;
+
+    /* "View.MemoryView":422
+ *         have_slices, index = _unellipsify(index, self.view.ndim)
+ * 
+ *         if have_slices:             # <<<<<<<<<<<<<<
+ *             obj = self.is_slice(value)
+ *             if obj:
+ */
+    goto __pyx_L4;
+  }
+
+  /* "View.MemoryView":429
+ *                 self.setitem_slice_assign_scalar(self[index], value)
+ *         else:
+ *             self.setitem_indexed(index, value)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef is_slice(self, obj):
+ */
+  /*else*/ {
+    __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_indexed(__pyx_v_self, __pyx_v_index, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 429, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  }
+  __pyx_L4:;
+
+  /* "View.MemoryView":416
+ *             return self.convert_item_to_object(itemp)
+ * 
+ *     def __setitem__(memoryview self, object index, object value):             # <<<<<<<<<<<<<<
+ *         if self.view.readonly:
+ *             raise TypeError("Cannot assign to read-only memoryview")
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__setitem__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_have_slices);
+  __Pyx_XDECREF(__pyx_v_obj);
+  __Pyx_XDECREF(__pyx_v_index);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":431
+ *             self.setitem_indexed(index, value)
+ * 
+ *     cdef is_slice(self, obj):             # <<<<<<<<<<<<<<
+ *         if not isinstance(obj, memoryview):
+ *             try:
+ */
+
+static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_t_9;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("is_slice", 0);
+  __Pyx_INCREF(__pyx_v_obj);
+
+  /* "View.MemoryView":432
+ * 
+ *     cdef is_slice(self, obj):
+ *         if not isinstance(obj, memoryview):             # <<<<<<<<<<<<<<
+ *             try:
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_obj, __pyx_memoryview_type); 
+  __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":433
+ *     cdef is_slice(self, obj):
+ *         if not isinstance(obj, memoryview):
+ *             try:             # <<<<<<<<<<<<<<
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ *                                  self.dtype_is_object)
+ */
+    {
+      __Pyx_PyThreadState_declare
+      __Pyx_PyThreadState_assign
+      __Pyx_ExceptionSave(&__pyx_t_3, &__pyx_t_4, &__pyx_t_5);
+      __Pyx_XGOTREF(__pyx_t_3);
+      __Pyx_XGOTREF(__pyx_t_4);
+      __Pyx_XGOTREF(__pyx_t_5);
+      /*try:*/ {
+
+        /* "View.MemoryView":434
+ *         if not isinstance(obj, memoryview):
+ *             try:
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,             # <<<<<<<<<<<<<<
+ *                                  self.dtype_is_object)
+ *             except TypeError:
+ */
+        __pyx_t_6 = __Pyx_PyInt_From_int(((__pyx_v_self->flags & (~PyBUF_WRITABLE)) | PyBUF_ANY_CONTIGUOUS)); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 434, __pyx_L4_error)
+        __Pyx_GOTREF(__pyx_t_6);
+
+        /* "View.MemoryView":435
+ *             try:
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ *                                  self.dtype_is_object)             # <<<<<<<<<<<<<<
+ *             except TypeError:
+ *                 return None
+ */
+        __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 435, __pyx_L4_error)
+        __Pyx_GOTREF(__pyx_t_7);
+
+        /* "View.MemoryView":434
+ *         if not isinstance(obj, memoryview):
+ *             try:
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,             # <<<<<<<<<<<<<<
+ *                                  self.dtype_is_object)
+ *             except TypeError:
+ */
+        __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 434, __pyx_L4_error)
+        __Pyx_GOTREF(__pyx_t_8);
+        __Pyx_INCREF(__pyx_v_obj);
+        __Pyx_GIVEREF(__pyx_v_obj);
+        PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_v_obj);
+        __Pyx_GIVEREF(__pyx_t_6);
+        PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_6);
+        __Pyx_GIVEREF(__pyx_t_7);
+        PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_7);
+        __pyx_t_6 = 0;
+        __pyx_t_7 = 0;
+        __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_8, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 434, __pyx_L4_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+        __Pyx_DECREF_SET(__pyx_v_obj, __pyx_t_7);
+        __pyx_t_7 = 0;
+
+        /* "View.MemoryView":433
+ *     cdef is_slice(self, obj):
+ *         if not isinstance(obj, memoryview):
+ *             try:             # <<<<<<<<<<<<<<
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ *                                  self.dtype_is_object)
+ */
+      }
+      __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+      goto __pyx_L9_try_end;
+      __pyx_L4_error:;
+      __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+      __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
+
+      /* "View.MemoryView":436
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ *                                  self.dtype_is_object)
+ *             except TypeError:             # <<<<<<<<<<<<<<
+ *                 return None
+ * 
+ */
+      __pyx_t_9 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_TypeError);
+      if (__pyx_t_9) {
+        __Pyx_AddTraceback("View.MemoryView.memoryview.is_slice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+        if (__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_6) < 0) __PYX_ERR(2, 436, __pyx_L6_except_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        __Pyx_GOTREF(__pyx_t_8);
+        __Pyx_GOTREF(__pyx_t_6);
+
+        /* "View.MemoryView":437
+ *                                  self.dtype_is_object)
+ *             except TypeError:
+ *                 return None             # <<<<<<<<<<<<<<
+ * 
+ *         return obj
+ */
+        __Pyx_XDECREF(__pyx_r);
+        __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+        __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+        __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+        __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+        goto __pyx_L7_except_return;
+      }
+      goto __pyx_L6_except_error;
+      __pyx_L6_except_error:;
+
+      /* "View.MemoryView":433
+ *     cdef is_slice(self, obj):
+ *         if not isinstance(obj, memoryview):
+ *             try:             # <<<<<<<<<<<<<<
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ *                                  self.dtype_is_object)
+ */
+      __Pyx_XGIVEREF(__pyx_t_3);
+      __Pyx_XGIVEREF(__pyx_t_4);
+      __Pyx_XGIVEREF(__pyx_t_5);
+      __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_4, __pyx_t_5);
+      goto __pyx_L1_error;
+      __pyx_L7_except_return:;
+      __Pyx_XGIVEREF(__pyx_t_3);
+      __Pyx_XGIVEREF(__pyx_t_4);
+      __Pyx_XGIVEREF(__pyx_t_5);
+      __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_4, __pyx_t_5);
+      goto __pyx_L0;
+      __pyx_L9_try_end:;
+    }
+
+    /* "View.MemoryView":432
+ * 
+ *     cdef is_slice(self, obj):
+ *         if not isinstance(obj, memoryview):             # <<<<<<<<<<<<<<
+ *             try:
+ *                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
+ */
+  }
+
+  /* "View.MemoryView":439
+ *                 return None
+ * 
+ *         return obj             # <<<<<<<<<<<<<<
+ * 
+ *     cdef setitem_slice_assignment(self, dst, src):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v_obj);
+  __pyx_r = __pyx_v_obj;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":431
+ *             self.setitem_indexed(index, value)
+ * 
+ *     cdef is_slice(self, obj):             # <<<<<<<<<<<<<<
+ *         if not isinstance(obj, memoryview):
+ *             try:
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.is_slice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_obj);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":441
+ *         return obj
+ * 
+ *     cdef setitem_slice_assignment(self, dst, src):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice dst_slice
+ *         cdef __Pyx_memviewslice src_slice
+ */
+
+static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_dst, PyObject *__pyx_v_src) {
+  __Pyx_memviewslice __pyx_v_dst_slice;
+  __Pyx_memviewslice __pyx_v_src_slice;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice *__pyx_t_1;
+  __Pyx_memviewslice *__pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("setitem_slice_assignment", 0);
+
+  /* "View.MemoryView":445
+ *         cdef __Pyx_memviewslice src_slice
+ * 
+ *         memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],             # <<<<<<<<<<<<<<
+ *                                  get_slice_from_memview(dst, &dst_slice)[0],
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)
+ */
+  if (!(likely(((__pyx_v_src) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_src, __pyx_memoryview_type))))) __PYX_ERR(2, 445, __pyx_L1_error)
+  __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_src), (&__pyx_v_src_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(2, 445, __pyx_L1_error)
+
+  /* "View.MemoryView":446
+ * 
+ *         memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],
+ *                                  get_slice_from_memview(dst, &dst_slice)[0],             # <<<<<<<<<<<<<<
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)
+ * 
+ */
+  if (!(likely(((__pyx_v_dst) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_dst, __pyx_memoryview_type))))) __PYX_ERR(2, 446, __pyx_L1_error)
+  __pyx_t_2 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_dst), (&__pyx_v_dst_slice)); if (unlikely(__pyx_t_2 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(2, 446, __pyx_L1_error)
+
+  /* "View.MemoryView":447
+ *         memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],
+ *                                  get_slice_from_memview(dst, &dst_slice)[0],
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef setitem_slice_assign_scalar(self, memoryview dst, value):
+ */
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_src, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 447, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 447, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_dst, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 447, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(2, 447, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "View.MemoryView":445
+ *         cdef __Pyx_memviewslice src_slice
+ * 
+ *         memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],             # <<<<<<<<<<<<<<
+ *                                  get_slice_from_memview(dst, &dst_slice)[0],
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)
+ */
+  __pyx_t_6 = __pyx_memoryview_copy_contents((__pyx_t_1[0]), (__pyx_t_2[0]), __pyx_t_4, __pyx_t_5, __pyx_v_self->dtype_is_object); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(2, 445, __pyx_L1_error)
+
+  /* "View.MemoryView":441
+ *         return obj
+ * 
+ *     cdef setitem_slice_assignment(self, dst, src):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice dst_slice
+ *         cdef __Pyx_memviewslice src_slice
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_slice_assignment", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":449
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)
+ * 
+ *     cdef setitem_slice_assign_scalar(self, memoryview dst, value):             # <<<<<<<<<<<<<<
+ *         cdef int array[128]
+ *         cdef void *tmp = NULL
+ */
+
+static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memoryview_obj *__pyx_v_self, struct __pyx_memoryview_obj *__pyx_v_dst, PyObject *__pyx_v_value) {
+  int __pyx_v_array[0x80];
+  void *__pyx_v_tmp;
+  void *__pyx_v_item;
+  __Pyx_memviewslice *__pyx_v_dst_slice;
+  __Pyx_memviewslice __pyx_v_tmp_slice;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice *__pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  char const *__pyx_t_6;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("setitem_slice_assign_scalar", 0);
+
+  /* "View.MemoryView":451
+ *     cdef setitem_slice_assign_scalar(self, memoryview dst, value):
+ *         cdef int array[128]
+ *         cdef void *tmp = NULL             # <<<<<<<<<<<<<<
+ *         cdef void *item
+ * 
+ */
+  __pyx_v_tmp = NULL;
+
+  /* "View.MemoryView":456
+ *         cdef __Pyx_memviewslice *dst_slice
+ *         cdef __Pyx_memviewslice tmp_slice
+ *         dst_slice = get_slice_from_memview(dst, &tmp_slice)             # <<<<<<<<<<<<<<
+ * 
+ *         if <size_t>self.view.itemsize > sizeof(array):
+ */
+  __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_dst, (&__pyx_v_tmp_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(2, 456, __pyx_L1_error)
+  __pyx_v_dst_slice = __pyx_t_1;
+
+  /* "View.MemoryView":458
+ *         dst_slice = get_slice_from_memview(dst, &tmp_slice)
+ * 
+ *         if <size_t>self.view.itemsize > sizeof(array):             # <<<<<<<<<<<<<<
+ *             tmp = PyMem_Malloc(self.view.itemsize)
+ *             if tmp == NULL:
+ */
+  __pyx_t_2 = ((((size_t)__pyx_v_self->view.itemsize) > (sizeof(__pyx_v_array))) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":459
+ * 
+ *         if <size_t>self.view.itemsize > sizeof(array):
+ *             tmp = PyMem_Malloc(self.view.itemsize)             # <<<<<<<<<<<<<<
+ *             if tmp == NULL:
+ *                 raise MemoryError
+ */
+    __pyx_v_tmp = PyMem_Malloc(__pyx_v_self->view.itemsize);
+
+    /* "View.MemoryView":460
+ *         if <size_t>self.view.itemsize > sizeof(array):
+ *             tmp = PyMem_Malloc(self.view.itemsize)
+ *             if tmp == NULL:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError
+ *             item = tmp
+ */
+    __pyx_t_2 = ((__pyx_v_tmp == NULL) != 0);
+    if (unlikely(__pyx_t_2)) {
+
+      /* "View.MemoryView":461
+ *             tmp = PyMem_Malloc(self.view.itemsize)
+ *             if tmp == NULL:
+ *                 raise MemoryError             # <<<<<<<<<<<<<<
+ *             item = tmp
+ *         else:
+ */
+      PyErr_NoMemory(); __PYX_ERR(2, 461, __pyx_L1_error)
+
+      /* "View.MemoryView":460
+ *         if <size_t>self.view.itemsize > sizeof(array):
+ *             tmp = PyMem_Malloc(self.view.itemsize)
+ *             if tmp == NULL:             # <<<<<<<<<<<<<<
+ *                 raise MemoryError
+ *             item = tmp
+ */
+    }
+
+    /* "View.MemoryView":462
+ *             if tmp == NULL:
+ *                 raise MemoryError
+ *             item = tmp             # <<<<<<<<<<<<<<
+ *         else:
+ *             item = <void *> array
+ */
+    __pyx_v_item = __pyx_v_tmp;
+
+    /* "View.MemoryView":458
+ *         dst_slice = get_slice_from_memview(dst, &tmp_slice)
+ * 
+ *         if <size_t>self.view.itemsize > sizeof(array):             # <<<<<<<<<<<<<<
+ *             tmp = PyMem_Malloc(self.view.itemsize)
+ *             if tmp == NULL:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":464
+ *             item = tmp
+ *         else:
+ *             item = <void *> array             # <<<<<<<<<<<<<<
+ * 
+ *         try:
+ */
+  /*else*/ {
+    __pyx_v_item = ((void *)__pyx_v_array);
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":466
+ *             item = <void *> array
+ * 
+ *         try:             # <<<<<<<<<<<<<<
+ *             if self.dtype_is_object:
+ *                 (<PyObject **> item)[0] = <PyObject *> value
+ */
+  /*try:*/ {
+
+    /* "View.MemoryView":467
+ * 
+ *         try:
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 (<PyObject **> item)[0] = <PyObject *> value
+ *             else:
+ */
+    __pyx_t_2 = (__pyx_v_self->dtype_is_object != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":468
+ *         try:
+ *             if self.dtype_is_object:
+ *                 (<PyObject **> item)[0] = <PyObject *> value             # <<<<<<<<<<<<<<
+ *             else:
+ *                 self.assign_item_from_object(<char *> item, value)
+ */
+      (((PyObject **)__pyx_v_item)[0]) = ((PyObject *)__pyx_v_value);
+
+      /* "View.MemoryView":467
+ * 
+ *         try:
+ *             if self.dtype_is_object:             # <<<<<<<<<<<<<<
+ *                 (<PyObject **> item)[0] = <PyObject *> value
+ *             else:
+ */
+      goto __pyx_L8;
+    }
+
+    /* "View.MemoryView":470
+ *                 (<PyObject **> item)[0] = <PyObject *> value
+ *             else:
+ *                 self.assign_item_from_object(<char *> item, value)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    /*else*/ {
+      __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, ((char *)__pyx_v_item), __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 470, __pyx_L6_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    }
+    __pyx_L8:;
+
+    /* "View.MemoryView":474
+ * 
+ * 
+ *             if self.view.suboffsets != NULL:             # <<<<<<<<<<<<<<
+ *                 assert_direct_dimensions(self.view.suboffsets, self.view.ndim)
+ *             slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize,
+ */
+    __pyx_t_2 = ((__pyx_v_self->view.suboffsets != NULL) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":475
+ * 
+ *             if self.view.suboffsets != NULL:
+ *                 assert_direct_dimensions(self.view.suboffsets, self.view.ndim)             # <<<<<<<<<<<<<<
+ *             slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize,
+ *                                 item, self.dtype_is_object)
+ */
+      __pyx_t_3 = assert_direct_dimensions(__pyx_v_self->view.suboffsets, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 475, __pyx_L6_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+      /* "View.MemoryView":474
+ * 
+ * 
+ *             if self.view.suboffsets != NULL:             # <<<<<<<<<<<<<<
+ *                 assert_direct_dimensions(self.view.suboffsets, self.view.ndim)
+ *             slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize,
+ */
+    }
+
+    /* "View.MemoryView":476
+ *             if self.view.suboffsets != NULL:
+ *                 assert_direct_dimensions(self.view.suboffsets, self.view.ndim)
+ *             slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize,             # <<<<<<<<<<<<<<
+ *                                 item, self.dtype_is_object)
+ *         finally:
+ */
+    __pyx_memoryview_slice_assign_scalar(__pyx_v_dst_slice, __pyx_v_dst->view.ndim, __pyx_v_self->view.itemsize, __pyx_v_item, __pyx_v_self->dtype_is_object);
+  }
+
+  /* "View.MemoryView":479
+ *                                 item, self.dtype_is_object)
+ *         finally:
+ *             PyMem_Free(tmp)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef setitem_indexed(self, index, value):
+ */
+  /*finally:*/ {
+    /*normal exit:*/{
+      PyMem_Free(__pyx_v_tmp);
+      goto __pyx_L7;
+    }
+    __pyx_L6_error:;
+    /*exception exit:*/{
+      __Pyx_PyThreadState_declare
+      __Pyx_PyThreadState_assign
+      __pyx_t_7 = 0; __pyx_t_8 = 0; __pyx_t_9 = 0; __pyx_t_10 = 0; __pyx_t_11 = 0; __pyx_t_12 = 0;
+      __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (PY_MAJOR_VERSION >= 3) __Pyx_ExceptionSwap(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
+      if ((PY_MAJOR_VERSION < 3) || unlikely(__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9) < 0)) __Pyx_ErrFetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9);
+      __Pyx_XGOTREF(__pyx_t_7);
+      __Pyx_XGOTREF(__pyx_t_8);
+      __Pyx_XGOTREF(__pyx_t_9);
+      __Pyx_XGOTREF(__pyx_t_10);
+      __Pyx_XGOTREF(__pyx_t_11);
+      __Pyx_XGOTREF(__pyx_t_12);
+      __pyx_t_4 = __pyx_lineno; __pyx_t_5 = __pyx_clineno; __pyx_t_6 = __pyx_filename;
+      {
+        PyMem_Free(__pyx_v_tmp);
+      }
+      if (PY_MAJOR_VERSION >= 3) {
+        __Pyx_XGIVEREF(__pyx_t_10);
+        __Pyx_XGIVEREF(__pyx_t_11);
+        __Pyx_XGIVEREF(__pyx_t_12);
+        __Pyx_ExceptionReset(__pyx_t_10, __pyx_t_11, __pyx_t_12);
+      }
+      __Pyx_XGIVEREF(__pyx_t_7);
+      __Pyx_XGIVEREF(__pyx_t_8);
+      __Pyx_XGIVEREF(__pyx_t_9);
+      __Pyx_ErrRestore(__pyx_t_7, __pyx_t_8, __pyx_t_9);
+      __pyx_t_7 = 0; __pyx_t_8 = 0; __pyx_t_9 = 0; __pyx_t_10 = 0; __pyx_t_11 = 0; __pyx_t_12 = 0;
+      __pyx_lineno = __pyx_t_4; __pyx_clineno = __pyx_t_5; __pyx_filename = __pyx_t_6;
+      goto __pyx_L1_error;
+    }
+    __pyx_L7:;
+  }
+
+  /* "View.MemoryView":449
+ *                                  src.ndim, dst.ndim, self.dtype_is_object)
+ * 
+ *     cdef setitem_slice_assign_scalar(self, memoryview dst, value):             # <<<<<<<<<<<<<<
+ *         cdef int array[128]
+ *         cdef void *tmp = NULL
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_slice_assign_scalar", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":481
+ *             PyMem_Free(tmp)
+ * 
+ *     cdef setitem_indexed(self, index, value):             # <<<<<<<<<<<<<<
+ *         cdef char *itemp = self.get_item_pointer(index)
+ *         self.assign_item_from_object(itemp, value)
+ */
+
+static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) {
+  char *__pyx_v_itemp;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  char *__pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("setitem_indexed", 0);
+
+  /* "View.MemoryView":482
+ * 
+ *     cdef setitem_indexed(self, index, value):
+ *         cdef char *itemp = self.get_item_pointer(index)             # <<<<<<<<<<<<<<
+ *         self.assign_item_from_object(itemp, value)
+ * 
+ */
+  __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_index); if (unlikely(__pyx_t_1 == ((char *)NULL))) __PYX_ERR(2, 482, __pyx_L1_error)
+  __pyx_v_itemp = __pyx_t_1;
+
+  /* "View.MemoryView":483
+ *     cdef setitem_indexed(self, index, value):
+ *         cdef char *itemp = self.get_item_pointer(index)
+ *         self.assign_item_from_object(itemp, value)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):
+ */
+  __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 483, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+  /* "View.MemoryView":481
+ *             PyMem_Free(tmp)
+ * 
+ *     cdef setitem_indexed(self, index, value):             # <<<<<<<<<<<<<<
+ *         cdef char *itemp = self.get_item_pointer(index)
+ *         self.assign_item_from_object(itemp, value)
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_indexed", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":485
+ *         self.assign_item_from_object(itemp, value)
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):             # <<<<<<<<<<<<<<
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ */
+
+static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp) {
+  PyObject *__pyx_v_struct = NULL;
+  PyObject *__pyx_v_bytesitem = 0;
+  PyObject *__pyx_v_result = NULL;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  int __pyx_t_8;
+  PyObject *__pyx_t_9 = NULL;
+  size_t __pyx_t_10;
+  int __pyx_t_11;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("convert_item_to_object", 0);
+
+  /* "View.MemoryView":488
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ *         import struct             # <<<<<<<<<<<<<<
+ *         cdef bytes bytesitem
+ * 
+ */
+  __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 488, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_v_struct = __pyx_t_1;
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":491
+ *         cdef bytes bytesitem
+ * 
+ *         bytesitem = itemp[:self.view.itemsize]             # <<<<<<<<<<<<<<
+ *         try:
+ *             result = struct.unpack(self.view.format, bytesitem)
+ */
+  __pyx_t_1 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_itemp + 0, __pyx_v_self->view.itemsize - 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 491, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_v_bytesitem = ((PyObject*)__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":492
+ * 
+ *         bytesitem = itemp[:self.view.itemsize]
+ *         try:             # <<<<<<<<<<<<<<
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_2, &__pyx_t_3, &__pyx_t_4);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_4);
+    /*try:*/ {
+
+      /* "View.MemoryView":493
+ *         bytesitem = itemp[:self.view.itemsize]
+ *         try:
+ *             result = struct.unpack(self.view.format, bytesitem)             # <<<<<<<<<<<<<<
+ *         except struct.error:
+ *             raise ValueError("Unable to convert item to object")
+ */
+      __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_unpack); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 493, __pyx_L3_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 493, __pyx_L3_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __pyx_t_7 = NULL;
+      __pyx_t_8 = 0;
+      if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
+        __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_5);
+        if (likely(__pyx_t_7)) {
+          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
+          __Pyx_INCREF(__pyx_t_7);
+          __Pyx_INCREF(function);
+          __Pyx_DECREF_SET(__pyx_t_5, function);
+          __pyx_t_8 = 1;
+        }
+      }
+      #if CYTHON_FAST_PYCALL
+      if (PyFunction_Check(__pyx_t_5)) {
+        PyObject *__pyx_temp[3] = {__pyx_t_7, __pyx_t_6, __pyx_v_bytesitem};
+        __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 493, __pyx_L3_error)
+        __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+        __Pyx_GOTREF(__pyx_t_1);
+        __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      } else
+      #endif
+      #if CYTHON_FAST_PYCCALL
+      if (__Pyx_PyFastCFunction_Check(__pyx_t_5)) {
+        PyObject *__pyx_temp[3] = {__pyx_t_7, __pyx_t_6, __pyx_v_bytesitem};
+        __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 493, __pyx_L3_error)
+        __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+        __Pyx_GOTREF(__pyx_t_1);
+        __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      } else
+      #endif
+      {
+        __pyx_t_9 = PyTuple_New(2+__pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 493, __pyx_L3_error)
+        __Pyx_GOTREF(__pyx_t_9);
+        if (__pyx_t_7) {
+          __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_7); __pyx_t_7 = NULL;
+        }
+        __Pyx_GIVEREF(__pyx_t_6);
+        PyTuple_SET_ITEM(__pyx_t_9, 0+__pyx_t_8, __pyx_t_6);
+        __Pyx_INCREF(__pyx_v_bytesitem);
+        __Pyx_GIVEREF(__pyx_v_bytesitem);
+        PyTuple_SET_ITEM(__pyx_t_9, 1+__pyx_t_8, __pyx_v_bytesitem);
+        __pyx_t_6 = 0;
+        __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 493, __pyx_L3_error)
+        __Pyx_GOTREF(__pyx_t_1);
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      }
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_v_result = __pyx_t_1;
+      __pyx_t_1 = 0;
+
+      /* "View.MemoryView":492
+ * 
+ *         bytesitem = itemp[:self.view.itemsize]
+ *         try:             # <<<<<<<<<<<<<<
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:
+ */
+    }
+
+    /* "View.MemoryView":497
+ *             raise ValueError("Unable to convert item to object")
+ *         else:
+ *             if len(self.view.format) == 1:             # <<<<<<<<<<<<<<
+ *                 return result[0]
+ *             return result
+ */
+    /*else:*/ {
+      __pyx_t_10 = strlen(__pyx_v_self->view.format); 
+      __pyx_t_11 = ((__pyx_t_10 == 1) != 0);
+      if (__pyx_t_11) {
+
+        /* "View.MemoryView":498
+ *         else:
+ *             if len(self.view.format) == 1:
+ *                 return result[0]             # <<<<<<<<<<<<<<
+ *             return result
+ * 
+ */
+        __Pyx_XDECREF(__pyx_r);
+        __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_result, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 498, __pyx_L5_except_error)
+        __Pyx_GOTREF(__pyx_t_1);
+        __pyx_r = __pyx_t_1;
+        __pyx_t_1 = 0;
+        goto __pyx_L6_except_return;
+
+        /* "View.MemoryView":497
+ *             raise ValueError("Unable to convert item to object")
+ *         else:
+ *             if len(self.view.format) == 1:             # <<<<<<<<<<<<<<
+ *                 return result[0]
+ *             return result
+ */
+      }
+
+      /* "View.MemoryView":499
+ *             if len(self.view.format) == 1:
+ *                 return result[0]
+ *             return result             # <<<<<<<<<<<<<<
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):
+ */
+      __Pyx_XDECREF(__pyx_r);
+      __Pyx_INCREF(__pyx_v_result);
+      __pyx_r = __pyx_v_result;
+      goto __pyx_L6_except_return;
+    }
+    __pyx_L3_error:;
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+    __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;
+
+    /* "View.MemoryView":494
+ *         try:
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Unable to convert item to object")
+ *         else:
+ */
+    __Pyx_ErrFetch(&__pyx_t_1, &__pyx_t_5, &__pyx_t_9);
+    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_error); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 494, __pyx_L5_except_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_8 = __Pyx_PyErr_GivenExceptionMatches(__pyx_t_1, __pyx_t_6);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __Pyx_ErrRestore(__pyx_t_1, __pyx_t_5, __pyx_t_9);
+    __pyx_t_1 = 0; __pyx_t_5 = 0; __pyx_t_9 = 0;
+    if (__pyx_t_8) {
+      __Pyx_AddTraceback("View.MemoryView.memoryview.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_5, &__pyx_t_1) < 0) __PYX_ERR(2, 494, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_GOTREF(__pyx_t_1);
+
+      /* "View.MemoryView":495
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:
+ *             raise ValueError("Unable to convert item to object")             # <<<<<<<<<<<<<<
+ *         else:
+ *             if len(self.view.format) == 1:
+ */
+      __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 495, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_Raise(__pyx_t_6, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __PYX_ERR(2, 495, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+    __pyx_L5_except_error:;
+
+    /* "View.MemoryView":492
+ * 
+ *         bytesitem = itemp[:self.view.itemsize]
+ *         try:             # <<<<<<<<<<<<<<
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:
+ */
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_XGIVEREF(__pyx_t_4);
+    __Pyx_ExceptionReset(__pyx_t_2, __pyx_t_3, __pyx_t_4);
+    goto __pyx_L1_error;
+    __pyx_L6_except_return:;
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_XGIVEREF(__pyx_t_4);
+    __Pyx_ExceptionReset(__pyx_t_2, __pyx_t_3, __pyx_t_4);
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":485
+ *         self.assign_item_from_object(itemp, value)
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):             # <<<<<<<<<<<<<<
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_struct);
+  __Pyx_XDECREF(__pyx_v_bytesitem);
+  __Pyx_XDECREF(__pyx_v_result);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":501
+ *             return result
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):             # <<<<<<<<<<<<<<
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ */
+
+static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value) {
+  PyObject *__pyx_v_struct = NULL;
+  char __pyx_v_c;
+  PyObject *__pyx_v_bytesvalue = 0;
+  Py_ssize_t __pyx_v_i;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  int __pyx_t_7;
+  PyObject *__pyx_t_8 = NULL;
+  Py_ssize_t __pyx_t_9;
+  PyObject *__pyx_t_10 = NULL;
+  char *__pyx_t_11;
+  char *__pyx_t_12;
+  char *__pyx_t_13;
+  char *__pyx_t_14;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("assign_item_from_object", 0);
+
+  /* "View.MemoryView":504
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ *         import struct             # <<<<<<<<<<<<<<
+ *         cdef char c
+ *         cdef bytes bytesvalue
+ */
+  __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 504, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_v_struct = __pyx_t_1;
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":509
+ *         cdef Py_ssize_t i
+ * 
+ *         if isinstance(value, tuple):             # <<<<<<<<<<<<<<
+ *             bytesvalue = struct.pack(self.view.format, *value)
+ *         else:
+ */
+  __pyx_t_2 = PyTuple_Check(__pyx_v_value); 
+  __pyx_t_3 = (__pyx_t_2 != 0);
+  if (__pyx_t_3) {
+
+    /* "View.MemoryView":510
+ * 
+ *         if isinstance(value, tuple):
+ *             bytesvalue = struct.pack(self.view.format, *value)             # <<<<<<<<<<<<<<
+ *         else:
+ *             bytesvalue = struct.pack(self.view.format, value)
+ */
+    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_GIVEREF(__pyx_t_4);
+    PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4);
+    __pyx_t_4 = 0;
+    __pyx_t_4 = __Pyx_PySequence_Tuple(__pyx_v_value); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_6 = PyNumber_Add(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 510, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(2, 510, __pyx_L1_error)
+    __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
+    __pyx_t_4 = 0;
+
+    /* "View.MemoryView":509
+ *         cdef Py_ssize_t i
+ * 
+ *         if isinstance(value, tuple):             # <<<<<<<<<<<<<<
+ *             bytesvalue = struct.pack(self.view.format, *value)
+ *         else:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":512
+ *             bytesvalue = struct.pack(self.view.format, *value)
+ *         else:
+ *             bytesvalue = struct.pack(self.view.format, value)             # <<<<<<<<<<<<<<
+ * 
+ *         for i, c in enumerate(bytesvalue):
+ */
+  /*else*/ {
+    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 512, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 512, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_5 = NULL;
+    __pyx_t_7 = 0;
+    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) {
+      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_6);
+      if (likely(__pyx_t_5)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6);
+        __Pyx_INCREF(__pyx_t_5);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_6, function);
+        __pyx_t_7 = 1;
+      }
+    }
+    #if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(__pyx_t_6)) {
+      PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_1, __pyx_v_value};
+      __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 512, __pyx_L1_error)
+      __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    } else
+    #endif
+    #if CYTHON_FAST_PYCCALL
+    if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) {
+      PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_1, __pyx_v_value};
+      __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 512, __pyx_L1_error)
+      __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    } else
+    #endif
+    {
+      __pyx_t_8 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 512, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      if (__pyx_t_5) {
+        __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL;
+      }
+      __Pyx_GIVEREF(__pyx_t_1);
+      PyTuple_SET_ITEM(__pyx_t_8, 0+__pyx_t_7, __pyx_t_1);
+      __Pyx_INCREF(__pyx_v_value);
+      __Pyx_GIVEREF(__pyx_v_value);
+      PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_7, __pyx_v_value);
+      __pyx_t_1 = 0;
+      __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_8, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 512, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+    }
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(2, 512, __pyx_L1_error)
+    __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4);
+    __pyx_t_4 = 0;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":514
+ *             bytesvalue = struct.pack(self.view.format, value)
+ * 
+ *         for i, c in enumerate(bytesvalue):             # <<<<<<<<<<<<<<
+ *             itemp[i] = c
+ * 
+ */
+  __pyx_t_9 = 0;
+  if (unlikely(__pyx_v_bytesvalue == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable");
+    __PYX_ERR(2, 514, __pyx_L1_error)
+  }
+  __Pyx_INCREF(__pyx_v_bytesvalue);
+  __pyx_t_10 = __pyx_v_bytesvalue;
+  __pyx_t_12 = PyBytes_AS_STRING(__pyx_t_10);
+  __pyx_t_13 = (__pyx_t_12 + PyBytes_GET_SIZE(__pyx_t_10));
+  for (__pyx_t_14 = __pyx_t_12; __pyx_t_14 < __pyx_t_13; __pyx_t_14++) {
+    __pyx_t_11 = __pyx_t_14;
+    __pyx_v_c = (__pyx_t_11[0]);
+
+    /* "View.MemoryView":515
+ * 
+ *         for i, c in enumerate(bytesvalue):
+ *             itemp[i] = c             # <<<<<<<<<<<<<<
+ * 
+ *     @cname('getbuffer')
+ */
+    __pyx_v_i = __pyx_t_9;
+
+    /* "View.MemoryView":514
+ *             bytesvalue = struct.pack(self.view.format, value)
+ * 
+ *         for i, c in enumerate(bytesvalue):             # <<<<<<<<<<<<<<
+ *             itemp[i] = c
+ * 
+ */
+    __pyx_t_9 = (__pyx_t_9 + 1);
+
+    /* "View.MemoryView":515
+ * 
+ *         for i, c in enumerate(bytesvalue):
+ *             itemp[i] = c             # <<<<<<<<<<<<<<
+ * 
+ *     @cname('getbuffer')
+ */
+    (__pyx_v_itemp[__pyx_v_i]) = __pyx_v_c;
+  }
+  __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+
+  /* "View.MemoryView":501
+ *             return result
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):             # <<<<<<<<<<<<<<
+ *         """Only used if instantiated manually by the user, or if Cython doesn't
+ *         know how to convert the type"""
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.assign_item_from_object", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_struct);
+  __Pyx_XDECREF(__pyx_v_bytesvalue);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":518
+ * 
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):             # <<<<<<<<<<<<<<
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ */
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(struct __pyx_memoryview_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  Py_ssize_t *__pyx_t_4;
+  char *__pyx_t_5;
+  void *__pyx_t_6;
+  int __pyx_t_7;
+  Py_ssize_t __pyx_t_8;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  if (__pyx_v_info == NULL) {
+    PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete");
+    return -1;
+  }
+  __Pyx_RefNannySetupContext("__getbuffer__", 0);
+  __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
+  __Pyx_GIVEREF(__pyx_v_info->obj);
+
+  /* "View.MemoryView":519
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ * 
+ */
+  __pyx_t_2 = ((__pyx_v_flags & PyBUF_WRITABLE) != 0);
+  if (__pyx_t_2) {
+  } else {
+    __pyx_t_1 = __pyx_t_2;
+    goto __pyx_L4_bool_binop_done;
+  }
+  __pyx_t_2 = (__pyx_v_self->view.readonly != 0);
+  __pyx_t_1 = __pyx_t_2;
+  __pyx_L4_bool_binop_done:;
+  if (unlikely(__pyx_t_1)) {
+
+    /* "View.MemoryView":520
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_ND:
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 520, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 520, __pyx_L1_error)
+
+    /* "View.MemoryView":519
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ * 
+ */
+  }
+
+  /* "View.MemoryView":522
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ * 
+ *         if flags & PyBUF_ND:             # <<<<<<<<<<<<<<
+ *             info.shape = self.view.shape
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_ND) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":523
+ * 
+ *         if flags & PyBUF_ND:
+ *             info.shape = self.view.shape             # <<<<<<<<<<<<<<
+ *         else:
+ *             info.shape = NULL
+ */
+    __pyx_t_4 = __pyx_v_self->view.shape;
+    __pyx_v_info->shape = __pyx_t_4;
+
+    /* "View.MemoryView":522
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ * 
+ *         if flags & PyBUF_ND:             # <<<<<<<<<<<<<<
+ *             info.shape = self.view.shape
+ *         else:
+ */
+    goto __pyx_L6;
+  }
+
+  /* "View.MemoryView":525
+ *             info.shape = self.view.shape
+ *         else:
+ *             info.shape = NULL             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_STRIDES:
+ */
+  /*else*/ {
+    __pyx_v_info->shape = NULL;
+  }
+  __pyx_L6:;
+
+  /* "View.MemoryView":527
+ *             info.shape = NULL
+ * 
+ *         if flags & PyBUF_STRIDES:             # <<<<<<<<<<<<<<
+ *             info.strides = self.view.strides
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_STRIDES) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":528
+ * 
+ *         if flags & PyBUF_STRIDES:
+ *             info.strides = self.view.strides             # <<<<<<<<<<<<<<
+ *         else:
+ *             info.strides = NULL
+ */
+    __pyx_t_4 = __pyx_v_self->view.strides;
+    __pyx_v_info->strides = __pyx_t_4;
+
+    /* "View.MemoryView":527
+ *             info.shape = NULL
+ * 
+ *         if flags & PyBUF_STRIDES:             # <<<<<<<<<<<<<<
+ *             info.strides = self.view.strides
+ *         else:
+ */
+    goto __pyx_L7;
+  }
+
+  /* "View.MemoryView":530
+ *             info.strides = self.view.strides
+ *         else:
+ *             info.strides = NULL             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_INDIRECT:
+ */
+  /*else*/ {
+    __pyx_v_info->strides = NULL;
+  }
+  __pyx_L7:;
+
+  /* "View.MemoryView":532
+ *             info.strides = NULL
+ * 
+ *         if flags & PyBUF_INDIRECT:             # <<<<<<<<<<<<<<
+ *             info.suboffsets = self.view.suboffsets
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_INDIRECT) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":533
+ * 
+ *         if flags & PyBUF_INDIRECT:
+ *             info.suboffsets = self.view.suboffsets             # <<<<<<<<<<<<<<
+ *         else:
+ *             info.suboffsets = NULL
+ */
+    __pyx_t_4 = __pyx_v_self->view.suboffsets;
+    __pyx_v_info->suboffsets = __pyx_t_4;
+
+    /* "View.MemoryView":532
+ *             info.strides = NULL
+ * 
+ *         if flags & PyBUF_INDIRECT:             # <<<<<<<<<<<<<<
+ *             info.suboffsets = self.view.suboffsets
+ *         else:
+ */
+    goto __pyx_L8;
+  }
+
+  /* "View.MemoryView":535
+ *             info.suboffsets = self.view.suboffsets
+ *         else:
+ *             info.suboffsets = NULL             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_FORMAT:
+ */
+  /*else*/ {
+    __pyx_v_info->suboffsets = NULL;
+  }
+  __pyx_L8:;
+
+  /* "View.MemoryView":537
+ *             info.suboffsets = NULL
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             info.format = self.view.format
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":538
+ * 
+ *         if flags & PyBUF_FORMAT:
+ *             info.format = self.view.format             # <<<<<<<<<<<<<<
+ *         else:
+ *             info.format = NULL
+ */
+    __pyx_t_5 = __pyx_v_self->view.format;
+    __pyx_v_info->format = __pyx_t_5;
+
+    /* "View.MemoryView":537
+ *             info.suboffsets = NULL
+ * 
+ *         if flags & PyBUF_FORMAT:             # <<<<<<<<<<<<<<
+ *             info.format = self.view.format
+ *         else:
+ */
+    goto __pyx_L9;
+  }
+
+  /* "View.MemoryView":540
+ *             info.format = self.view.format
+ *         else:
+ *             info.format = NULL             # <<<<<<<<<<<<<<
+ * 
+ *         info.buf = self.view.buf
+ */
+  /*else*/ {
+    __pyx_v_info->format = NULL;
+  }
+  __pyx_L9:;
+
+  /* "View.MemoryView":542
+ *             info.format = NULL
+ * 
+ *         info.buf = self.view.buf             # <<<<<<<<<<<<<<
+ *         info.ndim = self.view.ndim
+ *         info.itemsize = self.view.itemsize
+ */
+  __pyx_t_6 = __pyx_v_self->view.buf;
+  __pyx_v_info->buf = __pyx_t_6;
+
+  /* "View.MemoryView":543
+ * 
+ *         info.buf = self.view.buf
+ *         info.ndim = self.view.ndim             # <<<<<<<<<<<<<<
+ *         info.itemsize = self.view.itemsize
+ *         info.len = self.view.len
+ */
+  __pyx_t_7 = __pyx_v_self->view.ndim;
+  __pyx_v_info->ndim = __pyx_t_7;
+
+  /* "View.MemoryView":544
+ *         info.buf = self.view.buf
+ *         info.ndim = self.view.ndim
+ *         info.itemsize = self.view.itemsize             # <<<<<<<<<<<<<<
+ *         info.len = self.view.len
+ *         info.readonly = self.view.readonly
+ */
+  __pyx_t_8 = __pyx_v_self->view.itemsize;
+  __pyx_v_info->itemsize = __pyx_t_8;
+
+  /* "View.MemoryView":545
+ *         info.ndim = self.view.ndim
+ *         info.itemsize = self.view.itemsize
+ *         info.len = self.view.len             # <<<<<<<<<<<<<<
+ *         info.readonly = self.view.readonly
+ *         info.obj = self
+ */
+  __pyx_t_8 = __pyx_v_self->view.len;
+  __pyx_v_info->len = __pyx_t_8;
+
+  /* "View.MemoryView":546
+ *         info.itemsize = self.view.itemsize
+ *         info.len = self.view.len
+ *         info.readonly = self.view.readonly             # <<<<<<<<<<<<<<
+ *         info.obj = self
+ * 
+ */
+  __pyx_t_1 = __pyx_v_self->view.readonly;
+  __pyx_v_info->readonly = __pyx_t_1;
+
+  /* "View.MemoryView":547
+ *         info.len = self.view.len
+ *         info.readonly = self.view.readonly
+ *         info.obj = self             # <<<<<<<<<<<<<<
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")
+ */
+  __Pyx_INCREF(((PyObject *)__pyx_v_self));
+  __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
+  __Pyx_GOTREF(__pyx_v_info->obj);
+  __Pyx_DECREF(__pyx_v_info->obj);
+  __pyx_v_info->obj = ((PyObject *)__pyx_v_self);
+
+  /* "View.MemoryView":518
+ * 
+ *     @cname('getbuffer')
+ *     def __getbuffer__(self, Py_buffer *info, int flags):             # <<<<<<<<<<<<<<
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  if (__pyx_v_info->obj != NULL) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0;
+  }
+  goto __pyx_L2;
+  __pyx_L0:;
+  if (__pyx_v_info->obj == Py_None) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0;
+  }
+  __pyx_L2:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":553
+ * 
+ *     @property
+ *     def T(self):             # <<<<<<<<<<<<<<
+ *         cdef _memoryviewslice result = memoryview_copy(self)
+ *         transpose_memslice(&result.from_slice)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  struct __pyx_memoryviewslice_obj *__pyx_v_result = 0;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_t_2;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":554
+ *     @property
+ *     def T(self):
+ *         cdef _memoryviewslice result = memoryview_copy(self)             # <<<<<<<<<<<<<<
+ *         transpose_memslice(&result.from_slice)
+ *         return result
+ */
+  __pyx_t_1 = __pyx_memoryview_copy_object(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 554, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_memoryviewslice_type))))) __PYX_ERR(2, 554, __pyx_L1_error)
+  __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":555
+ *     def T(self):
+ *         cdef _memoryviewslice result = memoryview_copy(self)
+ *         transpose_memslice(&result.from_slice)             # <<<<<<<<<<<<<<
+ *         return result
+ * 
+ */
+  __pyx_t_2 = __pyx_memslice_transpose((&__pyx_v_result->from_slice)); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(2, 555, __pyx_L1_error)
+
+  /* "View.MemoryView":556
+ *         cdef _memoryviewslice result = memoryview_copy(self)
+ *         transpose_memslice(&result.from_slice)
+ *         return result             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_result));
+  __pyx_r = ((PyObject *)__pyx_v_result);
+  goto __pyx_L0;
+
+  /* "View.MemoryView":553
+ * 
+ *     @property
+ *     def T(self):             # <<<<<<<<<<<<<<
+ *         cdef _memoryviewslice result = memoryview_copy(self)
+ *         transpose_memslice(&result.from_slice)
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.T.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_result);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":559
+ * 
+ *     @property
+ *     def base(self):             # <<<<<<<<<<<<<<
+ *         return self.obj
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":560
+ *     @property
+ *     def base(self):
+ *         return self.obj             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v_self->obj);
+  __pyx_r = __pyx_v_self->obj;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":559
+ * 
+ *     @property
+ *     def base(self):             # <<<<<<<<<<<<<<
+ *         return self.obj
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":563
+ * 
+ *     @property
+ *     def shape(self):             # <<<<<<<<<<<<<<
+ *         return tuple([length for length in self.view.shape[:self.view.ndim]])
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  Py_ssize_t __pyx_v_length;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  Py_ssize_t *__pyx_t_2;
+  Py_ssize_t *__pyx_t_3;
+  Py_ssize_t *__pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":564
+ *     @property
+ *     def shape(self):
+ *         return tuple([length for length in self.view.shape[:self.view.ndim]])             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 564, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim);
+  for (__pyx_t_4 = __pyx_v_self->view.shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) {
+    __pyx_t_2 = __pyx_t_4;
+    __pyx_v_length = (__pyx_t_2[0]);
+    __pyx_t_5 = PyInt_FromSsize_t(__pyx_v_length); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 564, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(2, 564, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  }
+  __pyx_t_5 = PyList_AsTuple(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 564, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_r = __pyx_t_5;
+  __pyx_t_5 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":563
+ * 
+ *     @property
+ *     def shape(self):             # <<<<<<<<<<<<<<
+ *         return tuple([length for length in self.view.shape[:self.view.ndim]])
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.shape.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":567
+ * 
+ *     @property
+ *     def strides(self):             # <<<<<<<<<<<<<<
+ *         if self.view.strides == NULL:
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  Py_ssize_t __pyx_v_stride;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  Py_ssize_t *__pyx_t_3;
+  Py_ssize_t *__pyx_t_4;
+  Py_ssize_t *__pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":568
+ *     @property
+ *     def strides(self):
+ *         if self.view.strides == NULL:             # <<<<<<<<<<<<<<
+ * 
+ *             raise ValueError("Buffer view does not expose strides")
+ */
+  __pyx_t_1 = ((__pyx_v_self->view.strides == NULL) != 0);
+  if (unlikely(__pyx_t_1)) {
+
+    /* "View.MemoryView":570
+ *         if self.view.strides == NULL:
+ * 
+ *             raise ValueError("Buffer view does not expose strides")             # <<<<<<<<<<<<<<
+ * 
+ *         return tuple([stride for stride in self.view.strides[:self.view.ndim]])
+ */
+    __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__14, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 570, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __PYX_ERR(2, 570, __pyx_L1_error)
+
+    /* "View.MemoryView":568
+ *     @property
+ *     def strides(self):
+ *         if self.view.strides == NULL:             # <<<<<<<<<<<<<<
+ * 
+ *             raise ValueError("Buffer view does not expose strides")
+ */
+  }
+
+  /* "View.MemoryView":572
+ *             raise ValueError("Buffer view does not expose strides")
+ * 
+ *         return tuple([stride for stride in self.view.strides[:self.view.ndim]])             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 572, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_4 = (__pyx_v_self->view.strides + __pyx_v_self->view.ndim);
+  for (__pyx_t_5 = __pyx_v_self->view.strides; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) {
+    __pyx_t_3 = __pyx_t_5;
+    __pyx_v_stride = (__pyx_t_3[0]);
+    __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_stride); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 572, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    if (unlikely(__Pyx_ListComp_Append(__pyx_t_2, (PyObject*)__pyx_t_6))) __PYX_ERR(2, 572, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  }
+  __pyx_t_6 = PyList_AsTuple(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 572, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_r = __pyx_t_6;
+  __pyx_t_6 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":567
+ * 
+ *     @property
+ *     def strides(self):             # <<<<<<<<<<<<<<
+ *         if self.view.strides == NULL:
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.strides.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":575
+ * 
+ *     @property
+ *     def suboffsets(self):             # <<<<<<<<<<<<<<
+ *         if self.view.suboffsets == NULL:
+ *             return (-1,) * self.view.ndim
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  Py_ssize_t __pyx_v_suboffset;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  Py_ssize_t *__pyx_t_4;
+  Py_ssize_t *__pyx_t_5;
+  Py_ssize_t *__pyx_t_6;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":576
+ *     @property
+ *     def suboffsets(self):
+ *         if self.view.suboffsets == NULL:             # <<<<<<<<<<<<<<
+ *             return (-1,) * self.view.ndim
+ * 
+ */
+  __pyx_t_1 = ((__pyx_v_self->view.suboffsets == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":577
+ *     def suboffsets(self):
+ *         if self.view.suboffsets == NULL:
+ *             return (-1,) * self.view.ndim             # <<<<<<<<<<<<<<
+ * 
+ *         return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]])
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 577, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyNumber_Multiply(__pyx_tuple__15, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 577, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_r = __pyx_t_3;
+    __pyx_t_3 = 0;
+    goto __pyx_L0;
+
+    /* "View.MemoryView":576
+ *     @property
+ *     def suboffsets(self):
+ *         if self.view.suboffsets == NULL:             # <<<<<<<<<<<<<<
+ *             return (-1,) * self.view.ndim
+ * 
+ */
+  }
+
+  /* "View.MemoryView":579
+ *             return (-1,) * self.view.ndim
+ * 
+ *         return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]])             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 579, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_5 = (__pyx_v_self->view.suboffsets + __pyx_v_self->view.ndim);
+  for (__pyx_t_6 = __pyx_v_self->view.suboffsets; __pyx_t_6 < __pyx_t_5; __pyx_t_6++) {
+    __pyx_t_4 = __pyx_t_6;
+    __pyx_v_suboffset = (__pyx_t_4[0]);
+    __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_suboffset); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 579, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    if (unlikely(__Pyx_ListComp_Append(__pyx_t_3, (PyObject*)__pyx_t_2))) __PYX_ERR(2, 579, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  }
+  __pyx_t_2 = PyList_AsTuple(((PyObject*)__pyx_t_3)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 579, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":575
+ * 
+ *     @property
+ *     def suboffsets(self):             # <<<<<<<<<<<<<<
+ *         if self.view.suboffsets == NULL:
+ *             return (-1,) * self.view.ndim
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.suboffsets.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":582
+ * 
+ *     @property
+ *     def ndim(self):             # <<<<<<<<<<<<<<
+ *         return self.view.ndim
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":583
+ *     @property
+ *     def ndim(self):
+ *         return self.view.ndim             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 583, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":582
+ * 
+ *     @property
+ *     def ndim(self):             # <<<<<<<<<<<<<<
+ *         return self.view.ndim
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.ndim.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":586
+ * 
+ *     @property
+ *     def itemsize(self):             # <<<<<<<<<<<<<<
+ *         return self.view.itemsize
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":587
+ *     @property
+ *     def itemsize(self):
+ *         return self.view.itemsize             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 587, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":586
+ * 
+ *     @property
+ *     def itemsize(self):             # <<<<<<<<<<<<<<
+ *         return self.view.itemsize
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.itemsize.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":590
+ * 
+ *     @property
+ *     def nbytes(self):             # <<<<<<<<<<<<<<
+ *         return self.size * self.view.itemsize
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":591
+ *     @property
+ *     def nbytes(self):
+ *         return self.size * self.view.itemsize             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 591, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 591, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyNumber_Multiply(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 591, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_r = __pyx_t_3;
+  __pyx_t_3 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":590
+ * 
+ *     @property
+ *     def nbytes(self):             # <<<<<<<<<<<<<<
+ *         return self.size * self.view.itemsize
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.nbytes.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":594
+ * 
+ *     @property
+ *     def size(self):             # <<<<<<<<<<<<<<
+ *         if self._size is None:
+ *             result = 1
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_v_result = NULL;
+  PyObject *__pyx_v_length = NULL;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  Py_ssize_t *__pyx_t_3;
+  Py_ssize_t *__pyx_t_4;
+  Py_ssize_t *__pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":595
+ *     @property
+ *     def size(self):
+ *         if self._size is None:             # <<<<<<<<<<<<<<
+ *             result = 1
+ * 
+ */
+  __pyx_t_1 = (__pyx_v_self->_size == Py_None);
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":596
+ *     def size(self):
+ *         if self._size is None:
+ *             result = 1             # <<<<<<<<<<<<<<
+ * 
+ *             for length in self.view.shape[:self.view.ndim]:
+ */
+    __Pyx_INCREF(__pyx_int_1);
+    __pyx_v_result = __pyx_int_1;
+
+    /* "View.MemoryView":598
+ *             result = 1
+ * 
+ *             for length in self.view.shape[:self.view.ndim]:             # <<<<<<<<<<<<<<
+ *                 result *= length
+ * 
+ */
+    __pyx_t_4 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim);
+    for (__pyx_t_5 = __pyx_v_self->view.shape; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) {
+      __pyx_t_3 = __pyx_t_5;
+      __pyx_t_6 = PyInt_FromSsize_t((__pyx_t_3[0])); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 598, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_6);
+      __pyx_t_6 = 0;
+
+      /* "View.MemoryView":599
+ * 
+ *             for length in self.view.shape[:self.view.ndim]:
+ *                 result *= length             # <<<<<<<<<<<<<<
+ * 
+ *             self._size = result
+ */
+      __pyx_t_6 = PyNumber_InPlaceMultiply(__pyx_v_result, __pyx_v_length); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 599, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF_SET(__pyx_v_result, __pyx_t_6);
+      __pyx_t_6 = 0;
+    }
+
+    /* "View.MemoryView":601
+ *                 result *= length
+ * 
+ *             self._size = result             # <<<<<<<<<<<<<<
+ * 
+ *         return self._size
+ */
+    __Pyx_INCREF(__pyx_v_result);
+    __Pyx_GIVEREF(__pyx_v_result);
+    __Pyx_GOTREF(__pyx_v_self->_size);
+    __Pyx_DECREF(__pyx_v_self->_size);
+    __pyx_v_self->_size = __pyx_v_result;
+
+    /* "View.MemoryView":595
+ *     @property
+ *     def size(self):
+ *         if self._size is None:             # <<<<<<<<<<<<<<
+ *             result = 1
+ * 
+ */
+  }
+
+  /* "View.MemoryView":603
+ *             self._size = result
+ * 
+ *         return self._size             # <<<<<<<<<<<<<<
+ * 
+ *     def __len__(self):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v_self->_size);
+  __pyx_r = __pyx_v_self->_size;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":594
+ * 
+ *     @property
+ *     def size(self):             # <<<<<<<<<<<<<<
+ *         if self._size is None:
+ *             result = 1
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.size.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_result);
+  __Pyx_XDECREF(__pyx_v_length);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":605
+ *         return self._size
+ * 
+ *     def __len__(self):             # <<<<<<<<<<<<<<
+ *         if self.view.ndim >= 1:
+ *             return self.view.shape[0]
+ */
+
+/* Python wrapper */
+static Py_ssize_t __pyx_memoryview___len__(PyObject *__pyx_v_self); /*proto*/
+static Py_ssize_t __pyx_memoryview___len__(PyObject *__pyx_v_self) {
+  Py_ssize_t __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__len__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  Py_ssize_t __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("__len__", 0);
+
+  /* "View.MemoryView":606
+ * 
+ *     def __len__(self):
+ *         if self.view.ndim >= 1:             # <<<<<<<<<<<<<<
+ *             return self.view.shape[0]
+ * 
+ */
+  __pyx_t_1 = ((__pyx_v_self->view.ndim >= 1) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":607
+ *     def __len__(self):
+ *         if self.view.ndim >= 1:
+ *             return self.view.shape[0]             # <<<<<<<<<<<<<<
+ * 
+ *         return 0
+ */
+    __pyx_r = (__pyx_v_self->view.shape[0]);
+    goto __pyx_L0;
+
+    /* "View.MemoryView":606
+ * 
+ *     def __len__(self):
+ *         if self.view.ndim >= 1:             # <<<<<<<<<<<<<<
+ *             return self.view.shape[0]
+ * 
+ */
+  }
+
+  /* "View.MemoryView":609
+ *             return self.view.shape[0]
+ * 
+ *         return 0             # <<<<<<<<<<<<<<
+ * 
+ *     def __repr__(self):
+ */
+  __pyx_r = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":605
+ *         return self._size
+ * 
+ *     def __len__(self):             # <<<<<<<<<<<<<<
+ *         if self.view.ndim >= 1:
+ *             return self.view.shape[0]
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":611
+ *         return 0
+ * 
+ *     def __repr__(self):             # <<<<<<<<<<<<<<
+ *         return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,
+ *                                                id(self))
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview___repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_memoryview___repr__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__repr__", 0);
+
+  /* "View.MemoryView":612
+ * 
+ *     def __repr__(self):
+ *         return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,             # <<<<<<<<<<<<<<
+ *                                                id(self))
+ * 
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 612, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 612, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 612, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+  /* "View.MemoryView":613
+ *     def __repr__(self):
+ *         return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,
+ *                                                id(self))             # <<<<<<<<<<<<<<
+ * 
+ *     def __str__(self):
+ */
+  __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 613, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+
+  /* "View.MemoryView":612
+ * 
+ *     def __repr__(self):
+ *         return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,             # <<<<<<<<<<<<<<
+ *                                                id(self))
+ * 
+ */
+  __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 612, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_2);
+  PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2);
+  __pyx_t_1 = 0;
+  __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 612, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":611
+ *         return 0
+ * 
+ *     def __repr__(self):             # <<<<<<<<<<<<<<
+ *         return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,
+ *                                                id(self))
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":615
+ *                                                id(self))
+ * 
+ *     def __str__(self):             # <<<<<<<<<<<<<<
+ *         return "<MemoryView of %r object>" % (self.base.__class__.__name__,)
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview___str__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_memoryview___str__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__str__ (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__str__", 0);
+
+  /* "View.MemoryView":616
+ * 
+ *     def __str__(self):
+ *         return "<MemoryView of %r object>" % (self.base.__class__.__name__,)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 616, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 616, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 616, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 616, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+  __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_object, __pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 616, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":615
+ *                                                id(self))
+ * 
+ *     def __str__(self):             # <<<<<<<<<<<<<<
+ *         return "<MemoryView of %r object>" % (self.base.__class__.__name__,)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":619
+ * 
+ * 
+ *     def is_c_contig(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview_is_c_contig(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_memoryview_is_c_contig(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_c_contig (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(struct __pyx_memoryview_obj *__pyx_v_self) {
+  __Pyx_memviewslice *__pyx_v_mslice;
+  __Pyx_memviewslice __pyx_v_tmp;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice *__pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("is_c_contig", 0);
+
+  /* "View.MemoryView":622
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ *         mslice = get_slice_from_memview(self, &tmp)             # <<<<<<<<<<<<<<
+ *         return slice_is_contig(mslice[0], 'C', self.view.ndim)
+ * 
+ */
+  __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(2, 622, __pyx_L1_error)
+  __pyx_v_mslice = __pyx_t_1;
+
+  /* "View.MemoryView":623
+ *         cdef __Pyx_memviewslice tmp
+ *         mslice = get_slice_from_memview(self, &tmp)
+ *         return slice_is_contig(mslice[0], 'C', self.view.ndim)             # <<<<<<<<<<<<<<
+ * 
+ *     def is_f_contig(self):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'C', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 623, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":619
+ * 
+ * 
+ *     def is_c_contig(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.is_c_contig", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":625
+ *         return slice_is_contig(mslice[0], 'C', self.view.ndim)
+ * 
+ *     def is_f_contig(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview_is_f_contig(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_memoryview_is_f_contig(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("is_f_contig (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(struct __pyx_memoryview_obj *__pyx_v_self) {
+  __Pyx_memviewslice *__pyx_v_mslice;
+  __Pyx_memviewslice __pyx_v_tmp;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice *__pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("is_f_contig", 0);
+
+  /* "View.MemoryView":628
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ *         mslice = get_slice_from_memview(self, &tmp)             # <<<<<<<<<<<<<<
+ *         return slice_is_contig(mslice[0], 'F', self.view.ndim)
+ * 
+ */
+  __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(2, 628, __pyx_L1_error)
+  __pyx_v_mslice = __pyx_t_1;
+
+  /* "View.MemoryView":629
+ *         cdef __Pyx_memviewslice tmp
+ *         mslice = get_slice_from_memview(self, &tmp)
+ *         return slice_is_contig(mslice[0], 'F', self.view.ndim)             # <<<<<<<<<<<<<<
+ * 
+ *     def copy(self):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'F', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 629, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":625
+ *         return slice_is_contig(mslice[0], 'C', self.view.ndim)
+ * 
+ *     def is_f_contig(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice *mslice
+ *         cdef __Pyx_memviewslice tmp
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.is_f_contig", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":631
+ *         return slice_is_contig(mslice[0], 'F', self.view.ndim)
+ * 
+ *     def copy(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice mslice
+ *         cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview_copy(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_memoryview_copy(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("copy (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(struct __pyx_memoryview_obj *__pyx_v_self) {
+  __Pyx_memviewslice __pyx_v_mslice;
+  int __pyx_v_flags;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("copy", 0);
+
+  /* "View.MemoryView":633
+ *     def copy(self):
+ *         cdef __Pyx_memviewslice mslice
+ *         cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS             # <<<<<<<<<<<<<<
+ * 
+ *         slice_copy(self, &mslice)
+ */
+  __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_F_CONTIGUOUS));
+
+  /* "View.MemoryView":635
+ *         cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS
+ * 
+ *         slice_copy(self, &mslice)             # <<<<<<<<<<<<<<
+ *         mslice = slice_copy_contig(&mslice, "c", self.view.ndim,
+ *                                    self.view.itemsize,
+ */
+  __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_mslice));
+
+  /* "View.MemoryView":636
+ * 
+ *         slice_copy(self, &mslice)
+ *         mslice = slice_copy_contig(&mslice, "c", self.view.ndim,             # <<<<<<<<<<<<<<
+ *                                    self.view.itemsize,
+ *                                    flags|PyBUF_C_CONTIGUOUS,
+ */
+  __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_mslice), ((char *)"c"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_C_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(2, 636, __pyx_L1_error)
+  __pyx_v_mslice = __pyx_t_1;
+
+  /* "View.MemoryView":641
+ *                                    self.dtype_is_object)
+ * 
+ *         return memoryview_copy_from_slice(self, &mslice)             # <<<<<<<<<<<<<<
+ * 
+ *     def copy_fortran(self):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_mslice)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 641, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":631
+ *         return slice_is_contig(mslice[0], 'F', self.view.ndim)
+ * 
+ *     def copy(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice mslice
+ *         cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.copy", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":643
+ *         return memoryview_copy_from_slice(self, &mslice)
+ * 
+ *     def copy_fortran(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice src, dst
+ *         cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_memoryview_copy_fortran(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_memoryview_copy_fortran(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("copy_fortran (wrapper)", 0);
+  __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(struct __pyx_memoryview_obj *__pyx_v_self) {
+  __Pyx_memviewslice __pyx_v_src;
+  __Pyx_memviewslice __pyx_v_dst;
+  int __pyx_v_flags;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_memviewslice __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("copy_fortran", 0);
+
+  /* "View.MemoryView":645
+ *     def copy_fortran(self):
+ *         cdef __Pyx_memviewslice src, dst
+ *         cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS             # <<<<<<<<<<<<<<
+ * 
+ *         slice_copy(self, &src)
+ */
+  __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_C_CONTIGUOUS));
+
+  /* "View.MemoryView":647
+ *         cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS
+ * 
+ *         slice_copy(self, &src)             # <<<<<<<<<<<<<<
+ *         dst = slice_copy_contig(&src, "fortran", self.view.ndim,
+ *                                 self.view.itemsize,
+ */
+  __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_src));
+
+  /* "View.MemoryView":648
+ * 
+ *         slice_copy(self, &src)
+ *         dst = slice_copy_contig(&src, "fortran", self.view.ndim,             # <<<<<<<<<<<<<<
+ *                                 self.view.itemsize,
+ *                                 flags|PyBUF_F_CONTIGUOUS,
+ */
+  __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_src), ((char *)"fortran"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_F_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(2, 648, __pyx_L1_error)
+  __pyx_v_dst = __pyx_t_1;
+
+  /* "View.MemoryView":653
+ *                                 self.dtype_is_object)
+ * 
+ *         return memoryview_copy_from_slice(self, &dst)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_dst)); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 653, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_r = __pyx_t_2;
+  __pyx_t_2 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":643
+ *         return memoryview_copy_from_slice(self, &mslice)
+ * 
+ *     def copy_fortran(self):             # <<<<<<<<<<<<<<
+ *         cdef __Pyx_memviewslice src, dst
+ *         cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.copy_fortran", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_memoryview_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw___pyx_memoryview_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_memoryview___reduce_cython__(((struct __pyx_memoryview_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__reduce_cython__", 0);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__16, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 2, __pyx_L1_error)
+
+  /* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_memoryview_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state); /*proto*/
+static PyObject *__pyx_pw___pyx_memoryview_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_memoryview_2__setstate_cython__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((PyObject *)__pyx_v___pyx_state));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setstate_cython__", 0);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 4, __pyx_L1_error)
+
+  /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":657
+ * 
+ * @cname('__pyx_memoryview_new')
+ * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo):             # <<<<<<<<<<<<<<
+ *     cdef memoryview result = memoryview(o, flags, dtype_is_object)
+ *     result.typeinfo = typeinfo
+ */
+
+static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, int __pyx_v_dtype_is_object, __Pyx_TypeInfo *__pyx_v_typeinfo) {
+  struct __pyx_memoryview_obj *__pyx_v_result = 0;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("memoryview_cwrapper", 0);
+
+  /* "View.MemoryView":658
+ * @cname('__pyx_memoryview_new')
+ * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo):
+ *     cdef memoryview result = memoryview(o, flags, dtype_is_object)             # <<<<<<<<<<<<<<
+ *     result.typeinfo = typeinfo
+ *     return result
+ */
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 658, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 658, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 658, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_INCREF(__pyx_v_o);
+  __Pyx_GIVEREF(__pyx_v_o);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_o);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_2);
+  PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2);
+  __pyx_t_1 = 0;
+  __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 658, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_result = ((struct __pyx_memoryview_obj *)__pyx_t_2);
+  __pyx_t_2 = 0;
+
+  /* "View.MemoryView":659
+ * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo):
+ *     cdef memoryview result = memoryview(o, flags, dtype_is_object)
+ *     result.typeinfo = typeinfo             # <<<<<<<<<<<<<<
+ *     return result
+ * 
+ */
+  __pyx_v_result->typeinfo = __pyx_v_typeinfo;
+
+  /* "View.MemoryView":660
+ *     cdef memoryview result = memoryview(o, flags, dtype_is_object)
+ *     result.typeinfo = typeinfo
+ *     return result             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_check')
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_result));
+  __pyx_r = ((PyObject *)__pyx_v_result);
+  goto __pyx_L0;
+
+  /* "View.MemoryView":657
+ * 
+ * @cname('__pyx_memoryview_new')
+ * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo):             # <<<<<<<<<<<<<<
+ *     cdef memoryview result = memoryview(o, flags, dtype_is_object)
+ *     result.typeinfo = typeinfo
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview_cwrapper", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_result);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":663
+ * 
+ * @cname('__pyx_memoryview_check')
+ * cdef inline bint memoryview_check(object o):             # <<<<<<<<<<<<<<
+ *     return isinstance(o, memoryview)
+ * 
+ */
+
+static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("memoryview_check", 0);
+
+  /* "View.MemoryView":664
+ * @cname('__pyx_memoryview_check')
+ * cdef inline bint memoryview_check(object o):
+ *     return isinstance(o, memoryview)             # <<<<<<<<<<<<<<
+ * 
+ * cdef tuple _unellipsify(object index, int ndim):
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_o, __pyx_memoryview_type); 
+  __pyx_r = __pyx_t_1;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":663
+ * 
+ * @cname('__pyx_memoryview_check')
+ * cdef inline bint memoryview_check(object o):             # <<<<<<<<<<<<<<
+ *     return isinstance(o, memoryview)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":666
+ *     return isinstance(o, memoryview)
+ * 
+ * cdef tuple _unellipsify(object index, int ndim):             # <<<<<<<<<<<<<<
+ *     """
+ *     Replace all ellipses with full slices and fill incomplete indices with
+ */
+
+static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) {
+  PyObject *__pyx_v_tup = NULL;
+  PyObject *__pyx_v_result = NULL;
+  int __pyx_v_have_slices;
+  int __pyx_v_seen_ellipsis;
+  CYTHON_UNUSED PyObject *__pyx_v_idx = NULL;
+  PyObject *__pyx_v_item = NULL;
+  Py_ssize_t __pyx_v_nslices;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  Py_ssize_t __pyx_t_5;
+  PyObject *(*__pyx_t_6)(PyObject *);
+  PyObject *__pyx_t_7 = NULL;
+  Py_ssize_t __pyx_t_8;
+  int __pyx_t_9;
+  int __pyx_t_10;
+  PyObject *__pyx_t_11 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("_unellipsify", 0);
+
+  /* "View.MemoryView":671
+ *     full slices.
+ *     """
+ *     if not isinstance(index, tuple):             # <<<<<<<<<<<<<<
+ *         tup = (index,)
+ *     else:
+ */
+  __pyx_t_1 = PyTuple_Check(__pyx_v_index); 
+  __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":672
+ *     """
+ *     if not isinstance(index, tuple):
+ *         tup = (index,)             # <<<<<<<<<<<<<<
+ *     else:
+ *         tup = index
+ */
+    __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 672, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_INCREF(__pyx_v_index);
+    __Pyx_GIVEREF(__pyx_v_index);
+    PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_index);
+    __pyx_v_tup = __pyx_t_3;
+    __pyx_t_3 = 0;
+
+    /* "View.MemoryView":671
+ *     full slices.
+ *     """
+ *     if not isinstance(index, tuple):             # <<<<<<<<<<<<<<
+ *         tup = (index,)
+ *     else:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":674
+ *         tup = (index,)
+ *     else:
+ *         tup = index             # <<<<<<<<<<<<<<
+ * 
+ *     result = []
+ */
+  /*else*/ {
+    __Pyx_INCREF(__pyx_v_index);
+    __pyx_v_tup = __pyx_v_index;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":676
+ *         tup = index
+ * 
+ *     result = []             # <<<<<<<<<<<<<<
+ *     have_slices = False
+ *     seen_ellipsis = False
+ */
+  __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 676, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_v_result = ((PyObject*)__pyx_t_3);
+  __pyx_t_3 = 0;
+
+  /* "View.MemoryView":677
+ * 
+ *     result = []
+ *     have_slices = False             # <<<<<<<<<<<<<<
+ *     seen_ellipsis = False
+ *     for idx, item in enumerate(tup):
+ */
+  __pyx_v_have_slices = 0;
+
+  /* "View.MemoryView":678
+ *     result = []
+ *     have_slices = False
+ *     seen_ellipsis = False             # <<<<<<<<<<<<<<
+ *     for idx, item in enumerate(tup):
+ *         if item is Ellipsis:
+ */
+  __pyx_v_seen_ellipsis = 0;
+
+  /* "View.MemoryView":679
+ *     have_slices = False
+ *     seen_ellipsis = False
+ *     for idx, item in enumerate(tup):             # <<<<<<<<<<<<<<
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:
+ */
+  __Pyx_INCREF(__pyx_int_0);
+  __pyx_t_3 = __pyx_int_0;
+  if (likely(PyList_CheckExact(__pyx_v_tup)) || PyTuple_CheckExact(__pyx_v_tup)) {
+    __pyx_t_4 = __pyx_v_tup; __Pyx_INCREF(__pyx_t_4); __pyx_t_5 = 0;
+    __pyx_t_6 = NULL;
+  } else {
+    __pyx_t_5 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_tup); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 679, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_6 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 679, __pyx_L1_error)
+  }
+  for (;;) {
+    if (likely(!__pyx_t_6)) {
+      if (likely(PyList_CheckExact(__pyx_t_4))) {
+        if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_4)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_7 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(2, 679, __pyx_L1_error)
+        #else
+        __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 679, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        #endif
+      } else {
+        if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(2, 679, __pyx_L1_error)
+        #else
+        __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 679, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        #endif
+      }
+    } else {
+      __pyx_t_7 = __pyx_t_6(__pyx_t_4);
+      if (unlikely(!__pyx_t_7)) {
+        PyObject* exc_type = PyErr_Occurred();
+        if (exc_type) {
+          if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
+          else __PYX_ERR(2, 679, __pyx_L1_error)
+        }
+        break;
+      }
+      __Pyx_GOTREF(__pyx_t_7);
+    }
+    __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_7);
+    __pyx_t_7 = 0;
+    __Pyx_INCREF(__pyx_t_3);
+    __Pyx_XDECREF_SET(__pyx_v_idx, __pyx_t_3);
+    __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_t_3, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 679, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_7);
+    __Pyx_DECREF(__pyx_t_3);
+    __pyx_t_3 = __pyx_t_7;
+    __pyx_t_7 = 0;
+
+    /* "View.MemoryView":680
+ *     seen_ellipsis = False
+ *     for idx, item in enumerate(tup):
+ *         if item is Ellipsis:             # <<<<<<<<<<<<<<
+ *             if not seen_ellipsis:
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))
+ */
+    __pyx_t_2 = (__pyx_v_item == __pyx_builtin_Ellipsis);
+    __pyx_t_1 = (__pyx_t_2 != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":681
+ *     for idx, item in enumerate(tup):
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:             # <<<<<<<<<<<<<<
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))
+ *                 seen_ellipsis = True
+ */
+      __pyx_t_1 = ((!(__pyx_v_seen_ellipsis != 0)) != 0);
+      if (__pyx_t_1) {
+
+        /* "View.MemoryView":682
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))             # <<<<<<<<<<<<<<
+ *                 seen_ellipsis = True
+ *             else:
+ */
+        __pyx_t_8 = PyObject_Length(__pyx_v_tup); if (unlikely(__pyx_t_8 == ((Py_ssize_t)-1))) __PYX_ERR(2, 682, __pyx_L1_error)
+        __pyx_t_7 = PyList_New(1 * ((((__pyx_v_ndim - __pyx_t_8) + 1)<0) ? 0:((__pyx_v_ndim - __pyx_t_8) + 1))); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 682, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        { Py_ssize_t __pyx_temp;
+          for (__pyx_temp=0; __pyx_temp < ((__pyx_v_ndim - __pyx_t_8) + 1); __pyx_temp++) {
+            __Pyx_INCREF(__pyx_slice__18);
+            __Pyx_GIVEREF(__pyx_slice__18);
+            PyList_SET_ITEM(__pyx_t_7, __pyx_temp, __pyx_slice__18);
+          }
+        }
+        __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_7); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(2, 682, __pyx_L1_error)
+        __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+
+        /* "View.MemoryView":683
+ *             if not seen_ellipsis:
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))
+ *                 seen_ellipsis = True             # <<<<<<<<<<<<<<
+ *             else:
+ *                 result.append(slice(None))
+ */
+        __pyx_v_seen_ellipsis = 1;
+
+        /* "View.MemoryView":681
+ *     for idx, item in enumerate(tup):
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:             # <<<<<<<<<<<<<<
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))
+ *                 seen_ellipsis = True
+ */
+        goto __pyx_L7;
+      }
+
+      /* "View.MemoryView":685
+ *                 seen_ellipsis = True
+ *             else:
+ *                 result.append(slice(None))             # <<<<<<<<<<<<<<
+ *             have_slices = True
+ *         else:
+ */
+      /*else*/ {
+        __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_slice__18); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(2, 685, __pyx_L1_error)
+      }
+      __pyx_L7:;
+
+      /* "View.MemoryView":686
+ *             else:
+ *                 result.append(slice(None))
+ *             have_slices = True             # <<<<<<<<<<<<<<
+ *         else:
+ *             if not isinstance(item, slice) and not PyIndex_Check(item):
+ */
+      __pyx_v_have_slices = 1;
+
+      /* "View.MemoryView":680
+ *     seen_ellipsis = False
+ *     for idx, item in enumerate(tup):
+ *         if item is Ellipsis:             # <<<<<<<<<<<<<<
+ *             if not seen_ellipsis:
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))
+ */
+      goto __pyx_L6;
+    }
+
+    /* "View.MemoryView":688
+ *             have_slices = True
+ *         else:
+ *             if not isinstance(item, slice) and not PyIndex_Check(item):             # <<<<<<<<<<<<<<
+ *                 raise TypeError("Cannot index with type '%s'" % type(item))
+ * 
+ */
+    /*else*/ {
+      __pyx_t_2 = PySlice_Check(__pyx_v_item); 
+      __pyx_t_10 = ((!(__pyx_t_2 != 0)) != 0);
+      if (__pyx_t_10) {
+      } else {
+        __pyx_t_1 = __pyx_t_10;
+        goto __pyx_L9_bool_binop_done;
+      }
+      __pyx_t_10 = ((!(PyIndex_Check(__pyx_v_item) != 0)) != 0);
+      __pyx_t_1 = __pyx_t_10;
+      __pyx_L9_bool_binop_done:;
+      if (unlikely(__pyx_t_1)) {
+
+        /* "View.MemoryView":689
+ *         else:
+ *             if not isinstance(item, slice) and not PyIndex_Check(item):
+ *                 raise TypeError("Cannot index with type '%s'" % type(item))             # <<<<<<<<<<<<<<
+ * 
+ *             have_slices = have_slices or isinstance(item, slice)
+ */
+        __pyx_t_7 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Cannot_index_with_type_s, ((PyObject *)Py_TYPE(__pyx_v_item))); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 689, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_7);
+        __pyx_t_11 = __Pyx_PyObject_CallOneArg(__pyx_builtin_TypeError, __pyx_t_7); if (unlikely(!__pyx_t_11)) __PYX_ERR(2, 689, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_11);
+        __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+        __Pyx_Raise(__pyx_t_11, 0, 0, 0);
+        __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
+        __PYX_ERR(2, 689, __pyx_L1_error)
+
+        /* "View.MemoryView":688
+ *             have_slices = True
+ *         else:
+ *             if not isinstance(item, slice) and not PyIndex_Check(item):             # <<<<<<<<<<<<<<
+ *                 raise TypeError("Cannot index with type '%s'" % type(item))
+ * 
+ */
+      }
+
+      /* "View.MemoryView":691
+ *                 raise TypeError("Cannot index with type '%s'" % type(item))
+ * 
+ *             have_slices = have_slices or isinstance(item, slice)             # <<<<<<<<<<<<<<
+ *             result.append(item)
+ * 
+ */
+      __pyx_t_10 = (__pyx_v_have_slices != 0);
+      if (!__pyx_t_10) {
+      } else {
+        __pyx_t_1 = __pyx_t_10;
+        goto __pyx_L11_bool_binop_done;
+      }
+      __pyx_t_10 = PySlice_Check(__pyx_v_item); 
+      __pyx_t_2 = (__pyx_t_10 != 0);
+      __pyx_t_1 = __pyx_t_2;
+      __pyx_L11_bool_binop_done:;
+      __pyx_v_have_slices = __pyx_t_1;
+
+      /* "View.MemoryView":692
+ * 
+ *             have_slices = have_slices or isinstance(item, slice)
+ *             result.append(item)             # <<<<<<<<<<<<<<
+ * 
+ *     nslices = ndim - len(result)
+ */
+      __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_v_item); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(2, 692, __pyx_L1_error)
+    }
+    __pyx_L6:;
+
+    /* "View.MemoryView":679
+ *     have_slices = False
+ *     seen_ellipsis = False
+ *     for idx, item in enumerate(tup):             # <<<<<<<<<<<<<<
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:
+ */
+  }
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "View.MemoryView":694
+ *             result.append(item)
+ * 
+ *     nslices = ndim - len(result)             # <<<<<<<<<<<<<<
+ *     if nslices:
+ *         result.extend([slice(None)] * nslices)
+ */
+  __pyx_t_5 = PyList_GET_SIZE(__pyx_v_result); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(2, 694, __pyx_L1_error)
+  __pyx_v_nslices = (__pyx_v_ndim - __pyx_t_5);
+
+  /* "View.MemoryView":695
+ * 
+ *     nslices = ndim - len(result)
+ *     if nslices:             # <<<<<<<<<<<<<<
+ *         result.extend([slice(None)] * nslices)
+ * 
+ */
+  __pyx_t_1 = (__pyx_v_nslices != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":696
+ *     nslices = ndim - len(result)
+ *     if nslices:
+ *         result.extend([slice(None)] * nslices)             # <<<<<<<<<<<<<<
+ * 
+ *     return have_slices or nslices, tuple(result)
+ */
+    __pyx_t_3 = PyList_New(1 * ((__pyx_v_nslices<0) ? 0:__pyx_v_nslices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 696, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    { Py_ssize_t __pyx_temp;
+      for (__pyx_temp=0; __pyx_temp < __pyx_v_nslices; __pyx_temp++) {
+        __Pyx_INCREF(__pyx_slice__18);
+        __Pyx_GIVEREF(__pyx_slice__18);
+        PyList_SET_ITEM(__pyx_t_3, __pyx_temp, __pyx_slice__18);
+      }
+    }
+    __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_3); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(2, 696, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+    /* "View.MemoryView":695
+ * 
+ *     nslices = ndim - len(result)
+ *     if nslices:             # <<<<<<<<<<<<<<
+ *         result.extend([slice(None)] * nslices)
+ * 
+ */
+  }
+
+  /* "View.MemoryView":698
+ *         result.extend([slice(None)] * nslices)
+ * 
+ *     return have_slices or nslices, tuple(result)             # <<<<<<<<<<<<<<
+ * 
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  if (!__pyx_v_have_slices) {
+  } else {
+    __pyx_t_4 = __Pyx_PyBool_FromLong(__pyx_v_have_slices); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 698, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_3 = __pyx_t_4;
+    __pyx_t_4 = 0;
+    goto __pyx_L14_bool_binop_done;
+  }
+  __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_nslices); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 698, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_3 = __pyx_t_4;
+  __pyx_t_4 = 0;
+  __pyx_L14_bool_binop_done:;
+  __pyx_t_4 = PyList_AsTuple(__pyx_v_result); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 698, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) __PYX_ERR(2, 698, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_3);
+  PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_4);
+  PyTuple_SET_ITEM(__pyx_t_11, 1, __pyx_t_4);
+  __pyx_t_3 = 0;
+  __pyx_t_4 = 0;
+  __pyx_r = ((PyObject*)__pyx_t_11);
+  __pyx_t_11 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":666
+ *     return isinstance(o, memoryview)
+ * 
+ * cdef tuple _unellipsify(object index, int ndim):             # <<<<<<<<<<<<<<
+ *     """
+ *     Replace all ellipses with full slices and fill incomplete indices with
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_AddTraceback("View.MemoryView._unellipsify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_tup);
+  __Pyx_XDECREF(__pyx_v_result);
+  __Pyx_XDECREF(__pyx_v_idx);
+  __Pyx_XDECREF(__pyx_v_item);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":700
+ *     return have_slices or nslices, tuple(result)
+ * 
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):             # <<<<<<<<<<<<<<
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ */
+
+static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __pyx_v_ndim) {
+  Py_ssize_t __pyx_v_suboffset;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t *__pyx_t_1;
+  Py_ssize_t *__pyx_t_2;
+  Py_ssize_t *__pyx_t_3;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("assert_direct_dimensions", 0);
+
+  /* "View.MemoryView":701
+ * 
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):
+ *     for suboffset in suboffsets[:ndim]:             # <<<<<<<<<<<<<<
+ *         if suboffset >= 0:
+ *             raise ValueError("Indirect dimensions not supported")
+ */
+  __pyx_t_2 = (__pyx_v_suboffsets + __pyx_v_ndim);
+  for (__pyx_t_3 = __pyx_v_suboffsets; __pyx_t_3 < __pyx_t_2; __pyx_t_3++) {
+    __pyx_t_1 = __pyx_t_3;
+    __pyx_v_suboffset = (__pyx_t_1[0]);
+
+    /* "View.MemoryView":702
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Indirect dimensions not supported")
+ * 
+ */
+    __pyx_t_4 = ((__pyx_v_suboffset >= 0) != 0);
+    if (unlikely(__pyx_t_4)) {
+
+      /* "View.MemoryView":703
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ *             raise ValueError("Indirect dimensions not supported")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_t_5 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__19, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 703, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_Raise(__pyx_t_5, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __PYX_ERR(2, 703, __pyx_L1_error)
+
+      /* "View.MemoryView":702
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *             raise ValueError("Indirect dimensions not supported")
+ * 
+ */
+    }
+  }
+
+  /* "View.MemoryView":700
+ *     return have_slices or nslices, tuple(result)
+ * 
+ * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):             # <<<<<<<<<<<<<<
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.assert_direct_dimensions", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":710
+ * 
+ * @cname('__pyx_memview_slice')
+ * cdef memoryview memview_slice(memoryview memview, object indices):             # <<<<<<<<<<<<<<
+ *     cdef int new_ndim = 0, suboffset_dim = -1, dim
+ *     cdef bint negative_step
+ */
+
+static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_obj *__pyx_v_memview, PyObject *__pyx_v_indices) {
+  int __pyx_v_new_ndim;
+  int __pyx_v_suboffset_dim;
+  int __pyx_v_dim;
+  __Pyx_memviewslice __pyx_v_src;
+  __Pyx_memviewslice __pyx_v_dst;
+  __Pyx_memviewslice *__pyx_v_p_src;
+  struct __pyx_memoryviewslice_obj *__pyx_v_memviewsliceobj = 0;
+  __Pyx_memviewslice *__pyx_v_p_dst;
+  int *__pyx_v_p_suboffset_dim;
+  Py_ssize_t __pyx_v_start;
+  Py_ssize_t __pyx_v_stop;
+  Py_ssize_t __pyx_v_step;
+  int __pyx_v_have_start;
+  int __pyx_v_have_stop;
+  int __pyx_v_have_step;
+  PyObject *__pyx_v_index = NULL;
+  struct __pyx_memoryview_obj *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  struct __pyx_memoryview_obj *__pyx_t_4;
+  char *__pyx_t_5;
+  int __pyx_t_6;
+  Py_ssize_t __pyx_t_7;
+  PyObject *(*__pyx_t_8)(PyObject *);
+  PyObject *__pyx_t_9 = NULL;
+  Py_ssize_t __pyx_t_10;
+  int __pyx_t_11;
+  Py_ssize_t __pyx_t_12;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("memview_slice", 0);
+
+  /* "View.MemoryView":711
+ * @cname('__pyx_memview_slice')
+ * cdef memoryview memview_slice(memoryview memview, object indices):
+ *     cdef int new_ndim = 0, suboffset_dim = -1, dim             # <<<<<<<<<<<<<<
+ *     cdef bint negative_step
+ *     cdef __Pyx_memviewslice src, dst
+ */
+  __pyx_v_new_ndim = 0;
+  __pyx_v_suboffset_dim = -1;
+
+  /* "View.MemoryView":718
+ * 
+ * 
+ *     memset(&dst, 0, sizeof(dst))             # <<<<<<<<<<<<<<
+ * 
+ *     cdef _memoryviewslice memviewsliceobj
+ */
+  (void)(memset((&__pyx_v_dst), 0, (sizeof(__pyx_v_dst))));
+
+  /* "View.MemoryView":722
+ *     cdef _memoryviewslice memviewsliceobj
+ * 
+ *     assert memview.view.ndim > 0             # <<<<<<<<<<<<<<
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ */
+  #ifndef CYTHON_WITHOUT_ASSERTIONS
+  if (unlikely(!Py_OptimizeFlag)) {
+    if (unlikely(!((__pyx_v_memview->view.ndim > 0) != 0))) {
+      PyErr_SetNone(PyExc_AssertionError);
+      __PYX_ERR(2, 722, __pyx_L1_error)
+    }
+  }
+  #endif
+
+  /* "View.MemoryView":724
+ *     assert memview.view.ndim > 0
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         memviewsliceobj = memview
+ *         p_src = &memviewsliceobj.from_slice
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); 
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":725
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ *         memviewsliceobj = memview             # <<<<<<<<<<<<<<
+ *         p_src = &memviewsliceobj.from_slice
+ *     else:
+ */
+    if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(2, 725, __pyx_L1_error)
+    __pyx_t_3 = ((PyObject *)__pyx_v_memview);
+    __Pyx_INCREF(__pyx_t_3);
+    __pyx_v_memviewsliceobj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_3);
+    __pyx_t_3 = 0;
+
+    /* "View.MemoryView":726
+ *     if isinstance(memview, _memoryviewslice):
+ *         memviewsliceobj = memview
+ *         p_src = &memviewsliceobj.from_slice             # <<<<<<<<<<<<<<
+ *     else:
+ *         slice_copy(memview, &src)
+ */
+    __pyx_v_p_src = (&__pyx_v_memviewsliceobj->from_slice);
+
+    /* "View.MemoryView":724
+ *     assert memview.view.ndim > 0
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         memviewsliceobj = memview
+ *         p_src = &memviewsliceobj.from_slice
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":728
+ *         p_src = &memviewsliceobj.from_slice
+ *     else:
+ *         slice_copy(memview, &src)             # <<<<<<<<<<<<<<
+ *         p_src = &src
+ * 
+ */
+  /*else*/ {
+    __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_src));
+
+    /* "View.MemoryView":729
+ *     else:
+ *         slice_copy(memview, &src)
+ *         p_src = &src             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __pyx_v_p_src = (&__pyx_v_src);
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":735
+ * 
+ * 
+ *     dst.memview = p_src.memview             # <<<<<<<<<<<<<<
+ *     dst.data = p_src.data
+ * 
+ */
+  __pyx_t_4 = __pyx_v_p_src->memview;
+  __pyx_v_dst.memview = __pyx_t_4;
+
+  /* "View.MemoryView":736
+ * 
+ *     dst.memview = p_src.memview
+ *     dst.data = p_src.data             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_5 = __pyx_v_p_src->data;
+  __pyx_v_dst.data = __pyx_t_5;
+
+  /* "View.MemoryView":741
+ * 
+ * 
+ *     cdef __Pyx_memviewslice *p_dst = &dst             # <<<<<<<<<<<<<<
+ *     cdef int *p_suboffset_dim = &suboffset_dim
+ *     cdef Py_ssize_t start, stop, step
+ */
+  __pyx_v_p_dst = (&__pyx_v_dst);
+
+  /* "View.MemoryView":742
+ * 
+ *     cdef __Pyx_memviewslice *p_dst = &dst
+ *     cdef int *p_suboffset_dim = &suboffset_dim             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t start, stop, step
+ *     cdef bint have_start, have_stop, have_step
+ */
+  __pyx_v_p_suboffset_dim = (&__pyx_v_suboffset_dim);
+
+  /* "View.MemoryView":746
+ *     cdef bint have_start, have_stop, have_step
+ * 
+ *     for dim, index in enumerate(indices):             # <<<<<<<<<<<<<<
+ *         if PyIndex_Check(index):
+ *             slice_memviewslice(
+ */
+  __pyx_t_6 = 0;
+  if (likely(PyList_CheckExact(__pyx_v_indices)) || PyTuple_CheckExact(__pyx_v_indices)) {
+    __pyx_t_3 = __pyx_v_indices; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0;
+    __pyx_t_8 = NULL;
+  } else {
+    __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_indices); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 746, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_8 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 746, __pyx_L1_error)
+  }
+  for (;;) {
+    if (likely(!__pyx_t_8)) {
+      if (likely(PyList_CheckExact(__pyx_t_3))) {
+        if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_9 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(2, 746, __pyx_L1_error)
+        #else
+        __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 746, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_9);
+        #endif
+      } else {
+        if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
+        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        __pyx_t_9 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(2, 746, __pyx_L1_error)
+        #else
+        __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 746, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_9);
+        #endif
+      }
+    } else {
+      __pyx_t_9 = __pyx_t_8(__pyx_t_3);
+      if (unlikely(!__pyx_t_9)) {
+        PyObject* exc_type = PyErr_Occurred();
+        if (exc_type) {
+          if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
+          else __PYX_ERR(2, 746, __pyx_L1_error)
+        }
+        break;
+      }
+      __Pyx_GOTREF(__pyx_t_9);
+    }
+    __Pyx_XDECREF_SET(__pyx_v_index, __pyx_t_9);
+    __pyx_t_9 = 0;
+    __pyx_v_dim = __pyx_t_6;
+    __pyx_t_6 = (__pyx_t_6 + 1);
+
+    /* "View.MemoryView":747
+ * 
+ *     for dim, index in enumerate(indices):
+ *         if PyIndex_Check(index):             # <<<<<<<<<<<<<<
+ *             slice_memviewslice(
+ *                 p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+ */
+    __pyx_t_2 = (PyIndex_Check(__pyx_v_index) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":751
+ *                 p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+ *                 dim, new_ndim, p_suboffset_dim,
+ *                 index, 0, 0, # start, stop, step             # <<<<<<<<<<<<<<
+ *                 0, 0, 0, # have_{start,stop,step}
+ *                 False)
+ */
+      __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_index); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 751, __pyx_L1_error)
+
+      /* "View.MemoryView":748
+ *     for dim, index in enumerate(indices):
+ *         if PyIndex_Check(index):
+ *             slice_memviewslice(             # <<<<<<<<<<<<<<
+ *                 p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+ *                 dim, new_ndim, p_suboffset_dim,
+ */
+      __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_t_10, 0, 0, 0, 0, 0, 0); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(2, 748, __pyx_L1_error)
+
+      /* "View.MemoryView":747
+ * 
+ *     for dim, index in enumerate(indices):
+ *         if PyIndex_Check(index):             # <<<<<<<<<<<<<<
+ *             slice_memviewslice(
+ *                 p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+ */
+      goto __pyx_L6;
+    }
+
+    /* "View.MemoryView":754
+ *                 0, 0, 0, # have_{start,stop,step}
+ *                 False)
+ *         elif index is None:             # <<<<<<<<<<<<<<
+ *             p_dst.shape[new_ndim] = 1
+ *             p_dst.strides[new_ndim] = 0
+ */
+    __pyx_t_2 = (__pyx_v_index == Py_None);
+    __pyx_t_1 = (__pyx_t_2 != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":755
+ *                 False)
+ *         elif index is None:
+ *             p_dst.shape[new_ndim] = 1             # <<<<<<<<<<<<<<
+ *             p_dst.strides[new_ndim] = 0
+ *             p_dst.suboffsets[new_ndim] = -1
+ */
+      (__pyx_v_p_dst->shape[__pyx_v_new_ndim]) = 1;
+
+      /* "View.MemoryView":756
+ *         elif index is None:
+ *             p_dst.shape[new_ndim] = 1
+ *             p_dst.strides[new_ndim] = 0             # <<<<<<<<<<<<<<
+ *             p_dst.suboffsets[new_ndim] = -1
+ *             new_ndim += 1
+ */
+      (__pyx_v_p_dst->strides[__pyx_v_new_ndim]) = 0;
+
+      /* "View.MemoryView":757
+ *             p_dst.shape[new_ndim] = 1
+ *             p_dst.strides[new_ndim] = 0
+ *             p_dst.suboffsets[new_ndim] = -1             # <<<<<<<<<<<<<<
+ *             new_ndim += 1
+ *         else:
+ */
+      (__pyx_v_p_dst->suboffsets[__pyx_v_new_ndim]) = -1L;
+
+      /* "View.MemoryView":758
+ *             p_dst.strides[new_ndim] = 0
+ *             p_dst.suboffsets[new_ndim] = -1
+ *             new_ndim += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             start = index.start or 0
+ */
+      __pyx_v_new_ndim = (__pyx_v_new_ndim + 1);
+
+      /* "View.MemoryView":754
+ *                 0, 0, 0, # have_{start,stop,step}
+ *                 False)
+ *         elif index is None:             # <<<<<<<<<<<<<<
+ *             p_dst.shape[new_ndim] = 1
+ *             p_dst.strides[new_ndim] = 0
+ */
+      goto __pyx_L6;
+    }
+
+    /* "View.MemoryView":760
+ *             new_ndim += 1
+ *         else:
+ *             start = index.start or 0             # <<<<<<<<<<<<<<
+ *             stop = index.stop or 0
+ *             step = index.step or 0
+ */
+    /*else*/ {
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 760, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 760, __pyx_L1_error)
+      if (!__pyx_t_1) {
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      } else {
+        __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 760, __pyx_L1_error)
+        __pyx_t_10 = __pyx_t_12;
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+        goto __pyx_L7_bool_binop_done;
+      }
+      __pyx_t_10 = 0;
+      __pyx_L7_bool_binop_done:;
+      __pyx_v_start = __pyx_t_10;
+
+      /* "View.MemoryView":761
+ *         else:
+ *             start = index.start or 0
+ *             stop = index.stop or 0             # <<<<<<<<<<<<<<
+ *             step = index.step or 0
+ * 
+ */
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 761, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 761, __pyx_L1_error)
+      if (!__pyx_t_1) {
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      } else {
+        __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 761, __pyx_L1_error)
+        __pyx_t_10 = __pyx_t_12;
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+        goto __pyx_L9_bool_binop_done;
+      }
+      __pyx_t_10 = 0;
+      __pyx_L9_bool_binop_done:;
+      __pyx_v_stop = __pyx_t_10;
+
+      /* "View.MemoryView":762
+ *             start = index.start or 0
+ *             stop = index.stop or 0
+ *             step = index.step or 0             # <<<<<<<<<<<<<<
+ * 
+ *             have_start = index.start is not None
+ */
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 762, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(2, 762, __pyx_L1_error)
+      if (!__pyx_t_1) {
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      } else {
+        __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 762, __pyx_L1_error)
+        __pyx_t_10 = __pyx_t_12;
+        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+        goto __pyx_L11_bool_binop_done;
+      }
+      __pyx_t_10 = 0;
+      __pyx_L11_bool_binop_done:;
+      __pyx_v_step = __pyx_t_10;
+
+      /* "View.MemoryView":764
+ *             step = index.step or 0
+ * 
+ *             have_start = index.start is not None             # <<<<<<<<<<<<<<
+ *             have_stop = index.stop is not None
+ *             have_step = index.step is not None
+ */
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 764, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = (__pyx_t_9 != Py_None);
+      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      __pyx_v_have_start = __pyx_t_1;
+
+      /* "View.MemoryView":765
+ * 
+ *             have_start = index.start is not None
+ *             have_stop = index.stop is not None             # <<<<<<<<<<<<<<
+ *             have_step = index.step is not None
+ * 
+ */
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 765, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = (__pyx_t_9 != Py_None);
+      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      __pyx_v_have_stop = __pyx_t_1;
+
+      /* "View.MemoryView":766
+ *             have_start = index.start is not None
+ *             have_stop = index.stop is not None
+ *             have_step = index.step is not None             # <<<<<<<<<<<<<<
+ * 
+ *             slice_memviewslice(
+ */
+      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(2, 766, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_1 = (__pyx_t_9 != Py_None);
+      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+      __pyx_v_have_step = __pyx_t_1;
+
+      /* "View.MemoryView":768
+ *             have_step = index.step is not None
+ * 
+ *             slice_memviewslice(             # <<<<<<<<<<<<<<
+ *                 p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+ *                 dim, new_ndim, p_suboffset_dim,
+ */
+      __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_v_start, __pyx_v_stop, __pyx_v_step, __pyx_v_have_start, __pyx_v_have_stop, __pyx_v_have_step, 1); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(2, 768, __pyx_L1_error)
+
+      /* "View.MemoryView":774
+ *                 have_start, have_stop, have_step,
+ *                 True)
+ *             new_ndim += 1             # <<<<<<<<<<<<<<
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ */
+      __pyx_v_new_ndim = (__pyx_v_new_ndim + 1);
+    }
+    __pyx_L6:;
+
+    /* "View.MemoryView":746
+ *     cdef bint have_start, have_stop, have_step
+ * 
+ *     for dim, index in enumerate(indices):             # <<<<<<<<<<<<<<
+ *         if PyIndex_Check(index):
+ *             slice_memviewslice(
+ */
+  }
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "View.MemoryView":776
+ *             new_ndim += 1
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         return memoryview_fromslice(dst, new_ndim,
+ *                                     memviewsliceobj.to_object_func,
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); 
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":777
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ *         return memoryview_fromslice(dst, new_ndim,             # <<<<<<<<<<<<<<
+ *                                     memviewsliceobj.to_object_func,
+ *                                     memviewsliceobj.to_dtype_func,
+ */
+    __Pyx_XDECREF(((PyObject *)__pyx_r));
+
+    /* "View.MemoryView":778
+ *     if isinstance(memview, _memoryviewslice):
+ *         return memoryview_fromslice(dst, new_ndim,
+ *                                     memviewsliceobj.to_object_func,             # <<<<<<<<<<<<<<
+ *                                     memviewsliceobj.to_dtype_func,
+ *                                     memview.dtype_is_object)
+ */
+    if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(2, 778, __pyx_L1_error) }
+
+    /* "View.MemoryView":779
+ *         return memoryview_fromslice(dst, new_ndim,
+ *                                     memviewsliceobj.to_object_func,
+ *                                     memviewsliceobj.to_dtype_func,             # <<<<<<<<<<<<<<
+ *                                     memview.dtype_is_object)
+ *     else:
+ */
+    if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(2, 779, __pyx_L1_error) }
+
+    /* "View.MemoryView":777
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ *         return memoryview_fromslice(dst, new_ndim,             # <<<<<<<<<<<<<<
+ *                                     memviewsliceobj.to_object_func,
+ *                                     memviewsliceobj.to_dtype_func,
+ */
+    __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, __pyx_v_memviewsliceobj->to_object_func, __pyx_v_memviewsliceobj->to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 777, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(2, 777, __pyx_L1_error)
+    __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_3);
+    __pyx_t_3 = 0;
+    goto __pyx_L0;
+
+    /* "View.MemoryView":776
+ *             new_ndim += 1
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         return memoryview_fromslice(dst, new_ndim,
+ *                                     memviewsliceobj.to_object_func,
+ */
+  }
+
+  /* "View.MemoryView":782
+ *                                     memview.dtype_is_object)
+ *     else:
+ *         return memoryview_fromslice(dst, new_ndim, NULL, NULL,             # <<<<<<<<<<<<<<
+ *                                     memview.dtype_is_object)
+ * 
+ */
+  /*else*/ {
+    __Pyx_XDECREF(((PyObject *)__pyx_r));
+
+    /* "View.MemoryView":783
+ *     else:
+ *         return memoryview_fromslice(dst, new_ndim, NULL, NULL,
+ *                                     memview.dtype_is_object)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, NULL, NULL, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 782, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+
+    /* "View.MemoryView":782
+ *                                     memview.dtype_is_object)
+ *     else:
+ *         return memoryview_fromslice(dst, new_ndim, NULL, NULL,             # <<<<<<<<<<<<<<
+ *                                     memview.dtype_is_object)
+ * 
+ */
+    if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(2, 782, __pyx_L1_error)
+    __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_3);
+    __pyx_t_3 = 0;
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":710
+ * 
+ * @cname('__pyx_memview_slice')
+ * cdef memoryview memview_slice(memoryview memview, object indices):             # <<<<<<<<<<<<<<
+ *     cdef int new_ndim = 0, suboffset_dim = -1, dim
+ *     cdef bint negative_step
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_AddTraceback("View.MemoryView.memview_slice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_memviewsliceobj);
+  __Pyx_XDECREF(__pyx_v_index);
+  __Pyx_XGIVEREF((PyObject *)__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":807
+ * 
+ * @cname('__pyx_memoryview_slice_memviewslice')
+ * cdef int slice_memviewslice(             # <<<<<<<<<<<<<<
+ *         __Pyx_memviewslice *dst,
+ *         Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset,
+ */
+
+static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, Py_ssize_t __pyx_v_shape, Py_ssize_t __pyx_v_stride, Py_ssize_t __pyx_v_suboffset, int __pyx_v_dim, int __pyx_v_new_ndim, int *__pyx_v_suboffset_dim, Py_ssize_t __pyx_v_start, Py_ssize_t __pyx_v_stop, Py_ssize_t __pyx_v_step, int __pyx_v_have_start, int __pyx_v_have_stop, int __pyx_v_have_step, int __pyx_v_is_slice) {
+  Py_ssize_t __pyx_v_new_shape;
+  int __pyx_v_negative_step;
+  int __pyx_r;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+
+  /* "View.MemoryView":827
+ *     cdef bint negative_step
+ * 
+ *     if not is_slice:             # <<<<<<<<<<<<<<
+ * 
+ *         if start < 0:
+ */
+  __pyx_t_1 = ((!(__pyx_v_is_slice != 0)) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":829
+ *     if not is_slice:
+ * 
+ *         if start < 0:             # <<<<<<<<<<<<<<
+ *             start += shape
+ *         if not 0 <= start < shape:
+ */
+    __pyx_t_1 = ((__pyx_v_start < 0) != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":830
+ * 
+ *         if start < 0:
+ *             start += shape             # <<<<<<<<<<<<<<
+ *         if not 0 <= start < shape:
+ *             _err_dim(IndexError, "Index out of bounds (axis %d)", dim)
+ */
+      __pyx_v_start = (__pyx_v_start + __pyx_v_shape);
+
+      /* "View.MemoryView":829
+ *     if not is_slice:
+ * 
+ *         if start < 0:             # <<<<<<<<<<<<<<
+ *             start += shape
+ *         if not 0 <= start < shape:
+ */
+    }
+
+    /* "View.MemoryView":831
+ *         if start < 0:
+ *             start += shape
+ *         if not 0 <= start < shape:             # <<<<<<<<<<<<<<
+ *             _err_dim(IndexError, "Index out of bounds (axis %d)", dim)
+ *     else:
+ */
+    __pyx_t_1 = (0 <= __pyx_v_start);
+    if (__pyx_t_1) {
+      __pyx_t_1 = (__pyx_v_start < __pyx_v_shape);
+    }
+    __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":832
+ *             start += shape
+ *         if not 0 <= start < shape:
+ *             _err_dim(IndexError, "Index out of bounds (axis %d)", dim)             # <<<<<<<<<<<<<<
+ *     else:
+ * 
+ */
+      __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"Index out of bounds (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(2, 832, __pyx_L1_error)
+
+      /* "View.MemoryView":831
+ *         if start < 0:
+ *             start += shape
+ *         if not 0 <= start < shape:             # <<<<<<<<<<<<<<
+ *             _err_dim(IndexError, "Index out of bounds (axis %d)", dim)
+ *     else:
+ */
+    }
+
+    /* "View.MemoryView":827
+ *     cdef bint negative_step
+ * 
+ *     if not is_slice:             # <<<<<<<<<<<<<<
+ * 
+ *         if start < 0:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":835
+ *     else:
+ * 
+ *         negative_step = have_step != 0 and step < 0             # <<<<<<<<<<<<<<
+ * 
+ *         if have_step and step == 0:
+ */
+  /*else*/ {
+    __pyx_t_1 = ((__pyx_v_have_step != 0) != 0);
+    if (__pyx_t_1) {
+    } else {
+      __pyx_t_2 = __pyx_t_1;
+      goto __pyx_L6_bool_binop_done;
+    }
+    __pyx_t_1 = ((__pyx_v_step < 0) != 0);
+    __pyx_t_2 = __pyx_t_1;
+    __pyx_L6_bool_binop_done:;
+    __pyx_v_negative_step = __pyx_t_2;
+
+    /* "View.MemoryView":837
+ *         negative_step = have_step != 0 and step < 0
+ * 
+ *         if have_step and step == 0:             # <<<<<<<<<<<<<<
+ *             _err_dim(ValueError, "Step may not be zero (axis %d)", dim)
+ * 
+ */
+    __pyx_t_1 = (__pyx_v_have_step != 0);
+    if (__pyx_t_1) {
+    } else {
+      __pyx_t_2 = __pyx_t_1;
+      goto __pyx_L9_bool_binop_done;
+    }
+    __pyx_t_1 = ((__pyx_v_step == 0) != 0);
+    __pyx_t_2 = __pyx_t_1;
+    __pyx_L9_bool_binop_done:;
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":838
+ * 
+ *         if have_step and step == 0:
+ *             _err_dim(ValueError, "Step may not be zero (axis %d)", dim)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Step may not be zero (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(2, 838, __pyx_L1_error)
+
+      /* "View.MemoryView":837
+ *         negative_step = have_step != 0 and step < 0
+ * 
+ *         if have_step and step == 0:             # <<<<<<<<<<<<<<
+ *             _err_dim(ValueError, "Step may not be zero (axis %d)", dim)
+ * 
+ */
+    }
+
+    /* "View.MemoryView":841
+ * 
+ * 
+ *         if have_start:             # <<<<<<<<<<<<<<
+ *             if start < 0:
+ *                 start += shape
+ */
+    __pyx_t_2 = (__pyx_v_have_start != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":842
+ * 
+ *         if have_start:
+ *             if start < 0:             # <<<<<<<<<<<<<<
+ *                 start += shape
+ *                 if start < 0:
+ */
+      __pyx_t_2 = ((__pyx_v_start < 0) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":843
+ *         if have_start:
+ *             if start < 0:
+ *                 start += shape             # <<<<<<<<<<<<<<
+ *                 if start < 0:
+ *                     start = 0
+ */
+        __pyx_v_start = (__pyx_v_start + __pyx_v_shape);
+
+        /* "View.MemoryView":844
+ *             if start < 0:
+ *                 start += shape
+ *                 if start < 0:             # <<<<<<<<<<<<<<
+ *                     start = 0
+ *             elif start >= shape:
+ */
+        __pyx_t_2 = ((__pyx_v_start < 0) != 0);
+        if (__pyx_t_2) {
+
+          /* "View.MemoryView":845
+ *                 start += shape
+ *                 if start < 0:
+ *                     start = 0             # <<<<<<<<<<<<<<
+ *             elif start >= shape:
+ *                 if negative_step:
+ */
+          __pyx_v_start = 0;
+
+          /* "View.MemoryView":844
+ *             if start < 0:
+ *                 start += shape
+ *                 if start < 0:             # <<<<<<<<<<<<<<
+ *                     start = 0
+ *             elif start >= shape:
+ */
+        }
+
+        /* "View.MemoryView":842
+ * 
+ *         if have_start:
+ *             if start < 0:             # <<<<<<<<<<<<<<
+ *                 start += shape
+ *                 if start < 0:
+ */
+        goto __pyx_L12;
+      }
+
+      /* "View.MemoryView":846
+ *                 if start < 0:
+ *                     start = 0
+ *             elif start >= shape:             # <<<<<<<<<<<<<<
+ *                 if negative_step:
+ *                     start = shape - 1
+ */
+      __pyx_t_2 = ((__pyx_v_start >= __pyx_v_shape) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":847
+ *                     start = 0
+ *             elif start >= shape:
+ *                 if negative_step:             # <<<<<<<<<<<<<<
+ *                     start = shape - 1
+ *                 else:
+ */
+        __pyx_t_2 = (__pyx_v_negative_step != 0);
+        if (__pyx_t_2) {
+
+          /* "View.MemoryView":848
+ *             elif start >= shape:
+ *                 if negative_step:
+ *                     start = shape - 1             # <<<<<<<<<<<<<<
+ *                 else:
+ *                     start = shape
+ */
+          __pyx_v_start = (__pyx_v_shape - 1);
+
+          /* "View.MemoryView":847
+ *                     start = 0
+ *             elif start >= shape:
+ *                 if negative_step:             # <<<<<<<<<<<<<<
+ *                     start = shape - 1
+ *                 else:
+ */
+          goto __pyx_L14;
+        }
+
+        /* "View.MemoryView":850
+ *                     start = shape - 1
+ *                 else:
+ *                     start = shape             # <<<<<<<<<<<<<<
+ *         else:
+ *             if negative_step:
+ */
+        /*else*/ {
+          __pyx_v_start = __pyx_v_shape;
+        }
+        __pyx_L14:;
+
+        /* "View.MemoryView":846
+ *                 if start < 0:
+ *                     start = 0
+ *             elif start >= shape:             # <<<<<<<<<<<<<<
+ *                 if negative_step:
+ *                     start = shape - 1
+ */
+      }
+      __pyx_L12:;
+
+      /* "View.MemoryView":841
+ * 
+ * 
+ *         if have_start:             # <<<<<<<<<<<<<<
+ *             if start < 0:
+ *                 start += shape
+ */
+      goto __pyx_L11;
+    }
+
+    /* "View.MemoryView":852
+ *                     start = shape
+ *         else:
+ *             if negative_step:             # <<<<<<<<<<<<<<
+ *                 start = shape - 1
+ *             else:
+ */
+    /*else*/ {
+      __pyx_t_2 = (__pyx_v_negative_step != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":853
+ *         else:
+ *             if negative_step:
+ *                 start = shape - 1             # <<<<<<<<<<<<<<
+ *             else:
+ *                 start = 0
+ */
+        __pyx_v_start = (__pyx_v_shape - 1);
+
+        /* "View.MemoryView":852
+ *                     start = shape
+ *         else:
+ *             if negative_step:             # <<<<<<<<<<<<<<
+ *                 start = shape - 1
+ *             else:
+ */
+        goto __pyx_L15;
+      }
+
+      /* "View.MemoryView":855
+ *                 start = shape - 1
+ *             else:
+ *                 start = 0             # <<<<<<<<<<<<<<
+ * 
+ *         if have_stop:
+ */
+      /*else*/ {
+        __pyx_v_start = 0;
+      }
+      __pyx_L15:;
+    }
+    __pyx_L11:;
+
+    /* "View.MemoryView":857
+ *                 start = 0
+ * 
+ *         if have_stop:             # <<<<<<<<<<<<<<
+ *             if stop < 0:
+ *                 stop += shape
+ */
+    __pyx_t_2 = (__pyx_v_have_stop != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":858
+ * 
+ *         if have_stop:
+ *             if stop < 0:             # <<<<<<<<<<<<<<
+ *                 stop += shape
+ *                 if stop < 0:
+ */
+      __pyx_t_2 = ((__pyx_v_stop < 0) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":859
+ *         if have_stop:
+ *             if stop < 0:
+ *                 stop += shape             # <<<<<<<<<<<<<<
+ *                 if stop < 0:
+ *                     stop = 0
+ */
+        __pyx_v_stop = (__pyx_v_stop + __pyx_v_shape);
+
+        /* "View.MemoryView":860
+ *             if stop < 0:
+ *                 stop += shape
+ *                 if stop < 0:             # <<<<<<<<<<<<<<
+ *                     stop = 0
+ *             elif stop > shape:
+ */
+        __pyx_t_2 = ((__pyx_v_stop < 0) != 0);
+        if (__pyx_t_2) {
+
+          /* "View.MemoryView":861
+ *                 stop += shape
+ *                 if stop < 0:
+ *                     stop = 0             # <<<<<<<<<<<<<<
+ *             elif stop > shape:
+ *                 stop = shape
+ */
+          __pyx_v_stop = 0;
+
+          /* "View.MemoryView":860
+ *             if stop < 0:
+ *                 stop += shape
+ *                 if stop < 0:             # <<<<<<<<<<<<<<
+ *                     stop = 0
+ *             elif stop > shape:
+ */
+        }
+
+        /* "View.MemoryView":858
+ * 
+ *         if have_stop:
+ *             if stop < 0:             # <<<<<<<<<<<<<<
+ *                 stop += shape
+ *                 if stop < 0:
+ */
+        goto __pyx_L17;
+      }
+
+      /* "View.MemoryView":862
+ *                 if stop < 0:
+ *                     stop = 0
+ *             elif stop > shape:             # <<<<<<<<<<<<<<
+ *                 stop = shape
+ *         else:
+ */
+      __pyx_t_2 = ((__pyx_v_stop > __pyx_v_shape) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":863
+ *                     stop = 0
+ *             elif stop > shape:
+ *                 stop = shape             # <<<<<<<<<<<<<<
+ *         else:
+ *             if negative_step:
+ */
+        __pyx_v_stop = __pyx_v_shape;
+
+        /* "View.MemoryView":862
+ *                 if stop < 0:
+ *                     stop = 0
+ *             elif stop > shape:             # <<<<<<<<<<<<<<
+ *                 stop = shape
+ *         else:
+ */
+      }
+      __pyx_L17:;
+
+      /* "View.MemoryView":857
+ *                 start = 0
+ * 
+ *         if have_stop:             # <<<<<<<<<<<<<<
+ *             if stop < 0:
+ *                 stop += shape
+ */
+      goto __pyx_L16;
+    }
+
+    /* "View.MemoryView":865
+ *                 stop = shape
+ *         else:
+ *             if negative_step:             # <<<<<<<<<<<<<<
+ *                 stop = -1
+ *             else:
+ */
+    /*else*/ {
+      __pyx_t_2 = (__pyx_v_negative_step != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":866
+ *         else:
+ *             if negative_step:
+ *                 stop = -1             # <<<<<<<<<<<<<<
+ *             else:
+ *                 stop = shape
+ */
+        __pyx_v_stop = -1L;
+
+        /* "View.MemoryView":865
+ *                 stop = shape
+ *         else:
+ *             if negative_step:             # <<<<<<<<<<<<<<
+ *                 stop = -1
+ *             else:
+ */
+        goto __pyx_L19;
+      }
+
+      /* "View.MemoryView":868
+ *                 stop = -1
+ *             else:
+ *                 stop = shape             # <<<<<<<<<<<<<<
+ * 
+ *         if not have_step:
+ */
+      /*else*/ {
+        __pyx_v_stop = __pyx_v_shape;
+      }
+      __pyx_L19:;
+    }
+    __pyx_L16:;
+
+    /* "View.MemoryView":870
+ *                 stop = shape
+ * 
+ *         if not have_step:             # <<<<<<<<<<<<<<
+ *             step = 1
+ * 
+ */
+    __pyx_t_2 = ((!(__pyx_v_have_step != 0)) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":871
+ * 
+ *         if not have_step:
+ *             step = 1             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_v_step = 1;
+
+      /* "View.MemoryView":870
+ *                 stop = shape
+ * 
+ *         if not have_step:             # <<<<<<<<<<<<<<
+ *             step = 1
+ * 
+ */
+    }
+
+    /* "View.MemoryView":875
+ * 
+ *         with cython.cdivision(True):
+ *             new_shape = (stop - start) // step             # <<<<<<<<<<<<<<
+ * 
+ *             if (stop - start) - step * new_shape:
+ */
+    __pyx_v_new_shape = ((__pyx_v_stop - __pyx_v_start) / __pyx_v_step);
+
+    /* "View.MemoryView":877
+ *             new_shape = (stop - start) // step
+ * 
+ *             if (stop - start) - step * new_shape:             # <<<<<<<<<<<<<<
+ *                 new_shape += 1
+ * 
+ */
+    __pyx_t_2 = (((__pyx_v_stop - __pyx_v_start) - (__pyx_v_step * __pyx_v_new_shape)) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":878
+ * 
+ *             if (stop - start) - step * new_shape:
+ *                 new_shape += 1             # <<<<<<<<<<<<<<
+ * 
+ *         if new_shape < 0:
+ */
+      __pyx_v_new_shape = (__pyx_v_new_shape + 1);
+
+      /* "View.MemoryView":877
+ *             new_shape = (stop - start) // step
+ * 
+ *             if (stop - start) - step * new_shape:             # <<<<<<<<<<<<<<
+ *                 new_shape += 1
+ * 
+ */
+    }
+
+    /* "View.MemoryView":880
+ *                 new_shape += 1
+ * 
+ *         if new_shape < 0:             # <<<<<<<<<<<<<<
+ *             new_shape = 0
+ * 
+ */
+    __pyx_t_2 = ((__pyx_v_new_shape < 0) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":881
+ * 
+ *         if new_shape < 0:
+ *             new_shape = 0             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_v_new_shape = 0;
+
+      /* "View.MemoryView":880
+ *                 new_shape += 1
+ * 
+ *         if new_shape < 0:             # <<<<<<<<<<<<<<
+ *             new_shape = 0
+ * 
+ */
+    }
+
+    /* "View.MemoryView":884
+ * 
+ * 
+ *         dst.strides[new_ndim] = stride * step             # <<<<<<<<<<<<<<
+ *         dst.shape[new_ndim] = new_shape
+ *         dst.suboffsets[new_ndim] = suboffset
+ */
+    (__pyx_v_dst->strides[__pyx_v_new_ndim]) = (__pyx_v_stride * __pyx_v_step);
+
+    /* "View.MemoryView":885
+ * 
+ *         dst.strides[new_ndim] = stride * step
+ *         dst.shape[new_ndim] = new_shape             # <<<<<<<<<<<<<<
+ *         dst.suboffsets[new_ndim] = suboffset
+ * 
+ */
+    (__pyx_v_dst->shape[__pyx_v_new_ndim]) = __pyx_v_new_shape;
+
+    /* "View.MemoryView":886
+ *         dst.strides[new_ndim] = stride * step
+ *         dst.shape[new_ndim] = new_shape
+ *         dst.suboffsets[new_ndim] = suboffset             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    (__pyx_v_dst->suboffsets[__pyx_v_new_ndim]) = __pyx_v_suboffset;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":889
+ * 
+ * 
+ *     if suboffset_dim[0] < 0:             # <<<<<<<<<<<<<<
+ *         dst.data += start * stride
+ *     else:
+ */
+  __pyx_t_2 = (((__pyx_v_suboffset_dim[0]) < 0) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":890
+ * 
+ *     if suboffset_dim[0] < 0:
+ *         dst.data += start * stride             # <<<<<<<<<<<<<<
+ *     else:
+ *         dst.suboffsets[suboffset_dim[0]] += start * stride
+ */
+    __pyx_v_dst->data = (__pyx_v_dst->data + (__pyx_v_start * __pyx_v_stride));
+
+    /* "View.MemoryView":889
+ * 
+ * 
+ *     if suboffset_dim[0] < 0:             # <<<<<<<<<<<<<<
+ *         dst.data += start * stride
+ *     else:
+ */
+    goto __pyx_L23;
+  }
+
+  /* "View.MemoryView":892
+ *         dst.data += start * stride
+ *     else:
+ *         dst.suboffsets[suboffset_dim[0]] += start * stride             # <<<<<<<<<<<<<<
+ * 
+ *     if suboffset >= 0:
+ */
+  /*else*/ {
+    __pyx_t_3 = (__pyx_v_suboffset_dim[0]);
+    (__pyx_v_dst->suboffsets[__pyx_t_3]) = ((__pyx_v_dst->suboffsets[__pyx_t_3]) + (__pyx_v_start * __pyx_v_stride));
+  }
+  __pyx_L23:;
+
+  /* "View.MemoryView":894
+ *         dst.suboffsets[suboffset_dim[0]] += start * stride
+ * 
+ *     if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *         if not is_slice:
+ *             if new_ndim == 0:
+ */
+  __pyx_t_2 = ((__pyx_v_suboffset >= 0) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":895
+ * 
+ *     if suboffset >= 0:
+ *         if not is_slice:             # <<<<<<<<<<<<<<
+ *             if new_ndim == 0:
+ *                 dst.data = (<char **> dst.data)[0] + suboffset
+ */
+    __pyx_t_2 = ((!(__pyx_v_is_slice != 0)) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":896
+ *     if suboffset >= 0:
+ *         if not is_slice:
+ *             if new_ndim == 0:             # <<<<<<<<<<<<<<
+ *                 dst.data = (<char **> dst.data)[0] + suboffset
+ *             else:
+ */
+      __pyx_t_2 = ((__pyx_v_new_ndim == 0) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":897
+ *         if not is_slice:
+ *             if new_ndim == 0:
+ *                 dst.data = (<char **> dst.data)[0] + suboffset             # <<<<<<<<<<<<<<
+ *             else:
+ *                 _err_dim(IndexError, "All dimensions preceding dimension %d "
+ */
+        __pyx_v_dst->data = ((((char **)__pyx_v_dst->data)[0]) + __pyx_v_suboffset);
+
+        /* "View.MemoryView":896
+ *     if suboffset >= 0:
+ *         if not is_slice:
+ *             if new_ndim == 0:             # <<<<<<<<<<<<<<
+ *                 dst.data = (<char **> dst.data)[0] + suboffset
+ *             else:
+ */
+        goto __pyx_L26;
+      }
+
+      /* "View.MemoryView":899
+ *                 dst.data = (<char **> dst.data)[0] + suboffset
+ *             else:
+ *                 _err_dim(IndexError, "All dimensions preceding dimension %d "             # <<<<<<<<<<<<<<
+ *                                      "must be indexed and not sliced", dim)
+ *         else:
+ */
+      /*else*/ {
+
+        /* "View.MemoryView":900
+ *             else:
+ *                 _err_dim(IndexError, "All dimensions preceding dimension %d "
+ *                                      "must be indexed and not sliced", dim)             # <<<<<<<<<<<<<<
+ *         else:
+ *             suboffset_dim[0] = new_ndim
+ */
+        __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"All dimensions preceding dimension %d must be indexed and not sliced"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(2, 899, __pyx_L1_error)
+      }
+      __pyx_L26:;
+
+      /* "View.MemoryView":895
+ * 
+ *     if suboffset >= 0:
+ *         if not is_slice:             # <<<<<<<<<<<<<<
+ *             if new_ndim == 0:
+ *                 dst.data = (<char **> dst.data)[0] + suboffset
+ */
+      goto __pyx_L25;
+    }
+
+    /* "View.MemoryView":902
+ *                                      "must be indexed and not sliced", dim)
+ *         else:
+ *             suboffset_dim[0] = new_ndim             # <<<<<<<<<<<<<<
+ * 
+ *     return 0
+ */
+    /*else*/ {
+      (__pyx_v_suboffset_dim[0]) = __pyx_v_new_ndim;
+    }
+    __pyx_L25:;
+
+    /* "View.MemoryView":894
+ *         dst.suboffsets[suboffset_dim[0]] += start * stride
+ * 
+ *     if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *         if not is_slice:
+ *             if new_ndim == 0:
+ */
+  }
+
+  /* "View.MemoryView":904
+ *             suboffset_dim[0] = new_ndim
+ * 
+ *     return 0             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":807
+ * 
+ * @cname('__pyx_memoryview_slice_memviewslice')
+ * cdef int slice_memviewslice(             # <<<<<<<<<<<<<<
+ *         __Pyx_memviewslice *dst,
+ *         Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset,
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  {
+    #ifdef WITH_THREAD
+    PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+    #endif
+    __Pyx_AddTraceback("View.MemoryView.slice_memviewslice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    #ifdef WITH_THREAD
+    __Pyx_PyGILState_Release(__pyx_gilstate_save);
+    #endif
+  }
+  __pyx_r = -1;
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":910
+ * 
+ * @cname('__pyx_pybuffer_index')
+ * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index,             # <<<<<<<<<<<<<<
+ *                           Py_ssize_t dim) except NULL:
+ *     cdef Py_ssize_t shape, stride, suboffset = -1
+ */
+
+static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, Py_ssize_t __pyx_v_index, Py_ssize_t __pyx_v_dim) {
+  Py_ssize_t __pyx_v_shape;
+  Py_ssize_t __pyx_v_stride;
+  Py_ssize_t __pyx_v_suboffset;
+  Py_ssize_t __pyx_v_itemsize;
+  char *__pyx_v_resultp;
+  char *__pyx_r;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("pybuffer_index", 0);
+
+  /* "View.MemoryView":912
+ * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index,
+ *                           Py_ssize_t dim) except NULL:
+ *     cdef Py_ssize_t shape, stride, suboffset = -1             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t itemsize = view.itemsize
+ *     cdef char *resultp
+ */
+  __pyx_v_suboffset = -1L;
+
+  /* "View.MemoryView":913
+ *                           Py_ssize_t dim) except NULL:
+ *     cdef Py_ssize_t shape, stride, suboffset = -1
+ *     cdef Py_ssize_t itemsize = view.itemsize             # <<<<<<<<<<<<<<
+ *     cdef char *resultp
+ * 
+ */
+  __pyx_t_1 = __pyx_v_view->itemsize;
+  __pyx_v_itemsize = __pyx_t_1;
+
+  /* "View.MemoryView":916
+ *     cdef char *resultp
+ * 
+ *     if view.ndim == 0:             # <<<<<<<<<<<<<<
+ *         shape = view.len / itemsize
+ *         stride = itemsize
+ */
+  __pyx_t_2 = ((__pyx_v_view->ndim == 0) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":917
+ * 
+ *     if view.ndim == 0:
+ *         shape = view.len / itemsize             # <<<<<<<<<<<<<<
+ *         stride = itemsize
+ *     else:
+ */
+    if (unlikely(__pyx_v_itemsize == 0)) {
+      PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
+      __PYX_ERR(2, 917, __pyx_L1_error)
+    }
+    else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_itemsize == (Py_ssize_t)-1)  && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_v_view->len))) {
+      PyErr_SetString(PyExc_OverflowError, "value too large to perform division");
+      __PYX_ERR(2, 917, __pyx_L1_error)
+    }
+    __pyx_v_shape = __Pyx_div_Py_ssize_t(__pyx_v_view->len, __pyx_v_itemsize);
+
+    /* "View.MemoryView":918
+ *     if view.ndim == 0:
+ *         shape = view.len / itemsize
+ *         stride = itemsize             # <<<<<<<<<<<<<<
+ *     else:
+ *         shape = view.shape[dim]
+ */
+    __pyx_v_stride = __pyx_v_itemsize;
+
+    /* "View.MemoryView":916
+ *     cdef char *resultp
+ * 
+ *     if view.ndim == 0:             # <<<<<<<<<<<<<<
+ *         shape = view.len / itemsize
+ *         stride = itemsize
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":920
+ *         stride = itemsize
+ *     else:
+ *         shape = view.shape[dim]             # <<<<<<<<<<<<<<
+ *         stride = view.strides[dim]
+ *         if view.suboffsets != NULL:
+ */
+  /*else*/ {
+    __pyx_v_shape = (__pyx_v_view->shape[__pyx_v_dim]);
+
+    /* "View.MemoryView":921
+ *     else:
+ *         shape = view.shape[dim]
+ *         stride = view.strides[dim]             # <<<<<<<<<<<<<<
+ *         if view.suboffsets != NULL:
+ *             suboffset = view.suboffsets[dim]
+ */
+    __pyx_v_stride = (__pyx_v_view->strides[__pyx_v_dim]);
+
+    /* "View.MemoryView":922
+ *         shape = view.shape[dim]
+ *         stride = view.strides[dim]
+ *         if view.suboffsets != NULL:             # <<<<<<<<<<<<<<
+ *             suboffset = view.suboffsets[dim]
+ * 
+ */
+    __pyx_t_2 = ((__pyx_v_view->suboffsets != NULL) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":923
+ *         stride = view.strides[dim]
+ *         if view.suboffsets != NULL:
+ *             suboffset = view.suboffsets[dim]             # <<<<<<<<<<<<<<
+ * 
+ *     if index < 0:
+ */
+      __pyx_v_suboffset = (__pyx_v_view->suboffsets[__pyx_v_dim]);
+
+      /* "View.MemoryView":922
+ *         shape = view.shape[dim]
+ *         stride = view.strides[dim]
+ *         if view.suboffsets != NULL:             # <<<<<<<<<<<<<<
+ *             suboffset = view.suboffsets[dim]
+ * 
+ */
+    }
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":925
+ *             suboffset = view.suboffsets[dim]
+ * 
+ *     if index < 0:             # <<<<<<<<<<<<<<
+ *         index += view.shape[dim]
+ *         if index < 0:
+ */
+  __pyx_t_2 = ((__pyx_v_index < 0) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":926
+ * 
+ *     if index < 0:
+ *         index += view.shape[dim]             # <<<<<<<<<<<<<<
+ *         if index < 0:
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ */
+    __pyx_v_index = (__pyx_v_index + (__pyx_v_view->shape[__pyx_v_dim]));
+
+    /* "View.MemoryView":927
+ *     if index < 0:
+ *         index += view.shape[dim]
+ *         if index < 0:             # <<<<<<<<<<<<<<
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ */
+    __pyx_t_2 = ((__pyx_v_index < 0) != 0);
+    if (unlikely(__pyx_t_2)) {
+
+      /* "View.MemoryView":928
+ *         index += view.shape[dim]
+ *         if index < 0:
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)             # <<<<<<<<<<<<<<
+ * 
+ *     if index >= shape:
+ */
+      __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 928, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 928, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 928, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __PYX_ERR(2, 928, __pyx_L1_error)
+
+      /* "View.MemoryView":927
+ *     if index < 0:
+ *         index += view.shape[dim]
+ *         if index < 0:             # <<<<<<<<<<<<<<
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ */
+    }
+
+    /* "View.MemoryView":925
+ *             suboffset = view.suboffsets[dim]
+ * 
+ *     if index < 0:             # <<<<<<<<<<<<<<
+ *         index += view.shape[dim]
+ *         if index < 0:
+ */
+  }
+
+  /* "View.MemoryView":930
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ *     if index >= shape:             # <<<<<<<<<<<<<<
+ *         raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ */
+  __pyx_t_2 = ((__pyx_v_index >= __pyx_v_shape) != 0);
+  if (unlikely(__pyx_t_2)) {
+
+    /* "View.MemoryView":931
+ * 
+ *     if index >= shape:
+ *         raise IndexError("Out of bounds on buffer access (axis %d)" % dim)             # <<<<<<<<<<<<<<
+ * 
+ *     resultp = bufp + index * stride
+ */
+    __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 931, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 931, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 931, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 931, __pyx_L1_error)
+
+    /* "View.MemoryView":930
+ *             raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ *     if index >= shape:             # <<<<<<<<<<<<<<
+ *         raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ */
+  }
+
+  /* "View.MemoryView":933
+ *         raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+ * 
+ *     resultp = bufp + index * stride             # <<<<<<<<<<<<<<
+ *     if suboffset >= 0:
+ *         resultp = (<char **> resultp)[0] + suboffset
+ */
+  __pyx_v_resultp = (__pyx_v_bufp + (__pyx_v_index * __pyx_v_stride));
+
+  /* "View.MemoryView":934
+ * 
+ *     resultp = bufp + index * stride
+ *     if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *         resultp = (<char **> resultp)[0] + suboffset
+ * 
+ */
+  __pyx_t_2 = ((__pyx_v_suboffset >= 0) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":935
+ *     resultp = bufp + index * stride
+ *     if suboffset >= 0:
+ *         resultp = (<char **> resultp)[0] + suboffset             # <<<<<<<<<<<<<<
+ * 
+ *     return resultp
+ */
+    __pyx_v_resultp = ((((char **)__pyx_v_resultp)[0]) + __pyx_v_suboffset);
+
+    /* "View.MemoryView":934
+ * 
+ *     resultp = bufp + index * stride
+ *     if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *         resultp = (<char **> resultp)[0] + suboffset
+ * 
+ */
+  }
+
+  /* "View.MemoryView":937
+ *         resultp = (<char **> resultp)[0] + suboffset
+ * 
+ *     return resultp             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = __pyx_v_resultp;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":910
+ * 
+ * @cname('__pyx_pybuffer_index')
+ * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index,             # <<<<<<<<<<<<<<
+ *                           Py_ssize_t dim) except NULL:
+ *     cdef Py_ssize_t shape, stride, suboffset = -1
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_AddTraceback("View.MemoryView.pybuffer_index", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":943
+ * 
+ * @cname('__pyx_memslice_transpose')
+ * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0:             # <<<<<<<<<<<<<<
+ *     cdef int ndim = memslice.memview.view.ndim
+ * 
+ */
+
+static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) {
+  int __pyx_v_ndim;
+  Py_ssize_t *__pyx_v_shape;
+  Py_ssize_t *__pyx_v_strides;
+  int __pyx_v_i;
+  int __pyx_v_j;
+  int __pyx_r;
+  int __pyx_t_1;
+  Py_ssize_t *__pyx_t_2;
+  long __pyx_t_3;
+  long __pyx_t_4;
+  Py_ssize_t __pyx_t_5;
+  Py_ssize_t __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  int __pyx_t_9;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+
+  /* "View.MemoryView":944
+ * @cname('__pyx_memslice_transpose')
+ * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0:
+ *     cdef int ndim = memslice.memview.view.ndim             # <<<<<<<<<<<<<<
+ * 
+ *     cdef Py_ssize_t *shape = memslice.shape
+ */
+  __pyx_t_1 = __pyx_v_memslice->memview->view.ndim;
+  __pyx_v_ndim = __pyx_t_1;
+
+  /* "View.MemoryView":946
+ *     cdef int ndim = memslice.memview.view.ndim
+ * 
+ *     cdef Py_ssize_t *shape = memslice.shape             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t *strides = memslice.strides
+ * 
+ */
+  __pyx_t_2 = __pyx_v_memslice->shape;
+  __pyx_v_shape = __pyx_t_2;
+
+  /* "View.MemoryView":947
+ * 
+ *     cdef Py_ssize_t *shape = memslice.shape
+ *     cdef Py_ssize_t *strides = memslice.strides             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_2 = __pyx_v_memslice->strides;
+  __pyx_v_strides = __pyx_t_2;
+
+  /* "View.MemoryView":951
+ * 
+ *     cdef int i, j
+ *     for i in range(ndim / 2):             # <<<<<<<<<<<<<<
+ *         j = ndim - 1 - i
+ *         strides[i], strides[j] = strides[j], strides[i]
+ */
+  __pyx_t_3 = __Pyx_div_long(__pyx_v_ndim, 2);
+  __pyx_t_4 = __pyx_t_3;
+  for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_4; __pyx_t_1+=1) {
+    __pyx_v_i = __pyx_t_1;
+
+    /* "View.MemoryView":952
+ *     cdef int i, j
+ *     for i in range(ndim / 2):
+ *         j = ndim - 1 - i             # <<<<<<<<<<<<<<
+ *         strides[i], strides[j] = strides[j], strides[i]
+ *         shape[i], shape[j] = shape[j], shape[i]
+ */
+    __pyx_v_j = ((__pyx_v_ndim - 1) - __pyx_v_i);
+
+    /* "View.MemoryView":953
+ *     for i in range(ndim / 2):
+ *         j = ndim - 1 - i
+ *         strides[i], strides[j] = strides[j], strides[i]             # <<<<<<<<<<<<<<
+ *         shape[i], shape[j] = shape[j], shape[i]
+ * 
+ */
+    __pyx_t_5 = (__pyx_v_strides[__pyx_v_j]);
+    __pyx_t_6 = (__pyx_v_strides[__pyx_v_i]);
+    (__pyx_v_strides[__pyx_v_i]) = __pyx_t_5;
+    (__pyx_v_strides[__pyx_v_j]) = __pyx_t_6;
+
+    /* "View.MemoryView":954
+ *         j = ndim - 1 - i
+ *         strides[i], strides[j] = strides[j], strides[i]
+ *         shape[i], shape[j] = shape[j], shape[i]             # <<<<<<<<<<<<<<
+ * 
+ *         if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0:
+ */
+    __pyx_t_6 = (__pyx_v_shape[__pyx_v_j]);
+    __pyx_t_5 = (__pyx_v_shape[__pyx_v_i]);
+    (__pyx_v_shape[__pyx_v_i]) = __pyx_t_6;
+    (__pyx_v_shape[__pyx_v_j]) = __pyx_t_5;
+
+    /* "View.MemoryView":956
+ *         shape[i], shape[j] = shape[j], shape[i]
+ * 
+ *         if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0:             # <<<<<<<<<<<<<<
+ *             _err(ValueError, "Cannot transpose memoryview with indirect dimensions")
+ * 
+ */
+    __pyx_t_8 = (((__pyx_v_memslice->suboffsets[__pyx_v_i]) >= 0) != 0);
+    if (!__pyx_t_8) {
+    } else {
+      __pyx_t_7 = __pyx_t_8;
+      goto __pyx_L6_bool_binop_done;
+    }
+    __pyx_t_8 = (((__pyx_v_memslice->suboffsets[__pyx_v_j]) >= 0) != 0);
+    __pyx_t_7 = __pyx_t_8;
+    __pyx_L6_bool_binop_done:;
+    if (__pyx_t_7) {
+
+      /* "View.MemoryView":957
+ * 
+ *         if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0:
+ *             _err(ValueError, "Cannot transpose memoryview with indirect dimensions")             # <<<<<<<<<<<<<<
+ * 
+ *     return 1
+ */
+      __pyx_t_9 = __pyx_memoryview_err(__pyx_builtin_ValueError, ((char *)"Cannot transpose memoryview with indirect dimensions")); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(2, 957, __pyx_L1_error)
+
+      /* "View.MemoryView":956
+ *         shape[i], shape[j] = shape[j], shape[i]
+ * 
+ *         if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0:             # <<<<<<<<<<<<<<
+ *             _err(ValueError, "Cannot transpose memoryview with indirect dimensions")
+ * 
+ */
+    }
+  }
+
+  /* "View.MemoryView":959
+ *             _err(ValueError, "Cannot transpose memoryview with indirect dimensions")
+ * 
+ *     return 1             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = 1;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":943
+ * 
+ * @cname('__pyx_memslice_transpose')
+ * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0:             # <<<<<<<<<<<<<<
+ *     cdef int ndim = memslice.memview.view.ndim
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  {
+    #ifdef WITH_THREAD
+    PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+    #endif
+    __Pyx_AddTraceback("View.MemoryView.transpose_memslice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    #ifdef WITH_THREAD
+    __Pyx_PyGILState_Release(__pyx_gilstate_save);
+    #endif
+  }
+  __pyx_r = 0;
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":976
+ *     cdef int (*to_dtype_func)(char *, object) except 0
+ * 
+ *     def __dealloc__(self):             # <<<<<<<<<<<<<<
+ *         __PYX_XDEC_MEMVIEW(&self.from_slice, 1)
+ * 
+ */
+
+/* Python wrapper */
+static void __pyx_memoryviewslice___dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_memoryviewslice___dealloc__(PyObject *__pyx_v_self) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+  __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(struct __pyx_memoryviewslice_obj *__pyx_v_self) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+  /* "View.MemoryView":977
+ * 
+ *     def __dealloc__(self):
+ *         __PYX_XDEC_MEMVIEW(&self.from_slice, 1)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):
+ */
+  __PYX_XDEC_MEMVIEW((&__pyx_v_self->from_slice), 1);
+
+  /* "View.MemoryView":976
+ *     cdef int (*to_dtype_func)(char *, object) except 0
+ * 
+ *     def __dealloc__(self):             # <<<<<<<<<<<<<<
+ *         __PYX_XDEC_MEMVIEW(&self.from_slice, 1)
+ * 
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "View.MemoryView":979
+ *         __PYX_XDEC_MEMVIEW(&self.from_slice, 1)
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):             # <<<<<<<<<<<<<<
+ *         if self.to_object_func != NULL:
+ *             return self.to_object_func(itemp)
+ */
+
+static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("convert_item_to_object", 0);
+
+  /* "View.MemoryView":980
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):
+ *         if self.to_object_func != NULL:             # <<<<<<<<<<<<<<
+ *             return self.to_object_func(itemp)
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_self->to_object_func != NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":981
+ *     cdef convert_item_to_object(self, char *itemp):
+ *         if self.to_object_func != NULL:
+ *             return self.to_object_func(itemp)             # <<<<<<<<<<<<<<
+ *         else:
+ *             return memoryview.convert_item_to_object(self, itemp)
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_2 = __pyx_v_self->to_object_func(__pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 981, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_r = __pyx_t_2;
+    __pyx_t_2 = 0;
+    goto __pyx_L0;
+
+    /* "View.MemoryView":980
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):
+ *         if self.to_object_func != NULL:             # <<<<<<<<<<<<<<
+ *             return self.to_object_func(itemp)
+ *         else:
+ */
+  }
+
+  /* "View.MemoryView":983
+ *             return self.to_object_func(itemp)
+ *         else:
+ *             return memoryview.convert_item_to_object(self, itemp)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_2 = __pyx_memoryview_convert_item_to_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 983, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_r = __pyx_t_2;
+    __pyx_t_2 = 0;
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":979
+ *         __PYX_XDEC_MEMVIEW(&self.from_slice, 1)
+ * 
+ *     cdef convert_item_to_object(self, char *itemp):             # <<<<<<<<<<<<<<
+ *         if self.to_object_func != NULL:
+ *             return self.to_object_func(itemp)
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_AddTraceback("View.MemoryView._memoryviewslice.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":985
+ *             return memoryview.convert_item_to_object(self, itemp)
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):             # <<<<<<<<<<<<<<
+ *         if self.to_dtype_func != NULL:
+ *             self.to_dtype_func(itemp, value)
+ */
+
+static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("assign_item_from_object", 0);
+
+  /* "View.MemoryView":986
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):
+ *         if self.to_dtype_func != NULL:             # <<<<<<<<<<<<<<
+ *             self.to_dtype_func(itemp, value)
+ *         else:
+ */
+  __pyx_t_1 = ((__pyx_v_self->to_dtype_func != NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":987
+ *     cdef assign_item_from_object(self, char *itemp, object value):
+ *         if self.to_dtype_func != NULL:
+ *             self.to_dtype_func(itemp, value)             # <<<<<<<<<<<<<<
+ *         else:
+ *             memoryview.assign_item_from_object(self, itemp, value)
+ */
+    __pyx_t_2 = __pyx_v_self->to_dtype_func(__pyx_v_itemp, __pyx_v_value); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(2, 987, __pyx_L1_error)
+
+    /* "View.MemoryView":986
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):
+ *         if self.to_dtype_func != NULL:             # <<<<<<<<<<<<<<
+ *             self.to_dtype_func(itemp, value)
+ *         else:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":989
+ *             self.to_dtype_func(itemp, value)
+ *         else:
+ *             memoryview.assign_item_from_object(self, itemp, value)             # <<<<<<<<<<<<<<
+ * 
+ *     @property
+ */
+  /*else*/ {
+    __pyx_t_3 = __pyx_memoryview_assign_item_from_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 989, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":985
+ *             return memoryview.convert_item_to_object(self, itemp)
+ * 
+ *     cdef assign_item_from_object(self, char *itemp, object value):             # <<<<<<<<<<<<<<
+ *         if self.to_dtype_func != NULL:
+ *             self.to_dtype_func(itemp, value)
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView._memoryviewslice.assign_item_from_object", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":992
+ * 
+ *     @property
+ *     def base(self):             # <<<<<<<<<<<<<<
+ *         return self.from_object
+ * 
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_16_memoryviewslice_4base_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_15View_dot_MemoryView_16_memoryviewslice_4base_1__get__(PyObject *__pyx_v_self) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__(struct __pyx_memoryviewslice_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__get__", 0);
+
+  /* "View.MemoryView":993
+ *     @property
+ *     def base(self):
+ *         return self.from_object             # <<<<<<<<<<<<<<
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v_self->from_object);
+  __pyx_r = __pyx_v_self->from_object;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":992
+ * 
+ *     @property
+ *     def base(self):             # <<<<<<<<<<<<<<
+ *         return self.from_object
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_memoryviewslice_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw___pyx_memoryviewslice_1__reduce_cython__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_memoryviewslice___reduce_cython__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__reduce_cython__", 0);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__20, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 2, __pyx_L1_error)
+
+  /* "(tree fragment)":1
+ * def __reduce_cython__(self):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw___pyx_memoryviewslice_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state); /*proto*/
+static PyObject *__pyx_pw___pyx_memoryviewslice_3__setstate_cython__(PyObject *__pyx_v_self, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0);
+  __pyx_r = __pyx_pf___pyx_memoryviewslice_2__setstate_cython__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self), ((PyObject *)__pyx_v___pyx_state));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__setstate_cython__", 0);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__21, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 4, __pyx_L1_error)
+
+  /* "(tree fragment)":3
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):             # <<<<<<<<<<<<<<
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":999
+ * 
+ * @cname('__pyx_memoryview_fromslice')
+ * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice,             # <<<<<<<<<<<<<<
+ *                           int ndim,
+ *                           object (*to_object_func)(char *),
+ */
+
+static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewslice, int __pyx_v_ndim, PyObject *(*__pyx_v_to_object_func)(char *), int (*__pyx_v_to_dtype_func)(char *, PyObject *), int __pyx_v_dtype_is_object) {
+  struct __pyx_memoryviewslice_obj *__pyx_v_result = 0;
+  Py_ssize_t __pyx_v_suboffset;
+  PyObject *__pyx_v_length = NULL;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  __Pyx_TypeInfo *__pyx_t_4;
+  Py_buffer __pyx_t_5;
+  Py_ssize_t *__pyx_t_6;
+  Py_ssize_t *__pyx_t_7;
+  Py_ssize_t *__pyx_t_8;
+  Py_ssize_t __pyx_t_9;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("memoryview_fromslice", 0);
+
+  /* "View.MemoryView":1007
+ *     cdef _memoryviewslice result
+ * 
+ *     if <PyObject *> memviewslice.memview == Py_None:             # <<<<<<<<<<<<<<
+ *         return None
+ * 
+ */
+  __pyx_t_1 = ((((PyObject *)__pyx_v_memviewslice.memview) == Py_None) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1008
+ * 
+ *     if <PyObject *> memviewslice.memview == Py_None:
+ *         return None             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+    goto __pyx_L0;
+
+    /* "View.MemoryView":1007
+ *     cdef _memoryviewslice result
+ * 
+ *     if <PyObject *> memviewslice.memview == Py_None:             # <<<<<<<<<<<<<<
+ *         return None
+ * 
+ */
+  }
+
+  /* "View.MemoryView":1013
+ * 
+ * 
+ *     result = _memoryviewslice(None, 0, dtype_is_object)             # <<<<<<<<<<<<<<
+ * 
+ *     result.from_slice = memviewslice
+ */
+  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1013, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1013, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_INCREF(Py_None);
+  __Pyx_GIVEREF(Py_None);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, Py_None);
+  __Pyx_INCREF(__pyx_int_0);
+  __Pyx_GIVEREF(__pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_int_0);
+  __Pyx_GIVEREF(__pyx_t_2);
+  PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2);
+  __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryviewslice_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1013, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_2);
+  __pyx_t_2 = 0;
+
+  /* "View.MemoryView":1015
+ *     result = _memoryviewslice(None, 0, dtype_is_object)
+ * 
+ *     result.from_slice = memviewslice             # <<<<<<<<<<<<<<
+ *     __PYX_INC_MEMVIEW(&memviewslice, 1)
+ * 
+ */
+  __pyx_v_result->from_slice = __pyx_v_memviewslice;
+
+  /* "View.MemoryView":1016
+ * 
+ *     result.from_slice = memviewslice
+ *     __PYX_INC_MEMVIEW(&memviewslice, 1)             # <<<<<<<<<<<<<<
+ * 
+ *     result.from_object = (<memoryview> memviewslice.memview).base
+ */
+  __PYX_INC_MEMVIEW((&__pyx_v_memviewslice), 1);
+
+  /* "View.MemoryView":1018
+ *     __PYX_INC_MEMVIEW(&memviewslice, 1)
+ * 
+ *     result.from_object = (<memoryview> memviewslice.memview).base             # <<<<<<<<<<<<<<
+ *     result.typeinfo = memviewslice.memview.typeinfo
+ * 
+ */
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_memviewslice.memview), __pyx_n_s_base); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1018, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_GIVEREF(__pyx_t_2);
+  __Pyx_GOTREF(__pyx_v_result->from_object);
+  __Pyx_DECREF(__pyx_v_result->from_object);
+  __pyx_v_result->from_object = __pyx_t_2;
+  __pyx_t_2 = 0;
+
+  /* "View.MemoryView":1019
+ * 
+ *     result.from_object = (<memoryview> memviewslice.memview).base
+ *     result.typeinfo = memviewslice.memview.typeinfo             # <<<<<<<<<<<<<<
+ * 
+ *     result.view = memviewslice.memview.view
+ */
+  __pyx_t_4 = __pyx_v_memviewslice.memview->typeinfo;
+  __pyx_v_result->__pyx_base.typeinfo = __pyx_t_4;
+
+  /* "View.MemoryView":1021
+ *     result.typeinfo = memviewslice.memview.typeinfo
+ * 
+ *     result.view = memviewslice.memview.view             # <<<<<<<<<<<<<<
+ *     result.view.buf = <void *> memviewslice.data
+ *     result.view.ndim = ndim
+ */
+  __pyx_t_5 = __pyx_v_memviewslice.memview->view;
+  __pyx_v_result->__pyx_base.view = __pyx_t_5;
+
+  /* "View.MemoryView":1022
+ * 
+ *     result.view = memviewslice.memview.view
+ *     result.view.buf = <void *> memviewslice.data             # <<<<<<<<<<<<<<
+ *     result.view.ndim = ndim
+ *     (<__pyx_buffer *> &result.view).obj = Py_None
+ */
+  __pyx_v_result->__pyx_base.view.buf = ((void *)__pyx_v_memviewslice.data);
+
+  /* "View.MemoryView":1023
+ *     result.view = memviewslice.memview.view
+ *     result.view.buf = <void *> memviewslice.data
+ *     result.view.ndim = ndim             # <<<<<<<<<<<<<<
+ *     (<__pyx_buffer *> &result.view).obj = Py_None
+ *     Py_INCREF(Py_None)
+ */
+  __pyx_v_result->__pyx_base.view.ndim = __pyx_v_ndim;
+
+  /* "View.MemoryView":1024
+ *     result.view.buf = <void *> memviewslice.data
+ *     result.view.ndim = ndim
+ *     (<__pyx_buffer *> &result.view).obj = Py_None             # <<<<<<<<<<<<<<
+ *     Py_INCREF(Py_None)
+ * 
+ */
+  ((Py_buffer *)(&__pyx_v_result->__pyx_base.view))->obj = Py_None;
+
+  /* "View.MemoryView":1025
+ *     result.view.ndim = ndim
+ *     (<__pyx_buffer *> &result.view).obj = Py_None
+ *     Py_INCREF(Py_None)             # <<<<<<<<<<<<<<
+ * 
+ *     if (<memoryview>memviewslice.memview).flags & PyBUF_WRITABLE:
+ */
+  Py_INCREF(Py_None);
+
+  /* "View.MemoryView":1027
+ *     Py_INCREF(Py_None)
+ * 
+ *     if (<memoryview>memviewslice.memview).flags & PyBUF_WRITABLE:             # <<<<<<<<<<<<<<
+ *         result.flags = PyBUF_RECORDS
+ *     else:
+ */
+  __pyx_t_1 = ((((struct __pyx_memoryview_obj *)__pyx_v_memviewslice.memview)->flags & PyBUF_WRITABLE) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1028
+ * 
+ *     if (<memoryview>memviewslice.memview).flags & PyBUF_WRITABLE:
+ *         result.flags = PyBUF_RECORDS             # <<<<<<<<<<<<<<
+ *     else:
+ *         result.flags = PyBUF_RECORDS_RO
+ */
+    __pyx_v_result->__pyx_base.flags = PyBUF_RECORDS;
+
+    /* "View.MemoryView":1027
+ *     Py_INCREF(Py_None)
+ * 
+ *     if (<memoryview>memviewslice.memview).flags & PyBUF_WRITABLE:             # <<<<<<<<<<<<<<
+ *         result.flags = PyBUF_RECORDS
+ *     else:
+ */
+    goto __pyx_L4;
+  }
+
+  /* "View.MemoryView":1030
+ *         result.flags = PyBUF_RECORDS
+ *     else:
+ *         result.flags = PyBUF_RECORDS_RO             # <<<<<<<<<<<<<<
+ * 
+ *     result.view.shape = <Py_ssize_t *> result.from_slice.shape
+ */
+  /*else*/ {
+    __pyx_v_result->__pyx_base.flags = PyBUF_RECORDS_RO;
+  }
+  __pyx_L4:;
+
+  /* "View.MemoryView":1032
+ *         result.flags = PyBUF_RECORDS_RO
+ * 
+ *     result.view.shape = <Py_ssize_t *> result.from_slice.shape             # <<<<<<<<<<<<<<
+ *     result.view.strides = <Py_ssize_t *> result.from_slice.strides
+ * 
+ */
+  __pyx_v_result->__pyx_base.view.shape = ((Py_ssize_t *)__pyx_v_result->from_slice.shape);
+
+  /* "View.MemoryView":1033
+ * 
+ *     result.view.shape = <Py_ssize_t *> result.from_slice.shape
+ *     result.view.strides = <Py_ssize_t *> result.from_slice.strides             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_v_result->__pyx_base.view.strides = ((Py_ssize_t *)__pyx_v_result->from_slice.strides);
+
+  /* "View.MemoryView":1036
+ * 
+ * 
+ *     result.view.suboffsets = NULL             # <<<<<<<<<<<<<<
+ *     for suboffset in result.from_slice.suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ */
+  __pyx_v_result->__pyx_base.view.suboffsets = NULL;
+
+  /* "View.MemoryView":1037
+ * 
+ *     result.view.suboffsets = NULL
+ *     for suboffset in result.from_slice.suboffsets[:ndim]:             # <<<<<<<<<<<<<<
+ *         if suboffset >= 0:
+ *             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets
+ */
+  __pyx_t_7 = (__pyx_v_result->from_slice.suboffsets + __pyx_v_ndim);
+  for (__pyx_t_8 = __pyx_v_result->from_slice.suboffsets; __pyx_t_8 < __pyx_t_7; __pyx_t_8++) {
+    __pyx_t_6 = __pyx_t_8;
+    __pyx_v_suboffset = (__pyx_t_6[0]);
+
+    /* "View.MemoryView":1038
+ *     result.view.suboffsets = NULL
+ *     for suboffset in result.from_slice.suboffsets[:ndim]:
+ *         if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets
+ *             break
+ */
+    __pyx_t_1 = ((__pyx_v_suboffset >= 0) != 0);
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":1039
+ *     for suboffset in result.from_slice.suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ *             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets             # <<<<<<<<<<<<<<
+ *             break
+ * 
+ */
+      __pyx_v_result->__pyx_base.view.suboffsets = ((Py_ssize_t *)__pyx_v_result->from_slice.suboffsets);
+
+      /* "View.MemoryView":1040
+ *         if suboffset >= 0:
+ *             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets
+ *             break             # <<<<<<<<<<<<<<
+ * 
+ *     result.view.len = result.view.itemsize
+ */
+      goto __pyx_L6_break;
+
+      /* "View.MemoryView":1038
+ *     result.view.suboffsets = NULL
+ *     for suboffset in result.from_slice.suboffsets[:ndim]:
+ *         if suboffset >= 0:             # <<<<<<<<<<<<<<
+ *             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets
+ *             break
+ */
+    }
+  }
+  __pyx_L6_break:;
+
+  /* "View.MemoryView":1042
+ *             break
+ * 
+ *     result.view.len = result.view.itemsize             # <<<<<<<<<<<<<<
+ *     for length in result.view.shape[:ndim]:
+ *         result.view.len *= length
+ */
+  __pyx_t_9 = __pyx_v_result->__pyx_base.view.itemsize;
+  __pyx_v_result->__pyx_base.view.len = __pyx_t_9;
+
+  /* "View.MemoryView":1043
+ * 
+ *     result.view.len = result.view.itemsize
+ *     for length in result.view.shape[:ndim]:             # <<<<<<<<<<<<<<
+ *         result.view.len *= length
+ * 
+ */
+  __pyx_t_7 = (__pyx_v_result->__pyx_base.view.shape + __pyx_v_ndim);
+  for (__pyx_t_8 = __pyx_v_result->__pyx_base.view.shape; __pyx_t_8 < __pyx_t_7; __pyx_t_8++) {
+    __pyx_t_6 = __pyx_t_8;
+    __pyx_t_2 = PyInt_FromSsize_t((__pyx_t_6[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1043, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_2);
+    __pyx_t_2 = 0;
+
+    /* "View.MemoryView":1044
+ *     result.view.len = result.view.itemsize
+ *     for length in result.view.shape[:ndim]:
+ *         result.view.len *= length             # <<<<<<<<<<<<<<
+ * 
+ *     result.to_object_func = to_object_func
+ */
+    __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_result->__pyx_base.view.len); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1044, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_3 = PyNumber_InPlaceMultiply(__pyx_t_2, __pyx_v_length); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1044, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(2, 1044, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_v_result->__pyx_base.view.len = __pyx_t_9;
+  }
+
+  /* "View.MemoryView":1046
+ *         result.view.len *= length
+ * 
+ *     result.to_object_func = to_object_func             # <<<<<<<<<<<<<<
+ *     result.to_dtype_func = to_dtype_func
+ * 
+ */
+  __pyx_v_result->to_object_func = __pyx_v_to_object_func;
+
+  /* "View.MemoryView":1047
+ * 
+ *     result.to_object_func = to_object_func
+ *     result.to_dtype_func = to_dtype_func             # <<<<<<<<<<<<<<
+ * 
+ *     return result
+ */
+  __pyx_v_result->to_dtype_func = __pyx_v_to_dtype_func;
+
+  /* "View.MemoryView":1049
+ *     result.to_dtype_func = to_dtype_func
+ * 
+ *     return result             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_get_slice_from_memoryview')
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_result));
+  __pyx_r = ((PyObject *)__pyx_v_result);
+  goto __pyx_L0;
+
+  /* "View.MemoryView":999
+ * 
+ * @cname('__pyx_memoryview_fromslice')
+ * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice,             # <<<<<<<<<<<<<<
+ *                           int ndim,
+ *                           object (*to_object_func)(char *),
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.memoryview_fromslice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_result);
+  __Pyx_XDECREF(__pyx_v_length);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1052
+ * 
+ * @cname('__pyx_memoryview_get_slice_from_memoryview')
+ * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview,             # <<<<<<<<<<<<<<
+ *                                                    __Pyx_memviewslice *mslice) except NULL:
+ *     cdef _memoryviewslice obj
+ */
+
+static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_mslice) {
+  struct __pyx_memoryviewslice_obj *__pyx_v_obj = 0;
+  __Pyx_memviewslice *__pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("get_slice_from_memview", 0);
+
+  /* "View.MemoryView":1055
+ *                                                    __Pyx_memviewslice *mslice) except NULL:
+ *     cdef _memoryviewslice obj
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         obj = memview
+ *         return &obj.from_slice
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); 
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1056
+ *     cdef _memoryviewslice obj
+ *     if isinstance(memview, _memoryviewslice):
+ *         obj = memview             # <<<<<<<<<<<<<<
+ *         return &obj.from_slice
+ *     else:
+ */
+    if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(2, 1056, __pyx_L1_error)
+    __pyx_t_3 = ((PyObject *)__pyx_v_memview);
+    __Pyx_INCREF(__pyx_t_3);
+    __pyx_v_obj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_3);
+    __pyx_t_3 = 0;
+
+    /* "View.MemoryView":1057
+ *     if isinstance(memview, _memoryviewslice):
+ *         obj = memview
+ *         return &obj.from_slice             # <<<<<<<<<<<<<<
+ *     else:
+ *         slice_copy(memview, mslice)
+ */
+    __pyx_r = (&__pyx_v_obj->from_slice);
+    goto __pyx_L0;
+
+    /* "View.MemoryView":1055
+ *                                                    __Pyx_memviewslice *mslice) except NULL:
+ *     cdef _memoryviewslice obj
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         obj = memview
+ *         return &obj.from_slice
+ */
+  }
+
+  /* "View.MemoryView":1059
+ *         return &obj.from_slice
+ *     else:
+ *         slice_copy(memview, mslice)             # <<<<<<<<<<<<<<
+ *         return mslice
+ * 
+ */
+  /*else*/ {
+    __pyx_memoryview_slice_copy(__pyx_v_memview, __pyx_v_mslice);
+
+    /* "View.MemoryView":1060
+ *     else:
+ *         slice_copy(memview, mslice)
+ *         return mslice             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_slice_copy')
+ */
+    __pyx_r = __pyx_v_mslice;
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":1052
+ * 
+ * @cname('__pyx_memoryview_get_slice_from_memoryview')
+ * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview,             # <<<<<<<<<<<<<<
+ *                                                    __Pyx_memviewslice *mslice) except NULL:
+ *     cdef _memoryviewslice obj
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_AddTraceback("View.MemoryView.get_slice_from_memview", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_obj);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1063
+ * 
+ * @cname('__pyx_memoryview_slice_copy')
+ * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst):             # <<<<<<<<<<<<<<
+ *     cdef int dim
+ *     cdef (Py_ssize_t*) shape, strides, suboffsets
+ */
+
+static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_dst) {
+  int __pyx_v_dim;
+  Py_ssize_t *__pyx_v_shape;
+  Py_ssize_t *__pyx_v_strides;
+  Py_ssize_t *__pyx_v_suboffsets;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t *__pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+  Py_ssize_t __pyx_t_5;
+  __Pyx_RefNannySetupContext("slice_copy", 0);
+
+  /* "View.MemoryView":1067
+ *     cdef (Py_ssize_t*) shape, strides, suboffsets
+ * 
+ *     shape = memview.view.shape             # <<<<<<<<<<<<<<
+ *     strides = memview.view.strides
+ *     suboffsets = memview.view.suboffsets
+ */
+  __pyx_t_1 = __pyx_v_memview->view.shape;
+  __pyx_v_shape = __pyx_t_1;
+
+  /* "View.MemoryView":1068
+ * 
+ *     shape = memview.view.shape
+ *     strides = memview.view.strides             # <<<<<<<<<<<<<<
+ *     suboffsets = memview.view.suboffsets
+ * 
+ */
+  __pyx_t_1 = __pyx_v_memview->view.strides;
+  __pyx_v_strides = __pyx_t_1;
+
+  /* "View.MemoryView":1069
+ *     shape = memview.view.shape
+ *     strides = memview.view.strides
+ *     suboffsets = memview.view.suboffsets             # <<<<<<<<<<<<<<
+ * 
+ *     dst.memview = <__pyx_memoryview *> memview
+ */
+  __pyx_t_1 = __pyx_v_memview->view.suboffsets;
+  __pyx_v_suboffsets = __pyx_t_1;
+
+  /* "View.MemoryView":1071
+ *     suboffsets = memview.view.suboffsets
+ * 
+ *     dst.memview = <__pyx_memoryview *> memview             # <<<<<<<<<<<<<<
+ *     dst.data = <char *> memview.view.buf
+ * 
+ */
+  __pyx_v_dst->memview = ((struct __pyx_memoryview_obj *)__pyx_v_memview);
+
+  /* "View.MemoryView":1072
+ * 
+ *     dst.memview = <__pyx_memoryview *> memview
+ *     dst.data = <char *> memview.view.buf             # <<<<<<<<<<<<<<
+ * 
+ *     for dim in range(memview.view.ndim):
+ */
+  __pyx_v_dst->data = ((char *)__pyx_v_memview->view.buf);
+
+  /* "View.MemoryView":1074
+ *     dst.data = <char *> memview.view.buf
+ * 
+ *     for dim in range(memview.view.ndim):             # <<<<<<<<<<<<<<
+ *         dst.shape[dim] = shape[dim]
+ *         dst.strides[dim] = strides[dim]
+ */
+  __pyx_t_2 = __pyx_v_memview->view.ndim;
+  __pyx_t_3 = __pyx_t_2;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+    __pyx_v_dim = __pyx_t_4;
+
+    /* "View.MemoryView":1075
+ * 
+ *     for dim in range(memview.view.ndim):
+ *         dst.shape[dim] = shape[dim]             # <<<<<<<<<<<<<<
+ *         dst.strides[dim] = strides[dim]
+ *         dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1
+ */
+    (__pyx_v_dst->shape[__pyx_v_dim]) = (__pyx_v_shape[__pyx_v_dim]);
+
+    /* "View.MemoryView":1076
+ *     for dim in range(memview.view.ndim):
+ *         dst.shape[dim] = shape[dim]
+ *         dst.strides[dim] = strides[dim]             # <<<<<<<<<<<<<<
+ *         dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1
+ * 
+ */
+    (__pyx_v_dst->strides[__pyx_v_dim]) = (__pyx_v_strides[__pyx_v_dim]);
+
+    /* "View.MemoryView":1077
+ *         dst.shape[dim] = shape[dim]
+ *         dst.strides[dim] = strides[dim]
+ *         dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_copy_object')
+ */
+    if ((__pyx_v_suboffsets != 0)) {
+      __pyx_t_5 = (__pyx_v_suboffsets[__pyx_v_dim]);
+    } else {
+      __pyx_t_5 = -1L;
+    }
+    (__pyx_v_dst->suboffsets[__pyx_v_dim]) = __pyx_t_5;
+  }
+
+  /* "View.MemoryView":1063
+ * 
+ * @cname('__pyx_memoryview_slice_copy')
+ * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst):             # <<<<<<<<<<<<<<
+ *     cdef int dim
+ *     cdef (Py_ssize_t*) shape, strides, suboffsets
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "View.MemoryView":1080
+ * 
+ * @cname('__pyx_memoryview_copy_object')
+ * cdef memoryview_copy(memoryview memview):             # <<<<<<<<<<<<<<
+ *     "Create a new memoryview object"
+ *     cdef __Pyx_memviewslice memviewslice
+ */
+
+static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx_v_memview) {
+  __Pyx_memviewslice __pyx_v_memviewslice;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("memoryview_copy", 0);
+
+  /* "View.MemoryView":1083
+ *     "Create a new memoryview object"
+ *     cdef __Pyx_memviewslice memviewslice
+ *     slice_copy(memview, &memviewslice)             # <<<<<<<<<<<<<<
+ *     return memoryview_copy_from_slice(memview, &memviewslice)
+ * 
+ */
+  __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_memviewslice));
+
+  /* "View.MemoryView":1084
+ *     cdef __Pyx_memviewslice memviewslice
+ *     slice_copy(memview, &memviewslice)
+ *     return memoryview_copy_from_slice(memview, &memviewslice)             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_copy_object_from_slice')
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_memoryview_copy_object_from_slice(__pyx_v_memview, (&__pyx_v_memviewslice)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 1084, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1080
+ * 
+ * @cname('__pyx_memoryview_copy_object')
+ * cdef memoryview_copy(memoryview memview):             # <<<<<<<<<<<<<<
+ *     "Create a new memoryview object"
+ *     cdef __Pyx_memviewslice memviewslice
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("View.MemoryView.memoryview_copy", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1087
+ * 
+ * @cname('__pyx_memoryview_copy_object_from_slice')
+ * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice):             # <<<<<<<<<<<<<<
+ *     """
+ *     Create a new memoryview object from a given memoryview object and slice.
+ */
+
+static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_memviewslice) {
+  PyObject *(*__pyx_v_to_object_func)(char *);
+  int (*__pyx_v_to_dtype_func)(char *, PyObject *);
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *(*__pyx_t_3)(char *);
+  int (*__pyx_t_4)(char *, PyObject *);
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("memoryview_copy_from_slice", 0);
+
+  /* "View.MemoryView":1094
+ *     cdef int (*to_dtype_func)(char *, object) except 0
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         to_object_func = (<_memoryviewslice> memview).to_object_func
+ *         to_dtype_func = (<_memoryviewslice> memview).to_dtype_func
+ */
+  __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); 
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1095
+ * 
+ *     if isinstance(memview, _memoryviewslice):
+ *         to_object_func = (<_memoryviewslice> memview).to_object_func             # <<<<<<<<<<<<<<
+ *         to_dtype_func = (<_memoryviewslice> memview).to_dtype_func
+ *     else:
+ */
+    __pyx_t_3 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_object_func;
+    __pyx_v_to_object_func = __pyx_t_3;
+
+    /* "View.MemoryView":1096
+ *     if isinstance(memview, _memoryviewslice):
+ *         to_object_func = (<_memoryviewslice> memview).to_object_func
+ *         to_dtype_func = (<_memoryviewslice> memview).to_dtype_func             # <<<<<<<<<<<<<<
+ *     else:
+ *         to_object_func = NULL
+ */
+    __pyx_t_4 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_dtype_func;
+    __pyx_v_to_dtype_func = __pyx_t_4;
+
+    /* "View.MemoryView":1094
+ *     cdef int (*to_dtype_func)(char *, object) except 0
+ * 
+ *     if isinstance(memview, _memoryviewslice):             # <<<<<<<<<<<<<<
+ *         to_object_func = (<_memoryviewslice> memview).to_object_func
+ *         to_dtype_func = (<_memoryviewslice> memview).to_dtype_func
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":1098
+ *         to_dtype_func = (<_memoryviewslice> memview).to_dtype_func
+ *     else:
+ *         to_object_func = NULL             # <<<<<<<<<<<<<<
+ *         to_dtype_func = NULL
+ * 
+ */
+  /*else*/ {
+    __pyx_v_to_object_func = NULL;
+
+    /* "View.MemoryView":1099
+ *     else:
+ *         to_object_func = NULL
+ *         to_dtype_func = NULL             # <<<<<<<<<<<<<<
+ * 
+ *     return memoryview_fromslice(memviewslice[0], memview.view.ndim,
+ */
+    __pyx_v_to_dtype_func = NULL;
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":1101
+ *         to_dtype_func = NULL
+ * 
+ *     return memoryview_fromslice(memviewslice[0], memview.view.ndim,             # <<<<<<<<<<<<<<
+ *                                 to_object_func, to_dtype_func,
+ *                                 memview.dtype_is_object)
+ */
+  __Pyx_XDECREF(__pyx_r);
+
+  /* "View.MemoryView":1103
+ *     return memoryview_fromslice(memviewslice[0], memview.view.ndim,
+ *                                 to_object_func, to_dtype_func,
+ *                                 memview.dtype_is_object)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_5 = __pyx_memoryview_fromslice((__pyx_v_memviewslice[0]), __pyx_v_memview->view.ndim, __pyx_v_to_object_func, __pyx_v_to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_5)) __PYX_ERR(2, 1101, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_r = __pyx_t_5;
+  __pyx_t_5 = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1087
+ * 
+ * @cname('__pyx_memoryview_copy_object_from_slice')
+ * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice):             # <<<<<<<<<<<<<<
+ *     """
+ *     Create a new memoryview object from a given memoryview object and slice.
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.memoryview_copy_from_slice", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1109
+ * 
+ * 
+ * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:             # <<<<<<<<<<<<<<
+ *     if arg < 0:
+ *         return -arg
+ */
+
+static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) {
+  Py_ssize_t __pyx_r;
+  int __pyx_t_1;
+
+  /* "View.MemoryView":1110
+ * 
+ * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:
+ *     if arg < 0:             # <<<<<<<<<<<<<<
+ *         return -arg
+ *     else:
+ */
+  __pyx_t_1 = ((__pyx_v_arg < 0) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1111
+ * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:
+ *     if arg < 0:
+ *         return -arg             # <<<<<<<<<<<<<<
+ *     else:
+ *         return arg
+ */
+    __pyx_r = (-__pyx_v_arg);
+    goto __pyx_L0;
+
+    /* "View.MemoryView":1110
+ * 
+ * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:
+ *     if arg < 0:             # <<<<<<<<<<<<<<
+ *         return -arg
+ *     else:
+ */
+  }
+
+  /* "View.MemoryView":1113
+ *         return -arg
+ *     else:
+ *         return arg             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_get_best_slice_order')
+ */
+  /*else*/ {
+    __pyx_r = __pyx_v_arg;
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":1109
+ * 
+ * 
+ * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:             # <<<<<<<<<<<<<<
+ *     if arg < 0:
+ *         return -arg
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1116
+ * 
+ * @cname('__pyx_get_best_slice_order')
+ * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     Figure out the best memory access order for a given slice.
+ */
+
+static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int __pyx_v_ndim) {
+  int __pyx_v_i;
+  Py_ssize_t __pyx_v_c_stride;
+  Py_ssize_t __pyx_v_f_stride;
+  char __pyx_r;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+
+  /* "View.MemoryView":1121
+ *     """
+ *     cdef int i
+ *     cdef Py_ssize_t c_stride = 0             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t f_stride = 0
+ * 
+ */
+  __pyx_v_c_stride = 0;
+
+  /* "View.MemoryView":1122
+ *     cdef int i
+ *     cdef Py_ssize_t c_stride = 0
+ *     cdef Py_ssize_t f_stride = 0             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(ndim - 1, -1, -1):
+ */
+  __pyx_v_f_stride = 0;
+
+  /* "View.MemoryView":1124
+ *     cdef Py_ssize_t f_stride = 0
+ * 
+ *     for i in range(ndim - 1, -1, -1):             # <<<<<<<<<<<<<<
+ *         if mslice.shape[i] > 1:
+ *             c_stride = mslice.strides[i]
+ */
+  for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
+    __pyx_v_i = __pyx_t_1;
+
+    /* "View.MemoryView":1125
+ * 
+ *     for i in range(ndim - 1, -1, -1):
+ *         if mslice.shape[i] > 1:             # <<<<<<<<<<<<<<
+ *             c_stride = mslice.strides[i]
+ *             break
+ */
+    __pyx_t_2 = (((__pyx_v_mslice->shape[__pyx_v_i]) > 1) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1126
+ *     for i in range(ndim - 1, -1, -1):
+ *         if mslice.shape[i] > 1:
+ *             c_stride = mslice.strides[i]             # <<<<<<<<<<<<<<
+ *             break
+ * 
+ */
+      __pyx_v_c_stride = (__pyx_v_mslice->strides[__pyx_v_i]);
+
+      /* "View.MemoryView":1127
+ *         if mslice.shape[i] > 1:
+ *             c_stride = mslice.strides[i]
+ *             break             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(ndim):
+ */
+      goto __pyx_L4_break;
+
+      /* "View.MemoryView":1125
+ * 
+ *     for i in range(ndim - 1, -1, -1):
+ *         if mslice.shape[i] > 1:             # <<<<<<<<<<<<<<
+ *             c_stride = mslice.strides[i]
+ *             break
+ */
+    }
+  }
+  __pyx_L4_break:;
+
+  /* "View.MemoryView":1129
+ *             break
+ * 
+ *     for i in range(ndim):             # <<<<<<<<<<<<<<
+ *         if mslice.shape[i] > 1:
+ *             f_stride = mslice.strides[i]
+ */
+  __pyx_t_1 = __pyx_v_ndim;
+  __pyx_t_3 = __pyx_t_1;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "View.MemoryView":1130
+ * 
+ *     for i in range(ndim):
+ *         if mslice.shape[i] > 1:             # <<<<<<<<<<<<<<
+ *             f_stride = mslice.strides[i]
+ *             break
+ */
+    __pyx_t_2 = (((__pyx_v_mslice->shape[__pyx_v_i]) > 1) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1131
+ *     for i in range(ndim):
+ *         if mslice.shape[i] > 1:
+ *             f_stride = mslice.strides[i]             # <<<<<<<<<<<<<<
+ *             break
+ * 
+ */
+      __pyx_v_f_stride = (__pyx_v_mslice->strides[__pyx_v_i]);
+
+      /* "View.MemoryView":1132
+ *         if mslice.shape[i] > 1:
+ *             f_stride = mslice.strides[i]
+ *             break             # <<<<<<<<<<<<<<
+ * 
+ *     if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):
+ */
+      goto __pyx_L7_break;
+
+      /* "View.MemoryView":1130
+ * 
+ *     for i in range(ndim):
+ *         if mslice.shape[i] > 1:             # <<<<<<<<<<<<<<
+ *             f_stride = mslice.strides[i]
+ *             break
+ */
+    }
+  }
+  __pyx_L7_break:;
+
+  /* "View.MemoryView":1134
+ *             break
+ * 
+ *     if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):             # <<<<<<<<<<<<<<
+ *         return 'C'
+ *     else:
+ */
+  __pyx_t_2 = ((abs_py_ssize_t(__pyx_v_c_stride) <= abs_py_ssize_t(__pyx_v_f_stride)) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1135
+ * 
+ *     if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):
+ *         return 'C'             # <<<<<<<<<<<<<<
+ *     else:
+ *         return 'F'
+ */
+    __pyx_r = 'C';
+    goto __pyx_L0;
+
+    /* "View.MemoryView":1134
+ *             break
+ * 
+ *     if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):             # <<<<<<<<<<<<<<
+ *         return 'C'
+ *     else:
+ */
+  }
+
+  /* "View.MemoryView":1137
+ *         return 'C'
+ *     else:
+ *         return 'F'             # <<<<<<<<<<<<<<
+ * 
+ * @cython.cdivision(True)
+ */
+  /*else*/ {
+    __pyx_r = 'F';
+    goto __pyx_L0;
+  }
+
+  /* "View.MemoryView":1116
+ * 
+ * @cname('__pyx_get_best_slice_order')
+ * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     Figure out the best memory access order for a given slice.
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1140
+ * 
+ * @cython.cdivision(True)
+ * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides,             # <<<<<<<<<<<<<<
+ *                                    char *dst_data, Py_ssize_t *dst_strides,
+ *                                    Py_ssize_t *src_shape, Py_ssize_t *dst_shape,
+ */
+
+static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v_src_strides, char *__pyx_v_dst_data, Py_ssize_t *__pyx_v_dst_strides, Py_ssize_t *__pyx_v_src_shape, Py_ssize_t *__pyx_v_dst_shape, int __pyx_v_ndim, size_t __pyx_v_itemsize) {
+  CYTHON_UNUSED Py_ssize_t __pyx_v_i;
+  CYTHON_UNUSED Py_ssize_t __pyx_v_src_extent;
+  Py_ssize_t __pyx_v_dst_extent;
+  Py_ssize_t __pyx_v_src_stride;
+  Py_ssize_t __pyx_v_dst_stride;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  Py_ssize_t __pyx_t_4;
+  Py_ssize_t __pyx_t_5;
+  Py_ssize_t __pyx_t_6;
+
+  /* "View.MemoryView":1147
+ * 
+ *     cdef Py_ssize_t i
+ *     cdef Py_ssize_t src_extent = src_shape[0]             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t dst_extent = dst_shape[0]
+ *     cdef Py_ssize_t src_stride = src_strides[0]
+ */
+  __pyx_v_src_extent = (__pyx_v_src_shape[0]);
+
+  /* "View.MemoryView":1148
+ *     cdef Py_ssize_t i
+ *     cdef Py_ssize_t src_extent = src_shape[0]
+ *     cdef Py_ssize_t dst_extent = dst_shape[0]             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t src_stride = src_strides[0]
+ *     cdef Py_ssize_t dst_stride = dst_strides[0]
+ */
+  __pyx_v_dst_extent = (__pyx_v_dst_shape[0]);
+
+  /* "View.MemoryView":1149
+ *     cdef Py_ssize_t src_extent = src_shape[0]
+ *     cdef Py_ssize_t dst_extent = dst_shape[0]
+ *     cdef Py_ssize_t src_stride = src_strides[0]             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t dst_stride = dst_strides[0]
+ * 
+ */
+  __pyx_v_src_stride = (__pyx_v_src_strides[0]);
+
+  /* "View.MemoryView":1150
+ *     cdef Py_ssize_t dst_extent = dst_shape[0]
+ *     cdef Py_ssize_t src_stride = src_strides[0]
+ *     cdef Py_ssize_t dst_stride = dst_strides[0]             # <<<<<<<<<<<<<<
+ * 
+ *     if ndim == 1:
+ */
+  __pyx_v_dst_stride = (__pyx_v_dst_strides[0]);
+
+  /* "View.MemoryView":1152
+ *     cdef Py_ssize_t dst_stride = dst_strides[0]
+ * 
+ *     if ndim == 1:             # <<<<<<<<<<<<<<
+ *        if (src_stride > 0 and dst_stride > 0 and
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ */
+  __pyx_t_1 = ((__pyx_v_ndim == 1) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1153
+ * 
+ *     if ndim == 1:
+ *        if (src_stride > 0 and dst_stride > 0 and             # <<<<<<<<<<<<<<
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)
+ */
+    __pyx_t_2 = ((__pyx_v_src_stride > 0) != 0);
+    if (__pyx_t_2) {
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+      goto __pyx_L5_bool_binop_done;
+    }
+    __pyx_t_2 = ((__pyx_v_dst_stride > 0) != 0);
+    if (__pyx_t_2) {
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+      goto __pyx_L5_bool_binop_done;
+    }
+
+    /* "View.MemoryView":1154
+ *     if ndim == 1:
+ *        if (src_stride > 0 and dst_stride > 0 and
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):             # <<<<<<<<<<<<<<
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)
+ *        else:
+ */
+    __pyx_t_2 = (((size_t)__pyx_v_src_stride) == __pyx_v_itemsize);
+    if (__pyx_t_2) {
+      __pyx_t_2 = (__pyx_v_itemsize == ((size_t)__pyx_v_dst_stride));
+    }
+    __pyx_t_3 = (__pyx_t_2 != 0);
+    __pyx_t_1 = __pyx_t_3;
+    __pyx_L5_bool_binop_done:;
+
+    /* "View.MemoryView":1153
+ * 
+ *     if ndim == 1:
+ *        if (src_stride > 0 and dst_stride > 0 and             # <<<<<<<<<<<<<<
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)
+ */
+    if (__pyx_t_1) {
+
+      /* "View.MemoryView":1155
+ *        if (src_stride > 0 and dst_stride > 0 and
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)             # <<<<<<<<<<<<<<
+ *        else:
+ *            for i in range(dst_extent):
+ */
+      (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, (__pyx_v_itemsize * __pyx_v_dst_extent)));
+
+      /* "View.MemoryView":1153
+ * 
+ *     if ndim == 1:
+ *        if (src_stride > 0 and dst_stride > 0 and             # <<<<<<<<<<<<<<
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)
+ */
+      goto __pyx_L4;
+    }
+
+    /* "View.MemoryView":1157
+ *            memcpy(dst_data, src_data, itemsize * dst_extent)
+ *        else:
+ *            for i in range(dst_extent):             # <<<<<<<<<<<<<<
+ *                memcpy(dst_data, src_data, itemsize)
+ *                src_data += src_stride
+ */
+    /*else*/ {
+      __pyx_t_4 = __pyx_v_dst_extent;
+      __pyx_t_5 = __pyx_t_4;
+      for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+        __pyx_v_i = __pyx_t_6;
+
+        /* "View.MemoryView":1158
+ *        else:
+ *            for i in range(dst_extent):
+ *                memcpy(dst_data, src_data, itemsize)             # <<<<<<<<<<<<<<
+ *                src_data += src_stride
+ *                dst_data += dst_stride
+ */
+        (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, __pyx_v_itemsize));
+
+        /* "View.MemoryView":1159
+ *            for i in range(dst_extent):
+ *                memcpy(dst_data, src_data, itemsize)
+ *                src_data += src_stride             # <<<<<<<<<<<<<<
+ *                dst_data += dst_stride
+ *     else:
+ */
+        __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride);
+
+        /* "View.MemoryView":1160
+ *                memcpy(dst_data, src_data, itemsize)
+ *                src_data += src_stride
+ *                dst_data += dst_stride             # <<<<<<<<<<<<<<
+ *     else:
+ *         for i in range(dst_extent):
+ */
+        __pyx_v_dst_data = (__pyx_v_dst_data + __pyx_v_dst_stride);
+      }
+    }
+    __pyx_L4:;
+
+    /* "View.MemoryView":1152
+ *     cdef Py_ssize_t dst_stride = dst_strides[0]
+ * 
+ *     if ndim == 1:             # <<<<<<<<<<<<<<
+ *        if (src_stride > 0 and dst_stride > 0 and
+ *            <size_t> src_stride == itemsize == <size_t> dst_stride):
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":1162
+ *                dst_data += dst_stride
+ *     else:
+ *         for i in range(dst_extent):             # <<<<<<<<<<<<<<
+ *             _copy_strided_to_strided(src_data, src_strides + 1,
+ *                                      dst_data, dst_strides + 1,
+ */
+  /*else*/ {
+    __pyx_t_4 = __pyx_v_dst_extent;
+    __pyx_t_5 = __pyx_t_4;
+    for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+      __pyx_v_i = __pyx_t_6;
+
+      /* "View.MemoryView":1163
+ *     else:
+ *         for i in range(dst_extent):
+ *             _copy_strided_to_strided(src_data, src_strides + 1,             # <<<<<<<<<<<<<<
+ *                                      dst_data, dst_strides + 1,
+ *                                      src_shape + 1, dst_shape + 1,
+ */
+      _copy_strided_to_strided(__pyx_v_src_data, (__pyx_v_src_strides + 1), __pyx_v_dst_data, (__pyx_v_dst_strides + 1), (__pyx_v_src_shape + 1), (__pyx_v_dst_shape + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize);
+
+      /* "View.MemoryView":1167
+ *                                      src_shape + 1, dst_shape + 1,
+ *                                      ndim - 1, itemsize)
+ *             src_data += src_stride             # <<<<<<<<<<<<<<
+ *             dst_data += dst_stride
+ * 
+ */
+      __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride);
+
+      /* "View.MemoryView":1168
+ *                                      ndim - 1, itemsize)
+ *             src_data += src_stride
+ *             dst_data += dst_stride             # <<<<<<<<<<<<<<
+ * 
+ * cdef void copy_strided_to_strided(__Pyx_memviewslice *src,
+ */
+      __pyx_v_dst_data = (__pyx_v_dst_data + __pyx_v_dst_stride);
+    }
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":1140
+ * 
+ * @cython.cdivision(True)
+ * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides,             # <<<<<<<<<<<<<<
+ *                                    char *dst_data, Py_ssize_t *dst_strides,
+ *                                    Py_ssize_t *src_shape, Py_ssize_t *dst_shape,
+ */
+
+  /* function exit code */
+}
+
+/* "View.MemoryView":1170
+ *             dst_data += dst_stride
+ * 
+ * cdef void copy_strided_to_strided(__Pyx_memviewslice *src,             # <<<<<<<<<<<<<<
+ *                                   __Pyx_memviewslice *dst,
+ *                                   int ndim, size_t itemsize) nogil:
+ */
+
+static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize) {
+
+  /* "View.MemoryView":1173
+ *                                   __Pyx_memviewslice *dst,
+ *                                   int ndim, size_t itemsize) nogil:
+ *     _copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides,             # <<<<<<<<<<<<<<
+ *                              src.shape, dst.shape, ndim, itemsize)
+ * 
+ */
+  _copy_strided_to_strided(__pyx_v_src->data, __pyx_v_src->strides, __pyx_v_dst->data, __pyx_v_dst->strides, __pyx_v_src->shape, __pyx_v_dst->shape, __pyx_v_ndim, __pyx_v_itemsize);
+
+  /* "View.MemoryView":1170
+ *             dst_data += dst_stride
+ * 
+ * cdef void copy_strided_to_strided(__Pyx_memviewslice *src,             # <<<<<<<<<<<<<<
+ *                                   __Pyx_memviewslice *dst,
+ *                                   int ndim, size_t itemsize) nogil:
+ */
+
+  /* function exit code */
+}
+
+/* "View.MemoryView":1177
+ * 
+ * @cname('__pyx_memoryview_slice_get_size')
+ * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil:             # <<<<<<<<<<<<<<
+ *     "Return the size of the memory occupied by the slice in number of bytes"
+ *     cdef Py_ssize_t shape, size = src.memview.view.itemsize
+ */
+
+static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_src, int __pyx_v_ndim) {
+  Py_ssize_t __pyx_v_shape;
+  Py_ssize_t __pyx_v_size;
+  Py_ssize_t __pyx_r;
+  Py_ssize_t __pyx_t_1;
+  Py_ssize_t *__pyx_t_2;
+  Py_ssize_t *__pyx_t_3;
+  Py_ssize_t *__pyx_t_4;
+
+  /* "View.MemoryView":1179
+ * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil:
+ *     "Return the size of the memory occupied by the slice in number of bytes"
+ *     cdef Py_ssize_t shape, size = src.memview.view.itemsize             # <<<<<<<<<<<<<<
+ * 
+ *     for shape in src.shape[:ndim]:
+ */
+  __pyx_t_1 = __pyx_v_src->memview->view.itemsize;
+  __pyx_v_size = __pyx_t_1;
+
+  /* "View.MemoryView":1181
+ *     cdef Py_ssize_t shape, size = src.memview.view.itemsize
+ * 
+ *     for shape in src.shape[:ndim]:             # <<<<<<<<<<<<<<
+ *         size *= shape
+ * 
+ */
+  __pyx_t_3 = (__pyx_v_src->shape + __pyx_v_ndim);
+  for (__pyx_t_4 = __pyx_v_src->shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) {
+    __pyx_t_2 = __pyx_t_4;
+    __pyx_v_shape = (__pyx_t_2[0]);
+
+    /* "View.MemoryView":1182
+ * 
+ *     for shape in src.shape[:ndim]:
+ *         size *= shape             # <<<<<<<<<<<<<<
+ * 
+ *     return size
+ */
+    __pyx_v_size = (__pyx_v_size * __pyx_v_shape);
+  }
+
+  /* "View.MemoryView":1184
+ *         size *= shape
+ * 
+ *     return size             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_fill_contig_strides_array')
+ */
+  __pyx_r = __pyx_v_size;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1177
+ * 
+ * @cname('__pyx_memoryview_slice_get_size')
+ * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil:             # <<<<<<<<<<<<<<
+ *     "Return the size of the memory occupied by the slice in number of bytes"
+ *     cdef Py_ssize_t shape, size = src.memview.view.itemsize
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1187
+ * 
+ * @cname('__pyx_fill_contig_strides_array')
+ * cdef Py_ssize_t fill_contig_strides_array(             # <<<<<<<<<<<<<<
+ *                 Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride,
+ *                 int ndim, char order) nogil:
+ */
+
+static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, Py_ssize_t __pyx_v_stride, int __pyx_v_ndim, char __pyx_v_order) {
+  int __pyx_v_idx;
+  Py_ssize_t __pyx_r;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+
+  /* "View.MemoryView":1196
+ *     cdef int idx
+ * 
+ *     if order == 'F':             # <<<<<<<<<<<<<<
+ *         for idx in range(ndim):
+ *             strides[idx] = stride
+ */
+  __pyx_t_1 = ((__pyx_v_order == 'F') != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1197
+ * 
+ *     if order == 'F':
+ *         for idx in range(ndim):             # <<<<<<<<<<<<<<
+ *             strides[idx] = stride
+ *             stride *= shape[idx]
+ */
+    __pyx_t_2 = __pyx_v_ndim;
+    __pyx_t_3 = __pyx_t_2;
+    for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+      __pyx_v_idx = __pyx_t_4;
+
+      /* "View.MemoryView":1198
+ *     if order == 'F':
+ *         for idx in range(ndim):
+ *             strides[idx] = stride             # <<<<<<<<<<<<<<
+ *             stride *= shape[idx]
+ *     else:
+ */
+      (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride;
+
+      /* "View.MemoryView":1199
+ *         for idx in range(ndim):
+ *             strides[idx] = stride
+ *             stride *= shape[idx]             # <<<<<<<<<<<<<<
+ *     else:
+ *         for idx in range(ndim - 1, -1, -1):
+ */
+      __pyx_v_stride = (__pyx_v_stride * (__pyx_v_shape[__pyx_v_idx]));
+    }
+
+    /* "View.MemoryView":1196
+ *     cdef int idx
+ * 
+ *     if order == 'F':             # <<<<<<<<<<<<<<
+ *         for idx in range(ndim):
+ *             strides[idx] = stride
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":1201
+ *             stride *= shape[idx]
+ *     else:
+ *         for idx in range(ndim - 1, -1, -1):             # <<<<<<<<<<<<<<
+ *             strides[idx] = stride
+ *             stride *= shape[idx]
+ */
+  /*else*/ {
+    for (__pyx_t_2 = (__pyx_v_ndim - 1); __pyx_t_2 > -1; __pyx_t_2-=1) {
+      __pyx_v_idx = __pyx_t_2;
+
+      /* "View.MemoryView":1202
+ *     else:
+ *         for idx in range(ndim - 1, -1, -1):
+ *             strides[idx] = stride             # <<<<<<<<<<<<<<
+ *             stride *= shape[idx]
+ * 
+ */
+      (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride;
+
+      /* "View.MemoryView":1203
+ *         for idx in range(ndim - 1, -1, -1):
+ *             strides[idx] = stride
+ *             stride *= shape[idx]             # <<<<<<<<<<<<<<
+ * 
+ *     return stride
+ */
+      __pyx_v_stride = (__pyx_v_stride * (__pyx_v_shape[__pyx_v_idx]));
+    }
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":1205
+ *             stride *= shape[idx]
+ * 
+ *     return stride             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_copy_data_to_temp')
+ */
+  __pyx_r = __pyx_v_stride;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1187
+ * 
+ * @cname('__pyx_fill_contig_strides_array')
+ * cdef Py_ssize_t fill_contig_strides_array(             # <<<<<<<<<<<<<<
+ *                 Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride,
+ *                 int ndim, char order) nogil:
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1208
+ * 
+ * @cname('__pyx_memoryview_copy_data_to_temp')
+ * cdef void *copy_data_to_temp(__Pyx_memviewslice *src,             # <<<<<<<<<<<<<<
+ *                              __Pyx_memviewslice *tmpslice,
+ *                              char order,
+ */
+
+static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __Pyx_memviewslice *__pyx_v_tmpslice, char __pyx_v_order, int __pyx_v_ndim) {
+  int __pyx_v_i;
+  void *__pyx_v_result;
+  size_t __pyx_v_itemsize;
+  size_t __pyx_v_size;
+  void *__pyx_r;
+  Py_ssize_t __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  struct __pyx_memoryview_obj *__pyx_t_4;
+  int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+
+  /* "View.MemoryView":1219
+ *     cdef void *result
+ * 
+ *     cdef size_t itemsize = src.memview.view.itemsize             # <<<<<<<<<<<<<<
+ *     cdef size_t size = slice_get_size(src, ndim)
+ * 
+ */
+  __pyx_t_1 = __pyx_v_src->memview->view.itemsize;
+  __pyx_v_itemsize = __pyx_t_1;
+
+  /* "View.MemoryView":1220
+ * 
+ *     cdef size_t itemsize = src.memview.view.itemsize
+ *     cdef size_t size = slice_get_size(src, ndim)             # <<<<<<<<<<<<<<
+ * 
+ *     result = malloc(size)
+ */
+  __pyx_v_size = __pyx_memoryview_slice_get_size(__pyx_v_src, __pyx_v_ndim);
+
+  /* "View.MemoryView":1222
+ *     cdef size_t size = slice_get_size(src, ndim)
+ * 
+ *     result = malloc(size)             # <<<<<<<<<<<<<<
+ *     if not result:
+ *         _err(MemoryError, NULL)
+ */
+  __pyx_v_result = malloc(__pyx_v_size);
+
+  /* "View.MemoryView":1223
+ * 
+ *     result = malloc(size)
+ *     if not result:             # <<<<<<<<<<<<<<
+ *         _err(MemoryError, NULL)
+ * 
+ */
+  __pyx_t_2 = ((!(__pyx_v_result != 0)) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1224
+ *     result = malloc(size)
+ *     if not result:
+ *         _err(MemoryError, NULL)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __pyx_t_3 = __pyx_memoryview_err(__pyx_builtin_MemoryError, NULL); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(2, 1224, __pyx_L1_error)
+
+    /* "View.MemoryView":1223
+ * 
+ *     result = malloc(size)
+ *     if not result:             # <<<<<<<<<<<<<<
+ *         _err(MemoryError, NULL)
+ * 
+ */
+  }
+
+  /* "View.MemoryView":1227
+ * 
+ * 
+ *     tmpslice.data = <char *> result             # <<<<<<<<<<<<<<
+ *     tmpslice.memview = src.memview
+ *     for i in range(ndim):
+ */
+  __pyx_v_tmpslice->data = ((char *)__pyx_v_result);
+
+  /* "View.MemoryView":1228
+ * 
+ *     tmpslice.data = <char *> result
+ *     tmpslice.memview = src.memview             # <<<<<<<<<<<<<<
+ *     for i in range(ndim):
+ *         tmpslice.shape[i] = src.shape[i]
+ */
+  __pyx_t_4 = __pyx_v_src->memview;
+  __pyx_v_tmpslice->memview = __pyx_t_4;
+
+  /* "View.MemoryView":1229
+ *     tmpslice.data = <char *> result
+ *     tmpslice.memview = src.memview
+ *     for i in range(ndim):             # <<<<<<<<<<<<<<
+ *         tmpslice.shape[i] = src.shape[i]
+ *         tmpslice.suboffsets[i] = -1
+ */
+  __pyx_t_3 = __pyx_v_ndim;
+  __pyx_t_5 = __pyx_t_3;
+  for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+    __pyx_v_i = __pyx_t_6;
+
+    /* "View.MemoryView":1230
+ *     tmpslice.memview = src.memview
+ *     for i in range(ndim):
+ *         tmpslice.shape[i] = src.shape[i]             # <<<<<<<<<<<<<<
+ *         tmpslice.suboffsets[i] = -1
+ * 
+ */
+    (__pyx_v_tmpslice->shape[__pyx_v_i]) = (__pyx_v_src->shape[__pyx_v_i]);
+
+    /* "View.MemoryView":1231
+ *     for i in range(ndim):
+ *         tmpslice.shape[i] = src.shape[i]
+ *         tmpslice.suboffsets[i] = -1             # <<<<<<<<<<<<<<
+ * 
+ *     fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize,
+ */
+    (__pyx_v_tmpslice->suboffsets[__pyx_v_i]) = -1L;
+  }
+
+  /* "View.MemoryView":1233
+ *         tmpslice.suboffsets[i] = -1
+ * 
+ *     fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize,             # <<<<<<<<<<<<<<
+ *                               ndim, order)
+ * 
+ */
+  (void)(__pyx_fill_contig_strides_array((&(__pyx_v_tmpslice->shape[0])), (&(__pyx_v_tmpslice->strides[0])), __pyx_v_itemsize, __pyx_v_ndim, __pyx_v_order));
+
+  /* "View.MemoryView":1237
+ * 
+ * 
+ *     for i in range(ndim):             # <<<<<<<<<<<<<<
+ *         if tmpslice.shape[i] == 1:
+ *             tmpslice.strides[i] = 0
+ */
+  __pyx_t_3 = __pyx_v_ndim;
+  __pyx_t_5 = __pyx_t_3;
+  for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+    __pyx_v_i = __pyx_t_6;
+
+    /* "View.MemoryView":1238
+ * 
+ *     for i in range(ndim):
+ *         if tmpslice.shape[i] == 1:             # <<<<<<<<<<<<<<
+ *             tmpslice.strides[i] = 0
+ * 
+ */
+    __pyx_t_2 = (((__pyx_v_tmpslice->shape[__pyx_v_i]) == 1) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1239
+ *     for i in range(ndim):
+ *         if tmpslice.shape[i] == 1:
+ *             tmpslice.strides[i] = 0             # <<<<<<<<<<<<<<
+ * 
+ *     if slice_is_contig(src[0], order, ndim):
+ */
+      (__pyx_v_tmpslice->strides[__pyx_v_i]) = 0;
+
+      /* "View.MemoryView":1238
+ * 
+ *     for i in range(ndim):
+ *         if tmpslice.shape[i] == 1:             # <<<<<<<<<<<<<<
+ *             tmpslice.strides[i] = 0
+ * 
+ */
+    }
+  }
+
+  /* "View.MemoryView":1241
+ *             tmpslice.strides[i] = 0
+ * 
+ *     if slice_is_contig(src[0], order, ndim):             # <<<<<<<<<<<<<<
+ *         memcpy(result, src.data, size)
+ *     else:
+ */
+  __pyx_t_2 = (__pyx_memviewslice_is_contig((__pyx_v_src[0]), __pyx_v_order, __pyx_v_ndim) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1242
+ * 
+ *     if slice_is_contig(src[0], order, ndim):
+ *         memcpy(result, src.data, size)             # <<<<<<<<<<<<<<
+ *     else:
+ *         copy_strided_to_strided(src, tmpslice, ndim, itemsize)
+ */
+    (void)(memcpy(__pyx_v_result, __pyx_v_src->data, __pyx_v_size));
+
+    /* "View.MemoryView":1241
+ *             tmpslice.strides[i] = 0
+ * 
+ *     if slice_is_contig(src[0], order, ndim):             # <<<<<<<<<<<<<<
+ *         memcpy(result, src.data, size)
+ *     else:
+ */
+    goto __pyx_L9;
+  }
+
+  /* "View.MemoryView":1244
+ *         memcpy(result, src.data, size)
+ *     else:
+ *         copy_strided_to_strided(src, tmpslice, ndim, itemsize)             # <<<<<<<<<<<<<<
+ * 
+ *     return result
+ */
+  /*else*/ {
+    copy_strided_to_strided(__pyx_v_src, __pyx_v_tmpslice, __pyx_v_ndim, __pyx_v_itemsize);
+  }
+  __pyx_L9:;
+
+  /* "View.MemoryView":1246
+ *         copy_strided_to_strided(src, tmpslice, ndim, itemsize)
+ * 
+ *     return result             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = __pyx_v_result;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1208
+ * 
+ * @cname('__pyx_memoryview_copy_data_to_temp')
+ * cdef void *copy_data_to_temp(__Pyx_memviewslice *src,             # <<<<<<<<<<<<<<
+ *                              __Pyx_memviewslice *tmpslice,
+ *                              char order,
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  {
+    #ifdef WITH_THREAD
+    PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+    #endif
+    __Pyx_AddTraceback("View.MemoryView.copy_data_to_temp", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    #ifdef WITH_THREAD
+    __Pyx_PyGILState_Release(__pyx_gilstate_save);
+    #endif
+  }
+  __pyx_r = NULL;
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1251
+ * 
+ * @cname('__pyx_memoryview_err_extents')
+ * cdef int _err_extents(int i, Py_ssize_t extent1,             # <<<<<<<<<<<<<<
+ *                              Py_ssize_t extent2) except -1 with gil:
+ *     raise ValueError("got differing extents in dimension %d (got %d and %d)" %
+ */
+
+static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_v_extent1, Py_ssize_t __pyx_v_extent2) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  #ifdef WITH_THREAD
+  PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+  #endif
+  __Pyx_RefNannySetupContext("_err_extents", 0);
+
+  /* "View.MemoryView":1254
+ *                              Py_ssize_t extent2) except -1 with gil:
+ *     raise ValueError("got differing extents in dimension %d (got %d and %d)" %
+ *                                                         (i, extent1, extent2))             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_err_dim')
+ */
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_i); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 1254, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_extent1); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1254, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_extent2); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1254, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 1254, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_2);
+  PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2);
+  __Pyx_GIVEREF(__pyx_t_3);
+  PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3);
+  __pyx_t_1 = 0;
+  __pyx_t_2 = 0;
+  __pyx_t_3 = 0;
+
+  /* "View.MemoryView":1253
+ * cdef int _err_extents(int i, Py_ssize_t extent1,
+ *                              Py_ssize_t extent2) except -1 with gil:
+ *     raise ValueError("got differing extents in dimension %d (got %d and %d)" %             # <<<<<<<<<<<<<<
+ *                                                         (i, extent1, extent2))
+ * 
+ */
+  __pyx_t_3 = __Pyx_PyString_Format(__pyx_kp_s_got_differing_extents_in_dimensi, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1253, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 1253, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __PYX_ERR(2, 1253, __pyx_L1_error)
+
+  /* "View.MemoryView":1251
+ * 
+ * @cname('__pyx_memoryview_err_extents')
+ * cdef int _err_extents(int i, Py_ssize_t extent1,             # <<<<<<<<<<<<<<
+ *                              Py_ssize_t extent2) except -1 with gil:
+ *     raise ValueError("got differing extents in dimension %d (got %d and %d)" %
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_AddTraceback("View.MemoryView._err_extents", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __Pyx_RefNannyFinishContext();
+  #ifdef WITH_THREAD
+  __Pyx_PyGILState_Release(__pyx_gilstate_save);
+  #endif
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1257
+ * 
+ * @cname('__pyx_memoryview_err_dim')
+ * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil:             # <<<<<<<<<<<<<<
+ *     raise error(msg.decode('ascii') % dim)
+ * 
+ */
+
+static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, char *__pyx_v_msg, int __pyx_v_dim) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  #ifdef WITH_THREAD
+  PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+  #endif
+  __Pyx_RefNannySetupContext("_err_dim", 0);
+  __Pyx_INCREF(__pyx_v_error);
+
+  /* "View.MemoryView":1258
+ * @cname('__pyx_memoryview_err_dim')
+ * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil:
+ *     raise error(msg.decode('ascii') % dim)             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_err')
+ */
+  __pyx_t_2 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1258, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1258, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_4 = PyUnicode_Format(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 1258, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_INCREF(__pyx_v_error);
+  __pyx_t_3 = __pyx_v_error; __pyx_t_2 = NULL;
+  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
+    __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+    if (likely(__pyx_t_2)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+      __Pyx_INCREF(__pyx_t_2);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_3, function);
+    }
+  }
+  __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_2, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 1258, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_Raise(__pyx_t_1, 0, 0, 0);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __PYX_ERR(2, 1258, __pyx_L1_error)
+
+  /* "View.MemoryView":1257
+ * 
+ * @cname('__pyx_memoryview_err_dim')
+ * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil:             # <<<<<<<<<<<<<<
+ *     raise error(msg.decode('ascii') % dim)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_AddTraceback("View.MemoryView._err_dim", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __Pyx_XDECREF(__pyx_v_error);
+  __Pyx_RefNannyFinishContext();
+  #ifdef WITH_THREAD
+  __Pyx_PyGILState_Release(__pyx_gilstate_save);
+  #endif
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1261
+ * 
+ * @cname('__pyx_memoryview_err')
+ * cdef int _err(object error, char *msg) except -1 with gil:             # <<<<<<<<<<<<<<
+ *     if msg != NULL:
+ *         raise error(msg.decode('ascii'))
+ */
+
+static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  #ifdef WITH_THREAD
+  PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+  #endif
+  __Pyx_RefNannySetupContext("_err", 0);
+  __Pyx_INCREF(__pyx_v_error);
+
+  /* "View.MemoryView":1262
+ * @cname('__pyx_memoryview_err')
+ * cdef int _err(object error, char *msg) except -1 with gil:
+ *     if msg != NULL:             # <<<<<<<<<<<<<<
+ *         raise error(msg.decode('ascii'))
+ *     else:
+ */
+  __pyx_t_1 = ((__pyx_v_msg != NULL) != 0);
+  if (unlikely(__pyx_t_1)) {
+
+    /* "View.MemoryView":1263
+ * cdef int _err(object error, char *msg) except -1 with gil:
+ *     if msg != NULL:
+ *         raise error(msg.decode('ascii'))             # <<<<<<<<<<<<<<
+ *     else:
+ *         raise error
+ */
+    __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 1263, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_INCREF(__pyx_v_error);
+    __pyx_t_4 = __pyx_v_error; __pyx_t_5 = NULL;
+    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
+      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+      if (likely(__pyx_t_5)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+        __Pyx_INCREF(__pyx_t_5);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_4, function);
+      }
+    }
+    __pyx_t_2 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, __pyx_t_3) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3);
+    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 1263, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __Pyx_Raise(__pyx_t_2, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __PYX_ERR(2, 1263, __pyx_L1_error)
+
+    /* "View.MemoryView":1262
+ * @cname('__pyx_memoryview_err')
+ * cdef int _err(object error, char *msg) except -1 with gil:
+ *     if msg != NULL:             # <<<<<<<<<<<<<<
+ *         raise error(msg.decode('ascii'))
+ *     else:
+ */
+  }
+
+  /* "View.MemoryView":1265
+ *         raise error(msg.decode('ascii'))
+ *     else:
+ *         raise error             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_copy_contents')
+ */
+  /*else*/ {
+    __Pyx_Raise(__pyx_v_error, 0, 0, 0);
+    __PYX_ERR(2, 1265, __pyx_L1_error)
+  }
+
+  /* "View.MemoryView":1261
+ * 
+ * @cname('__pyx_memoryview_err')
+ * cdef int _err(object error, char *msg) except -1 with gil:             # <<<<<<<<<<<<<<
+ *     if msg != NULL:
+ *         raise error(msg.decode('ascii'))
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView._err", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __Pyx_XDECREF(__pyx_v_error);
+  __Pyx_RefNannyFinishContext();
+  #ifdef WITH_THREAD
+  __Pyx_PyGILState_Release(__pyx_gilstate_save);
+  #endif
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1268
+ * 
+ * @cname('__pyx_memoryview_copy_contents')
+ * cdef int memoryview_copy_contents(__Pyx_memviewslice src,             # <<<<<<<<<<<<<<
+ *                                   __Pyx_memviewslice dst,
+ *                                   int src_ndim, int dst_ndim,
+ */
+
+static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_memviewslice __pyx_v_dst, int __pyx_v_src_ndim, int __pyx_v_dst_ndim, int __pyx_v_dtype_is_object) {
+  void *__pyx_v_tmpdata;
+  size_t __pyx_v_itemsize;
+  int __pyx_v_i;
+  char __pyx_v_order;
+  int __pyx_v_broadcasting;
+  int __pyx_v_direct_copy;
+  __Pyx_memviewslice __pyx_v_tmp;
+  int __pyx_v_ndim;
+  int __pyx_r;
+  Py_ssize_t __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  int __pyx_t_6;
+  void *__pyx_t_7;
+  int __pyx_t_8;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+
+  /* "View.MemoryView":1276
+ *     Check for overlapping memory and verify the shapes.
+ *     """
+ *     cdef void *tmpdata = NULL             # <<<<<<<<<<<<<<
+ *     cdef size_t itemsize = src.memview.view.itemsize
+ *     cdef int i
+ */
+  __pyx_v_tmpdata = NULL;
+
+  /* "View.MemoryView":1277
+ *     """
+ *     cdef void *tmpdata = NULL
+ *     cdef size_t itemsize = src.memview.view.itemsize             # <<<<<<<<<<<<<<
+ *     cdef int i
+ *     cdef char order = get_best_order(&src, src_ndim)
+ */
+  __pyx_t_1 = __pyx_v_src.memview->view.itemsize;
+  __pyx_v_itemsize = __pyx_t_1;
+
+  /* "View.MemoryView":1279
+ *     cdef size_t itemsize = src.memview.view.itemsize
+ *     cdef int i
+ *     cdef char order = get_best_order(&src, src_ndim)             # <<<<<<<<<<<<<<
+ *     cdef bint broadcasting = False
+ *     cdef bint direct_copy = False
+ */
+  __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_src), __pyx_v_src_ndim);
+
+  /* "View.MemoryView":1280
+ *     cdef int i
+ *     cdef char order = get_best_order(&src, src_ndim)
+ *     cdef bint broadcasting = False             # <<<<<<<<<<<<<<
+ *     cdef bint direct_copy = False
+ *     cdef __Pyx_memviewslice tmp
+ */
+  __pyx_v_broadcasting = 0;
+
+  /* "View.MemoryView":1281
+ *     cdef char order = get_best_order(&src, src_ndim)
+ *     cdef bint broadcasting = False
+ *     cdef bint direct_copy = False             # <<<<<<<<<<<<<<
+ *     cdef __Pyx_memviewslice tmp
+ * 
+ */
+  __pyx_v_direct_copy = 0;
+
+  /* "View.MemoryView":1284
+ *     cdef __Pyx_memviewslice tmp
+ * 
+ *     if src_ndim < dst_ndim:             # <<<<<<<<<<<<<<
+ *         broadcast_leading(&src, src_ndim, dst_ndim)
+ *     elif dst_ndim < src_ndim:
+ */
+  __pyx_t_2 = ((__pyx_v_src_ndim < __pyx_v_dst_ndim) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1285
+ * 
+ *     if src_ndim < dst_ndim:
+ *         broadcast_leading(&src, src_ndim, dst_ndim)             # <<<<<<<<<<<<<<
+ *     elif dst_ndim < src_ndim:
+ *         broadcast_leading(&dst, dst_ndim, src_ndim)
+ */
+    __pyx_memoryview_broadcast_leading((&__pyx_v_src), __pyx_v_src_ndim, __pyx_v_dst_ndim);
+
+    /* "View.MemoryView":1284
+ *     cdef __Pyx_memviewslice tmp
+ * 
+ *     if src_ndim < dst_ndim:             # <<<<<<<<<<<<<<
+ *         broadcast_leading(&src, src_ndim, dst_ndim)
+ *     elif dst_ndim < src_ndim:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":1286
+ *     if src_ndim < dst_ndim:
+ *         broadcast_leading(&src, src_ndim, dst_ndim)
+ *     elif dst_ndim < src_ndim:             # <<<<<<<<<<<<<<
+ *         broadcast_leading(&dst, dst_ndim, src_ndim)
+ * 
+ */
+  __pyx_t_2 = ((__pyx_v_dst_ndim < __pyx_v_src_ndim) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1287
+ *         broadcast_leading(&src, src_ndim, dst_ndim)
+ *     elif dst_ndim < src_ndim:
+ *         broadcast_leading(&dst, dst_ndim, src_ndim)             # <<<<<<<<<<<<<<
+ * 
+ *     cdef int ndim = max(src_ndim, dst_ndim)
+ */
+    __pyx_memoryview_broadcast_leading((&__pyx_v_dst), __pyx_v_dst_ndim, __pyx_v_src_ndim);
+
+    /* "View.MemoryView":1286
+ *     if src_ndim < dst_ndim:
+ *         broadcast_leading(&src, src_ndim, dst_ndim)
+ *     elif dst_ndim < src_ndim:             # <<<<<<<<<<<<<<
+ *         broadcast_leading(&dst, dst_ndim, src_ndim)
+ * 
+ */
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":1289
+ *         broadcast_leading(&dst, dst_ndim, src_ndim)
+ * 
+ *     cdef int ndim = max(src_ndim, dst_ndim)             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(ndim):
+ */
+  __pyx_t_3 = __pyx_v_dst_ndim;
+  __pyx_t_4 = __pyx_v_src_ndim;
+  if (((__pyx_t_3 > __pyx_t_4) != 0)) {
+    __pyx_t_5 = __pyx_t_3;
+  } else {
+    __pyx_t_5 = __pyx_t_4;
+  }
+  __pyx_v_ndim = __pyx_t_5;
+
+  /* "View.MemoryView":1291
+ *     cdef int ndim = max(src_ndim, dst_ndim)
+ * 
+ *     for i in range(ndim):             # <<<<<<<<<<<<<<
+ *         if src.shape[i] != dst.shape[i]:
+ *             if src.shape[i] == 1:
+ */
+  __pyx_t_5 = __pyx_v_ndim;
+  __pyx_t_3 = __pyx_t_5;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "View.MemoryView":1292
+ * 
+ *     for i in range(ndim):
+ *         if src.shape[i] != dst.shape[i]:             # <<<<<<<<<<<<<<
+ *             if src.shape[i] == 1:
+ *                 broadcasting = True
+ */
+    __pyx_t_2 = (((__pyx_v_src.shape[__pyx_v_i]) != (__pyx_v_dst.shape[__pyx_v_i])) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1293
+ *     for i in range(ndim):
+ *         if src.shape[i] != dst.shape[i]:
+ *             if src.shape[i] == 1:             # <<<<<<<<<<<<<<
+ *                 broadcasting = True
+ *                 src.strides[i] = 0
+ */
+      __pyx_t_2 = (((__pyx_v_src.shape[__pyx_v_i]) == 1) != 0);
+      if (__pyx_t_2) {
+
+        /* "View.MemoryView":1294
+ *         if src.shape[i] != dst.shape[i]:
+ *             if src.shape[i] == 1:
+ *                 broadcasting = True             # <<<<<<<<<<<<<<
+ *                 src.strides[i] = 0
+ *             else:
+ */
+        __pyx_v_broadcasting = 1;
+
+        /* "View.MemoryView":1295
+ *             if src.shape[i] == 1:
+ *                 broadcasting = True
+ *                 src.strides[i] = 0             # <<<<<<<<<<<<<<
+ *             else:
+ *                 _err_extents(i, dst.shape[i], src.shape[i])
+ */
+        (__pyx_v_src.strides[__pyx_v_i]) = 0;
+
+        /* "View.MemoryView":1293
+ *     for i in range(ndim):
+ *         if src.shape[i] != dst.shape[i]:
+ *             if src.shape[i] == 1:             # <<<<<<<<<<<<<<
+ *                 broadcasting = True
+ *                 src.strides[i] = 0
+ */
+        goto __pyx_L7;
+      }
+
+      /* "View.MemoryView":1297
+ *                 src.strides[i] = 0
+ *             else:
+ *                 _err_extents(i, dst.shape[i], src.shape[i])             # <<<<<<<<<<<<<<
+ * 
+ *         if src.suboffsets[i] >= 0:
+ */
+      /*else*/ {
+        __pyx_t_6 = __pyx_memoryview_err_extents(__pyx_v_i, (__pyx_v_dst.shape[__pyx_v_i]), (__pyx_v_src.shape[__pyx_v_i])); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(2, 1297, __pyx_L1_error)
+      }
+      __pyx_L7:;
+
+      /* "View.MemoryView":1292
+ * 
+ *     for i in range(ndim):
+ *         if src.shape[i] != dst.shape[i]:             # <<<<<<<<<<<<<<
+ *             if src.shape[i] == 1:
+ *                 broadcasting = True
+ */
+    }
+
+    /* "View.MemoryView":1299
+ *                 _err_extents(i, dst.shape[i], src.shape[i])
+ * 
+ *         if src.suboffsets[i] >= 0:             # <<<<<<<<<<<<<<
+ *             _err_dim(ValueError, "Dimension %d is not direct", i)
+ * 
+ */
+    __pyx_t_2 = (((__pyx_v_src.suboffsets[__pyx_v_i]) >= 0) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1300
+ * 
+ *         if src.suboffsets[i] >= 0:
+ *             _err_dim(ValueError, "Dimension %d is not direct", i)             # <<<<<<<<<<<<<<
+ * 
+ *     if slices_overlap(&src, &dst, ndim, itemsize):
+ */
+      __pyx_t_6 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Dimension %d is not direct"), __pyx_v_i); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(2, 1300, __pyx_L1_error)
+
+      /* "View.MemoryView":1299
+ *                 _err_extents(i, dst.shape[i], src.shape[i])
+ * 
+ *         if src.suboffsets[i] >= 0:             # <<<<<<<<<<<<<<
+ *             _err_dim(ValueError, "Dimension %d is not direct", i)
+ * 
+ */
+    }
+  }
+
+  /* "View.MemoryView":1302
+ *             _err_dim(ValueError, "Dimension %d is not direct", i)
+ * 
+ *     if slices_overlap(&src, &dst, ndim, itemsize):             # <<<<<<<<<<<<<<
+ * 
+ *         if not slice_is_contig(src, order, ndim):
+ */
+  __pyx_t_2 = (__pyx_slices_overlap((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1304
+ *     if slices_overlap(&src, &dst, ndim, itemsize):
+ * 
+ *         if not slice_is_contig(src, order, ndim):             # <<<<<<<<<<<<<<
+ *             order = get_best_order(&dst, ndim)
+ * 
+ */
+    __pyx_t_2 = ((!(__pyx_memviewslice_is_contig(__pyx_v_src, __pyx_v_order, __pyx_v_ndim) != 0)) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1305
+ * 
+ *         if not slice_is_contig(src, order, ndim):
+ *             order = get_best_order(&dst, ndim)             # <<<<<<<<<<<<<<
+ * 
+ *         tmpdata = copy_data_to_temp(&src, &tmp, order, ndim)
+ */
+      __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_dst), __pyx_v_ndim);
+
+      /* "View.MemoryView":1304
+ *     if slices_overlap(&src, &dst, ndim, itemsize):
+ * 
+ *         if not slice_is_contig(src, order, ndim):             # <<<<<<<<<<<<<<
+ *             order = get_best_order(&dst, ndim)
+ * 
+ */
+    }
+
+    /* "View.MemoryView":1307
+ *             order = get_best_order(&dst, ndim)
+ * 
+ *         tmpdata = copy_data_to_temp(&src, &tmp, order, ndim)             # <<<<<<<<<<<<<<
+ *         src = tmp
+ * 
+ */
+    __pyx_t_7 = __pyx_memoryview_copy_data_to_temp((&__pyx_v_src), (&__pyx_v_tmp), __pyx_v_order, __pyx_v_ndim); if (unlikely(__pyx_t_7 == ((void *)NULL))) __PYX_ERR(2, 1307, __pyx_L1_error)
+    __pyx_v_tmpdata = __pyx_t_7;
+
+    /* "View.MemoryView":1308
+ * 
+ *         tmpdata = copy_data_to_temp(&src, &tmp, order, ndim)
+ *         src = tmp             # <<<<<<<<<<<<<<
+ * 
+ *     if not broadcasting:
+ */
+    __pyx_v_src = __pyx_v_tmp;
+
+    /* "View.MemoryView":1302
+ *             _err_dim(ValueError, "Dimension %d is not direct", i)
+ * 
+ *     if slices_overlap(&src, &dst, ndim, itemsize):             # <<<<<<<<<<<<<<
+ * 
+ *         if not slice_is_contig(src, order, ndim):
+ */
+  }
+
+  /* "View.MemoryView":1310
+ *         src = tmp
+ * 
+ *     if not broadcasting:             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_2 = ((!(__pyx_v_broadcasting != 0)) != 0);
+  if (__pyx_t_2) {
+
+    /* "View.MemoryView":1313
+ * 
+ * 
+ *         if slice_is_contig(src, 'C', ndim):             # <<<<<<<<<<<<<<
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)
+ *         elif slice_is_contig(src, 'F', ndim):
+ */
+    __pyx_t_2 = (__pyx_memviewslice_is_contig(__pyx_v_src, 'C', __pyx_v_ndim) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1314
+ * 
+ *         if slice_is_contig(src, 'C', ndim):
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)             # <<<<<<<<<<<<<<
+ *         elif slice_is_contig(src, 'F', ndim):
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)
+ */
+      __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'C', __pyx_v_ndim);
+
+      /* "View.MemoryView":1313
+ * 
+ * 
+ *         if slice_is_contig(src, 'C', ndim):             # <<<<<<<<<<<<<<
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)
+ *         elif slice_is_contig(src, 'F', ndim):
+ */
+      goto __pyx_L12;
+    }
+
+    /* "View.MemoryView":1315
+ *         if slice_is_contig(src, 'C', ndim):
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)
+ *         elif slice_is_contig(src, 'F', ndim):             # <<<<<<<<<<<<<<
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)
+ * 
+ */
+    __pyx_t_2 = (__pyx_memviewslice_is_contig(__pyx_v_src, 'F', __pyx_v_ndim) != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1316
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)
+ *         elif slice_is_contig(src, 'F', ndim):
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)             # <<<<<<<<<<<<<<
+ * 
+ *         if direct_copy:
+ */
+      __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'F', __pyx_v_ndim);
+
+      /* "View.MemoryView":1315
+ *         if slice_is_contig(src, 'C', ndim):
+ *             direct_copy = slice_is_contig(dst, 'C', ndim)
+ *         elif slice_is_contig(src, 'F', ndim):             # <<<<<<<<<<<<<<
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)
+ * 
+ */
+    }
+    __pyx_L12:;
+
+    /* "View.MemoryView":1318
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)
+ * 
+ *         if direct_copy:             # <<<<<<<<<<<<<<
+ * 
+ *             refcount_copying(&dst, dtype_is_object, ndim, False)
+ */
+    __pyx_t_2 = (__pyx_v_direct_copy != 0);
+    if (__pyx_t_2) {
+
+      /* "View.MemoryView":1320
+ *         if direct_copy:
+ * 
+ *             refcount_copying(&dst, dtype_is_object, ndim, False)             # <<<<<<<<<<<<<<
+ *             memcpy(dst.data, src.data, slice_get_size(&src, ndim))
+ *             refcount_copying(&dst, dtype_is_object, ndim, True)
+ */
+      __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0);
+
+      /* "View.MemoryView":1321
+ * 
+ *             refcount_copying(&dst, dtype_is_object, ndim, False)
+ *             memcpy(dst.data, src.data, slice_get_size(&src, ndim))             # <<<<<<<<<<<<<<
+ *             refcount_copying(&dst, dtype_is_object, ndim, True)
+ *             free(tmpdata)
+ */
+      (void)(memcpy(__pyx_v_dst.data, __pyx_v_src.data, __pyx_memoryview_slice_get_size((&__pyx_v_src), __pyx_v_ndim)));
+
+      /* "View.MemoryView":1322
+ *             refcount_copying(&dst, dtype_is_object, ndim, False)
+ *             memcpy(dst.data, src.data, slice_get_size(&src, ndim))
+ *             refcount_copying(&dst, dtype_is_object, ndim, True)             # <<<<<<<<<<<<<<
+ *             free(tmpdata)
+ *             return 0
+ */
+      __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1);
+
+      /* "View.MemoryView":1323
+ *             memcpy(dst.data, src.data, slice_get_size(&src, ndim))
+ *             refcount_copying(&dst, dtype_is_object, ndim, True)
+ *             free(tmpdata)             # <<<<<<<<<<<<<<
+ *             return 0
+ * 
+ */
+      free(__pyx_v_tmpdata);
+
+      /* "View.MemoryView":1324
+ *             refcount_copying(&dst, dtype_is_object, ndim, True)
+ *             free(tmpdata)
+ *             return 0             # <<<<<<<<<<<<<<
+ * 
+ *     if order == 'F' == get_best_order(&dst, ndim):
+ */
+      __pyx_r = 0;
+      goto __pyx_L0;
+
+      /* "View.MemoryView":1318
+ *             direct_copy = slice_is_contig(dst, 'F', ndim)
+ * 
+ *         if direct_copy:             # <<<<<<<<<<<<<<
+ * 
+ *             refcount_copying(&dst, dtype_is_object, ndim, False)
+ */
+    }
+
+    /* "View.MemoryView":1310
+ *         src = tmp
+ * 
+ *     if not broadcasting:             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  }
+
+  /* "View.MemoryView":1326
+ *             return 0
+ * 
+ *     if order == 'F' == get_best_order(&dst, ndim):             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_2 = (__pyx_v_order == 'F');
+  if (__pyx_t_2) {
+    __pyx_t_2 = ('F' == __pyx_get_best_slice_order((&__pyx_v_dst), __pyx_v_ndim));
+  }
+  __pyx_t_8 = (__pyx_t_2 != 0);
+  if (__pyx_t_8) {
+
+    /* "View.MemoryView":1329
+ * 
+ * 
+ *         transpose_memslice(&src)             # <<<<<<<<<<<<<<
+ *         transpose_memslice(&dst)
+ * 
+ */
+    __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_src)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(2, 1329, __pyx_L1_error)
+
+    /* "View.MemoryView":1330
+ * 
+ *         transpose_memslice(&src)
+ *         transpose_memslice(&dst)             # <<<<<<<<<<<<<<
+ * 
+ *     refcount_copying(&dst, dtype_is_object, ndim, False)
+ */
+    __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_dst)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(2, 1330, __pyx_L1_error)
+
+    /* "View.MemoryView":1326
+ *             return 0
+ * 
+ *     if order == 'F' == get_best_order(&dst, ndim):             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  }
+
+  /* "View.MemoryView":1332
+ *         transpose_memslice(&dst)
+ * 
+ *     refcount_copying(&dst, dtype_is_object, ndim, False)             # <<<<<<<<<<<<<<
+ *     copy_strided_to_strided(&src, &dst, ndim, itemsize)
+ *     refcount_copying(&dst, dtype_is_object, ndim, True)
+ */
+  __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0);
+
+  /* "View.MemoryView":1333
+ * 
+ *     refcount_copying(&dst, dtype_is_object, ndim, False)
+ *     copy_strided_to_strided(&src, &dst, ndim, itemsize)             # <<<<<<<<<<<<<<
+ *     refcount_copying(&dst, dtype_is_object, ndim, True)
+ * 
+ */
+  copy_strided_to_strided((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize);
+
+  /* "View.MemoryView":1334
+ *     refcount_copying(&dst, dtype_is_object, ndim, False)
+ *     copy_strided_to_strided(&src, &dst, ndim, itemsize)
+ *     refcount_copying(&dst, dtype_is_object, ndim, True)             # <<<<<<<<<<<<<<
+ * 
+ *     free(tmpdata)
+ */
+  __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1);
+
+  /* "View.MemoryView":1336
+ *     refcount_copying(&dst, dtype_is_object, ndim, True)
+ * 
+ *     free(tmpdata)             # <<<<<<<<<<<<<<
+ *     return 0
+ * 
+ */
+  free(__pyx_v_tmpdata);
+
+  /* "View.MemoryView":1337
+ * 
+ *     free(tmpdata)
+ *     return 0             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_broadcast_leading')
+ */
+  __pyx_r = 0;
+  goto __pyx_L0;
+
+  /* "View.MemoryView":1268
+ * 
+ * @cname('__pyx_memoryview_copy_contents')
+ * cdef int memoryview_copy_contents(__Pyx_memviewslice src,             # <<<<<<<<<<<<<<
+ *                                   __Pyx_memviewslice dst,
+ *                                   int src_ndim, int dst_ndim,
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  {
+    #ifdef WITH_THREAD
+    PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+    #endif
+    __Pyx_AddTraceback("View.MemoryView.memoryview_copy_contents", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    #ifdef WITH_THREAD
+    __Pyx_PyGILState_Release(__pyx_gilstate_save);
+    #endif
+  }
+  __pyx_r = -1;
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "View.MemoryView":1340
+ * 
+ * @cname('__pyx_memoryview_broadcast_leading')
+ * cdef void broadcast_leading(__Pyx_memviewslice *mslice,             # <<<<<<<<<<<<<<
+ *                             int ndim,
+ *                             int ndim_other) nogil:
+ */
+
+static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslice, int __pyx_v_ndim, int __pyx_v_ndim_other) {
+  int __pyx_v_i;
+  int __pyx_v_offset;
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+
+  /* "View.MemoryView":1344
+ *                             int ndim_other) nogil:
+ *     cdef int i
+ *     cdef int offset = ndim_other - ndim             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(ndim - 1, -1, -1):
+ */
+  __pyx_v_offset = (__pyx_v_ndim_other - __pyx_v_ndim);
+
+  /* "View.MemoryView":1346
+ *     cdef int offset = ndim_other - ndim
+ * 
+ *     for i in range(ndim - 1, -1, -1):             # <<<<<<<<<<<<<<
+ *         mslice.shape[i + offset] = mslice.shape[i]
+ *         mslice.strides[i + offset] = mslice.strides[i]
+ */
+  for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
+    __pyx_v_i = __pyx_t_1;
+
+    /* "View.MemoryView":1347
+ * 
+ *     for i in range(ndim - 1, -1, -1):
+ *         mslice.shape[i + offset] = mslice.shape[i]             # <<<<<<<<<<<<<<
+ *         mslice.strides[i + offset] = mslice.strides[i]
+ *         mslice.suboffsets[i + offset] = mslice.suboffsets[i]
+ */
+    (__pyx_v_mslice->shape[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->shape[__pyx_v_i]);
+
+    /* "View.MemoryView":1348
+ *     for i in range(ndim - 1, -1, -1):
+ *         mslice.shape[i + offset] = mslice.shape[i]
+ *         mslice.strides[i + offset] = mslice.strides[i]             # <<<<<<<<<<<<<<
+ *         mslice.suboffsets[i + offset] = mslice.suboffsets[i]
+ * 
+ */
+    (__pyx_v_mslice->strides[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->strides[__pyx_v_i]);
+
+    /* "View.MemoryView":1349
+ *         mslice.shape[i + offset] = mslice.shape[i]
+ *         mslice.strides[i + offset] = mslice.strides[i]
+ *         mslice.suboffsets[i + offset] = mslice.suboffsets[i]             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(offset):
+ */
+    (__pyx_v_mslice->suboffsets[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->suboffsets[__pyx_v_i]);
+  }
+
+  /* "View.MemoryView":1351
+ *         mslice.suboffsets[i + offset] = mslice.suboffsets[i]
+ * 
+ *     for i in range(offset):             # <<<<<<<<<<<<<<
+ *         mslice.shape[i] = 1
+ *         mslice.strides[i] = mslice.strides[0]
+ */
+  __pyx_t_1 = __pyx_v_offset;
+  __pyx_t_2 = __pyx_t_1;
+  for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+    __pyx_v_i = __pyx_t_3;
+
+    /* "View.MemoryView":1352
+ * 
+ *     for i in range(offset):
+ *         mslice.shape[i] = 1             # <<<<<<<<<<<<<<
+ *         mslice.strides[i] = mslice.strides[0]
+ *         mslice.suboffsets[i] = -1
+ */
+    (__pyx_v_mslice->shape[__pyx_v_i]) = 1;
+
+    /* "View.MemoryView":1353
+ *     for i in range(offset):
+ *         mslice.shape[i] = 1
+ *         mslice.strides[i] = mslice.strides[0]             # <<<<<<<<<<<<<<
+ *         mslice.suboffsets[i] = -1
+ * 
+ */
+    (__pyx_v_mslice->strides[__pyx_v_i]) = (__pyx_v_mslice->strides[0]);
+
+    /* "View.MemoryView":1354
+ *         mslice.shape[i] = 1
+ *         mslice.strides[i] = mslice.strides[0]
+ *         mslice.suboffsets[i] = -1             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    (__pyx_v_mslice->suboffsets[__pyx_v_i]) = -1L;
+  }
+
+  /* "View.MemoryView":1340
+ * 
+ * @cname('__pyx_memoryview_broadcast_leading')
+ * cdef void broadcast_leading(__Pyx_memviewslice *mslice,             # <<<<<<<<<<<<<<
+ *                             int ndim,
+ *                             int ndim_other) nogil:
+ */
+
+  /* function exit code */
+}
+
+/* "View.MemoryView":1362
+ * 
+ * @cname('__pyx_memoryview_refcount_copying')
+ * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object,             # <<<<<<<<<<<<<<
+ *                            int ndim, bint inc) nogil:
+ * 
+ */
+
+static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_dtype_is_object, int __pyx_v_ndim, int __pyx_v_inc) {
+  int __pyx_t_1;
+
+  /* "View.MemoryView":1366
+ * 
+ * 
+ *     if dtype_is_object:             # <<<<<<<<<<<<<<
+ *         refcount_objects_in_slice_with_gil(dst.data, dst.shape,
+ *                                            dst.strides, ndim, inc)
+ */
+  __pyx_t_1 = (__pyx_v_dtype_is_object != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1367
+ * 
+ *     if dtype_is_object:
+ *         refcount_objects_in_slice_with_gil(dst.data, dst.shape,             # <<<<<<<<<<<<<<
+ *                                            dst.strides, ndim, inc)
+ * 
+ */
+    __pyx_memoryview_refcount_objects_in_slice_with_gil(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_inc);
+
+    /* "View.MemoryView":1366
+ * 
+ * 
+ *     if dtype_is_object:             # <<<<<<<<<<<<<<
+ *         refcount_objects_in_slice_with_gil(dst.data, dst.shape,
+ *                                            dst.strides, ndim, inc)
+ */
+  }
+
+  /* "View.MemoryView":1362
+ * 
+ * @cname('__pyx_memoryview_refcount_copying')
+ * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object,             # <<<<<<<<<<<<<<
+ *                            int ndim, bint inc) nogil:
+ * 
+ */
+
+  /* function exit code */
+}
+
+/* "View.MemoryView":1371
+ * 
+ * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil')
+ * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                                              Py_ssize_t *strides, int ndim,
+ *                                              bint inc) with gil:
+ */
+
+static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, int __pyx_v_inc) {
+  __Pyx_RefNannyDeclarations
+  #ifdef WITH_THREAD
+  PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure();
+  #endif
+  __Pyx_RefNannySetupContext("refcount_objects_in_slice_with_gil", 0);
+
+  /* "View.MemoryView":1374
+ *                                              Py_ssize_t *strides, int ndim,
+ *                                              bint inc) with gil:
+ *     refcount_objects_in_slice(data, shape, strides, ndim, inc)             # <<<<<<<<<<<<<<
+ * 
+ * @cname('__pyx_memoryview_refcount_objects_in_slice')
+ */
+  __pyx_memoryview_refcount_objects_in_slice(__pyx_v_data, __pyx_v_shape, __pyx_v_strides, __pyx_v_ndim, __pyx_v_inc);
+
+  /* "View.MemoryView":1371
+ * 
+ * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil')
+ * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                                              Py_ssize_t *strides, int ndim,
+ *                                              bint inc) with gil:
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  #ifdef WITH_THREAD
+  __Pyx_PyGILState_Release(__pyx_gilstate_save);
+  #endif
+}
+
+/* "View.MemoryView":1377
+ * 
+ * @cname('__pyx_memoryview_refcount_objects_in_slice')
+ * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                                     Py_ssize_t *strides, int ndim, bint inc):
+ *     cdef Py_ssize_t i
+ */
+
+static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, int __pyx_v_inc) {
+  CYTHON_UNUSED Py_ssize_t __pyx_v_i;
+  __Pyx_RefNannyDeclarations
+  Py_ssize_t __pyx_t_1;
+  Py_ssize_t __pyx_t_2;
+  Py_ssize_t __pyx_t_3;
+  int __pyx_t_4;
+  __Pyx_RefNannySetupContext("refcount_objects_in_slice", 0);
+
+  /* "View.MemoryView":1381
+ *     cdef Py_ssize_t i
+ * 
+ *     for i in range(shape[0]):             # <<<<<<<<<<<<<<
+ *         if ndim == 1:
+ *             if inc:
+ */
+  __pyx_t_1 = (__pyx_v_shape[0]);
+  __pyx_t_2 = __pyx_t_1;
+  for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+    __pyx_v_i = __pyx_t_3;
+
+    /* "View.MemoryView":1382
+ * 
+ *     for i in range(shape[0]):
+ *         if ndim == 1:             # <<<<<<<<<<<<<<
+ *             if inc:
+ *                 Py_INCREF((<PyObject **> data)[0])
+ */
+    __pyx_t_4 = ((__pyx_v_ndim == 1) != 0);
+    if (__pyx_t_4) {
+
+      /* "View.MemoryView":1383
+ *     for i in range(shape[0]):
+ *         if ndim == 1:
+ *             if inc:             # <<<<<<<<<<<<<<
+ *                 Py_INCREF((<PyObject **> data)[0])
+ *             else:
+ */
+      __pyx_t_4 = (__pyx_v_inc != 0);
+      if (__pyx_t_4) {
+
+        /* "View.MemoryView":1384
+ *         if ndim == 1:
+ *             if inc:
+ *                 Py_INCREF((<PyObject **> data)[0])             # <<<<<<<<<<<<<<
+ *             else:
+ *                 Py_DECREF((<PyObject **> data)[0])
+ */
+        Py_INCREF((((PyObject **)__pyx_v_data)[0]));
+
+        /* "View.MemoryView":1383
+ *     for i in range(shape[0]):
+ *         if ndim == 1:
+ *             if inc:             # <<<<<<<<<<<<<<
+ *                 Py_INCREF((<PyObject **> data)[0])
+ *             else:
+ */
+        goto __pyx_L6;
+      }
+
+      /* "View.MemoryView":1386
+ *                 Py_INCREF((<PyObject **> data)[0])
+ *             else:
+ *                 Py_DECREF((<PyObject **> data)[0])             # <<<<<<<<<<<<<<
+ *         else:
+ *             refcount_objects_in_slice(data, shape + 1, strides + 1,
+ */
+      /*else*/ {
+        Py_DECREF((((PyObject **)__pyx_v_data)[0]));
+      }
+      __pyx_L6:;
+
+      /* "View.MemoryView":1382
+ * 
+ *     for i in range(shape[0]):
+ *         if ndim == 1:             # <<<<<<<<<<<<<<
+ *             if inc:
+ *                 Py_INCREF((<PyObject **> data)[0])
+ */
+      goto __pyx_L5;
+    }
+
+    /* "View.MemoryView":1388
+ *                 Py_DECREF((<PyObject **> data)[0])
+ *         else:
+ *             refcount_objects_in_slice(data, shape + 1, strides + 1,             # <<<<<<<<<<<<<<
+ *                                       ndim - 1, inc)
+ * 
+ */
+    /*else*/ {
+
+      /* "View.MemoryView":1389
+ *         else:
+ *             refcount_objects_in_slice(data, shape + 1, strides + 1,
+ *                                       ndim - 1, inc)             # <<<<<<<<<<<<<<
+ * 
+ *         data += strides[0]
+ */
+      __pyx_memoryview_refcount_objects_in_slice(__pyx_v_data, (__pyx_v_shape + 1), (__pyx_v_strides + 1), (__pyx_v_ndim - 1), __pyx_v_inc);
+    }
+    __pyx_L5:;
+
+    /* "View.MemoryView":1391
+ *                                       ndim - 1, inc)
+ * 
+ *         data += strides[0]             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+    __pyx_v_data = (__pyx_v_data + (__pyx_v_strides[0]));
+  }
+
+  /* "View.MemoryView":1377
+ * 
+ * @cname('__pyx_memoryview_refcount_objects_in_slice')
+ * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                                     Py_ssize_t *strides, int ndim, bint inc):
+ *     cdef Py_ssize_t i
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "View.MemoryView":1397
+ * 
+ * @cname('__pyx_memoryview_slice_assign_scalar')
+ * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim,             # <<<<<<<<<<<<<<
+ *                               size_t itemsize, void *item,
+ *                               bint dtype_is_object) nogil:
+ */
+
+static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize, void *__pyx_v_item, int __pyx_v_dtype_is_object) {
+
+  /* "View.MemoryView":1400
+ *                               size_t itemsize, void *item,
+ *                               bint dtype_is_object) nogil:
+ *     refcount_copying(dst, dtype_is_object, ndim, False)             # <<<<<<<<<<<<<<
+ *     _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim,
+ *                          itemsize, item)
+ */
+  __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 0);
+
+  /* "View.MemoryView":1401
+ *                               bint dtype_is_object) nogil:
+ *     refcount_copying(dst, dtype_is_object, ndim, False)
+ *     _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim,             # <<<<<<<<<<<<<<
+ *                          itemsize, item)
+ *     refcount_copying(dst, dtype_is_object, ndim, True)
+ */
+  __pyx_memoryview__slice_assign_scalar(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_itemsize, __pyx_v_item);
+
+  /* "View.MemoryView":1403
+ *     _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim,
+ *                          itemsize, item)
+ *     refcount_copying(dst, dtype_is_object, ndim, True)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 1);
+
+  /* "View.MemoryView":1397
+ * 
+ * @cname('__pyx_memoryview_slice_assign_scalar')
+ * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim,             # <<<<<<<<<<<<<<
+ *                               size_t itemsize, void *item,
+ *                               bint dtype_is_object) nogil:
+ */
+
+  /* function exit code */
+}
+
+/* "View.MemoryView":1407
+ * 
+ * @cname('__pyx_memoryview__slice_assign_scalar')
+ * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                               Py_ssize_t *strides, int ndim,
+ *                               size_t itemsize, void *item) nogil:
+ */
+
+static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, size_t __pyx_v_itemsize, void *__pyx_v_item) {
+  CYTHON_UNUSED Py_ssize_t __pyx_v_i;
+  Py_ssize_t __pyx_v_stride;
+  Py_ssize_t __pyx_v_extent;
+  int __pyx_t_1;
+  Py_ssize_t __pyx_t_2;
+  Py_ssize_t __pyx_t_3;
+  Py_ssize_t __pyx_t_4;
+
+  /* "View.MemoryView":1411
+ *                               size_t itemsize, void *item) nogil:
+ *     cdef Py_ssize_t i
+ *     cdef Py_ssize_t stride = strides[0]             # <<<<<<<<<<<<<<
+ *     cdef Py_ssize_t extent = shape[0]
+ * 
+ */
+  __pyx_v_stride = (__pyx_v_strides[0]);
+
+  /* "View.MemoryView":1412
+ *     cdef Py_ssize_t i
+ *     cdef Py_ssize_t stride = strides[0]
+ *     cdef Py_ssize_t extent = shape[0]             # <<<<<<<<<<<<<<
+ * 
+ *     if ndim == 1:
+ */
+  __pyx_v_extent = (__pyx_v_shape[0]);
+
+  /* "View.MemoryView":1414
+ *     cdef Py_ssize_t extent = shape[0]
+ * 
+ *     if ndim == 1:             # <<<<<<<<<<<<<<
+ *         for i in range(extent):
+ *             memcpy(data, item, itemsize)
+ */
+  __pyx_t_1 = ((__pyx_v_ndim == 1) != 0);
+  if (__pyx_t_1) {
+
+    /* "View.MemoryView":1415
+ * 
+ *     if ndim == 1:
+ *         for i in range(extent):             # <<<<<<<<<<<<<<
+ *             memcpy(data, item, itemsize)
+ *             data += stride
+ */
+    __pyx_t_2 = __pyx_v_extent;
+    __pyx_t_3 = __pyx_t_2;
+    for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+      __pyx_v_i = __pyx_t_4;
+
+      /* "View.MemoryView":1416
+ *     if ndim == 1:
+ *         for i in range(extent):
+ *             memcpy(data, item, itemsize)             # <<<<<<<<<<<<<<
+ *             data += stride
+ *     else:
+ */
+      (void)(memcpy(__pyx_v_data, __pyx_v_item, __pyx_v_itemsize));
+
+      /* "View.MemoryView":1417
+ *         for i in range(extent):
+ *             memcpy(data, item, itemsize)
+ *             data += stride             # <<<<<<<<<<<<<<
+ *     else:
+ *         for i in range(extent):
+ */
+      __pyx_v_data = (__pyx_v_data + __pyx_v_stride);
+    }
+
+    /* "View.MemoryView":1414
+ *     cdef Py_ssize_t extent = shape[0]
+ * 
+ *     if ndim == 1:             # <<<<<<<<<<<<<<
+ *         for i in range(extent):
+ *             memcpy(data, item, itemsize)
+ */
+    goto __pyx_L3;
+  }
+
+  /* "View.MemoryView":1419
+ *             data += stride
+ *     else:
+ *         for i in range(extent):             # <<<<<<<<<<<<<<
+ *             _slice_assign_scalar(data, shape + 1, strides + 1,
+ *                                 ndim - 1, itemsize, item)
+ */
+  /*else*/ {
+    __pyx_t_2 = __pyx_v_extent;
+    __pyx_t_3 = __pyx_t_2;
+    for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
+      __pyx_v_i = __pyx_t_4;
+
+      /* "View.MemoryView":1420
+ *     else:
+ *         for i in range(extent):
+ *             _slice_assign_scalar(data, shape + 1, strides + 1,             # <<<<<<<<<<<<<<
+ *                                 ndim - 1, itemsize, item)
+ *             data += stride
+ */
+      __pyx_memoryview__slice_assign_scalar(__pyx_v_data, (__pyx_v_shape + 1), (__pyx_v_strides + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize, __pyx_v_item);
+
+      /* "View.MemoryView":1422
+ *             _slice_assign_scalar(data, shape + 1, strides + 1,
+ *                                 ndim - 1, itemsize, item)
+ *             data += stride             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_v_data = (__pyx_v_data + __pyx_v_stride);
+    }
+  }
+  __pyx_L3:;
+
+  /* "View.MemoryView":1407
+ * 
+ * @cname('__pyx_memoryview__slice_assign_scalar')
+ * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape,             # <<<<<<<<<<<<<<
+ *                               Py_ssize_t *strides, int ndim,
+ *                               size_t itemsize, void *item) nogil:
+ */
+
+  /* function exit code */
+}
+
+/* "(tree fragment)":1
+ * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state):             # <<<<<<<<<<<<<<
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyMethodDef __pyx_mdef_15View_dot_MemoryView_1__pyx_unpickle_Enum = {"__pyx_unpickle_Enum", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum, METH_VARARGS|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v___pyx_type = 0;
+  long __pyx_v___pyx_checksum;
+  PyObject *__pyx_v___pyx_state = 0;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__pyx_unpickle_Enum (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_type,&__pyx_n_s_pyx_checksum,&__pyx_n_s_pyx_state,0};
+    PyObject* values[3] = {0,0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        CYTHON_FALLTHROUGH;
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_pyx_type)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_pyx_checksum)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, 1); __PYX_ERR(2, 1, __pyx_L3_error)
+        }
+        CYTHON_FALLTHROUGH;
+        case  2:
+        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_pyx_state)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, 2); __PYX_ERR(2, 1, __pyx_L3_error)
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__pyx_unpickle_Enum") < 0)) __PYX_ERR(2, 1, __pyx_L3_error)
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+    }
+    __pyx_v___pyx_type = values[0];
+    __pyx_v___pyx_checksum = __Pyx_PyInt_As_long(values[1]); if (unlikely((__pyx_v___pyx_checksum == (long)-1) && PyErr_Occurred())) __PYX_ERR(2, 1, __pyx_L3_error)
+    __pyx_v___pyx_state = values[2];
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(2, 1, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(__pyx_self, __pyx_v___pyx_type, __pyx_v___pyx_checksum, __pyx_v___pyx_state);
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v___pyx_type, long __pyx_v___pyx_checksum, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_v___pyx_PickleError = 0;
+  PyObject *__pyx_v___pyx_result = 0;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_t_6;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__pyx_unpickle_Enum", 0);
+
+  /* "(tree fragment)":4
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ *     if __pyx_checksum != 0xb068931:             # <<<<<<<<<<<<<<
+ *         from pickle import PickleError as __pyx_PickleError
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ */
+  __pyx_t_1 = ((__pyx_v___pyx_checksum != 0xb068931) != 0);
+  if (__pyx_t_1) {
+
+    /* "(tree fragment)":5
+ *     cdef object __pyx_result
+ *     if __pyx_checksum != 0xb068931:
+ *         from pickle import PickleError as __pyx_PickleError             # <<<<<<<<<<<<<<
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ *     __pyx_result = Enum.__new__(__pyx_type)
+ */
+    __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 5, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_INCREF(__pyx_n_s_PickleError);
+    __Pyx_GIVEREF(__pyx_n_s_PickleError);
+    PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_PickleError);
+    __pyx_t_3 = __Pyx_Import(__pyx_n_s_pickle, __pyx_t_2, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 5, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_PickleError); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 5, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_INCREF(__pyx_t_2);
+    __pyx_v___pyx_PickleError = __pyx_t_2;
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+    /* "(tree fragment)":6
+ *     if __pyx_checksum != 0xb068931:
+ *         from pickle import PickleError as __pyx_PickleError
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)             # <<<<<<<<<<<<<<
+ *     __pyx_result = Enum.__new__(__pyx_type)
+ *     if __pyx_state is not None:
+ */
+    __pyx_t_2 = __Pyx_PyInt_From_long(__pyx_v___pyx_checksum); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 6, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Incompatible_checksums_s_vs_0xb0, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(2, 6, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_INCREF(__pyx_v___pyx_PickleError);
+    __pyx_t_2 = __pyx_v___pyx_PickleError; __pyx_t_5 = NULL;
+    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
+      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_2);
+      if (likely(__pyx_t_5)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+        __Pyx_INCREF(__pyx_t_5);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_2, function);
+      }
+    }
+    __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_5, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_4);
+    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 6, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(2, 6, __pyx_L1_error)
+
+    /* "(tree fragment)":4
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ *     if __pyx_checksum != 0xb068931:             # <<<<<<<<<<<<<<
+ *         from pickle import PickleError as __pyx_PickleError
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ */
+  }
+
+  /* "(tree fragment)":7
+ *         from pickle import PickleError as __pyx_PickleError
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ *     __pyx_result = Enum.__new__(__pyx_type)             # <<<<<<<<<<<<<<
+ *     if __pyx_state is not None:
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ */
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_MemviewEnum_type), __pyx_n_s_new); if (unlikely(!__pyx_t_2)) __PYX_ERR(2, 7, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_4 = NULL;
+  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_2))) {
+    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+    if (likely(__pyx_t_4)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+      __Pyx_INCREF(__pyx_t_4);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_2, function);
+    }
+  }
+  __pyx_t_3 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_4, __pyx_v___pyx_type) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v___pyx_type);
+  __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+  if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 7, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_v___pyx_result = __pyx_t_3;
+  __pyx_t_3 = 0;
+
+  /* "(tree fragment)":8
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ *     __pyx_result = Enum.__new__(__pyx_type)
+ *     if __pyx_state is not None:             # <<<<<<<<<<<<<<
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result
+ */
+  __pyx_t_1 = (__pyx_v___pyx_state != Py_None);
+  __pyx_t_6 = (__pyx_t_1 != 0);
+  if (__pyx_t_6) {
+
+    /* "(tree fragment)":9
+ *     __pyx_result = Enum.__new__(__pyx_type)
+ *     if __pyx_state is not None:
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)             # <<<<<<<<<<<<<<
+ *     return __pyx_result
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
+ */
+    if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_v___pyx_state)->tp_name), 0))) __PYX_ERR(2, 9, __pyx_L1_error)
+    __pyx_t_3 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_3)) __PYX_ERR(2, 9, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+    /* "(tree fragment)":8
+ *         raise __pyx_PickleError("Incompatible checksums (%s vs 0xb068931 = (name))" % __pyx_checksum)
+ *     __pyx_result = Enum.__new__(__pyx_type)
+ *     if __pyx_state is not None:             # <<<<<<<<<<<<<<
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result
+ */
+  }
+
+  /* "(tree fragment)":10
+ *     if __pyx_state is not None:
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result             # <<<<<<<<<<<<<<
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
+ *     __pyx_result.name = __pyx_state[0]
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(__pyx_v___pyx_result);
+  __pyx_r = __pyx_v___pyx_result;
+  goto __pyx_L0;
+
+  /* "(tree fragment)":1
+ * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state):             # <<<<<<<<<<<<<<
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v___pyx_PickleError);
+  __Pyx_XDECREF(__pyx_v___pyx_result);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "(tree fragment)":11
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):             # <<<<<<<<<<<<<<
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):
+ */
+
+static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__pyx_v___pyx_result, PyObject *__pyx_v___pyx_state) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_t_2;
+  Py_ssize_t __pyx_t_3;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__pyx_unpickle_Enum__set_state", 0);
+
+  /* "(tree fragment)":12
+ *     return __pyx_result
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
+ *     __pyx_result.name = __pyx_state[0]             # <<<<<<<<<<<<<<
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):
+ *         __pyx_result.__dict__.update(__pyx_state[1])
+ */
+  if (unlikely(__pyx_v___pyx_state == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+    __PYX_ERR(2, 12, __pyx_L1_error)
+  }
+  __pyx_t_1 = __Pyx_GetItemInt_Tuple(__pyx_v___pyx_state, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 12, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __Pyx_GOTREF(__pyx_v___pyx_result->name);
+  __Pyx_DECREF(__pyx_v___pyx_result->name);
+  __pyx_v___pyx_result->name = __pyx_t_1;
+  __pyx_t_1 = 0;
+
+  /* "(tree fragment)":13
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):             # <<<<<<<<<<<<<<
+ *         __pyx_result.__dict__.update(__pyx_state[1])
+ */
+  if (unlikely(__pyx_v___pyx_state == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
+    __PYX_ERR(2, 13, __pyx_L1_error)
+  }
+  __pyx_t_3 = PyTuple_GET_SIZE(__pyx_v___pyx_state); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(2, 13, __pyx_L1_error)
+  __pyx_t_4 = ((__pyx_t_3 > 1) != 0);
+  if (__pyx_t_4) {
+  } else {
+    __pyx_t_2 = __pyx_t_4;
+    goto __pyx_L4_bool_binop_done;
+  }
+  __pyx_t_4 = __Pyx_HasAttr(((PyObject *)__pyx_v___pyx_result), __pyx_n_s_dict); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 13, __pyx_L1_error)
+  __pyx_t_5 = (__pyx_t_4 != 0);
+  __pyx_t_2 = __pyx_t_5;
+  __pyx_L4_bool_binop_done:;
+  if (__pyx_t_2) {
+
+    /* "(tree fragment)":14
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):
+ *         __pyx_result.__dict__.update(__pyx_state[1])             # <<<<<<<<<<<<<<
+ */
+    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v___pyx_result), __pyx_n_s_dict); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 14, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_update); if (unlikely(!__pyx_t_7)) __PYX_ERR(2, 14, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_7);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (unlikely(__pyx_v___pyx_state == Py_None)) {
+      PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+      __PYX_ERR(2, 14, __pyx_L1_error)
+    }
+    __pyx_t_6 = __Pyx_GetItemInt_Tuple(__pyx_v___pyx_state, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(2, 14, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_8 = NULL;
+    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
+      __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_7);
+      if (likely(__pyx_t_8)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
+        __Pyx_INCREF(__pyx_t_8);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_7, function);
+      }
+    }
+    __pyx_t_1 = (__pyx_t_8) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_8, __pyx_t_6) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_6);
+    __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 14, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+    /* "(tree fragment)":13
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):             # <<<<<<<<<<<<<<
+ *         __pyx_result.__dict__.update(__pyx_state[1])
+ */
+  }
+
+  /* "(tree fragment)":11
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):             # <<<<<<<<<<<<<<
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):
+ */
+
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum__set_state", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+static struct __pyx_vtabstruct_array __pyx_vtable_array;
+
+static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObject *k) {
+  struct __pyx_array_obj *p;
+  PyObject *o;
+  if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+    o = (*t->tp_alloc)(t, 0);
+  } else {
+    o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+  }
+  if (unlikely(!o)) return 0;
+  p = ((struct __pyx_array_obj *)o);
+  p->__pyx_vtab = __pyx_vtabptr_array;
+  p->mode = ((PyObject*)Py_None); Py_INCREF(Py_None);
+  p->_format = ((PyObject*)Py_None); Py_INCREF(Py_None);
+  if (unlikely(__pyx_array___cinit__(o, a, k) < 0)) goto bad;
+  return o;
+  bad:
+  Py_DECREF(o); o = 0;
+  return NULL;
+}
+
+static void __pyx_tp_dealloc_array(PyObject *o) {
+  struct __pyx_array_obj *p = (struct __pyx_array_obj *)o;
+  #if CYTHON_USE_TP_FINALIZE
+  if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
+    if (PyObject_CallFinalizerFromDealloc(o)) return;
+  }
+  #endif
+  {
+    PyObject *etype, *eval, *etb;
+    PyErr_Fetch(&etype, &eval, &etb);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1);
+    __pyx_array___dealloc__(o);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1);
+    PyErr_Restore(etype, eval, etb);
+  }
+  Py_CLEAR(p->mode);
+  Py_CLEAR(p->_format);
+  (*Py_TYPE(o)->tp_free)(o);
+}
+static PyObject *__pyx_sq_item_array(PyObject *o, Py_ssize_t i) {
+  PyObject *r;
+  PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;
+  r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x);
+  Py_DECREF(x);
+  return r;
+}
+
+static int __pyx_mp_ass_subscript_array(PyObject *o, PyObject *i, PyObject *v) {
+  if (v) {
+    return __pyx_array___setitem__(o, i, v);
+  }
+  else {
+    PyErr_Format(PyExc_NotImplementedError,
+      "Subscript deletion not supported by %.200s", Py_TYPE(o)->tp_name);
+    return -1;
+  }
+}
+
+static PyObject *__pyx_tp_getattro_array(PyObject *o, PyObject *n) {
+  PyObject *v = __Pyx_PyObject_GenericGetAttr(o, n);
+  if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {
+    PyErr_Clear();
+    v = __pyx_array___getattr__(o, n);
+  }
+  return v;
+}
+
+static PyObject *__pyx_getprop___pyx_array_memview(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(o);
+}
+
+static PyMethodDef __pyx_methods_array[] = {
+  {"__getattr__", (PyCFunction)__pyx_array___getattr__, METH_O|METH_COEXIST, 0},
+  {"__reduce_cython__", (PyCFunction)__pyx_pw___pyx_array_1__reduce_cython__, METH_NOARGS, 0},
+  {"__setstate_cython__", (PyCFunction)__pyx_pw___pyx_array_3__setstate_cython__, METH_O, 0},
+  {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_array[] = {
+  {(char *)"memview", __pyx_getprop___pyx_array_memview, 0, (char *)0, 0},
+  {0, 0, 0, 0, 0}
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_array = {
+  __pyx_array___len__, /*sq_length*/
+  0, /*sq_concat*/
+  0, /*sq_repeat*/
+  __pyx_sq_item_array, /*sq_item*/
+  0, /*sq_slice*/
+  0, /*sq_ass_item*/
+  0, /*sq_ass_slice*/
+  0, /*sq_contains*/
+  0, /*sq_inplace_concat*/
+  0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_array = {
+  __pyx_array___len__, /*mp_length*/
+  __pyx_array___getitem__, /*mp_subscript*/
+  __pyx_mp_ass_subscript_array, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_array = {
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getreadbuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getwritebuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getsegcount*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getcharbuffer*/
+  #endif
+  __pyx_array_getbuffer, /*bf_getbuffer*/
+  0, /*bf_releasebuffer*/
+};
+
+static PyTypeObject __pyx_type___pyx_array = {
+  PyVarObject_HEAD_INIT(0, 0)
+  "TTS.tts.utils.monotonic_align.core.array", /*tp_name*/
+  sizeof(struct __pyx_array_obj), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc_array, /*tp_dealloc*/
+  #if PY_VERSION_HEX < 0x030800b4
+  0, /*tp_print*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4
+  0, /*tp_vectorcall_offset*/
+  #endif
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #endif
+  #if PY_MAJOR_VERSION >= 3
+  0, /*tp_as_async*/
+  #endif
+  0, /*tp_repr*/
+  0, /*tp_as_number*/
+  &__pyx_tp_as_sequence_array, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping_array, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  0, /*tp_str*/
+  __pyx_tp_getattro_array, /*tp_getattro*/
+  0, /*tp_setattro*/
+  &__pyx_tp_as_buffer_array, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+  0, /*tp_doc*/
+  0, /*tp_traverse*/
+  0, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods_array, /*tp_methods*/
+  0, /*tp_members*/
+  __pyx_getsets_array, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  0, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new_array, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  0, /*tp_version_tag*/
+  #if PY_VERSION_HEX >= 0x030400a1
+  0, /*tp_finalize*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+  0, /*tp_vectorcall*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+  0, /*tp_print*/
+  #endif
+  #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
+  0, /*tp_pypy_flags*/
+  #endif
+};
+
+static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+  struct __pyx_MemviewEnum_obj *p;
+  PyObject *o;
+  if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+    o = (*t->tp_alloc)(t, 0);
+  } else {
+    o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+  }
+  if (unlikely(!o)) return 0;
+  p = ((struct __pyx_MemviewEnum_obj *)o);
+  p->name = Py_None; Py_INCREF(Py_None);
+  return o;
+}
+
+static void __pyx_tp_dealloc_Enum(PyObject *o) {
+  struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o;
+  #if CYTHON_USE_TP_FINALIZE
+  if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && !_PyGC_FINALIZED(o)) {
+    if (PyObject_CallFinalizerFromDealloc(o)) return;
+  }
+  #endif
+  PyObject_GC_UnTrack(o);
+  Py_CLEAR(p->name);
+  (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_Enum(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o;
+  if (p->name) {
+    e = (*v)(p->name, a); if (e) return e;
+  }
+  return 0;
+}
+
+static int __pyx_tp_clear_Enum(PyObject *o) {
+  PyObject* tmp;
+  struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o;
+  tmp = ((PyObject*)p->name);
+  p->name = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  return 0;
+}
+
+static PyMethodDef __pyx_methods_Enum[] = {
+  {"__reduce_cython__", (PyCFunction)__pyx_pw___pyx_MemviewEnum_1__reduce_cython__, METH_NOARGS, 0},
+  {"__setstate_cython__", (PyCFunction)__pyx_pw___pyx_MemviewEnum_3__setstate_cython__, METH_O, 0},
+  {0, 0, 0, 0}
+};
+
+static PyTypeObject __pyx_type___pyx_MemviewEnum = {
+  PyVarObject_HEAD_INIT(0, 0)
+  "TTS.tts.utils.monotonic_align.core.Enum", /*tp_name*/
+  sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc_Enum, /*tp_dealloc*/
+  #if PY_VERSION_HEX < 0x030800b4
+  0, /*tp_print*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4
+  0, /*tp_vectorcall_offset*/
+  #endif
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #endif
+  #if PY_MAJOR_VERSION >= 3
+  0, /*tp_as_async*/
+  #endif
+  __pyx_MemviewEnum___repr__, /*tp_repr*/
+  0, /*tp_as_number*/
+  0, /*tp_as_sequence*/
+  0, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  0, /*tp_str*/
+  0, /*tp_getattro*/
+  0, /*tp_setattro*/
+  0, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  0, /*tp_doc*/
+  __pyx_tp_traverse_Enum, /*tp_traverse*/
+  __pyx_tp_clear_Enum, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods_Enum, /*tp_methods*/
+  0, /*tp_members*/
+  0, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  __pyx_MemviewEnum___init__, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new_Enum, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  0, /*tp_version_tag*/
+  #if PY_VERSION_HEX >= 0x030400a1
+  0, /*tp_finalize*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+  0, /*tp_vectorcall*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+  0, /*tp_print*/
+  #endif
+  #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
+  0, /*tp_pypy_flags*/
+  #endif
+};
+static struct __pyx_vtabstruct_memoryview __pyx_vtable_memoryview;
+
+static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, PyObject *k) {
+  struct __pyx_memoryview_obj *p;
+  PyObject *o;
+  if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+    o = (*t->tp_alloc)(t, 0);
+  } else {
+    o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+  }
+  if (unlikely(!o)) return 0;
+  p = ((struct __pyx_memoryview_obj *)o);
+  p->__pyx_vtab = __pyx_vtabptr_memoryview;
+  p->obj = Py_None; Py_INCREF(Py_None);
+  p->_size = Py_None; Py_INCREF(Py_None);
+  p->_array_interface = Py_None; Py_INCREF(Py_None);
+  p->view.obj = NULL;
+  if (unlikely(__pyx_memoryview___cinit__(o, a, k) < 0)) goto bad;
+  return o;
+  bad:
+  Py_DECREF(o); o = 0;
+  return NULL;
+}
+
+static void __pyx_tp_dealloc_memoryview(PyObject *o) {
+  struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o;
+  #if CYTHON_USE_TP_FINALIZE
+  if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && !_PyGC_FINALIZED(o)) {
+    if (PyObject_CallFinalizerFromDealloc(o)) return;
+  }
+  #endif
+  PyObject_GC_UnTrack(o);
+  {
+    PyObject *etype, *eval, *etb;
+    PyErr_Fetch(&etype, &eval, &etb);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1);
+    __pyx_memoryview___dealloc__(o);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1);
+    PyErr_Restore(etype, eval, etb);
+  }
+  Py_CLEAR(p->obj);
+  Py_CLEAR(p->_size);
+  Py_CLEAR(p->_array_interface);
+  (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_memoryview(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o;
+  if (p->obj) {
+    e = (*v)(p->obj, a); if (e) return e;
+  }
+  if (p->_size) {
+    e = (*v)(p->_size, a); if (e) return e;
+  }
+  if (p->_array_interface) {
+    e = (*v)(p->_array_interface, a); if (e) return e;
+  }
+  if (p->view.obj) {
+    e = (*v)(p->view.obj, a); if (e) return e;
+  }
+  return 0;
+}
+
+static int __pyx_tp_clear_memoryview(PyObject *o) {
+  PyObject* tmp;
+  struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o;
+  tmp = ((PyObject*)p->obj);
+  p->obj = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->_size);
+  p->_size = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  tmp = ((PyObject*)p->_array_interface);
+  p->_array_interface = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  Py_CLEAR(p->view.obj);
+  return 0;
+}
+static PyObject *__pyx_sq_item_memoryview(PyObject *o, Py_ssize_t i) {
+  PyObject *r;
+  PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;
+  r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x);
+  Py_DECREF(x);
+  return r;
+}
+
+static int __pyx_mp_ass_subscript_memoryview(PyObject *o, PyObject *i, PyObject *v) {
+  if (v) {
+    return __pyx_memoryview___setitem__(o, i, v);
+  }
+  else {
+    PyErr_Format(PyExc_NotImplementedError,
+      "Subscript deletion not supported by %.200s", Py_TYPE(o)->tp_name);
+    return -1;
+  }
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_T(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_base(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_shape(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_strides(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_suboffsets(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_ndim(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_itemsize(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_nbytes(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(o);
+}
+
+static PyObject *__pyx_getprop___pyx_memoryview_size(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(o);
+}
+
+static PyMethodDef __pyx_methods_memoryview[] = {
+  {"is_c_contig", (PyCFunction)__pyx_memoryview_is_c_contig, METH_NOARGS, 0},
+  {"is_f_contig", (PyCFunction)__pyx_memoryview_is_f_contig, METH_NOARGS, 0},
+  {"copy", (PyCFunction)__pyx_memoryview_copy, METH_NOARGS, 0},
+  {"copy_fortran", (PyCFunction)__pyx_memoryview_copy_fortran, METH_NOARGS, 0},
+  {"__reduce_cython__", (PyCFunction)__pyx_pw___pyx_memoryview_1__reduce_cython__, METH_NOARGS, 0},
+  {"__setstate_cython__", (PyCFunction)__pyx_pw___pyx_memoryview_3__setstate_cython__, METH_O, 0},
+  {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_memoryview[] = {
+  {(char *)"T", __pyx_getprop___pyx_memoryview_T, 0, (char *)0, 0},
+  {(char *)"base", __pyx_getprop___pyx_memoryview_base, 0, (char *)0, 0},
+  {(char *)"shape", __pyx_getprop___pyx_memoryview_shape, 0, (char *)0, 0},
+  {(char *)"strides", __pyx_getprop___pyx_memoryview_strides, 0, (char *)0, 0},
+  {(char *)"suboffsets", __pyx_getprop___pyx_memoryview_suboffsets, 0, (char *)0, 0},
+  {(char *)"ndim", __pyx_getprop___pyx_memoryview_ndim, 0, (char *)0, 0},
+  {(char *)"itemsize", __pyx_getprop___pyx_memoryview_itemsize, 0, (char *)0, 0},
+  {(char *)"nbytes", __pyx_getprop___pyx_memoryview_nbytes, 0, (char *)0, 0},
+  {(char *)"size", __pyx_getprop___pyx_memoryview_size, 0, (char *)0, 0},
+  {0, 0, 0, 0, 0}
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_memoryview = {
+  __pyx_memoryview___len__, /*sq_length*/
+  0, /*sq_concat*/
+  0, /*sq_repeat*/
+  __pyx_sq_item_memoryview, /*sq_item*/
+  0, /*sq_slice*/
+  0, /*sq_ass_item*/
+  0, /*sq_ass_slice*/
+  0, /*sq_contains*/
+  0, /*sq_inplace_concat*/
+  0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_memoryview = {
+  __pyx_memoryview___len__, /*mp_length*/
+  __pyx_memoryview___getitem__, /*mp_subscript*/
+  __pyx_mp_ass_subscript_memoryview, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_memoryview = {
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getreadbuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getwritebuffer*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getsegcount*/
+  #endif
+  #if PY_MAJOR_VERSION < 3
+  0, /*bf_getcharbuffer*/
+  #endif
+  __pyx_memoryview_getbuffer, /*bf_getbuffer*/
+  0, /*bf_releasebuffer*/
+};
+
+static PyTypeObject __pyx_type___pyx_memoryview = {
+  PyVarObject_HEAD_INIT(0, 0)
+  "TTS.tts.utils.monotonic_align.core.memoryview", /*tp_name*/
+  sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc_memoryview, /*tp_dealloc*/
+  #if PY_VERSION_HEX < 0x030800b4
+  0, /*tp_print*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4
+  0, /*tp_vectorcall_offset*/
+  #endif
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #endif
+  #if PY_MAJOR_VERSION >= 3
+  0, /*tp_as_async*/
+  #endif
+  __pyx_memoryview___repr__, /*tp_repr*/
+  0, /*tp_as_number*/
+  &__pyx_tp_as_sequence_memoryview, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping_memoryview, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  __pyx_memoryview___str__, /*tp_str*/
+  0, /*tp_getattro*/
+  0, /*tp_setattro*/
+  &__pyx_tp_as_buffer_memoryview, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  0, /*tp_doc*/
+  __pyx_tp_traverse_memoryview, /*tp_traverse*/
+  __pyx_tp_clear_memoryview, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods_memoryview, /*tp_methods*/
+  0, /*tp_members*/
+  __pyx_getsets_memoryview, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  0, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new_memoryview, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  0, /*tp_version_tag*/
+  #if PY_VERSION_HEX >= 0x030400a1
+  0, /*tp_finalize*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+  0, /*tp_vectorcall*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+  0, /*tp_print*/
+  #endif
+  #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
+  0, /*tp_pypy_flags*/
+  #endif
+};
+static struct __pyx_vtabstruct__memoryviewslice __pyx_vtable__memoryviewslice;
+
+static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject *a, PyObject *k) {
+  struct __pyx_memoryviewslice_obj *p;
+  PyObject *o = __pyx_tp_new_memoryview(t, a, k);
+  if (unlikely(!o)) return 0;
+  p = ((struct __pyx_memoryviewslice_obj *)o);
+  p->__pyx_base.__pyx_vtab = (struct __pyx_vtabstruct_memoryview*)__pyx_vtabptr__memoryviewslice;
+  p->from_object = Py_None; Py_INCREF(Py_None);
+  p->from_slice.memview = NULL;
+  return o;
+}
+
+static void __pyx_tp_dealloc__memoryviewslice(PyObject *o) {
+  struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o;
+  #if CYTHON_USE_TP_FINALIZE
+  if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE) && Py_TYPE(o)->tp_finalize) && !_PyGC_FINALIZED(o)) {
+    if (PyObject_CallFinalizerFromDealloc(o)) return;
+  }
+  #endif
+  PyObject_GC_UnTrack(o);
+  {
+    PyObject *etype, *eval, *etb;
+    PyErr_Fetch(&etype, &eval, &etb);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1);
+    __pyx_memoryviewslice___dealloc__(o);
+    __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1);
+    PyErr_Restore(etype, eval, etb);
+  }
+  Py_CLEAR(p->from_object);
+  PyObject_GC_Track(o);
+  __pyx_tp_dealloc_memoryview(o);
+}
+
+static int __pyx_tp_traverse__memoryviewslice(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o;
+  e = __pyx_tp_traverse_memoryview(o, v, a); if (e) return e;
+  if (p->from_object) {
+    e = (*v)(p->from_object, a); if (e) return e;
+  }
+  return 0;
+}
+
+static int __pyx_tp_clear__memoryviewslice(PyObject *o) {
+  PyObject* tmp;
+  struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o;
+  __pyx_tp_clear_memoryview(o);
+  tmp = ((PyObject*)p->from_object);
+  p->from_object = Py_None; Py_INCREF(Py_None);
+  Py_XDECREF(tmp);
+  __PYX_XDEC_MEMVIEW(&p->from_slice, 1);
+  return 0;
+}
+
+static PyObject *__pyx_getprop___pyx_memoryviewslice_base(PyObject *o, CYTHON_UNUSED void *x) {
+  return __pyx_pw_15View_dot_MemoryView_16_memoryviewslice_4base_1__get__(o);
+}
+
+static PyMethodDef __pyx_methods__memoryviewslice[] = {
+  {"__reduce_cython__", (PyCFunction)__pyx_pw___pyx_memoryviewslice_1__reduce_cython__, METH_NOARGS, 0},
+  {"__setstate_cython__", (PyCFunction)__pyx_pw___pyx_memoryviewslice_3__setstate_cython__, METH_O, 0},
+  {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = {
+  {(char *)"base", __pyx_getprop___pyx_memoryviewslice_base, 0, (char *)0, 0},
+  {0, 0, 0, 0, 0}
+};
+
+static PyTypeObject __pyx_type___pyx_memoryviewslice = {
+  PyVarObject_HEAD_INIT(0, 0)
+  "TTS.tts.utils.monotonic_align.core._memoryviewslice", /*tp_name*/
+  sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/
+  0, /*tp_itemsize*/
+  __pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/
+  #if PY_VERSION_HEX < 0x030800b4
+  0, /*tp_print*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4
+  0, /*tp_vectorcall_offset*/
+  #endif
+  0, /*tp_getattr*/
+  0, /*tp_setattr*/
+  #if PY_MAJOR_VERSION < 3
+  0, /*tp_compare*/
+  #endif
+  #if PY_MAJOR_VERSION >= 3
+  0, /*tp_as_async*/
+  #endif
+  #if CYTHON_COMPILING_IN_PYPY
+  __pyx_memoryview___repr__, /*tp_repr*/
+  #else
+  0, /*tp_repr*/
+  #endif
+  0, /*tp_as_number*/
+  0, /*tp_as_sequence*/
+  0, /*tp_as_mapping*/
+  0, /*tp_hash*/
+  0, /*tp_call*/
+  #if CYTHON_COMPILING_IN_PYPY
+  __pyx_memoryview___str__, /*tp_str*/
+  #else
+  0, /*tp_str*/
+  #endif
+  0, /*tp_getattro*/
+  0, /*tp_setattro*/
+  0, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+  "Internal class for passing memoryview slices to Python", /*tp_doc*/
+  __pyx_tp_traverse__memoryviewslice, /*tp_traverse*/
+  __pyx_tp_clear__memoryviewslice, /*tp_clear*/
+  0, /*tp_richcompare*/
+  0, /*tp_weaklistoffset*/
+  0, /*tp_iter*/
+  0, /*tp_iternext*/
+  __pyx_methods__memoryviewslice, /*tp_methods*/
+  0, /*tp_members*/
+  __pyx_getsets__memoryviewslice, /*tp_getset*/
+  0, /*tp_base*/
+  0, /*tp_dict*/
+  0, /*tp_descr_get*/
+  0, /*tp_descr_set*/
+  0, /*tp_dictoffset*/
+  0, /*tp_init*/
+  0, /*tp_alloc*/
+  __pyx_tp_new__memoryviewslice, /*tp_new*/
+  0, /*tp_free*/
+  0, /*tp_is_gc*/
+  0, /*tp_bases*/
+  0, /*tp_mro*/
+  0, /*tp_cache*/
+  0, /*tp_subclasses*/
+  0, /*tp_weaklist*/
+  0, /*tp_del*/
+  0, /*tp_version_tag*/
+  #if PY_VERSION_HEX >= 0x030400a1
+  0, /*tp_finalize*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+  0, /*tp_vectorcall*/
+  #endif
+  #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+  0, /*tp_print*/
+  #endif
+  #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
+  0, /*tp_pypy_flags*/
+  #endif
+};
+
+static PyMethodDef __pyx_methods[] = {
+  {"maximum_path_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_3TTS_3tts_5utils_15monotonic_align_4core_1maximum_path_c, METH_VARARGS|METH_KEYWORDS, 0},
+  {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/
+static int __pyx_pymod_exec_core(PyObject* module); /*proto*/
+static PyModuleDef_Slot __pyx_moduledef_slots[] = {
+  {Py_mod_create, (void*)__pyx_pymod_create},
+  {Py_mod_exec, (void*)__pyx_pymod_exec_core},
+  {0, NULL}
+};
+#endif
+
+static struct PyModuleDef __pyx_moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "core",
+    0, /* m_doc */
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+    0, /* m_size */
+  #else
+    -1, /* m_size */
+  #endif
+    __pyx_methods /* m_methods */,
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+    __pyx_moduledef_slots, /* m_slots */
+  #else
+    NULL, /* m_reload */
+  #endif
+    NULL, /* m_traverse */
+    NULL, /* m_clear */
+    NULL /* m_free */
+};
+#endif
+#ifndef CYTHON_SMALL_CODE
+#if defined(__clang__)
+    #define CYTHON_SMALL_CODE
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+    #define CYTHON_SMALL_CODE __attribute__((cold))
+#else
+    #define CYTHON_SMALL_CODE
+#endif
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+  {&__pyx_n_s_ASCII, __pyx_k_ASCII, sizeof(__pyx_k_ASCII), 0, 0, 1, 1},
+  {&__pyx_kp_s_Buffer_view_does_not_expose_stri, __pyx_k_Buffer_view_does_not_expose_stri, sizeof(__pyx_k_Buffer_view_does_not_expose_stri), 0, 0, 1, 0},
+  {&__pyx_kp_s_Can_only_create_a_buffer_that_is, __pyx_k_Can_only_create_a_buffer_that_is, sizeof(__pyx_k_Can_only_create_a_buffer_that_is), 0, 0, 1, 0},
+  {&__pyx_kp_s_Cannot_assign_to_read_only_memor, __pyx_k_Cannot_assign_to_read_only_memor, sizeof(__pyx_k_Cannot_assign_to_read_only_memor), 0, 0, 1, 0},
+  {&__pyx_kp_s_Cannot_create_writable_memory_vi, __pyx_k_Cannot_create_writable_memory_vi, sizeof(__pyx_k_Cannot_create_writable_memory_vi), 0, 0, 1, 0},
+  {&__pyx_kp_s_Cannot_index_with_type_s, __pyx_k_Cannot_index_with_type_s, sizeof(__pyx_k_Cannot_index_with_type_s), 0, 0, 1, 0},
+  {&__pyx_n_s_Ellipsis, __pyx_k_Ellipsis, sizeof(__pyx_k_Ellipsis), 0, 0, 1, 1},
+  {&__pyx_kp_s_Empty_shape_tuple_for_cython_arr, __pyx_k_Empty_shape_tuple_for_cython_arr, sizeof(__pyx_k_Empty_shape_tuple_for_cython_arr), 0, 0, 1, 0},
+  {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1},
+  {&__pyx_kp_s_Incompatible_checksums_s_vs_0xb0, __pyx_k_Incompatible_checksums_s_vs_0xb0, sizeof(__pyx_k_Incompatible_checksums_s_vs_0xb0), 0, 0, 1, 0},
+  {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1},
+  {&__pyx_kp_s_Indirect_dimensions_not_supporte, __pyx_k_Indirect_dimensions_not_supporte, sizeof(__pyx_k_Indirect_dimensions_not_supporte), 0, 0, 1, 0},
+  {&__pyx_kp_s_Invalid_mode_expected_c_or_fortr, __pyx_k_Invalid_mode_expected_c_or_fortr, sizeof(__pyx_k_Invalid_mode_expected_c_or_fortr), 0, 0, 1, 0},
+  {&__pyx_kp_s_Invalid_shape_in_axis_d_d, __pyx_k_Invalid_shape_in_axis_d_d, sizeof(__pyx_k_Invalid_shape_in_axis_d_d), 0, 0, 1, 0},
+  {&__pyx_n_s_MemoryError, __pyx_k_MemoryError, sizeof(__pyx_k_MemoryError), 0, 0, 1, 1},
+  {&__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_k_MemoryView_of_r_at_0x_x, sizeof(__pyx_k_MemoryView_of_r_at_0x_x), 0, 0, 1, 0},
+  {&__pyx_kp_s_MemoryView_of_r_object, __pyx_k_MemoryView_of_r_object, sizeof(__pyx_k_MemoryView_of_r_object), 0, 0, 1, 0},
+  {&__pyx_n_b_O, __pyx_k_O, sizeof(__pyx_k_O), 0, 0, 0, 1},
+  {&__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_k_Out_of_bounds_on_buffer_access_a, sizeof(__pyx_k_Out_of_bounds_on_buffer_access_a), 0, 0, 1, 0},
+  {&__pyx_n_s_PickleError, __pyx_k_PickleError, sizeof(__pyx_k_PickleError), 0, 0, 1, 1},
+  {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1},
+  {&__pyx_kp_s_Unable_to_convert_item_to_object, __pyx_k_Unable_to_convert_item_to_object, sizeof(__pyx_k_Unable_to_convert_item_to_object), 0, 0, 1, 0},
+  {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1},
+  {&__pyx_n_s_View_MemoryView, __pyx_k_View_MemoryView, sizeof(__pyx_k_View_MemoryView), 0, 0, 1, 1},
+  {&__pyx_n_s_allocate_buffer, __pyx_k_allocate_buffer, sizeof(__pyx_k_allocate_buffer), 0, 0, 1, 1},
+  {&__pyx_n_s_base, __pyx_k_base, sizeof(__pyx_k_base), 0, 0, 1, 1},
+  {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1},
+  {&__pyx_n_u_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 1, 0, 1},
+  {&__pyx_n_s_class, __pyx_k_class, sizeof(__pyx_k_class), 0, 0, 1, 1},
+  {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1},
+  {&__pyx_kp_s_contiguous_and_direct, __pyx_k_contiguous_and_direct, sizeof(__pyx_k_contiguous_and_direct), 0, 0, 1, 0},
+  {&__pyx_kp_s_contiguous_and_indirect, __pyx_k_contiguous_and_indirect, sizeof(__pyx_k_contiguous_and_indirect), 0, 0, 1, 0},
+  {&__pyx_n_s_dict, __pyx_k_dict, sizeof(__pyx_k_dict), 0, 0, 1, 1},
+  {&__pyx_n_s_dtype_is_object, __pyx_k_dtype_is_object, sizeof(__pyx_k_dtype_is_object), 0, 0, 1, 1},
+  {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1},
+  {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1},
+  {&__pyx_n_s_error, __pyx_k_error, sizeof(__pyx_k_error), 0, 0, 1, 1},
+  {&__pyx_n_s_flags, __pyx_k_flags, sizeof(__pyx_k_flags), 0, 0, 1, 1},
+  {&__pyx_n_s_format, __pyx_k_format, sizeof(__pyx_k_format), 0, 0, 1, 1},
+  {&__pyx_n_s_fortran, __pyx_k_fortran, sizeof(__pyx_k_fortran), 0, 0, 1, 1},
+  {&__pyx_n_u_fortran, __pyx_k_fortran, sizeof(__pyx_k_fortran), 0, 1, 0, 1},
+  {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1},
+  {&__pyx_kp_s_got_differing_extents_in_dimensi, __pyx_k_got_differing_extents_in_dimensi, sizeof(__pyx_k_got_differing_extents_in_dimensi), 0, 0, 1, 0},
+  {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1},
+  {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+  {&__pyx_n_s_itemsize, __pyx_k_itemsize, sizeof(__pyx_k_itemsize), 0, 0, 1, 1},
+  {&__pyx_kp_s_itemsize_0_for_cython_array, __pyx_k_itemsize_0_for_cython_array, sizeof(__pyx_k_itemsize_0_for_cython_array), 0, 0, 1, 0},
+  {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+  {&__pyx_n_s_max_neg_val, __pyx_k_max_neg_val, sizeof(__pyx_k_max_neg_val), 0, 0, 1, 1},
+  {&__pyx_n_s_memview, __pyx_k_memview, sizeof(__pyx_k_memview), 0, 0, 1, 1},
+  {&__pyx_n_s_mode, __pyx_k_mode, sizeof(__pyx_k_mode), 0, 0, 1, 1},
+  {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
+  {&__pyx_n_s_name_2, __pyx_k_name_2, sizeof(__pyx_k_name_2), 0, 0, 1, 1},
+  {&__pyx_n_s_ndim, __pyx_k_ndim, sizeof(__pyx_k_ndim), 0, 0, 1, 1},
+  {&__pyx_n_s_new, __pyx_k_new, sizeof(__pyx_k_new), 0, 0, 1, 1},
+  {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0},
+  {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1},
+  {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1},
+  {&__pyx_kp_u_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 1, 0, 0},
+  {&__pyx_kp_u_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 1, 0, 0},
+  {&__pyx_n_s_obj, __pyx_k_obj, sizeof(__pyx_k_obj), 0, 0, 1, 1},
+  {&__pyx_n_s_pack, __pyx_k_pack, sizeof(__pyx_k_pack), 0, 0, 1, 1},
+  {&__pyx_n_s_paths, __pyx_k_paths, sizeof(__pyx_k_paths), 0, 0, 1, 1},
+  {&__pyx_n_s_pickle, __pyx_k_pickle, sizeof(__pyx_k_pickle), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_PickleError, __pyx_k_pyx_PickleError, sizeof(__pyx_k_pyx_PickleError), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_checksum, __pyx_k_pyx_checksum, sizeof(__pyx_k_pyx_checksum), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_getbuffer, __pyx_k_pyx_getbuffer, sizeof(__pyx_k_pyx_getbuffer), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_result, __pyx_k_pyx_result, sizeof(__pyx_k_pyx_result), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_type, __pyx_k_pyx_type, sizeof(__pyx_k_pyx_type), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_unpickle_Enum, __pyx_k_pyx_unpickle_Enum, sizeof(__pyx_k_pyx_unpickle_Enum), 0, 0, 1, 1},
+  {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1},
+  {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
+  {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1},
+  {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1},
+  {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1},
+  {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1},
+  {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1},
+  {&__pyx_n_s_shape, __pyx_k_shape, sizeof(__pyx_k_shape), 0, 0, 1, 1},
+  {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1},
+  {&__pyx_n_s_start, __pyx_k_start, sizeof(__pyx_k_start), 0, 0, 1, 1},
+  {&__pyx_n_s_step, __pyx_k_step, sizeof(__pyx_k_step), 0, 0, 1, 1},
+  {&__pyx_n_s_stop, __pyx_k_stop, sizeof(__pyx_k_stop), 0, 0, 1, 1},
+  {&__pyx_kp_s_strided_and_direct, __pyx_k_strided_and_direct, sizeof(__pyx_k_strided_and_direct), 0, 0, 1, 0},
+  {&__pyx_kp_s_strided_and_direct_or_indirect, __pyx_k_strided_and_direct_or_indirect, sizeof(__pyx_k_strided_and_direct_or_indirect), 0, 0, 1, 0},
+  {&__pyx_kp_s_strided_and_indirect, __pyx_k_strided_and_indirect, sizeof(__pyx_k_strided_and_indirect), 0, 0, 1, 0},
+  {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0},
+  {&__pyx_n_s_struct, __pyx_k_struct, sizeof(__pyx_k_struct), 0, 0, 1, 1},
+  {&__pyx_n_s_t_xs, __pyx_k_t_xs, sizeof(__pyx_k_t_xs), 0, 0, 1, 1},
+  {&__pyx_n_s_t_ys, __pyx_k_t_ys, sizeof(__pyx_k_t_ys), 0, 0, 1, 1},
+  {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+  {&__pyx_kp_s_unable_to_allocate_array_data, __pyx_k_unable_to_allocate_array_data, sizeof(__pyx_k_unable_to_allocate_array_data), 0, 0, 1, 0},
+  {&__pyx_kp_s_unable_to_allocate_shape_and_str, __pyx_k_unable_to_allocate_shape_and_str, sizeof(__pyx_k_unable_to_allocate_shape_and_str), 0, 0, 1, 0},
+  {&__pyx_n_s_unpack, __pyx_k_unpack, sizeof(__pyx_k_unpack), 0, 0, 1, 1},
+  {&__pyx_n_s_update, __pyx_k_update, sizeof(__pyx_k_update), 0, 0, 1, 1},
+  {&__pyx_n_s_values, __pyx_k_values, sizeof(__pyx_k_values), 0, 0, 1, 1},
+  {0, 0, 0, 0, 0, 0, 0}
+};
+static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 19, __pyx_L1_error)
+  __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 945, __pyx_L1_error)
+  __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(2, 133, __pyx_L1_error)
+  __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(2, 148, __pyx_L1_error)
+  __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(2, 151, __pyx_L1_error)
+  __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(2, 2, __pyx_L1_error)
+  __pyx_builtin_Ellipsis = __Pyx_GetBuiltinName(__pyx_n_s_Ellipsis); if (!__pyx_builtin_Ellipsis) __PYX_ERR(2, 404, __pyx_L1_error)
+  __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(2, 613, __pyx_L1_error)
+  __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(2, 832, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":945
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+  __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_u_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 945, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__2);
+  __Pyx_GIVEREF(__pyx_tuple__2);
+
+  /* "../../../../../private/var/folders/pt/dmf7ppyx1315fyhk9__tbjmm0000gn/T/pip-build-env-rdsl2tnb/overlay/lib/python3.9/site-packages/numpy/__init__.pxd":951
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+  __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_u_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(1, 951, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__3);
+  __Pyx_GIVEREF(__pyx_tuple__3);
+
+  /* "View.MemoryView":133
+ * 
+ *         if not self.ndim:
+ *             raise ValueError("Empty shape tuple for cython.array")             # <<<<<<<<<<<<<<
+ * 
+ *         if itemsize <= 0:
+ */
+  __pyx_tuple__4 = PyTuple_Pack(1, __pyx_kp_s_Empty_shape_tuple_for_cython_arr); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(2, 133, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__4);
+  __Pyx_GIVEREF(__pyx_tuple__4);
+
+  /* "View.MemoryView":136
+ * 
+ *         if itemsize <= 0:
+ *             raise ValueError("itemsize <= 0 for cython.array")             # <<<<<<<<<<<<<<
+ * 
+ *         if not isinstance(format, bytes):
+ */
+  __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_s_itemsize_0_for_cython_array); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(2, 136, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__5);
+  __Pyx_GIVEREF(__pyx_tuple__5);
+
+  /* "View.MemoryView":148
+ * 
+ *         if not self._shape:
+ *             raise MemoryError("unable to allocate shape and strides.")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_shape_and_str); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(2, 148, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__6);
+  __Pyx_GIVEREF(__pyx_tuple__6);
+
+  /* "View.MemoryView":176
+ *             self.data = <char *>malloc(self.len)
+ *             if not self.data:
+ *                 raise MemoryError("unable to allocate array data.")             # <<<<<<<<<<<<<<
+ * 
+ *             if self.dtype_is_object:
+ */
+  __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_array_data); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(2, 176, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__7);
+  __Pyx_GIVEREF(__pyx_tuple__7);
+
+  /* "View.MemoryView":192
+ *             bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+ *         if not (flags & bufmode):
+ *             raise ValueError("Can only create a buffer that is contiguous in memory.")             # <<<<<<<<<<<<<<
+ *         info.buf = self.data
+ *         info.len = self.len
+ */
+  __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_s_Can_only_create_a_buffer_that_is); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(2, 192, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__8);
+  __Pyx_GIVEREF(__pyx_tuple__8);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__9);
+  __Pyx_GIVEREF(__pyx_tuple__9);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__10);
+  __Pyx_GIVEREF(__pyx_tuple__10);
+
+  /* "View.MemoryView":418
+ *     def __setitem__(memoryview self, object index, object value):
+ *         if self.view.readonly:
+ *             raise TypeError("Cannot assign to read-only memoryview")             # <<<<<<<<<<<<<<
+ * 
+ *         have_slices, index = _unellipsify(index, self.view.ndim)
+ */
+  __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_s_Cannot_assign_to_read_only_memor); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(2, 418, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__11);
+  __Pyx_GIVEREF(__pyx_tuple__11);
+
+  /* "View.MemoryView":495
+ *             result = struct.unpack(self.view.format, bytesitem)
+ *         except struct.error:
+ *             raise ValueError("Unable to convert item to object")             # <<<<<<<<<<<<<<
+ *         else:
+ *             if len(self.view.format) == 1:
+ */
+  __pyx_tuple__12 = PyTuple_Pack(1, __pyx_kp_s_Unable_to_convert_item_to_object); if (unlikely(!__pyx_tuple__12)) __PYX_ERR(2, 495, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__12);
+  __Pyx_GIVEREF(__pyx_tuple__12);
+
+  /* "View.MemoryView":520
+ *     def __getbuffer__(self, Py_buffer *info, int flags):
+ *         if flags & PyBUF_WRITABLE and self.view.readonly:
+ *             raise ValueError("Cannot create writable memory view from read-only memoryview")             # <<<<<<<<<<<<<<
+ * 
+ *         if flags & PyBUF_ND:
+ */
+  __pyx_tuple__13 = PyTuple_Pack(1, __pyx_kp_s_Cannot_create_writable_memory_vi); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(2, 520, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__13);
+  __Pyx_GIVEREF(__pyx_tuple__13);
+
+  /* "View.MemoryView":570
+ *         if self.view.strides == NULL:
+ * 
+ *             raise ValueError("Buffer view does not expose strides")             # <<<<<<<<<<<<<<
+ * 
+ *         return tuple([stride for stride in self.view.strides[:self.view.ndim]])
+ */
+  __pyx_tuple__14 = PyTuple_Pack(1, __pyx_kp_s_Buffer_view_does_not_expose_stri); if (unlikely(!__pyx_tuple__14)) __PYX_ERR(2, 570, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__14);
+  __Pyx_GIVEREF(__pyx_tuple__14);
+
+  /* "View.MemoryView":577
+ *     def suboffsets(self):
+ *         if self.view.suboffsets == NULL:
+ *             return (-1,) * self.view.ndim             # <<<<<<<<<<<<<<
+ * 
+ *         return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]])
+ */
+  __pyx_tuple__15 = PyTuple_New(1); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(2, 577, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__15);
+  __Pyx_INCREF(__pyx_int_neg_1);
+  __Pyx_GIVEREF(__pyx_int_neg_1);
+  PyTuple_SET_ITEM(__pyx_tuple__15, 0, __pyx_int_neg_1);
+  __Pyx_GIVEREF(__pyx_tuple__15);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_tuple__16 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__16)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__16);
+  __Pyx_GIVEREF(__pyx_tuple__16);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_tuple__17 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__17);
+  __Pyx_GIVEREF(__pyx_tuple__17);
+
+  /* "View.MemoryView":682
+ *         if item is Ellipsis:
+ *             if not seen_ellipsis:
+ *                 result.extend([slice(None)] * (ndim - len(tup) + 1))             # <<<<<<<<<<<<<<
+ *                 seen_ellipsis = True
+ *             else:
+ */
+  __pyx_slice__18 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__18)) __PYX_ERR(2, 682, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__18);
+  __Pyx_GIVEREF(__pyx_slice__18);
+
+  /* "View.MemoryView":703
+ *     for suboffset in suboffsets[:ndim]:
+ *         if suboffset >= 0:
+ *             raise ValueError("Indirect dimensions not supported")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_tuple__19 = PyTuple_Pack(1, __pyx_kp_s_Indirect_dimensions_not_supporte); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(2, 703, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__19);
+  __Pyx_GIVEREF(__pyx_tuple__19);
+
+  /* "(tree fragment)":2
+ * def __reduce_cython__(self):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ */
+  __pyx_tuple__20 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(2, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__20);
+  __Pyx_GIVEREF(__pyx_tuple__20);
+
+  /* "(tree fragment)":4
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")
+ * def __setstate_cython__(self, __pyx_state):
+ *     raise TypeError("no default __reduce__ due to non-trivial __cinit__")             # <<<<<<<<<<<<<<
+ */
+  __pyx_tuple__21 = PyTuple_Pack(1, __pyx_kp_s_no_default___reduce___due_to_non); if (unlikely(!__pyx_tuple__21)) __PYX_ERR(2, 4, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__21);
+  __Pyx_GIVEREF(__pyx_tuple__21);
+
+  /* "View.MemoryView":286
+ *         return self.name
+ * 
+ * cdef generic = Enum("<strided and direct or indirect>")             # <<<<<<<<<<<<<<
+ * cdef strided = Enum("<strided and direct>") # default
+ * cdef indirect = Enum("<strided and indirect>")
+ */
+  __pyx_tuple__22 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct_or_indirect); if (unlikely(!__pyx_tuple__22)) __PYX_ERR(2, 286, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__22);
+  __Pyx_GIVEREF(__pyx_tuple__22);
+
+  /* "View.MemoryView":287
+ * 
+ * cdef generic = Enum("<strided and direct or indirect>")
+ * cdef strided = Enum("<strided and direct>") # default             # <<<<<<<<<<<<<<
+ * cdef indirect = Enum("<strided and indirect>")
+ * 
+ */
+  __pyx_tuple__23 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct); if (unlikely(!__pyx_tuple__23)) __PYX_ERR(2, 287, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__23);
+  __Pyx_GIVEREF(__pyx_tuple__23);
+
+  /* "View.MemoryView":288
+ * cdef generic = Enum("<strided and direct or indirect>")
+ * cdef strided = Enum("<strided and direct>") # default
+ * cdef indirect = Enum("<strided and indirect>")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_tuple__24 = PyTuple_Pack(1, __pyx_kp_s_strided_and_indirect); if (unlikely(!__pyx_tuple__24)) __PYX_ERR(2, 288, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__24);
+  __Pyx_GIVEREF(__pyx_tuple__24);
+
+  /* "View.MemoryView":291
+ * 
+ * 
+ * cdef contiguous = Enum("<contiguous and direct>")             # <<<<<<<<<<<<<<
+ * cdef indirect_contiguous = Enum("<contiguous and indirect>")
+ * 
+ */
+  __pyx_tuple__25 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_direct); if (unlikely(!__pyx_tuple__25)) __PYX_ERR(2, 291, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__25);
+  __Pyx_GIVEREF(__pyx_tuple__25);
+
+  /* "View.MemoryView":292
+ * 
+ * cdef contiguous = Enum("<contiguous and direct>")
+ * cdef indirect_contiguous = Enum("<contiguous and indirect>")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_tuple__26 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_indirect); if (unlikely(!__pyx_tuple__26)) __PYX_ERR(2, 292, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__26);
+  __Pyx_GIVEREF(__pyx_tuple__26);
+
+  /* "(tree fragment)":1
+ * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state):             # <<<<<<<<<<<<<<
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ */
+  __pyx_tuple__27 = PyTuple_Pack(5, __pyx_n_s_pyx_type, __pyx_n_s_pyx_checksum, __pyx_n_s_pyx_state, __pyx_n_s_pyx_PickleError, __pyx_n_s_pyx_result); if (unlikely(!__pyx_tuple__27)) __PYX_ERR(2, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__27);
+  __Pyx_GIVEREF(__pyx_tuple__27);
+  __pyx_codeobj__28 = (PyObject*)__Pyx_PyCode_New(3, 0, 5, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__27, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_pyx_unpickle_Enum, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__28)) __PYX_ERR(2, 1, __pyx_L1_error)
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
+  /* InitThreads.init */
+  #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0
+PyEval_InitThreads();
+#endif
+
+if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error)
+
+  if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+  __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_184977713 = PyInt_FromLong(184977713L); if (unlikely(!__pyx_int_184977713)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/
+
+static int __Pyx_modinit_global_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0);
+  /*--- Global init code ---*/
+  generic = Py_None; Py_INCREF(Py_None);
+  strided = Py_None; Py_INCREF(Py_None);
+  indirect = Py_None; Py_INCREF(Py_None);
+  contiguous = Py_None; Py_INCREF(Py_None);
+  indirect_contiguous = Py_None; Py_INCREF(Py_None);
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_variable_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0);
+  /*--- Variable export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0);
+  /*--- Function export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_type_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
+  /*--- Type init code ---*/
+  __pyx_vtabptr_array = &__pyx_vtable_array;
+  __pyx_vtable_array.get_memview = (PyObject *(*)(struct __pyx_array_obj *))__pyx_array_get_memview;
+  if (PyType_Ready(&__pyx_type___pyx_array) < 0) __PYX_ERR(2, 105, __pyx_L1_error)
+  #if PY_VERSION_HEX < 0x030800B1
+  __pyx_type___pyx_array.tp_print = 0;
+  #endif
+  if (__Pyx_SetVtable(__pyx_type___pyx_array.tp_dict, __pyx_vtabptr_array) < 0) __PYX_ERR(2, 105, __pyx_L1_error)
+  if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_array) < 0) __PYX_ERR(2, 105, __pyx_L1_error)
+  __pyx_array_type = &__pyx_type___pyx_array;
+  if (PyType_Ready(&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(2, 279, __pyx_L1_error)
+  #if PY_VERSION_HEX < 0x030800B1
+  __pyx_type___pyx_MemviewEnum.tp_print = 0;
+  #endif
+  if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_MemviewEnum.tp_dictoffset && __pyx_type___pyx_MemviewEnum.tp_getattro == PyObject_GenericGetAttr)) {
+    __pyx_type___pyx_MemviewEnum.tp_getattro = __Pyx_PyObject_GenericGetAttr;
+  }
+  if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(2, 279, __pyx_L1_error)
+  __pyx_MemviewEnum_type = &__pyx_type___pyx_MemviewEnum;
+  __pyx_vtabptr_memoryview = &__pyx_vtable_memoryview;
+  __pyx_vtable_memoryview.get_item_pointer = (char *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_get_item_pointer;
+  __pyx_vtable_memoryview.is_slice = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_is_slice;
+  __pyx_vtable_memoryview.setitem_slice_assignment = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *, PyObject *))__pyx_memoryview_setitem_slice_assignment;
+  __pyx_vtable_memoryview.setitem_slice_assign_scalar = (PyObject *(*)(struct __pyx_memoryview_obj *, struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_setitem_slice_assign_scalar;
+  __pyx_vtable_memoryview.setitem_indexed = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *, PyObject *))__pyx_memoryview_setitem_indexed;
+  __pyx_vtable_memoryview.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryview_convert_item_to_object;
+  __pyx_vtable_memoryview.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryview_assign_item_from_object;
+  if (PyType_Ready(&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(2, 330, __pyx_L1_error)
+  #if PY_VERSION_HEX < 0x030800B1
+  __pyx_type___pyx_memoryview.tp_print = 0;
+  #endif
+  if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_memoryview.tp_dictoffset && __pyx_type___pyx_memoryview.tp_getattro == PyObject_GenericGetAttr)) {
+    __pyx_type___pyx_memoryview.tp_getattro = __Pyx_PyObject_GenericGetAttr;
+  }
+  if (__Pyx_SetVtable(__pyx_type___pyx_memoryview.tp_dict, __pyx_vtabptr_memoryview) < 0) __PYX_ERR(2, 330, __pyx_L1_error)
+  if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(2, 330, __pyx_L1_error)
+  __pyx_memoryview_type = &__pyx_type___pyx_memoryview;
+  __pyx_vtabptr__memoryviewslice = &__pyx_vtable__memoryviewslice;
+  __pyx_vtable__memoryviewslice.__pyx_base = *__pyx_vtabptr_memoryview;
+  __pyx_vtable__memoryviewslice.__pyx_base.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryviewslice_convert_item_to_object;
+  __pyx_vtable__memoryviewslice.__pyx_base.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryviewslice_assign_item_from_object;
+  __pyx_type___pyx_memoryviewslice.tp_base = __pyx_memoryview_type;
+  if (PyType_Ready(&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(2, 965, __pyx_L1_error)
+  #if PY_VERSION_HEX < 0x030800B1
+  __pyx_type___pyx_memoryviewslice.tp_print = 0;
+  #endif
+  if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_memoryviewslice.tp_dictoffset && __pyx_type___pyx_memoryviewslice.tp_getattro == PyObject_GenericGetAttr)) {
+    __pyx_type___pyx_memoryviewslice.tp_getattro = __Pyx_PyObject_GenericGetAttr;
+  }
+  if (__Pyx_SetVtable(__pyx_type___pyx_memoryviewslice.tp_dict, __pyx_vtabptr__memoryviewslice) < 0) __PYX_ERR(2, 965, __pyx_L1_error)
+  if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(2, 965, __pyx_L1_error)
+  __pyx_memoryviewslice_type = &__pyx_type___pyx_memoryviewslice;
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_modinit_type_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0);
+  /*--- Type import code ---*/
+  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 9, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", 
+  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
+  sizeof(PyTypeObject),
+  #else
+  sizeof(PyHeapTypeObject),
+  #endif
+  __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(3, 9, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 200, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_5numpy_dtype = __Pyx_ImportType(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 200, __pyx_L1_error)
+  __pyx_ptype_5numpy_flatiter = __Pyx_ImportType(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 223, __pyx_L1_error)
+  __pyx_ptype_5numpy_broadcast = __Pyx_ImportType(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 227, __pyx_L1_error)
+  __pyx_ptype_5numpy_ndarray = __Pyx_ImportType(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 239, __pyx_L1_error)
+  __pyx_ptype_5numpy_generic = __Pyx_ImportType(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 771, __pyx_L1_error)
+  __pyx_ptype_5numpy_number = __Pyx_ImportType(__pyx_t_1, "numpy", "number", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 773, __pyx_L1_error)
+  __pyx_ptype_5numpy_integer = __Pyx_ImportType(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 775, __pyx_L1_error)
+  __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 777, __pyx_L1_error)
+  __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 779, __pyx_L1_error)
+  __pyx_ptype_5numpy_inexact = __Pyx_ImportType(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 781, __pyx_L1_error)
+  __pyx_ptype_5numpy_floating = __Pyx_ImportType(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 783, __pyx_L1_error)
+  __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 785, __pyx_L1_error)
+  __pyx_ptype_5numpy_flexible = __Pyx_ImportType(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 787, __pyx_L1_error)
+  __pyx_ptype_5numpy_character = __Pyx_ImportType(__pyx_t_1, "numpy", "character", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn);
+   if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 789, __pyx_L1_error)
+  __pyx_ptype_5numpy_ufunc = __Pyx_ImportType(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __Pyx_ImportType_CheckSize_Ignore);
+   if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 827, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_modinit_variable_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0);
+  /*--- Variable import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0);
+  /*--- Function import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+
+#ifndef CYTHON_NO_PYINIT_EXPORT
+#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
+#elif PY_MAJOR_VERSION < 3
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" void
+#else
+#define __Pyx_PyMODINIT_FUNC void
+#endif
+#else
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
+#else
+#define __Pyx_PyMODINIT_FUNC PyObject *
+#endif
+#endif
+
+
+#if PY_MAJOR_VERSION < 3
+__Pyx_PyMODINIT_FUNC initcore(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC initcore(void)
+#else
+__Pyx_PyMODINIT_FUNC PyInit_core(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC PyInit_core(void)
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+{
+  return PyModuleDef_Init(&__pyx_moduledef);
+}
+static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
+    #if PY_VERSION_HEX >= 0x030700A1
+    static PY_INT64_T main_interpreter_id = -1;
+    PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp);
+    if (main_interpreter_id == -1) {
+        main_interpreter_id = current_id;
+        return (unlikely(current_id == -1)) ? -1 : 0;
+    } else if (unlikely(main_interpreter_id != current_id))
+    #else
+    static PyInterpreterState *main_interpreter = NULL;
+    PyInterpreterState *current_interpreter = PyThreadState_Get()->interp;
+    if (!main_interpreter) {
+        main_interpreter = current_interpreter;
+    } else if (unlikely(main_interpreter != current_interpreter))
+    #endif
+    {
+        PyErr_SetString(
+            PyExc_ImportError,
+            "Interpreter change detected - this module can only be loaded into one interpreter per process.");
+        return -1;
+    }
+    return 0;
+}
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) {
+    PyObject *value = PyObject_GetAttrString(spec, from_name);
+    int result = 0;
+    if (likely(value)) {
+        if (allow_none || value != Py_None) {
+            result = PyDict_SetItemString(moddict, to_name, value);
+        }
+        Py_DECREF(value);
+    } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Clear();
+    } else {
+        result = -1;
+    }
+    return result;
+}
+static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) {
+    PyObject *module = NULL, *moddict, *modname;
+    if (__Pyx_check_single_interpreter())
+        return NULL;
+    if (__pyx_m)
+        return __Pyx_NewRef(__pyx_m);
+    modname = PyObject_GetAttrString(spec, "name");
+    if (unlikely(!modname)) goto bad;
+    module = PyModule_NewObject(modname);
+    Py_DECREF(modname);
+    if (unlikely(!module)) goto bad;
+    moddict = PyModule_GetDict(module);
+    if (unlikely(!moddict)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad;
+    return module;
+bad:
+    Py_XDECREF(module);
+    return NULL;
+}
+
+
+static CYTHON_SMALL_CODE int __pyx_pymod_exec_core(PyObject *__pyx_pyinit_module)
+#endif
+#endif
+{
+  PyObject *__pyx_t_1 = NULL;
+  static PyThread_type_lock __pyx_t_2[8];
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannyDeclarations
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  if (__pyx_m) {
+    if (__pyx_m == __pyx_pyinit_module) return 0;
+    PyErr_SetString(PyExc_RuntimeError, "Module 'core' has already been imported. Re-initialisation is not supported.");
+    return -1;
+  }
+  #elif PY_MAJOR_VERSION >= 3
+  if (__pyx_m) return __Pyx_NewRef(__pyx_m);
+  #endif
+  #if CYTHON_REFNANNY
+__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+if (!__Pyx_RefNanny) {
+  PyErr_Clear();
+  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+  if (!__Pyx_RefNanny)
+      Py_FatalError("failed to import 'refnanny' module");
+}
+#endif
+  __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_core(void)", 0);
+  if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pxy_PyFrame_Initialize_Offsets
+  __Pxy_PyFrame_Initialize_Offsets();
+  #endif
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pyx_CyFunction_USED
+  if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Generator_USED
+  if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_AsyncGen_USED
+  if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_StopAsyncIteration_USED
+  if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  /*--- Library function declarations ---*/
+  /*--- Threads initialization code ---*/
+  #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+  PyEval_InitThreads();
+  #endif
+  /*--- Module creation code ---*/
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  __pyx_m = __pyx_pyinit_module;
+  Py_INCREF(__pyx_m);
+  #else
+  #if PY_MAJOR_VERSION < 3
+  __pyx_m = Py_InitModule4("core", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+  #else
+  __pyx_m = PyModule_Create(&__pyx_moduledef);
+  #endif
+  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_d);
+  __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_b);
+  __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_cython_runtime);
+  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+  /*--- Initialize various global constants etc. ---*/
+  if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+  if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  if (__pyx_module_is_main_TTS__tts__utils__monotonic_align__core) {
+    if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  }
+  #if PY_MAJOR_VERSION >= 3
+  {
+    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
+    if (!PyDict_GetItemString(modules, "TTS.tts.utils.monotonic_align.core")) {
+      if (unlikely(PyDict_SetItemString(modules, "TTS.tts.utils.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+    }
+  }
+  #endif
+  /*--- Builtin init code ---*/
+  if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Constants init code ---*/
+  if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Global type/function init code ---*/
+  (void)__Pyx_modinit_global_init_code();
+  (void)__Pyx_modinit_variable_export_code();
+  (void)__Pyx_modinit_function_export_code();
+  if (unlikely(__Pyx_modinit_type_init_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+  if (unlikely(__Pyx_modinit_type_import_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+  (void)__Pyx_modinit_variable_import_code();
+  (void)__Pyx_modinit_function_import_code();
+  /*--- Execution code ---*/
+  #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
+  if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":1
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * 
+ * cimport cython
+ */
+  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":42
+ * @cython.boundscheck(False)
+ * @cython.wraparound(False)
+ * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:             # <<<<<<<<<<<<<<
+ *   cdef int b = values.shape[0]
+ * 
+ */
+  __pyx_k_ = (-1e9);
+  __pyx_k_ = (-1e9);
+
+  /* "TTS/tts/utils/monotonic_align/core.pyx":1
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * 
+ * cimport cython
+ */
+  __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "View.MemoryView":209
+ *         info.obj = self
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)")             # <<<<<<<<<<<<<<
+ * 
+ *     def __dealloc__(array self):
+ */
+  __pyx_t_1 = __pyx_capsule_create(((void *)(&__pyx_array_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 209, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem((PyObject *)__pyx_array_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_1) < 0) __PYX_ERR(2, 209, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  PyType_Modified(__pyx_array_type);
+
+  /* "View.MemoryView":286
+ *         return self.name
+ * 
+ * cdef generic = Enum("<strided and direct or indirect>")             # <<<<<<<<<<<<<<
+ * cdef strided = Enum("<strided and direct>") # default
+ * cdef indirect = Enum("<strided and indirect>")
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__22, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 286, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_XGOTREF(generic);
+  __Pyx_DECREF_SET(generic, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":287
+ * 
+ * cdef generic = Enum("<strided and direct or indirect>")
+ * cdef strided = Enum("<strided and direct>") # default             # <<<<<<<<<<<<<<
+ * cdef indirect = Enum("<strided and indirect>")
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__23, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 287, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_XGOTREF(strided);
+  __Pyx_DECREF_SET(strided, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":288
+ * cdef generic = Enum("<strided and direct or indirect>")
+ * cdef strided = Enum("<strided and direct>") # default
+ * cdef indirect = Enum("<strided and indirect>")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__24, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 288, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_XGOTREF(indirect);
+  __Pyx_DECREF_SET(indirect, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":291
+ * 
+ * 
+ * cdef contiguous = Enum("<contiguous and direct>")             # <<<<<<<<<<<<<<
+ * cdef indirect_contiguous = Enum("<contiguous and indirect>")
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__25, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 291, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_XGOTREF(contiguous);
+  __Pyx_DECREF_SET(contiguous, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":292
+ * 
+ * cdef contiguous = Enum("<contiguous and direct>")
+ * cdef indirect_contiguous = Enum("<contiguous and indirect>")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__26, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 292, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_XGOTREF(indirect_contiguous);
+  __Pyx_DECREF_SET(indirect_contiguous, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "View.MemoryView":316
+ * 
+ * DEF THREAD_LOCKS_PREALLOCATED = 8
+ * cdef int __pyx_memoryview_thread_locks_used = 0             # <<<<<<<<<<<<<<
+ * cdef PyThread_type_lock[THREAD_LOCKS_PREALLOCATED] __pyx_memoryview_thread_locks = [
+ *     PyThread_allocate_lock(),
+ */
+  __pyx_memoryview_thread_locks_used = 0;
+
+  /* "View.MemoryView":317
+ * DEF THREAD_LOCKS_PREALLOCATED = 8
+ * cdef int __pyx_memoryview_thread_locks_used = 0
+ * cdef PyThread_type_lock[THREAD_LOCKS_PREALLOCATED] __pyx_memoryview_thread_locks = [             # <<<<<<<<<<<<<<
+ *     PyThread_allocate_lock(),
+ *     PyThread_allocate_lock(),
+ */
+  __pyx_t_2[0] = PyThread_allocate_lock();
+  __pyx_t_2[1] = PyThread_allocate_lock();
+  __pyx_t_2[2] = PyThread_allocate_lock();
+  __pyx_t_2[3] = PyThread_allocate_lock();
+  __pyx_t_2[4] = PyThread_allocate_lock();
+  __pyx_t_2[5] = PyThread_allocate_lock();
+  __pyx_t_2[6] = PyThread_allocate_lock();
+  __pyx_t_2[7] = PyThread_allocate_lock();
+  memcpy(&(__pyx_memoryview_thread_locks[0]), __pyx_t_2, sizeof(__pyx_memoryview_thread_locks[0]) * (8));
+
+  /* "View.MemoryView":549
+ *         info.obj = self
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 549, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem((PyObject *)__pyx_memoryview_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_1) < 0) __PYX_ERR(2, 549, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  PyType_Modified(__pyx_memoryview_type);
+
+  /* "View.MemoryView":995
+ *         return self.from_object
+ * 
+ *     __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_t_1 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 995, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem((PyObject *)__pyx_memoryviewslice_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_1) < 0) __PYX_ERR(2, 995, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  PyType_Modified(__pyx_memoryviewslice_type);
+
+  /* "(tree fragment)":1
+ * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state):             # <<<<<<<<<<<<<<
+ *     cdef object __pyx_PickleError
+ *     cdef object __pyx_result
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_15View_dot_MemoryView_1__pyx_unpickle_Enum, NULL, __pyx_n_s_View_MemoryView); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_pyx_unpickle_Enum, __pyx_t_1) < 0) __PYX_ERR(2, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "(tree fragment)":11
+ *         __pyx_unpickle_Enum__set_state(<Enum> __pyx_result, __pyx_state)
+ *     return __pyx_result
+ * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state):             # <<<<<<<<<<<<<<
+ *     __pyx_result.name = __pyx_state[0]
+ *     if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'):
+ */
+
+  /*--- Wrapped vars code ---*/
+
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  if (__pyx_m) {
+    if (__pyx_d) {
+      __Pyx_AddTraceback("init TTS.tts.utils.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    }
+    Py_CLEAR(__pyx_m);
+  } else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_ImportError, "init TTS.tts.utils.monotonic_align.core");
+  }
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  return (__pyx_m != NULL) ? 0 : -1;
+  #elif PY_MAJOR_VERSION >= 3
+  return __pyx_m;
+  #else
+  return;
+  #endif
+}
+
+/* --- Runtime support code --- */
+/* Refnanny */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
+    m = PyImport_ImportModule(modname);
+    if (!m) goto end;
+    p = PyObject_GetAttrString(m, "RefNannyAPI");
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+/* PyObjectGetAttrStr */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro))
+        return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_getattr))
+        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+    return PyObject_GetAttr(obj, attr_name);
+}
+#endif
+
+/* GetBuiltinName */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+    PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
+    if (unlikely(!result)) {
+        PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+            "name '%U' is not defined", name);
+#else
+            "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+    }
+    return result;
+}
+
+/* MemviewSliceInit */
+static int
+__Pyx_init_memviewslice(struct __pyx_memoryview_obj *memview,
+                        int ndim,
+                        __Pyx_memviewslice *memviewslice,
+                        int memview_is_new_reference)
+{
+    __Pyx_RefNannyDeclarations
+    int i, retval=-1;
+    Py_buffer *buf = &memview->view;
+    __Pyx_RefNannySetupContext("init_memviewslice", 0);
+    if (unlikely(memviewslice->memview || memviewslice->data)) {
+        PyErr_SetString(PyExc_ValueError,
+            "memviewslice is already initialized!");
+        goto fail;
+    }
+    if (buf->strides) {
+        for (i = 0; i < ndim; i++) {
+            memviewslice->strides[i] = buf->strides[i];
+        }
+    } else {
+        Py_ssize_t stride = buf->itemsize;
+        for (i = ndim - 1; i >= 0; i--) {
+            memviewslice->strides[i] = stride;
+            stride *= buf->shape[i];
+        }
+    }
+    for (i = 0; i < ndim; i++) {
+        memviewslice->shape[i]   = buf->shape[i];
+        if (buf->suboffsets) {
+            memviewslice->suboffsets[i] = buf->suboffsets[i];
+        } else {
+            memviewslice->suboffsets[i] = -1;
+        }
+    }
+    memviewslice->memview = memview;
+    memviewslice->data = (char *)buf->buf;
+    if (__pyx_add_acquisition_count(memview) == 0 && !memview_is_new_reference) {
+        Py_INCREF(memview);
+    }
+    retval = 0;
+    goto no_fail;
+fail:
+    memviewslice->memview = 0;
+    memviewslice->data = 0;
+    retval = -1;
+no_fail:
+    __Pyx_RefNannyFinishContext();
+    return retval;
+}
+#ifndef Py_NO_RETURN
+#define Py_NO_RETURN
+#endif
+static void __pyx_fatalerror(const char *fmt, ...) Py_NO_RETURN {
+    va_list vargs;
+    char msg[200];
+#ifdef HAVE_STDARG_PROTOTYPES
+    va_start(vargs, fmt);
+#else
+    va_start(vargs);
+#endif
+    vsnprintf(msg, 200, fmt, vargs);
+    va_end(vargs);
+    Py_FatalError(msg);
+}
+static CYTHON_INLINE int
+__pyx_add_acquisition_count_locked(__pyx_atomic_int *acquisition_count,
+                                   PyThread_type_lock lock)
+{
+    int result;
+    PyThread_acquire_lock(lock, 1);
+    result = (*acquisition_count)++;
+    PyThread_release_lock(lock);
+    return result;
+}
+static CYTHON_INLINE int
+__pyx_sub_acquisition_count_locked(__pyx_atomic_int *acquisition_count,
+                                   PyThread_type_lock lock)
+{
+    int result;
+    PyThread_acquire_lock(lock, 1);
+    result = (*acquisition_count)--;
+    PyThread_release_lock(lock);
+    return result;
+}
+static CYTHON_INLINE void
+__Pyx_INC_MEMVIEW(__Pyx_memviewslice *memslice, int have_gil, int lineno)
+{
+    int first_time;
+    struct __pyx_memoryview_obj *memview = memslice->memview;
+    if (unlikely(!memview || (PyObject *) memview == Py_None))
+        return;
+    if (unlikely(__pyx_get_slice_count(memview) < 0))
+        __pyx_fatalerror("Acquisition count is %d (line %d)",
+                         __pyx_get_slice_count(memview), lineno);
+    first_time = __pyx_add_acquisition_count(memview) == 0;
+    if (unlikely(first_time)) {
+        if (have_gil) {
+            Py_INCREF((PyObject *) memview);
+        } else {
+            PyGILState_STATE _gilstate = PyGILState_Ensure();
+            Py_INCREF((PyObject *) memview);
+            PyGILState_Release(_gilstate);
+        }
+    }
+}
+static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW(__Pyx_memviewslice *memslice,
+                                             int have_gil, int lineno) {
+    int last_time;
+    struct __pyx_memoryview_obj *memview = memslice->memview;
+    if (unlikely(!memview || (PyObject *) memview == Py_None)) {
+        memslice->memview = NULL;
+        return;
+    }
+    if (unlikely(__pyx_get_slice_count(memview) <= 0))
+        __pyx_fatalerror("Acquisition count is %d (line %d)",
+                         __pyx_get_slice_count(memview), lineno);
+    last_time = __pyx_sub_acquisition_count(memview) == 1;
+    memslice->data = NULL;
+    if (unlikely(last_time)) {
+        if (have_gil) {
+            Py_CLEAR(memslice->memview);
+        } else {
+            PyGILState_STATE _gilstate = PyGILState_Ensure();
+            Py_CLEAR(memslice->memview);
+            PyGILState_Release(_gilstate);
+        }
+    } else {
+        memslice->memview = NULL;
+    }
+}
+
+/* RaiseArgTupleInvalid */
+static void __Pyx_RaiseArgtupleInvalid(
+    const char* func_name,
+    int exact,
+    Py_ssize_t num_min,
+    Py_ssize_t num_max,
+    Py_ssize_t num_found)
+{
+    Py_ssize_t num_expected;
+    const char *more_or_less;
+    if (num_found < num_min) {
+        num_expected = num_min;
+        more_or_less = "at least";
+    } else {
+        num_expected = num_max;
+        more_or_less = "at most";
+    }
+    if (exact) {
+        more_or_less = "exactly";
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                 func_name, more_or_less, num_expected,
+                 (num_expected == 1) ? "" : "s", num_found);
+}
+
+/* RaiseDoubleKeywords */
+static void __Pyx_RaiseDoubleKeywordsError(
+    const char* func_name,
+    PyObject* kw_name)
+{
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+        #else
+        "%s() got multiple values for keyword argument '%s'", func_name,
+        PyString_AsString(kw_name));
+        #endif
+}
+
+/* ParseKeywords */
+static int __Pyx_ParseOptionalKeywords(
+    PyObject *kwds,
+    PyObject **argnames[],
+    PyObject *kwds2,
+    PyObject *values[],
+    Py_ssize_t num_pos_args,
+    const char* function_name)
+{
+    PyObject *key = 0, *value = 0;
+    Py_ssize_t pos = 0;
+    PyObject*** name;
+    PyObject*** first_kw_arg = argnames + num_pos_args;
+    while (PyDict_Next(kwds, &pos, &key, &value)) {
+        name = first_kw_arg;
+        while (*name && (**name != key)) name++;
+        if (*name) {
+            values[name-argnames] = value;
+            continue;
+        }
+        name = first_kw_arg;
+        #if PY_MAJOR_VERSION < 3
+        if (likely(PyString_Check(key))) {
+            while (*name) {
+                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+                        && _PyString_Eq(**name, key)) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    if ((**argname == key) || (
+                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+                             && _PyString_Eq(**argname, key))) {
+                        goto arg_passed_twice;
+                    }
+                    argname++;
+                }
+            }
+        } else
+        #endif
+        if (likely(PyUnicode_Check(key))) {
+            while (*name) {
+                int cmp = (**name == key) ? 0 :
+                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                    (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                #endif
+                    PyUnicode_Compare(**name, key);
+                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                if (cmp == 0) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    int cmp = (**argname == key) ? 0 :
+                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                        (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                    #endif
+                        PyUnicode_Compare(**argname, key);
+                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                    if (cmp == 0) goto arg_passed_twice;
+                    argname++;
+                }
+            }
+        } else
+            goto invalid_keyword_type;
+        if (kwds2) {
+            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+        } else {
+            goto invalid_keyword;
+        }
+    }
+    return 0;
+arg_passed_twice:
+    __Pyx_RaiseDoubleKeywordsError(function_name, key);
+    goto bad;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    goto bad;
+invalid_keyword:
+    PyErr_Format(PyExc_TypeError,
+    #if PY_MAJOR_VERSION < 3
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+bad:
+    return -1;
+}
+
+/* None */
+static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) {
+    PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname);
+}
+
+/* GetTopmostException */
+#if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem *
+__Pyx_PyErr_GetTopmostException(PyThreadState *tstate)
+{
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    while ((exc_info->exc_type == NULL || exc_info->exc_type == Py_None) &&
+           exc_info->previous_item != NULL)
+    {
+        exc_info = exc_info->previous_item;
+    }
+    return exc_info;
+}
+#endif
+
+/* SaveResetException */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate);
+    *type = exc_info->exc_type;
+    *value = exc_info->exc_value;
+    *tb = exc_info->exc_traceback;
+    #else
+    *type = tstate->exc_type;
+    *value = tstate->exc_value;
+    *tb = tstate->exc_traceback;
+    #endif
+    Py_XINCREF(*type);
+    Py_XINCREF(*value);
+    Py_XINCREF(*tb);
+}
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    tmp_type = exc_info->exc_type;
+    tmp_value = exc_info->exc_value;
+    tmp_tb = exc_info->exc_traceback;
+    exc_info->exc_type = type;
+    exc_info->exc_value = value;
+    exc_info->exc_traceback = tb;
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = type;
+    tstate->exc_value = value;
+    tstate->exc_traceback = tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+#endif
+
+/* PyErrExceptionMatches */
+#if CYTHON_FAST_THREAD_STATE
+static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        if (__Pyx_PyErr_GivenExceptionMatches(exc_type, PyTuple_GET_ITEM(tuple, i))) return 1;
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err) {
+    PyObject *exc_type = tstate->curexc_type;
+    if (exc_type == err) return 1;
+    if (unlikely(!exc_type)) return 0;
+    if (unlikely(PyTuple_Check(err)))
+        return __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err);
+    return __Pyx_PyErr_GivenExceptionMatches(exc_type, err);
+}
+#endif
+
+/* GetException */
+#if CYTHON_FAST_THREAD_STATE
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb)
+#endif
+{
+    PyObject *local_type, *local_value, *local_tb;
+#if CYTHON_FAST_THREAD_STATE
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    local_type = tstate->curexc_type;
+    local_value = tstate->curexc_value;
+    local_tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+#else
+    PyErr_Fetch(&local_type, &local_value, &local_tb);
+#endif
+    PyErr_NormalizeException(&local_type, &local_value, &local_tb);
+#if CYTHON_FAST_THREAD_STATE
+    if (unlikely(tstate->curexc_type))
+#else
+    if (unlikely(PyErr_Occurred()))
+#endif
+        goto bad;
+    #if PY_MAJOR_VERSION >= 3
+    if (local_tb) {
+        if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0))
+            goto bad;
+    }
+    #endif
+    Py_XINCREF(local_tb);
+    Py_XINCREF(local_type);
+    Py_XINCREF(local_value);
+    *type = local_type;
+    *value = local_value;
+    *tb = local_tb;
+#if CYTHON_FAST_THREAD_STATE
+    #if CYTHON_USE_EXC_INFO_STACK
+    {
+        _PyErr_StackItem *exc_info = tstate->exc_info;
+        tmp_type = exc_info->exc_type;
+        tmp_value = exc_info->exc_value;
+        tmp_tb = exc_info->exc_traceback;
+        exc_info->exc_type = local_type;
+        exc_info->exc_value = local_value;
+        exc_info->exc_traceback = local_tb;
+    }
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = local_type;
+    tstate->exc_value = local_value;
+    tstate->exc_traceback = local_tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#else
+    PyErr_SetExcInfo(local_type, local_value, local_tb);
+#endif
+    return 0;
+bad:
+    *type = 0;
+    *value = 0;
+    *tb = 0;
+    Py_XDECREF(local_type);
+    Py_XDECREF(local_value);
+    Py_XDECREF(local_tb);
+    return -1;
+}
+
+/* PyObjectCall */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *result;
+    ternaryfunc call = Py_TYPE(func)->tp_call;
+    if (unlikely(!call))
+        return PyObject_Call(func, arg, kw);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = (*call)(func, arg, kw);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* PyErrFetchRestore */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+}
+#endif
+
+/* RaiseException */
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+                        CYTHON_UNUSED PyObject *cause) {
+    __Pyx_PyThreadState_declare
+    Py_XINCREF(type);
+    if (!value || value == Py_None)
+        value = NULL;
+    else
+        Py_INCREF(value);
+    if (!tb || tb == Py_None)
+        tb = NULL;
+    else {
+        Py_INCREF(tb);
+        if (!PyTraceBack_Check(tb)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: arg 3 must be a traceback or None");
+            goto raise_error;
+        }
+    }
+    if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+        if (!value) {
+            Py_INCREF(Py_None);
+            value = Py_None;
+        }
+#endif
+        PyErr_NormalizeException(&type, &value, &tb);
+    } else {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(type);
+        Py_INCREF(type);
+        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: exception class must be a subclass of BaseException");
+            goto raise_error;
+        }
+    }
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    PyObject* owned_instance = NULL;
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (PyExceptionClass_Check(type)) {
+        PyObject *instance_class = NULL;
+        if (value && PyExceptionInstance_Check(value)) {
+            instance_class = (PyObject*) Py_TYPE(value);
+            if (instance_class != type) {
+                int is_subclass = PyObject_IsSubclass(instance_class, type);
+                if (!is_subclass) {
+                    instance_class = NULL;
+                } else if (unlikely(is_subclass == -1)) {
+                    goto bad;
+                } else {
+                    type = instance_class;
+                }
+            }
+        }
+        if (!instance_class) {
+            PyObject *args;
+            if (!value)
+                args = PyTuple_New(0);
+            else if (PyTuple_Check(value)) {
+                Py_INCREF(value);
+                args = value;
+            } else
+                args = PyTuple_Pack(1, value);
+            if (!args)
+                goto bad;
+            owned_instance = PyObject_Call(type, args, NULL);
+            Py_DECREF(args);
+            if (!owned_instance)
+                goto bad;
+            value = owned_instance;
+            if (!PyExceptionInstance_Check(value)) {
+                PyErr_Format(PyExc_TypeError,
+                             "calling %R should have returned an instance of "
+                             "BaseException, not %R",
+                             type, Py_TYPE(value));
+                goto bad;
+            }
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+    if (cause) {
+        PyObject *fixed_cause;
+        if (cause == Py_None) {
+            fixed_cause = NULL;
+        } else if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        } else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        } else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+    PyErr_SetObject(type, value);
+    if (tb) {
+#if CYTHON_COMPILING_IN_PYPY
+        PyObject *tmp_type, *tmp_value, *tmp_tb;
+        PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
+        Py_INCREF(tb);
+        PyErr_Restore(tmp_type, tmp_value, tb);
+        Py_XDECREF(tmp_tb);
+#else
+        PyThreadState *tstate = __Pyx_PyThreadState_Current;
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+#endif
+    }
+bad:
+    Py_XDECREF(owned_instance);
+    return;
+}
+#endif
+
+/* ArgTypeTest */
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact)
+{
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    else if (exact) {
+        #if PY_MAJOR_VERSION == 2
+        if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+        #endif
+    }
+    else {
+        if (likely(__Pyx_TypeCheck(obj, type))) return 1;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
+        name, type->tp_name, Py_TYPE(obj)->tp_name);
+    return 0;
+}
+
+/* PyCFunctionFastCall */
+#if CYTHON_FAST_PYCCALL
+static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) {
+    PyCFunctionObject *func = (PyCFunctionObject*)func_obj;
+    PyCFunction meth = PyCFunction_GET_FUNCTION(func);
+    PyObject *self = PyCFunction_GET_SELF(func);
+    int flags = PyCFunction_GET_FLAGS(func);
+    assert(PyCFunction_Check(func));
+    assert(METH_FASTCALL == (flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)));
+    assert(nargs >= 0);
+    assert(nargs == 0 || args != NULL);
+    /* _PyCFunction_FastCallDict() must not be called with an exception set,
+       because it may clear it (directly or indirectly) and so the
+       caller loses its exception */
+    assert(!PyErr_Occurred());
+    if ((PY_VERSION_HEX < 0x030700A0) || unlikely(flags & METH_KEYWORDS)) {
+        return (*((__Pyx_PyCFunctionFastWithKeywords)(void*)meth)) (self, args, nargs, NULL);
+    } else {
+        return (*((__Pyx_PyCFunctionFast)(void*)meth)) (self, args, nargs);
+    }
+}
+#endif
+
+/* PyFunctionFastCall */
+#if CYTHON_FAST_PYCALL
+static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na,
+                                               PyObject *globals) {
+    PyFrameObject *f;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    PyObject **fastlocals;
+    Py_ssize_t i;
+    PyObject *result;
+    assert(globals != NULL);
+    /* XXX Perhaps we should create a specialized
+       PyFrame_New() that doesn't take locals, but does
+       take builtins without sanity checking them.
+       */
+    assert(tstate != NULL);
+    f = PyFrame_New(tstate, co, globals, NULL);
+    if (f == NULL) {
+        return NULL;
+    }
+    fastlocals = __Pyx_PyFrame_GetLocalsplus(f);
+    for (i = 0; i < na; i++) {
+        Py_INCREF(*args);
+        fastlocals[i] = *args++;
+    }
+    result = PyEval_EvalFrameEx(f,0);
+    ++tstate->recursion_depth;
+    Py_DECREF(f);
+    --tstate->recursion_depth;
+    return result;
+}
+#if 1 || PY_VERSION_HEX < 0x030600B1
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) {
+    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
+    PyObject *globals = PyFunction_GET_GLOBALS(func);
+    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
+    PyObject *closure;
+#if PY_MAJOR_VERSION >= 3
+    PyObject *kwdefs;
+#endif
+    PyObject *kwtuple, **k;
+    PyObject **d;
+    Py_ssize_t nd;
+    Py_ssize_t nk;
+    PyObject *result;
+    assert(kwargs == NULL || PyDict_Check(kwargs));
+    nk = kwargs ? PyDict_Size(kwargs) : 0;
+    if (Py_EnterRecursiveCall((char*)" while calling a Python object")) {
+        return NULL;
+    }
+    if (
+#if PY_MAJOR_VERSION >= 3
+            co->co_kwonlyargcount == 0 &&
+#endif
+            likely(kwargs == NULL || nk == 0) &&
+            co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+        if (argdefs == NULL && co->co_argcount == nargs) {
+            result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals);
+            goto done;
+        }
+        else if (nargs == 0 && argdefs != NULL
+                 && co->co_argcount == Py_SIZE(argdefs)) {
+            /* function called with no arguments, but all parameters have
+               a default value: use default values as arguments .*/
+            args = &PyTuple_GET_ITEM(argdefs, 0);
+            result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals);
+            goto done;
+        }
+    }
+    if (kwargs != NULL) {
+        Py_ssize_t pos, i;
+        kwtuple = PyTuple_New(2 * nk);
+        if (kwtuple == NULL) {
+            result = NULL;
+            goto done;
+        }
+        k = &PyTuple_GET_ITEM(kwtuple, 0);
+        pos = i = 0;
+        while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) {
+            Py_INCREF(k[i]);
+            Py_INCREF(k[i+1]);
+            i += 2;
+        }
+        nk = i / 2;
+    }
+    else {
+        kwtuple = NULL;
+        k = NULL;
+    }
+    closure = PyFunction_GET_CLOSURE(func);
+#if PY_MAJOR_VERSION >= 3
+    kwdefs = PyFunction_GET_KW_DEFAULTS(func);
+#endif
+    if (argdefs != NULL) {
+        d = &PyTuple_GET_ITEM(argdefs, 0);
+        nd = Py_SIZE(argdefs);
+    }
+    else {
+        d = NULL;
+        nd = 0;
+    }
+#if PY_MAJOR_VERSION >= 3
+    result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, kwdefs, closure);
+#else
+    result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, closure);
+#endif
+    Py_XDECREF(kwtuple);
+done:
+    Py_LeaveRecursiveCall();
+    return result;
+}
+#endif
+#endif
+
+/* PyObjectCall2Args */
+static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) {
+    PyObject *args, *result = NULL;
+    #if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(function)) {
+        PyObject *args[2] = {arg1, arg2};
+        return __Pyx_PyFunction_FastCall(function, args, 2);
+    }
+    #endif
+    #if CYTHON_FAST_PYCCALL
+    if (__Pyx_PyFastCFunction_Check(function)) {
+        PyObject *args[2] = {arg1, arg2};
+        return __Pyx_PyCFunction_FastCall(function, args, 2);
+    }
+    #endif
+    args = PyTuple_New(2);
+    if (unlikely(!args)) goto done;
+    Py_INCREF(arg1);
+    PyTuple_SET_ITEM(args, 0, arg1);
+    Py_INCREF(arg2);
+    PyTuple_SET_ITEM(args, 1, arg2);
+    Py_INCREF(function);
+    result = __Pyx_PyObject_Call(function, args, NULL);
+    Py_DECREF(args);
+    Py_DECREF(function);
+done:
+    return result;
+}
+
+/* PyObjectCallMethO */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+    PyObject *self, *result;
+    PyCFunction cfunc;
+    cfunc = PyCFunction_GET_FUNCTION(func);
+    self = PyCFunction_GET_SELF(func);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = cfunc(self, arg);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* PyObjectCallOneArg */
+#if CYTHON_COMPILING_IN_CPYTHON
+static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_New(1);
+    if (unlikely(!args)) return NULL;
+    Py_INCREF(arg);
+    PyTuple_SET_ITEM(args, 0, arg);
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+#if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(func)) {
+        return __Pyx_PyFunction_FastCall(func, &arg, 1);
+    }
+#endif
+    if (likely(PyCFunction_Check(func))) {
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
+            return __Pyx_PyObject_CallMethO(func, arg);
+#if CYTHON_FAST_PYCCALL
+        } else if (__Pyx_PyFastCFunction_Check(func)) {
+            return __Pyx_PyCFunction_FastCall(func, &arg, 1);
+#endif
+        }
+    }
+    return __Pyx__PyObject_CallOneArg(func, arg);
+}
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_Pack(1, arg);
+    if (unlikely(!args)) return NULL;
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+#endif
+
+/* BytesEquals */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+    if (s1 == s2) {
+        return (equals == Py_EQ);
+    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
+        const char *ps1, *ps2;
+        Py_ssize_t length = PyBytes_GET_SIZE(s1);
+        if (length != PyBytes_GET_SIZE(s2))
+            return (equals == Py_NE);
+        ps1 = PyBytes_AS_STRING(s1);
+        ps2 = PyBytes_AS_STRING(s2);
+        if (ps1[0] != ps2[0]) {
+            return (equals == Py_NE);
+        } else if (length == 1) {
+            return (equals == Py_EQ);
+        } else {
+            int result;
+#if CYTHON_USE_UNICODE_INTERNALS
+            Py_hash_t hash1, hash2;
+            hash1 = ((PyBytesObject*)s1)->ob_shash;
+            hash2 = ((PyBytesObject*)s2)->ob_shash;
+            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+                return (equals == Py_NE);
+            }
+#endif
+            result = memcmp(ps1, ps2, (size_t)length);
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
+        return (equals == Py_NE);
+    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
+        return (equals == Py_NE);
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+#endif
+}
+
+/* UnicodeEquals */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+#if PY_MAJOR_VERSION < 3
+    PyObject* owned_ref = NULL;
+#endif
+    int s1_is_unicode, s2_is_unicode;
+    if (s1 == s2) {
+        goto return_eq;
+    }
+    s1_is_unicode = PyUnicode_CheckExact(s1);
+    s2_is_unicode = PyUnicode_CheckExact(s2);
+#if PY_MAJOR_VERSION < 3
+    if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
+        owned_ref = PyUnicode_FromObject(s2);
+        if (unlikely(!owned_ref))
+            return -1;
+        s2 = owned_ref;
+        s2_is_unicode = 1;
+    } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
+        owned_ref = PyUnicode_FromObject(s1);
+        if (unlikely(!owned_ref))
+            return -1;
+        s1 = owned_ref;
+        s1_is_unicode = 1;
+    } else if (((!s2_is_unicode) & (!s1_is_unicode))) {
+        return __Pyx_PyBytes_Equals(s1, s2, equals);
+    }
+#endif
+    if (s1_is_unicode & s2_is_unicode) {
+        Py_ssize_t length;
+        int kind;
+        void *data1, *data2;
+        if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0))
+            return -1;
+        length = __Pyx_PyUnicode_GET_LENGTH(s1);
+        if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
+            goto return_ne;
+        }
+#if CYTHON_USE_UNICODE_INTERNALS
+        {
+            Py_hash_t hash1, hash2;
+        #if CYTHON_PEP393_ENABLED
+            hash1 = ((PyASCIIObject*)s1)->hash;
+            hash2 = ((PyASCIIObject*)s2)->hash;
+        #else
+            hash1 = ((PyUnicodeObject*)s1)->hash;
+            hash2 = ((PyUnicodeObject*)s2)->hash;
+        #endif
+            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+                goto return_ne;
+            }
+        }
+#endif
+        kind = __Pyx_PyUnicode_KIND(s1);
+        if (kind != __Pyx_PyUnicode_KIND(s2)) {
+            goto return_ne;
+        }
+        data1 = __Pyx_PyUnicode_DATA(s1);
+        data2 = __Pyx_PyUnicode_DATA(s2);
+        if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
+            goto return_ne;
+        } else if (length == 1) {
+            goto return_eq;
+        } else {
+            int result = memcmp(data1, data2, (size_t)(length * kind));
+            #if PY_MAJOR_VERSION < 3
+            Py_XDECREF(owned_ref);
+            #endif
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & s2_is_unicode) {
+        goto return_ne;
+    } else if ((s2 == Py_None) & s1_is_unicode) {
+        goto return_ne;
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        #if PY_MAJOR_VERSION < 3
+        Py_XDECREF(owned_ref);
+        #endif
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+return_eq:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_EQ);
+return_ne:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_NE);
+#endif
+}
+
+/* DivInt[Py_ssize_t] */
+static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t a, Py_ssize_t b) {
+    Py_ssize_t q = a / b;
+    Py_ssize_t r = a - q*b;
+    q -= ((r != 0) & ((r ^ b) < 0));
+    return q;
+}
+
+/* GetAttr */
+static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) {
+#if CYTHON_USE_TYPE_SLOTS
+#if PY_MAJOR_VERSION >= 3
+    if (likely(PyUnicode_Check(n)))
+#else
+    if (likely(PyString_Check(n)))
+#endif
+        return __Pyx_PyObject_GetAttrStr(o, n);
+#endif
+    return PyObject_GetAttr(o, n);
+}
+
+/* GetItemInt */
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
+    PyObject *r;
+    if (!j) return NULL;
+    r = PyObject_GetItem(o, j);
+    Py_DECREF(j);
+    return r;
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyList_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) {
+        PyObject *r = PyList_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyTuple_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) {
+        PyObject *r = PyTuple_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
+                                                     CYTHON_NCP_UNUSED int wraparound,
+                                                     CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
+        if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
+            PyObject *r = PyList_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    }
+    else if (PyTuple_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
+        if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
+            PyObject *r = PyTuple_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    } else {
+        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+        if (likely(m && m->sq_item)) {
+            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
+                Py_ssize_t l = m->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                        return NULL;
+                    PyErr_Clear();
+                }
+            }
+            return m->sq_item(o, i);
+        }
+    }
+#else
+    if (is_list || PySequence_Check(o)) {
+        return PySequence_GetItem(o, i);
+    }
+#endif
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+/* ObjectGetItem */
+#if CYTHON_USE_TYPE_SLOTS
+static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
+    PyObject *runerr;
+    Py_ssize_t key_value;
+    PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence;
+    if (unlikely(!(m && m->sq_item))) {
+        PyErr_Format(PyExc_TypeError, "'%.200s' object is not subscriptable", Py_TYPE(obj)->tp_name);
+        return NULL;
+    }
+    key_value = __Pyx_PyIndex_AsSsize_t(index);
+    if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) {
+        return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1);
+    }
+    if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) {
+        PyErr_Clear();
+        PyErr_Format(PyExc_IndexError, "cannot fit '%.200s' into an index-sized integer", Py_TYPE(index)->tp_name);
+    }
+    return NULL;
+}
+static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key) {
+    PyMappingMethods *m = Py_TYPE(obj)->tp_as_mapping;
+    if (likely(m && m->mp_subscript)) {
+        return m->mp_subscript(obj, key);
+    }
+    return __Pyx_PyObject_GetIndex(obj, key);
+}
+#endif
+
+/* decode_c_string */
+static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
+         const char* cstring, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    Py_ssize_t length;
+    if (unlikely((start < 0) | (stop < 0))) {
+        size_t slen = strlen(cstring);
+        if (unlikely(slen > (size_t) PY_SSIZE_T_MAX)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "c-string too long to convert to Python");
+            return NULL;
+        }
+        length = (Py_ssize_t) slen;
+        if (start < 0) {
+            start += length;
+            if (start < 0)
+                start = 0;
+        }
+        if (stop < 0)
+            stop += length;
+    }
+    if (unlikely(stop <= start))
+        return __Pyx_NewRef(__pyx_empty_unicode);
+    length = stop - start;
+    cstring += start;
+    if (decode_func) {
+        return decode_func(cstring, length, errors);
+    } else {
+        return PyUnicode_Decode(cstring, length, encoding, errors);
+    }
+}
+
+/* GetAttr3 */
+static PyObject *__Pyx_GetAttr3Default(PyObject *d) {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    if (unlikely(!__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError)))
+        return NULL;
+    __Pyx_PyErr_Clear();
+    Py_INCREF(d);
+    return d;
+}
+static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *o, PyObject *n, PyObject *d) {
+    PyObject *r = __Pyx_GetAttr(o, n);
+    return (likely(r)) ? r : __Pyx_GetAttr3Default(d);
+}
+
+/* PyDictVersioning */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0;
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) {
+    PyObject **dictptr = NULL;
+    Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset;
+    if (offset) {
+#if CYTHON_COMPILING_IN_CPYTHON
+        dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj);
+#else
+        dictptr = _PyObject_GetDictPtr(obj);
+#endif
+    }
+    return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0;
+}
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict)))
+        return 0;
+    return obj_dict_version == __Pyx_get_object_dict_version(obj);
+}
+#endif
+
+/* GetModuleGlobalName */
+#if CYTHON_USE_DICT_VERSIONS
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
+#else
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
+#endif
+{
+    PyObject *result;
+#if !CYTHON_AVOID_BORROWED_REFS
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1
+    result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    } else if (unlikely(PyErr_Occurred())) {
+        return NULL;
+    }
+#else
+    result = PyDict_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+#endif
+#else
+    result = PyObject_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+    PyErr_Clear();
+#endif
+    return __Pyx_GetBuiltinName(name);
+}
+
+/* RaiseTooManyValuesToUnpack */
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
+    PyErr_Format(PyExc_ValueError,
+                 "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
+}
+
+/* RaiseNeedMoreValuesToUnpack */
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
+    PyErr_Format(PyExc_ValueError,
+                 "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
+                 index, (index == 1) ? "" : "s");
+}
+
+/* RaiseNoneIterError */
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+}
+
+/* ExtTypeTest */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (likely(__Pyx_TypeCheck(obj, type)))
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s",
+                 Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+/* SwapException */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    tmp_type = exc_info->exc_type;
+    tmp_value = exc_info->exc_value;
+    tmp_tb = exc_info->exc_traceback;
+    exc_info->exc_type = *type;
+    exc_info->exc_value = *value;
+    exc_info->exc_traceback = *tb;
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = *type;
+    tstate->exc_value = *value;
+    tstate->exc_traceback = *tb;
+    #endif
+    *type = tmp_type;
+    *value = tmp_value;
+    *tb = tmp_tb;
+}
+#else
+static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb);
+    PyErr_SetExcInfo(*type, *value, *tb);
+    *type = tmp_type;
+    *value = tmp_value;
+    *tb = tmp_tb;
+}
+#endif
+
+/* Import */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_import;
+    py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+    if (!py_import)
+        goto bad;
+    #endif
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict(__pyx_m);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
+            if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
+                module = PyImport_ImportModuleLevelObject(
+                    name, global_dict, empty_dict, list, 1);
+                if (!module) {
+                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0;
+        }
+        #endif
+        if (!module) {
+            #if PY_MAJOR_VERSION < 3
+            PyObject *py_level = PyInt_FromLong(level);
+            if (!py_level)
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
+                name, global_dict, empty_dict, list, py_level, (PyObject *)NULL);
+            Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, global_dict, empty_dict, list, level);
+            #endif
+        }
+    }
+bad:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_import);
+    #endif
+    Py_XDECREF(empty_list);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+/* FastTypeChecks */
+#if CYTHON_COMPILING_IN_CPYTHON
+static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
+    while (a) {
+        a = a->tp_base;
+        if (a == b)
+            return 1;
+    }
+    return b == &PyBaseObject_Type;
+}
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) {
+    PyObject *mro;
+    if (a == b) return 1;
+    mro = a->tp_mro;
+    if (likely(mro)) {
+        Py_ssize_t i, n;
+        n = PyTuple_GET_SIZE(mro);
+        for (i = 0; i < n; i++) {
+            if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b)
+                return 1;
+        }
+        return 0;
+    }
+    return __Pyx_InBases(a, b);
+}
+#if PY_MAJOR_VERSION == 2
+static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) {
+    PyObject *exception, *value, *tb;
+    int res;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&exception, &value, &tb);
+    res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0;
+    if (unlikely(res == -1)) {
+        PyErr_WriteUnraisable(err);
+        res = 0;
+    }
+    if (!res) {
+        res = PyObject_IsSubclass(err, exc_type2);
+        if (unlikely(res == -1)) {
+            PyErr_WriteUnraisable(err);
+            res = 0;
+        }
+    }
+    __Pyx_ErrRestore(exception, value, tb);
+    return res;
+}
+#else
+static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) {
+    int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0;
+    if (!res) {
+        res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2);
+    }
+    return res;
+}
+#endif
+static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    assert(PyExceptionClass_Check(exc_type));
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        PyObject *t = PyTuple_GET_ITEM(tuple, i);
+        #if PY_MAJOR_VERSION < 3
+        if (likely(exc_type == t)) return 1;
+        #endif
+        if (likely(PyExceptionClass_Check(t))) {
+            if (__Pyx_inner_PyErr_GivenExceptionMatches2(exc_type, NULL, t)) return 1;
+        } else {
+        }
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) {
+    if (likely(err == exc_type)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        if (likely(PyExceptionClass_Check(exc_type))) {
+            return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type);
+        } else if (likely(PyTuple_Check(exc_type))) {
+            return __Pyx_PyErr_GivenExceptionMatchesTuple(err, exc_type);
+        } else {
+        }
+    }
+    return PyErr_GivenExceptionMatches(err, exc_type);
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) {
+    assert(PyExceptionClass_Check(exc_type1));
+    assert(PyExceptionClass_Check(exc_type2));
+    if (likely(err == exc_type1 || err == exc_type2)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2);
+    }
+    return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2));
+}
+#endif
+
+/* PyIntBinop */
+#if !CYTHON_COMPILING_IN_PYPY
+static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, int inplace, int zerodivision_check) {
+    (void)inplace;
+    (void)zerodivision_check;
+    #if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_CheckExact(op1))) {
+        const long b = intval;
+        long x;
+        long a = PyInt_AS_LONG(op1);
+            x = (long)((unsigned long)a + b);
+            if (likely((x^a) >= 0 || (x^b) >= 0))
+                return PyInt_FromLong(x);
+            return PyLong_Type.tp_as_number->nb_add(op1, op2);
+    }
+    #endif
+    #if CYTHON_USE_PYLONG_INTERNALS
+    if (likely(PyLong_CheckExact(op1))) {
+        const long b = intval;
+        long a, x;
+#ifdef HAVE_LONG_LONG
+        const PY_LONG_LONG llb = intval;
+        PY_LONG_LONG lla, llx;
+#endif
+        const digit* digits = ((PyLongObject*)op1)->ob_digit;
+        const Py_ssize_t size = Py_SIZE(op1);
+        if (likely(__Pyx_sst_abs(size) <= 1)) {
+            a = likely(size) ? digits[0] : 0;
+            if (size == -1) a = -a;
+        } else {
+            switch (size) {
+                case -2:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 2:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case -3:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 3:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case -4:
+                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                        a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 4:
+                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                        a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+#ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+#endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                default: return PyLong_Type.tp_as_number->nb_add(op1, op2);
+            }
+        }
+                x = a + b;
+            return PyLong_FromLong(x);
+#ifdef HAVE_LONG_LONG
+        long_long:
+                llx = lla + llb;
+            return PyLong_FromLongLong(llx);
+#endif
+        
+        
+    }
+    #endif
+    if (PyFloat_CheckExact(op1)) {
+        const long b = intval;
+        double a = PyFloat_AS_DOUBLE(op1);
+            double result;
+            PyFPE_START_PROTECT("add", return NULL)
+            result = ((double)a) + (double)b;
+            PyFPE_END_PROTECT(result)
+            return PyFloat_FromDouble(result);
+    }
+    return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2);
+}
+#endif
+
+/* DivInt[long] */
+static CYTHON_INLINE long __Pyx_div_long(long a, long b) {
+    long q = a / b;
+    long r = a - q*b;
+    q -= ((r != 0) & ((r ^ b) < 0));
+    return q;
+}
+
+/* ImportFrom */
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
+    PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
+    if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Format(PyExc_ImportError,
+        #if PY_MAJOR_VERSION < 3
+            "cannot import name %.230s", PyString_AS_STRING(name));
+        #else
+            "cannot import name %S", name);
+        #endif
+    }
+    return value;
+}
+
+/* HasAttr */
+static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) {
+    PyObject *r;
+    if (unlikely(!__Pyx_PyBaseString_Check(n))) {
+        PyErr_SetString(PyExc_TypeError,
+                        "hasattr(): attribute name must be string");
+        return -1;
+    }
+    r = __Pyx_GetAttr(o, n);
+    if (unlikely(!r)) {
+        PyErr_Clear();
+        return 0;
+    } else {
+        Py_DECREF(r);
+        return 1;
+    }
+}
+
+/* PyObject_GenericGetAttrNoDict */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) {
+    PyErr_Format(PyExc_AttributeError,
+#if PY_MAJOR_VERSION >= 3
+                 "'%.50s' object has no attribute '%U'",
+                 tp->tp_name, attr_name);
+#else
+                 "'%.50s' object has no attribute '%.400s'",
+                 tp->tp_name, PyString_AS_STRING(attr_name));
+#endif
+    return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) {
+    PyObject *descr;
+    PyTypeObject *tp = Py_TYPE(obj);
+    if (unlikely(!PyString_Check(attr_name))) {
+        return PyObject_GenericGetAttr(obj, attr_name);
+    }
+    assert(!tp->tp_dictoffset);
+    descr = _PyType_Lookup(tp, attr_name);
+    if (unlikely(!descr)) {
+        return __Pyx_RaiseGenericGetAttributeError(tp, attr_name);
+    }
+    Py_INCREF(descr);
+    #if PY_MAJOR_VERSION < 3
+    if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS)))
+    #endif
+    {
+        descrgetfunc f = Py_TYPE(descr)->tp_descr_get;
+        if (unlikely(f)) {
+            PyObject *res = f(descr, obj, (PyObject *)tp);
+            Py_DECREF(descr);
+            return res;
+        }
+    }
+    return descr;
+}
+#endif
+
+/* PyObject_GenericGetAttr */
+#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
+static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) {
+    if (unlikely(Py_TYPE(obj)->tp_dictoffset)) {
+        return PyObject_GenericGetAttr(obj, attr_name);
+    }
+    return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name);
+}
+#endif
+
+/* SetVTable */
+static int __Pyx_SetVtable(PyObject *dict, void *vtable) {
+#if PY_VERSION_HEX >= 0x02070000
+    PyObject *ob = PyCapsule_New(vtable, 0, 0);
+#else
+    PyObject *ob = PyCObject_FromVoidPtr(vtable, 0);
+#endif
+    if (!ob)
+        goto bad;
+    if (PyDict_SetItem(dict, __pyx_n_s_pyx_vtable, ob) < 0)
+        goto bad;
+    Py_DECREF(ob);
+    return 0;
+bad:
+    Py_XDECREF(ob);
+    return -1;
+}
+
+/* PyObjectGetAttrStrNoError */
+static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError)))
+        __Pyx_PyErr_Clear();
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) {
+    PyObject *result;
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) {
+        return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1);
+    }
+#endif
+    result = __Pyx_PyObject_GetAttrStr(obj, attr_name);
+    if (unlikely(!result)) {
+        __Pyx_PyObject_GetAttrStr_ClearAttributeError();
+    }
+    return result;
+}
+
+/* SetupReduce */
+static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) {
+  int ret;
+  PyObject *name_attr;
+  name_attr = __Pyx_PyObject_GetAttrStr(meth, __pyx_n_s_name_2);
+  if (likely(name_attr)) {
+      ret = PyObject_RichCompareBool(name_attr, name, Py_EQ);
+  } else {
+      ret = -1;
+  }
+  if (unlikely(ret < 0)) {
+      PyErr_Clear();
+      ret = 0;
+  }
+  Py_XDECREF(name_attr);
+  return ret;
+}
+static int __Pyx_setup_reduce(PyObject* type_obj) {
+    int ret = 0;
+    PyObject *object_reduce = NULL;
+    PyObject *object_reduce_ex = NULL;
+    PyObject *reduce = NULL;
+    PyObject *reduce_ex = NULL;
+    PyObject *reduce_cython = NULL;
+    PyObject *setstate = NULL;
+    PyObject *setstate_cython = NULL;
+#if CYTHON_USE_PYTYPE_LOOKUP
+    if (_PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate)) goto __PYX_GOOD;
+#else
+    if (PyObject_HasAttr(type_obj, __pyx_n_s_getstate)) goto __PYX_GOOD;
+#endif
+#if CYTHON_USE_PYTYPE_LOOKUP
+    object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD;
+#else
+    object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD;
+#endif
+    reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD;
+    if (reduce_ex == object_reduce_ex) {
+#if CYTHON_USE_PYTYPE_LOOKUP
+        object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD;
+#else
+        object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD;
+#endif
+        reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD;
+        if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) {
+            reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython);
+            if (likely(reduce_cython)) {
+                ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+                ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+            } else if (reduce == object_reduce || PyErr_Occurred()) {
+                goto __PYX_BAD;
+            }
+            setstate = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_setstate);
+            if (!setstate) PyErr_Clear();
+            if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) {
+                setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython);
+                if (likely(setstate_cython)) {
+                    ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+                    ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+                } else if (!setstate || PyErr_Occurred()) {
+                    goto __PYX_BAD;
+                }
+            }
+            PyType_Modified((PyTypeObject*)type_obj);
+        }
+    }
+    goto __PYX_GOOD;
+__PYX_BAD:
+    if (!PyErr_Occurred())
+        PyErr_Format(PyExc_RuntimeError, "Unable to initialize pickling for %s", ((PyTypeObject*)type_obj)->tp_name);
+    ret = -1;
+__PYX_GOOD:
+#if !CYTHON_USE_PYTYPE_LOOKUP
+    Py_XDECREF(object_reduce);
+    Py_XDECREF(object_reduce_ex);
+#endif
+    Py_XDECREF(reduce);
+    Py_XDECREF(reduce_ex);
+    Py_XDECREF(reduce_cython);
+    Py_XDECREF(setstate);
+    Py_XDECREF(setstate_cython);
+    return ret;
+}
+
+/* TypeImport */
+#ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *module_name, const char *class_name,
+    size_t size, enum __Pyx_ImportType_CheckSize check_size)
+{
+    PyObject *result = 0;
+    char warning[200];
+    Py_ssize_t basicsize;
+#ifdef Py_LIMITED_API
+    PyObject *py_basicsize;
+#endif
+    result = PyObject_GetAttrString(module, class_name);
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s.%.200s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+#ifndef Py_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
+    if ((size_t)basicsize < size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        goto bad;
+    }
+    if (check_size == __Pyx_ImportType_CheckSize_Error && (size_t)basicsize != size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        goto bad;
+    }
+    else if (check_size == __Pyx_ImportType_CheckSize_Warn && (size_t)basicsize > size) {
+        PyOS_snprintf(warning, sizeof(warning),
+            "%s.%s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+/* CLineInTraceback */
+#ifndef CYTHON_CLINE_IN_TRACEBACK
+static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
+    PyObject *use_cline;
+    PyObject *ptype, *pvalue, *ptraceback;
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject **cython_runtime_dict;
+#endif
+    if (unlikely(!__pyx_cython_runtime)) {
+        return c_line;
+    }
+    __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+#if CYTHON_COMPILING_IN_CPYTHON
+    cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime);
+    if (likely(cython_runtime_dict)) {
+        __PYX_PY_DICT_LOOKUP_IF_MODIFIED(
+            use_cline, *cython_runtime_dict,
+            __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback))
+    } else
+#endif
+    {
+      PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback);
+      if (use_cline_obj) {
+        use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True;
+        Py_DECREF(use_cline_obj);
+      } else {
+        PyErr_Clear();
+        use_cline = NULL;
+      }
+    }
+    if (!use_cline) {
+        c_line = 0;
+        (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False);
+    }
+    else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) {
+        c_line = 0;
+    }
+    __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+    return c_line;
+}
+#endif
+
+/* CodeObjectCache */
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
+        mid = start + (end - start) / 2;
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+            __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+
+/* AddTraceback */
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
+    PyCodeObject *py_code = NULL;
+    PyObject *py_funcname = NULL;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_srcfile = NULL;
+    py_srcfile = PyString_FromString(filename);
+    if (!py_srcfile) goto bad;
+    #endif
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        funcname = PyUnicode_AsUTF8(py_funcname);
+        if (!funcname) goto bad;
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        if (!py_funcname) goto bad;
+        #endif
+    }
+    #if PY_MAJOR_VERSION < 3
+    py_code = __Pyx_PyCode_New(
+        0,
+        0,
+        0,
+        0,
+        0,
+        __pyx_empty_bytes, /*PyObject *code,*/
+        __pyx_empty_tuple, /*PyObject *consts,*/
+        __pyx_empty_tuple, /*PyObject *names,*/
+        __pyx_empty_tuple, /*PyObject *varnames,*/
+        __pyx_empty_tuple, /*PyObject *freevars,*/
+        __pyx_empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,
+        __pyx_empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
+    #else
+    py_code = PyCode_NewEmpty(filename, funcname, py_line);
+    #endif
+    Py_XDECREF(py_funcname);  // XDECREF since it's only set on Py3 if cline
+    return py_code;
+bad:
+    Py_XDECREF(py_funcname);
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_srcfile);
+    #endif
+    return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    if (c_line) {
+        c_line = __Pyx_CLineForTraceback(tstate, c_line);
+    }
+    py_code = __pyx_find_code_object(c_line ? -c_line : py_line);
+    if (!py_code) {
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) goto bad;
+        __pyx_insert_code_object(c_line ? -c_line : py_line, py_code);
+    }
+    py_frame = PyFrame_New(
+        tstate,            /*PyThreadState *tstate,*/
+        py_code,           /*PyCodeObject *code,*/
+        __pyx_d,    /*PyObject *globals,*/
+        0                  /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+        if (__Pyx_TypeCheck(obj, __pyx_array_type)) return __pyx_array_getbuffer(obj, view, flags);
+        if (__Pyx_TypeCheck(obj, __pyx_memoryview_type)) return __pyx_memoryview_getbuffer(obj, view, flags);
+    PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+    return -1;
+}
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    if (!obj) return;
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+    if ((0)) {}
+    view->obj = NULL;
+    Py_DECREF(obj);
+}
+#endif
+
+
+/* MemviewSliceIsContig */
+static int
+__pyx_memviewslice_is_contig(const __Pyx_memviewslice mvs, char order, int ndim)
+{
+    int i, index, step, start;
+    Py_ssize_t itemsize = mvs.memview->view.itemsize;
+    if (order == 'F') {
+        step = 1;
+        start = 0;
+    } else {
+        step = -1;
+        start = ndim - 1;
+    }
+    for (i = 0; i < ndim; i++) {
+        index = start + step * i;
+        if (mvs.suboffsets[index] >= 0 || mvs.strides[index] != itemsize)
+            return 0;
+        itemsize *= mvs.shape[index];
+    }
+    return 1;
+}
+
+/* OverlappingSlices */
+static void
+__pyx_get_array_memory_extents(__Pyx_memviewslice *slice,
+                               void **out_start, void **out_end,
+                               int ndim, size_t itemsize)
+{
+    char *start, *end;
+    int i;
+    start = end = slice->data;
+    for (i = 0; i < ndim; i++) {
+        Py_ssize_t stride = slice->strides[i];
+        Py_ssize_t extent = slice->shape[i];
+        if (extent == 0) {
+            *out_start = *out_end = start;
+            return;
+        } else {
+            if (stride > 0)
+                end += stride * (extent - 1);
+            else
+                start += stride * (extent - 1);
+        }
+    }
+    *out_start = start;
+    *out_end = end + itemsize;
+}
+static int
+__pyx_slices_overlap(__Pyx_memviewslice *slice1,
+                     __Pyx_memviewslice *slice2,
+                     int ndim, size_t itemsize)
+{
+    void *start1, *end1, *start2, *end2;
+    __pyx_get_array_memory_extents(slice1, &start1, &end1, ndim, itemsize);
+    __pyx_get_array_memory_extents(slice2, &start2, &end2, ndim, itemsize);
+    return (start1 < end2) && (start2 < end1);
+}
+
+/* Capsule */
+static CYTHON_INLINE PyObject *
+__pyx_capsule_create(void *p, CYTHON_UNUSED const char *sig)
+{
+    PyObject *cobj;
+#if PY_VERSION_HEX >= 0x02070000
+    cobj = PyCapsule_New(p, sig, NULL);
+#else
+    cobj = PyCObject_FromVoidPtr(p, NULL);
+#endif
+    return cobj;
+}
+
+/* IsLittleEndian */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void)
+{
+  union {
+    uint32_t u32;
+    uint8_t u8[4];
+  } S;
+  S.u32 = 0x01020304;
+  return S.u8[0] == 4;
+}
+
+/* BufferFormatCheck */
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
+        while (*t >= '0' && *t <= '9') {
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1)
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
+    case '?': return "'bool'";
+    case 'c': return "'char'";
+    case 'b': return "'signed char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparseable format string";
+  }
+}
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c':
+        return 'H';
+    case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
+    case '?': case 'B': case 'H': case 'I': case 'L': case 'Q':
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+  if (ctx->enc_type == 0) return 0;
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+      if ((type->typegroup == 'H' || group == 'H') && type->size == size) {
+      } else {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+      }
+    }
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+    --ctx->enc_count;
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break;
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue;
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+static PyObject *
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
+    int i = 0, number, ndim;
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return NULL;
+    }
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+    ndim = ctx->head->field->type->ndim;
+    while (*ts && *ts != ')') {
+        switch (*ts) {
+            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue;
+            default:  break;
+        }
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return NULL;
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
+            return PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+        if (*ts != ',' && *ts != ')')
+            return PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+        if (*ts == ',') ts++;
+        i++;
+    }
+    if (i != ndim)
+        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return NULL;
+    }
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return Py_None;
+}
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+  while (1) {
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        return ts;
+      case ' ':
+      case '\r':
+      case '\n':
+        ++ts;
+        break;
+      case '<':
+        if (!__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
+        if (__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T':
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}':
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          if (alignment && ctx->fmt_offset % alignment) {
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }
+        CYTHON_FALLTHROUGH;
+      case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 'p':
+        if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) &&
+            (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) {
+          ctx->enc_count += ctx->new_count;
+          ctx->new_count = 1;
+          got_Z = 0;
+          ++ts;
+          break;
+        }
+        CYTHON_FALLTHROUGH;
+      case 's':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->enc_count = ctx->new_count;
+        ctx->enc_packmode = ctx->new_packmode;
+        ctx->enc_type = *ts;
+        ctx->is_complex = got_Z;
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+
+/* TypeInfoCompare */
+  static int
+__pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b)
+{
+    int i;
+    if (!a || !b)
+        return 0;
+    if (a == b)
+        return 1;
+    if (a->size != b->size || a->typegroup != b->typegroup ||
+            a->is_unsigned != b->is_unsigned || a->ndim != b->ndim) {
+        if (a->typegroup == 'H' || b->typegroup == 'H') {
+            return a->size == b->size;
+        } else {
+            return 0;
+        }
+    }
+    if (a->ndim) {
+        for (i = 0; i < a->ndim; i++)
+            if (a->arraysize[i] != b->arraysize[i])
+                return 0;
+    }
+    if (a->typegroup == 'S') {
+        if (a->flags != b->flags)
+            return 0;
+        if (a->fields || b->fields) {
+            if (!(a->fields && b->fields))
+                return 0;
+            for (i = 0; a->fields[i].type && b->fields[i].type; i++) {
+                __Pyx_StructField *field_a = a->fields + i;
+                __Pyx_StructField *field_b = b->fields + i;
+                if (field_a->offset != field_b->offset ||
+                    !__pyx_typeinfo_cmp(field_a->type, field_b->type))
+                    return 0;
+            }
+            return !a->fields[i].type && !b->fields[i].type;
+        }
+    }
+    return 1;
+}
+
+/* MemviewSliceValidateAndInit */
+  static int
+__pyx_check_strides(Py_buffer *buf, int dim, int ndim, int spec)
+{
+    if (buf->shape[dim] <= 1)
+        return 1;
+    if (buf->strides) {
+        if (spec & __Pyx_MEMVIEW_CONTIG) {
+            if (spec & (__Pyx_MEMVIEW_PTR|__Pyx_MEMVIEW_FULL)) {
+                if (unlikely(buf->strides[dim] != sizeof(void *))) {
+                    PyErr_Format(PyExc_ValueError,
+                                 "Buffer is not indirectly contiguous "
+                                 "in dimension %d.", dim);
+                    goto fail;
+                }
+            } else if (unlikely(buf->strides[dim] != buf->itemsize)) {
+                PyErr_SetString(PyExc_ValueError,
+                                "Buffer and memoryview are not contiguous "
+                                "in the same dimension.");
+                goto fail;
+            }
+        }
+        if (spec & __Pyx_MEMVIEW_FOLLOW) {
+            Py_ssize_t stride = buf->strides[dim];
+            if (stride < 0)
+                stride = -stride;
+            if (unlikely(stride < buf->itemsize)) {
+                PyErr_SetString(PyExc_ValueError,
+                                "Buffer and memoryview are not contiguous "
+                                "in the same dimension.");
+                goto fail;
+            }
+        }
+    } else {
+        if (unlikely(spec & __Pyx_MEMVIEW_CONTIG && dim != ndim - 1)) {
+            PyErr_Format(PyExc_ValueError,
+                         "C-contiguous buffer is not contiguous in "
+                         "dimension %d", dim);
+            goto fail;
+        } else if (unlikely(spec & (__Pyx_MEMVIEW_PTR))) {
+            PyErr_Format(PyExc_ValueError,
+                         "C-contiguous buffer is not indirect in "
+                         "dimension %d", dim);
+            goto fail;
+        } else if (unlikely(buf->suboffsets)) {
+            PyErr_SetString(PyExc_ValueError,
+                            "Buffer exposes suboffsets but no strides");
+            goto fail;
+        }
+    }
+    return 1;
+fail:
+    return 0;
+}
+static int
+__pyx_check_suboffsets(Py_buffer *buf, int dim, CYTHON_UNUSED int ndim, int spec)
+{
+    if (spec & __Pyx_MEMVIEW_DIRECT) {
+        if (unlikely(buf->suboffsets && buf->suboffsets[dim] >= 0)) {
+            PyErr_Format(PyExc_ValueError,
+                         "Buffer not compatible with direct access "
+                         "in dimension %d.", dim);
+            goto fail;
+        }
+    }
+    if (spec & __Pyx_MEMVIEW_PTR) {
+        if (unlikely(!buf->suboffsets || (buf->suboffsets[dim] < 0))) {
+            PyErr_Format(PyExc_ValueError,
+                         "Buffer is not indirectly accessible "
+                         "in dimension %d.", dim);
+            goto fail;
+        }
+    }
+    return 1;
+fail:
+    return 0;
+}
+static int
+__pyx_verify_contig(Py_buffer *buf, int ndim, int c_or_f_flag)
+{
+    int i;
+    if (c_or_f_flag & __Pyx_IS_F_CONTIG) {
+        Py_ssize_t stride = 1;
+        for (i = 0; i < ndim; i++) {
+            if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
+                PyErr_SetString(PyExc_ValueError,
+                    "Buffer not fortran contiguous.");
+                goto fail;
+            }
+            stride = stride * buf->shape[i];
+        }
+    } else if (c_or_f_flag & __Pyx_IS_C_CONTIG) {
+        Py_ssize_t stride = 1;
+        for (i = ndim - 1; i >- 1; i--) {
+            if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
+                PyErr_SetString(PyExc_ValueError,
+                    "Buffer not C contiguous.");
+                goto fail;
+            }
+            stride = stride * buf->shape[i];
+        }
+    }
+    return 1;
+fail:
+    return 0;
+}
+static int __Pyx_ValidateAndInit_memviewslice(
+                int *axes_specs,
+                int c_or_f_flag,
+                int buf_flags,
+                int ndim,
+                __Pyx_TypeInfo *dtype,
+                __Pyx_BufFmt_StackElem stack[],
+                __Pyx_memviewslice *memviewslice,
+                PyObject *original_obj)
+{
+    struct __pyx_memoryview_obj *memview, *new_memview;
+    __Pyx_RefNannyDeclarations
+    Py_buffer *buf;
+    int i, spec = 0, retval = -1;
+    __Pyx_BufFmt_Context ctx;
+    int from_memoryview = __pyx_memoryview_check(original_obj);
+    __Pyx_RefNannySetupContext("ValidateAndInit_memviewslice", 0);
+    if (from_memoryview && __pyx_typeinfo_cmp(dtype, ((struct __pyx_memoryview_obj *)
+                                                            original_obj)->typeinfo)) {
+        memview = (struct __pyx_memoryview_obj *) original_obj;
+        new_memview = NULL;
+    } else {
+        memview = (struct __pyx_memoryview_obj *) __pyx_memoryview_new(
+                                            original_obj, buf_flags, 0, dtype);
+        new_memview = memview;
+        if (unlikely(!memview))
+            goto fail;
+    }
+    buf = &memview->view;
+    if (unlikely(buf->ndim != ndim)) {
+        PyErr_Format(PyExc_ValueError,
+                "Buffer has wrong number of dimensions (expected %d, got %d)",
+                ndim, buf->ndim);
+        goto fail;
+    }
+    if (new_memview) {
+        __Pyx_BufFmt_Init(&ctx, stack, dtype);
+        if (unlikely(!__Pyx_BufFmt_CheckString(&ctx, buf->format))) goto fail;
+    }
+    if (unlikely((unsigned) buf->itemsize != dtype->size)) {
+        PyErr_Format(PyExc_ValueError,
+                     "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "u byte%s) "
+                     "does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "u byte%s)",
+                     buf->itemsize,
+                     (buf->itemsize > 1) ? "s" : "",
+                     dtype->name,
+                     dtype->size,
+                     (dtype->size > 1) ? "s" : "");
+        goto fail;
+    }
+    if (buf->len > 0) {
+        for (i = 0; i < ndim; i++) {
+            spec = axes_specs[i];
+            if (unlikely(!__pyx_check_strides(buf, i, ndim, spec)))
+                goto fail;
+            if (unlikely(!__pyx_check_suboffsets(buf, i, ndim, spec)))
+                goto fail;
+        }
+        if (unlikely(buf->strides && !__pyx_verify_contig(buf, ndim, c_or_f_flag)))
+            goto fail;
+    }
+    if (unlikely(__Pyx_init_memviewslice(memview, ndim, memviewslice,
+                                         new_memview != NULL) == -1)) {
+        goto fail;
+    }
+    retval = 0;
+    goto no_fail;
+fail:
+    Py_XDECREF(new_memview);
+    retval = -1;
+no_fail:
+    __Pyx_RefNannyFinishContext();
+    return retval;
+}
+
+/* ObjectToMemviewSlice */
+  static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_int(PyObject *obj, int writable_flag) {
+    __Pyx_memviewslice result = { 0, 0, { 0 }, { 0 }, { 0 } };
+    __Pyx_BufFmt_StackElem stack[1];
+    int axes_specs[] = { (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_FOLLOW), (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_FOLLOW), (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_CONTIG) };
+    int retcode;
+    if (obj == Py_None) {
+        result.memview = (struct __pyx_memoryview_obj *) Py_None;
+        return result;
+    }
+    retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, __Pyx_IS_C_CONTIG,
+                                                 (PyBUF_C_CONTIGUOUS | PyBUF_FORMAT) | writable_flag, 3,
+                                                 &__Pyx_TypeInfo_int, stack,
+                                                 &result, obj);
+    if (unlikely(retcode == -1))
+        goto __pyx_fail;
+    return result;
+__pyx_fail:
+    result.memview = NULL;
+    result.data = NULL;
+    return result;
+}
+
+/* ObjectToMemviewSlice */
+  static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_float(PyObject *obj, int writable_flag) {
+    __Pyx_memviewslice result = { 0, 0, { 0 }, { 0 }, { 0 } };
+    __Pyx_BufFmt_StackElem stack[1];
+    int axes_specs[] = { (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_FOLLOW), (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_FOLLOW), (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_CONTIG) };
+    int retcode;
+    if (obj == Py_None) {
+        result.memview = (struct __pyx_memoryview_obj *) Py_None;
+        return result;
+    }
+    retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, __Pyx_IS_C_CONTIG,
+                                                 (PyBUF_C_CONTIGUOUS | PyBUF_FORMAT) | writable_flag, 3,
+                                                 &__Pyx_TypeInfo_float, stack,
+                                                 &result, obj);
+    if (unlikely(retcode == -1))
+        goto __pyx_fail;
+    return result;
+__pyx_fail:
+    result.memview = NULL;
+    result.data = NULL;
+    return result;
+}
+
+/* ObjectToMemviewSlice */
+  static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_dc_int(PyObject *obj, int writable_flag) {
+    __Pyx_memviewslice result = { 0, 0, { 0 }, { 0 }, { 0 } };
+    __Pyx_BufFmt_StackElem stack[1];
+    int axes_specs[] = { (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_CONTIG) };
+    int retcode;
+    if (obj == Py_None) {
+        result.memview = (struct __pyx_memoryview_obj *) Py_None;
+        return result;
+    }
+    retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, __Pyx_IS_C_CONTIG,
+                                                 (PyBUF_C_CONTIGUOUS | PyBUF_FORMAT) | writable_flag, 1,
+                                                 &__Pyx_TypeInfo_int, stack,
+                                                 &result, obj);
+    if (unlikely(retcode == -1))
+        goto __pyx_fail;
+    return result;
+__pyx_fail:
+    result.memview = NULL;
+    result.data = NULL;
+    return result;
+}
+
+/* CIntFromPyVerify */
+  #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
+#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
+#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
+    {\
+        func_type value = func_value;\
+        if (sizeof(target_type) < sizeof(func_type)) {\
+            if (unlikely(value != (func_type) (target_type) value)) {\
+                func_type zero = 0;\
+                if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
+                    return (target_type) -1;\
+                if (is_unsigned && unlikely(value < zero))\
+                    goto raise_neg_overflow;\
+                else\
+                    goto raise_overflow;\
+            }\
+        }\
+        return (target_type) value;\
+    }
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return ::std::complex< float >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return x + y*(__pyx_t_float_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      __pyx_t_float_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabsf(b.real) >= fabsf(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                float r = b.imag / b.real;
+                float s = (float)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_float_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            float r = b.real / b.imag;
+            float s = (float)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_float_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            float denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_float_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrtf(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypotf(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+            __pyx_t_float_complex z;
+            float r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    float denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_float(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if (b.imag == 0) {
+                    z.real = powf(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2f(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_float(a);
+                theta = atan2f(a.imag, a.real);
+            }
+            lnr = logf(r);
+            z_r = expf(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cosf(z_theta);
+            z.imag = z_r * sinf(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return ::std::complex< double >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return x + y*(__pyx_t_double_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      __pyx_t_double_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabs(b.real) >= fabs(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                double r = b.imag / b.real;
+                double s = (double)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_double_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            double r = b.real / b.imag;
+            double s = (double)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_double_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            double denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_double_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrt(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypot(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+            __pyx_t_double_complex z;
+            double r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    double denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_double(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if (b.imag == 0) {
+                    z.real = pow(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_double(a);
+                theta = atan2(a.imag, a.real);
+            }
+            lnr = log(r);
+            z_r = exp(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cos(z_theta);
+            z.imag = z_r * sin(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* MemviewSliceCopyTemplate */
+  static __Pyx_memviewslice
+__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
+                                 const char *mode, int ndim,
+                                 size_t sizeof_dtype, int contig_flag,
+                                 int dtype_is_object)
+{
+    __Pyx_RefNannyDeclarations
+    int i;
+    __Pyx_memviewslice new_mvs = { 0, 0, { 0 }, { 0 }, { 0 } };
+    struct __pyx_memoryview_obj *from_memview = from_mvs->memview;
+    Py_buffer *buf = &from_memview->view;
+    PyObject *shape_tuple = NULL;
+    PyObject *temp_int = NULL;
+    struct __pyx_array_obj *array_obj = NULL;
+    struct __pyx_memoryview_obj *memview_obj = NULL;
+    __Pyx_RefNannySetupContext("__pyx_memoryview_copy_new_contig", 0);
+    for (i = 0; i < ndim; i++) {
+        if (unlikely(from_mvs->suboffsets[i] >= 0)) {
+            PyErr_Format(PyExc_ValueError, "Cannot copy memoryview slice with "
+                                           "indirect dimensions (axis %d)", i);
+            goto fail;
+        }
+    }
+    shape_tuple = PyTuple_New(ndim);
+    if (unlikely(!shape_tuple)) {
+        goto fail;
+    }
+    __Pyx_GOTREF(shape_tuple);
+    for(i = 0; i < ndim; i++) {
+        temp_int = PyInt_FromSsize_t(from_mvs->shape[i]);
+        if(unlikely(!temp_int)) {
+            goto fail;
+        } else {
+            PyTuple_SET_ITEM(shape_tuple, i, temp_int);
+            temp_int = NULL;
+        }
+    }
+    array_obj = __pyx_array_new(shape_tuple, sizeof_dtype, buf->format, (char *) mode, NULL);
+    if (unlikely(!array_obj)) {
+        goto fail;
+    }
+    __Pyx_GOTREF(array_obj);
+    memview_obj = (struct __pyx_memoryview_obj *) __pyx_memoryview_new(
+                                    (PyObject *) array_obj, contig_flag,
+                                    dtype_is_object,
+                                    from_mvs->memview->typeinfo);
+    if (unlikely(!memview_obj))
+        goto fail;
+    if (unlikely(__Pyx_init_memviewslice(memview_obj, ndim, &new_mvs, 1) < 0))
+        goto fail;
+    if (unlikely(__pyx_memoryview_copy_contents(*from_mvs, new_mvs, ndim, ndim,
+                                                dtype_is_object) < 0))
+        goto fail;
+    goto no_fail;
+fail:
+    __Pyx_XDECREF(new_mvs.memview);
+    new_mvs.memview = NULL;
+    new_mvs.data = NULL;
+no_fail:
+    __Pyx_XDECREF(shape_tuple);
+    __Pyx_XDECREF(temp_int);
+    __Pyx_XDECREF(array_obj);
+    __Pyx_RefNannyFinishContext();
+    return new_mvs;
+}
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(int) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(int) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(int) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(int),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(int) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (int) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) {
+                            return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) {
+                            return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) {
+                            return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (int) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(int) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(int,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(int) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            int val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (int) -1;
+        }
+    } else {
+        int val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (int) -1;
+        val = __Pyx_PyInt_As_int(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to int");
+    return (int) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to int");
+    return (int) -1;
+}
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(long) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(long) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(long) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(long),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(long) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (long) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) {
+                            return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) {
+                            return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) {
+                            return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (long) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(long) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(long,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(long) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            long val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (long) -1;
+        }
+    } else {
+        long val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (long) -1;
+        val = __Pyx_PyInt_As_long(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to long");
+    return (long) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to long");
+    return (long) -1;
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const char neg_one = (char) -1, const_zero = (char) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(char) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (char) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (char) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(char, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(char) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) >= 2 * PyLong_SHIFT) {
+                            return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(char) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) >= 3 * PyLong_SHIFT) {
+                            return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(char) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) >= 4 * PyLong_SHIFT) {
+                            return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (char) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(char) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(char) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (char) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(char, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(char,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(char) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 2 * PyLong_SHIFT) {
+                            return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(char) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 2 * PyLong_SHIFT) {
+                            return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(char) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 3 * PyLong_SHIFT) {
+                            return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(char) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 3 * PyLong_SHIFT) {
+                            return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(char) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 4 * PyLong_SHIFT) {
+                            return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(char) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(char) - 1 > 4 * PyLong_SHIFT) {
+                            return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(char) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+            } else if (sizeof(char) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            char val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (char) -1;
+        }
+    } else {
+        char val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (char) -1;
+        val = __Pyx_PyInt_As_char(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to char");
+    return (char) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to char");
+    return (char) -1;
+}
+
+/* CheckBinaryVersion */
+  static int __Pyx_check_binary_version(void) {
+    char ctversion[4], rtversion[4];
+    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+        char message[200];
+        PyOS_snprintf(message, sizeof(message),
+                      "compiletime version %s of module '%.100s' "
+                      "does not match runtime version %s",
+                      ctversion, __Pyx_MODULE_NAME, rtversion);
+        return PyErr_WarnEx(NULL, message, 1);
+    }
+    return 0;
+}
+
+/* InitStrings */
+  static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else
+        if (t->is_unicode | t->is_str) {
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else if (t->encoding) {
+                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
+        if (PyObject_Hash(*t->p) == -1)
+            return -1;
+        ++t;
+    }
+    return 0;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
+}
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
+    Py_ssize_t ignore;
+    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#if !CYTHON_PEP393_ENABLED
+static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    char* defenc_c;
+    PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+    if (!defenc) return NULL;
+    defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    {
+        char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+        char* c;
+        for (c = defenc_c; c < end; c++) {
+            if ((unsigned char) (*c) >= 128) {
+                PyUnicode_AsASCIIString(o);
+                return NULL;
+            }
+        }
+    }
+#endif
+    *length = PyBytes_GET_SIZE(defenc);
+    return defenc_c;
+}
+#else
+static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    if (likely(PyUnicode_IS_ASCII(o))) {
+        *length = PyUnicode_GET_LENGTH(o);
+        return PyUnicode_AsUTF8(o);
+    } else {
+        PyUnicode_AsASCIIString(o);
+        return NULL;
+    }
+#else
+    return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+}
+#endif
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+    if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+            __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+            PyUnicode_Check(o)) {
+        return __Pyx_PyUnicode_AsStringAndSize(o, length);
+    } else
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
+    {
+        char* result;
+        int r = PyBytes_AsStringAndSize(o, &result, length);
+        if (unlikely(r < 0)) {
+            return NULL;
+        } else {
+            return result;
+        }
+    }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
+    int retval;
+    if (unlikely(!x)) return -1;
+    retval = __Pyx_PyObject_IsTrue(x);
+    Py_DECREF(x);
+    return retval;
+}
+static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) {
+#if PY_MAJOR_VERSION >= 3
+    if (PyLong_Check(result)) {
+        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                "__int__ returned non-int (type %.200s).  "
+                "The ability to return an instance of a strict subclass of int "
+                "is deprecated, and may be removed in a future version of Python.",
+                Py_TYPE(result)->tp_name)) {
+            Py_DECREF(result);
+            return NULL;
+        }
+        return result;
+    }
+#endif
+    PyErr_Format(PyExc_TypeError,
+                 "__%.4s__ returned non-%.4s (type %.200s)",
+                 type_name, type_name, Py_TYPE(result)->tp_name);
+    Py_DECREF(result);
+    return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
+#if CYTHON_USE_TYPE_SLOTS
+  PyNumberMethods *m;
+#endif
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_Check(x) || PyLong_Check(x)))
+#else
+  if (likely(PyLong_Check(x)))
+#endif
+    return __Pyx_NewRef(x);
+#if CYTHON_USE_TYPE_SLOTS
+  m = Py_TYPE(x)->tp_as_number;
+  #if PY_MAJOR_VERSION < 3
+  if (m && m->nb_int) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  else if (m && m->nb_long) {
+    name = "long";
+    res = m->nb_long(x);
+  }
+  #else
+  if (likely(m && m->nb_int)) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  #endif
+#else
+  if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
+    res = PyNumber_Int(x);
+  }
+#endif
+  if (likely(res)) {
+#if PY_MAJOR_VERSION < 3
+    if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
+#else
+    if (unlikely(!PyLong_CheckExact(res))) {
+#endif
+        return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject *x;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_CheckExact(b))) {
+    if (sizeof(Py_ssize_t) >= sizeof(long))
+        return PyInt_AS_LONG(b);
+    else
+        return PyInt_AsSsize_t(b);
+  }
+#endif
+  if (likely(PyLong_CheckExact(b))) {
+    #if CYTHON_USE_PYLONG_INTERNALS
+    const digit* digits = ((PyLongObject*)b)->ob_digit;
+    const Py_ssize_t size = Py_SIZE(b);
+    if (likely(__Pyx_sst_abs(size) <= 1)) {
+        ival = likely(size) ? digits[0] : 0;
+        if (size == -1) ival = -ival;
+        return ival;
+    } else {
+      switch (size) {
+         case 2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+      }
+    }
+    #endif
+    return PyLong_AsSsize_t(b);
+  }
+  x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
+  if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) {
+    return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o);
+#if PY_MAJOR_VERSION < 3
+  } else if (likely(PyInt_CheckExact(o))) {
+    return PyInt_AS_LONG(o);
+#endif
+  } else {
+    Py_ssize_t ival;
+    PyObject *x;
+    x = PyNumber_Index(o);
+    if (!x) return -1;
+    ival = PyInt_AsLong(x);
+    Py_DECREF(x);
+    return ival;
+  }
+}
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
+  return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False);
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+    return PyInt_FromSize_t(ival);
+}
+
+
+#endif /* Py_PYTHON_H */
diff --git a/TTS/tts/utils/monotonic_align/core.cpython-37m-darwin.so b/TTS/tts/utils/monotonic_align/core.cpython-37m-darwin.so
new file mode 100644
index 0000000000000000000000000000000000000000..4a3f68e59df23e40c8a361649c25892b57efce50
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/core.cpython-37m-darwin.so differ
diff --git a/TTS/tts/utils/monotonic_align/core.cpython-39-darwin.so b/TTS/tts/utils/monotonic_align/core.cpython-39-darwin.so
new file mode 100644
index 0000000000000000000000000000000000000000..f7d4b7a1e4c16766940fe576e6da2fe3e4674afb
Binary files /dev/null and b/TTS/tts/utils/monotonic_align/core.cpython-39-darwin.so differ
diff --git a/TTS/tts/utils/monotonic_align/core.pyx b/TTS/tts/utils/monotonic_align/core.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..091fcc3a50a51f3d3fee47a70825260757e6d885
--- /dev/null
+++ b/TTS/tts/utils/monotonic_align/core.pyx
@@ -0,0 +1,47 @@
+import numpy as np
+
+cimport cython
+cimport numpy as np
+
+from cython.parallel import prange
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil:
+  cdef int x
+  cdef int y
+  cdef float v_prev
+  cdef float v_cur
+  cdef float tmp
+  cdef int index = t_x - 1
+
+  for y in range(t_y):
+    for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+      if x == y:
+        v_cur = max_neg_val
+      else:
+        v_cur = value[x, y-1]
+      if x == 0:
+        if y == 0:
+          v_prev = 0.
+        else:
+          v_prev = max_neg_val
+      else:
+        v_prev = value[x-1, y-1]
+      value[x, y] = max(v_cur, v_prev) + value[x, y]
+
+  for y in range(t_y - 1, -1, -1):
+    path[index, y] = 1
+    if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
+      index = index - 1
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:
+  cdef int b = values.shape[0]
+
+  cdef int i
+  for i in prange(b, nogil=True):
+    maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
diff --git a/TTS/tts/utils/monotonic_align/setup.py b/TTS/tts/utils/monotonic_align/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..f22bc6a35a5a04c9e6d7b82040973722c9b770c9
--- /dev/null
+++ b/TTS/tts/utils/monotonic_align/setup.py
@@ -0,0 +1,7 @@
+# from distutils.core import setup
+# from Cython.Build import cythonize
+# import numpy
+
+# setup(name='monotonic_align',
+#       ext_modules=cythonize("core.pyx"),
+#       include_dirs=[numpy.get_include()])
diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py
new file mode 100644
index 0000000000000000000000000000000000000000..21fefa0b32cd64be21a0fa5c1844edf928063846
--- /dev/null
+++ b/TTS/tts/utils/speakers.py
@@ -0,0 +1,226 @@
+import json
+import os
+from typing import Any, Dict, List, Union
+
+import fsspec
+import numpy as np
+import torch
+from coqpit import Coqpit
+
+from TTS.config import get_from_config_or_model_args_with_default
+from TTS.tts.utils.managers import EmbeddingManager
+
+
+class SpeakerManager(EmbeddingManager):
+    """Manage the speakers for multi-speaker 🐸TTS models. Load a datafile and parse the information
+    in a way that can be queried by speaker or clip.
+
+    There are 3 different scenarios considered:
+
+    1. Models using speaker embedding layers. The datafile only maps speaker names to ids used by the embedding layer.
+    2. Models using d-vectors. The datafile includes a dictionary in the following format.
+
+    ::
+
+        {
+            'clip_name.wav':{
+                'name': 'speakerA',
+                'embedding'[<d_vector_values>]
+            },
+            ...
+        }
+
+
+    3. Computing the d-vectors by the speaker encoder. It loads the speaker encoder model and
+    computes the d-vectors for a given clip or speaker.
+
+    Args:
+        d_vectors_file_path (str, optional): Path to the metafile including x vectors. Defaults to "".
+        speaker_id_file_path (str, optional): Path to the metafile that maps speaker names to ids used by
+        TTS models. Defaults to "".
+        encoder_model_path (str, optional): Path to the speaker encoder model file. Defaults to "".
+        encoder_config_path (str, optional): Path to the spealer encoder config file. Defaults to "".
+
+    Examples:
+        >>> # load audio processor and speaker encoder
+        >>> ap = AudioProcessor(**config.audio)
+        >>> manager = SpeakerManager(encoder_model_path=encoder_model_path, encoder_config_path=encoder_config_path)
+        >>> # load a sample audio and compute embedding
+        >>> waveform = ap.load_wav(sample_wav_path)
+        >>> mel = ap.melspectrogram(waveform)
+        >>> d_vector = manager.compute_embeddings(mel.T)
+    """
+
+    def __init__(
+        self,
+        data_items: List[List[Any]] = None,
+        d_vectors_file_path: str = "",
+        speaker_id_file_path: str = "",
+        encoder_model_path: str = "",
+        encoder_config_path: str = "",
+        use_cuda: bool = False,
+    ):
+        super().__init__(
+            embedding_file_path=d_vectors_file_path,
+            id_file_path=speaker_id_file_path,
+            encoder_model_path=encoder_model_path,
+            encoder_config_path=encoder_config_path,
+            use_cuda=use_cuda,
+        )
+
+        if data_items:
+            self.set_ids_from_data(data_items, parse_key="speaker_name")
+
+    @property
+    def num_speakers(self):
+        return len(self.name_to_id)
+
+    @property
+    def speaker_names(self):
+        return list(self.name_to_id.keys())
+
+    def get_speakers(self) -> List:
+        return self.name_to_id
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "SpeakerManager":
+        """Initialize a speaker manager from config
+
+        Args:
+            config (Coqpit): Config object.
+            samples (Union[List[List], List[Dict]], optional): List of data samples to parse out the speaker names.
+                Defaults to None.
+
+        Returns:
+            SpeakerEncoder: Speaker encoder object.
+        """
+        speaker_manager = None
+        if get_from_config_or_model_args_with_default(config, "use_speaker_embedding", False):
+            if samples:
+                speaker_manager = SpeakerManager(data_items=samples)
+            if get_from_config_or_model_args_with_default(config, "speaker_file", None):
+                speaker_manager = SpeakerManager(
+                    speaker_id_file_path=get_from_config_or_model_args_with_default(config, "speaker_file", None)
+                )
+            if get_from_config_or_model_args_with_default(config, "speakers_file", None):
+                speaker_manager = SpeakerManager(
+                    speaker_id_file_path=get_from_config_or_model_args_with_default(config, "speakers_file", None)
+                )
+
+        if get_from_config_or_model_args_with_default(config, "use_d_vector_file", False):
+            speaker_manager = SpeakerManager()
+            if get_from_config_or_model_args_with_default(config, "speakers_file", None):
+                speaker_manager = SpeakerManager(
+                    d_vectors_file_path=get_from_config_or_model_args_with_default(config, "speaker_file", None)
+                )
+            if get_from_config_or_model_args_with_default(config, "d_vector_file", None):
+                speaker_manager = SpeakerManager(
+                    d_vectors_file_path=get_from_config_or_model_args_with_default(config, "d_vector_file", None)
+                )
+        return speaker_manager
+
+
+def _set_file_path(path):
+    """Find the speakers.json under the given path or the above it.
+    Intended to band aid the different paths returned in restored and continued training."""
+    path_restore = os.path.join(os.path.dirname(path), "speakers.json")
+    path_continue = os.path.join(path, "speakers.json")
+    fs = fsspec.get_mapper(path).fs
+    if fs.exists(path_restore):
+        return path_restore
+    if fs.exists(path_continue):
+        return path_continue
+    raise FileNotFoundError(f" [!] `speakers.json` not found in {path}")
+
+
+def load_speaker_mapping(out_path):
+    """Loads speaker mapping if already present."""
+    if os.path.splitext(out_path)[1] == ".json":
+        json_file = out_path
+    else:
+        json_file = _set_file_path(out_path)
+    with fsspec.open(json_file, "r") as f:
+        return json.load(f)
+
+
+def save_speaker_mapping(out_path, speaker_mapping):
+    """Saves speaker mapping if not yet present."""
+    if out_path is not None:
+        speakers_json_path = _set_file_path(out_path)
+        with fsspec.open(speakers_json_path, "w") as f:
+            json.dump(speaker_mapping, f, indent=4)
+
+
+def get_speaker_manager(c: Coqpit, data: List = None, restore_path: str = None, out_path: str = None) -> SpeakerManager:
+    """Initiate a `SpeakerManager` instance by the provided config.
+
+    Args:
+        c (Coqpit): Model configuration.
+        restore_path (str): Path to a previous training folder.
+        data (List): Data samples used in training to infer speakers from. It must be provided if speaker embedding
+            layers is used. Defaults to None.
+        out_path (str, optional): Save the generated speaker IDs to a output path. Defaults to None.
+
+    Returns:
+        SpeakerManager: initialized and ready to use instance.
+    """
+    speaker_manager = SpeakerManager()
+    if c.use_speaker_embedding:
+        if data is not None:
+            speaker_manager.set_ids_from_data(data, parse_key="speaker_name")
+        if restore_path:
+            speakers_file = _set_file_path(restore_path)
+            # restoring speaker manager from a previous run.
+            if c.use_d_vector_file:
+                # restore speaker manager with the embedding file
+                if not os.path.exists(speakers_file):
+                    print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.d_vector_file")
+                    if not os.path.exists(c.d_vector_file):
+                        raise RuntimeError(
+                            "You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.d_vector_file"
+                        )
+                    speaker_manager.load_embeddings_from_file(c.d_vector_file)
+                speaker_manager.load_embeddings_from_file(speakers_file)
+            elif not c.use_d_vector_file:  # restor speaker manager with speaker ID file.
+                speaker_ids_from_data = speaker_manager.name_to_id
+                speaker_manager.load_ids_from_file(speakers_file)
+                assert all(
+                    speaker in speaker_manager.name_to_id for speaker in speaker_ids_from_data
+                ), " [!] You cannot introduce new speakers to a pre-trained model."
+        elif c.use_d_vector_file and c.d_vector_file:
+            # new speaker manager with external speaker embeddings.
+            speaker_manager.load_embeddings_from_file(c.d_vector_file)
+        elif c.use_d_vector_file and not c.d_vector_file:
+            raise "use_d_vector_file is True, so you need pass a external speaker embedding file."
+        elif c.use_speaker_embedding and "speakers_file" in c and c.speakers_file:
+            # new speaker manager with speaker IDs file.
+            speaker_manager.load_ids_from_file(c.speakers_file)
+
+        if speaker_manager.num_speakers > 0:
+            print(
+                " > Speaker manager is loaded with {} speakers: {}".format(
+                    speaker_manager.num_speakers, ", ".join(speaker_manager.name_to_id)
+                )
+            )
+
+        # save file if path is defined
+        if out_path:
+            out_file_path = os.path.join(out_path, "speakers.json")
+            print(f" > Saving `speakers.json` to {out_file_path}.")
+            if c.use_d_vector_file and c.d_vector_file:
+                speaker_manager.save_embeddings_to_file(out_file_path)
+            else:
+                speaker_manager.save_ids_to_file(out_file_path)
+    return speaker_manager
+
+
+def get_speaker_balancer_weights(items: list):
+    speaker_names = np.array([item["speaker_name"] for item in items])
+    unique_speaker_names = np.unique(speaker_names).tolist()
+    speaker_ids = [unique_speaker_names.index(l) for l in speaker_names]
+    speaker_count = np.array([len(np.where(speaker_names == l)[0]) for l in unique_speaker_names])
+    weight_speaker = 1.0 / speaker_count
+    dataset_samples_weight = np.array([weight_speaker[l] for l in speaker_ids])
+    # normalize
+    dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight)
+    return torch.from_numpy(dataset_samples_weight).float()
diff --git a/TTS/tts/utils/ssim.py b/TTS/tts/utils/ssim.py
new file mode 100644
index 0000000000000000000000000000000000000000..887c859a398a866e084921ea7be1fdb61d9f65fa
--- /dev/null
+++ b/TTS/tts/utils/ssim.py
@@ -0,0 +1,389 @@
+# Adopted from https://github.com/photosynthesis-team/piq
+
+from typing import List, Optional, Tuple, Union
+import math
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.loss import _Loss
+
+
+def _reduce(x: torch.Tensor, reduction: str = "mean") -> torch.Tensor:
+    r"""Reduce input in batch dimension if needed.
+    Args:
+        x: Tensor with shape (N, *).
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
+    """
+    if reduction == "none":
+        return x
+    if reduction == "mean":
+        return x.mean(dim=0)
+    if reduction == "sum":
+        return x.sum(dim=0)
+    raise ValueError("Unknown reduction. Expected one of {'none', 'mean', 'sum'}")
+
+
+def _validate_input(
+    tensors: List[torch.Tensor],
+    dim_range: Tuple[int, int] = (0, -1),
+    data_range: Tuple[float, float] = (0.0, -1.0),
+    # size_dim_range: Tuple[float, float] = (0., -1.),
+    size_range: Optional[Tuple[int, int]] = None,
+) -> None:
+    r"""Check that input(-s)  satisfies the requirements
+    Args:
+        tensors: Tensors to check
+        dim_range: Allowed number of dimensions. (min, max)
+        data_range: Allowed range of values in tensors. (min, max)
+        size_range: Dimensions to include in size comparison. (start_dim, end_dim + 1)
+    """
+
+    if not __debug__:
+        return
+
+    x = tensors[0]
+    for t in tensors:
+        
+        assert torch.is_tensor(t), f"Expected torch.Tensor, got {type(t)}"
+        assert t.device == x.device, f"Expected tensors to be on {x.device}, got {t.device}"
+        torch.nan_to_num(t)
+        if size_range is None:
+            assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}"
+        else:
+            assert (
+                t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]]
+            ), f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}"
+
+        if dim_range[0] == dim_range[1]:
+            assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}"
+        elif dim_range[0] < dim_range[1]:
+            assert (
+                dim_range[0] <= t.dim() <= dim_range[1]
+            ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}"
+
+        if data_range[0] < data_range[1]:
+            # if(math.isnan(t.min())) :
+            #     assert 0 < 1
+            # else :
+            assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t}"
+            # if(math.isnan(t.max())) :   
+            #     assert 0 < 1
+            # else :
+            assert t.max() <= data_range[1], f"Expected values to be lower or equal to {data_range[1]}, got {t}"
+
+
+def gaussian_filter(kernel_size: int, sigma: float) -> torch.Tensor:
+    r"""Returns 2D Gaussian kernel N(0,`sigma`^2)
+    Args:
+        size: Size of the kernel
+        sigma: Std of the distribution
+    Returns:
+        gaussian_kernel: Tensor with shape (1, kernel_size, kernel_size)
+    """
+    coords = torch.arange(kernel_size, dtype=torch.float32)
+    coords -= (kernel_size - 1) / 2.0
+
+    g = coords**2
+    g = (-(g.unsqueeze(0) + g.unsqueeze(1)) / (2 * sigma**2)).exp()
+
+    g /= g.sum()
+    return g.unsqueeze(0)
+
+
+def ssim(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    kernel_size: int = 11,
+    kernel_sigma: float = 1.5,
+    data_range: Union[int, float] = 1.0,
+    reduction: str = "mean",
+    full: bool = False,
+    downsample: bool = True,
+    k1: float = 0.01,
+    k2: float = 0.03,
+) -> List[torch.Tensor]:
+    r"""Interface of Structural Similarity (SSIM) index.
+    Inputs supposed to be in range ``[0, data_range]``.
+    To match performance with skimage and tensorflow set ``'downsample' = True``.
+
+    Args:
+        x: An input tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`.
+        y: A target tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`.
+        kernel_size: The side-length of the sliding window used in comparison. Must be an odd value.
+        kernel_sigma: Sigma of normal distribution.
+        data_range: Maximum value range of images (usually 1.0 or 255).
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        full: Return cs map or not.
+        downsample: Perform average pool before SSIM computation. Default: True
+        k1: Algorithm parameter, K1 (small constant).
+        k2: Algorithm parameter, K2 (small constant).
+            Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results.
+
+    Returns:
+        Value of Structural Similarity (SSIM) index. In case of 5D input tensors, complex value is returned
+        as a tensor of size 2.
+
+    References:
+        Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004).
+        Image quality assessment: From error visibility to structural similarity.
+        IEEE Transactions on Image Processing, 13, 600-612.
+        https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
+        DOI: `10.1109/TIP.2003.819861`
+    """
+    assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]"
+    _validate_input([x, y], dim_range=(4, 5), data_range=(0, data_range))
+
+    x = x / float(data_range)
+    y = y / float(data_range)
+
+    # Averagepool image if the size is large enough
+    f = max(1, round(min(x.size()[-2:]) / 256))
+    if (f > 1) and downsample:
+        x = F.avg_pool2d(x, kernel_size=f)
+        y = F.avg_pool2d(y, kernel_size=f)
+
+    kernel = gaussian_filter(kernel_size, kernel_sigma).repeat(x.size(1), 1, 1, 1).to(y)
+    _compute_ssim_per_channel = _ssim_per_channel_complex if x.dim() == 5 else _ssim_per_channel
+    ssim_map, cs_map = _compute_ssim_per_channel(x=x, y=y, kernel=kernel, k1=k1, k2=k2)
+    ssim_val = ssim_map.mean(1)
+    cs = cs_map.mean(1)
+
+    ssim_val = _reduce(ssim_val, reduction)
+    cs = _reduce(cs, reduction)
+
+    if full:
+        return [ssim_val, cs]
+
+    return ssim_val
+
+
+class SSIMLoss(_Loss):
+    r"""Creates a criterion that measures the structural similarity index error between
+    each element in the input :math:`x` and target :math:`y`.
+
+    To match performance with skimage and tensorflow set ``'downsample' = True``.
+
+    The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
+
+    .. math::
+        SSIM = \{ssim_1,\dots,ssim_{N \times C}\}\\
+        ssim_{l}(x, y) = \frac{(2 \mu_x \mu_y + c_1) (2 \sigma_{xy} + c_2)}
+        {(\mu_x^2 +\mu_y^2 + c_1)(\sigma_x^2 +\sigma_y^2 + c_2)},
+
+    where :math:`N` is the batch size, `C` is the channel size. If :attr:`reduction` is not ``'none'``
+    (default ``'mean'``), then:
+
+    .. math::
+        SSIMLoss(x, y) =
+        \begin{cases}
+            \operatorname{mean}(1 - SSIM), &  \text{if reduction} = \text{'mean';}\\
+            \operatorname{sum}(1 - SSIM),  &  \text{if reduction} = \text{'sum'.}
+        \end{cases}
+
+    :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
+    of :math:`n` elements each.
+
+    The sum operation still operates over all the elements, and divides by :math:`n`.
+    The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``.
+    In case of 5D input tensors, complex value is returned as a tensor of size 2.
+
+    Args:
+        kernel_size: By default, the mean and covariance of a pixel is obtained
+            by convolution with given filter_size.
+        kernel_sigma: Standard deviation for Gaussian kernel.
+        k1: Coefficient related to c1 in the above equation.
+        k2: Coefficient related to c2 in the above equation.
+        downsample: Perform average pool before SSIM computation. Default: True
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        data_range: Maximum value range of images (usually 1.0 or 255).
+
+    Examples:
+        >>> loss = SSIMLoss()
+        >>> x = torch.rand(3, 3, 256, 256, requires_grad=True)
+        >>> y = torch.rand(3, 3, 256, 256)
+        >>> output = loss(x, y)
+        >>> output.backward()
+
+    References:
+        Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004).
+        Image quality assessment: From error visibility to structural similarity.
+        IEEE Transactions on Image Processing, 13, 600-612.
+        https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
+        DOI:`10.1109/TIP.2003.819861`
+    """
+    __constants__ = ["kernel_size", "k1", "k2", "sigma", "kernel", "reduction"]
+
+    def __init__(
+        self,
+        kernel_size: int = 11,
+        kernel_sigma: float = 1.5,
+        k1: float = 0.01,
+        k2: float = 0.03,
+        downsample: bool = True,
+        reduction: str = "mean",
+        data_range: Union[int, float] = 1.0,
+    ) -> None:
+        super().__init__()
+
+        # Generic loss parameters.
+        self.reduction = reduction
+
+        # Loss-specific parameters.
+        self.kernel_size = kernel_size
+
+        # This check might look redundant because kernel size is checked within the ssim function anyway.
+        # However, this check allows to fail fast when the loss is being initialised and training has not been started.
+        assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]"
+        self.kernel_sigma = kernel_sigma
+        self.k1 = k1
+        self.k2 = k2
+        self.downsample = downsample
+        self.data_range = data_range
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        r"""Computation of Structural Similarity (SSIM) index as a loss function.
+
+        Args:
+            x: An input tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`.
+            y: A target tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`.
+
+        Returns:
+            Value of SSIM loss to be minimized, i.e ``1 - ssim`` in [0, 1] range. In case of 5D input tensors,
+            complex value is returned as a tensor of size 2.
+        """
+
+        score = ssim(
+            x=x,
+            y=y,
+            kernel_size=self.kernel_size,
+            kernel_sigma=self.kernel_sigma,
+            downsample=self.downsample,
+            data_range=self.data_range,
+            reduction=self.reduction,
+            full=False,
+            k1=self.k1,
+            k2=self.k2,
+        )
+        return torch.ones_like(score) - score
+
+
+def _ssim_per_channel(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    kernel: torch.Tensor,
+    k1: float = 0.01,
+    k2: float = 0.03,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    r"""Calculate Structural Similarity (SSIM) index for X and Y per channel.
+
+    Args:
+        x: An input tensor. Shape :math:`(N, C, H, W)`.
+        y: A target tensor. Shape :math:`(N, C, H, W)`.
+        kernel: 2D Gaussian kernel.
+        k1: Algorithm parameter, K1 (small constant, see [1]).
+        k2: Algorithm parameter, K2 (small constant, see [1]).
+            Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results.
+
+    Returns:
+        Full Value of Structural Similarity (SSIM) index.
+    """
+    if x.size(-1) < kernel.size(-1) or x.size(-2) < kernel.size(-2):
+        raise ValueError(
+            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. "
+            f"Kernel size: {kernel.size()}"
+        )
+
+    c1 = k1**2
+    c2 = k2**2
+    n_channels = x.size(1)
+    mu_x = F.conv2d(x, weight=kernel, stride=1, padding=0, groups=n_channels)
+    mu_y = F.conv2d(y, weight=kernel, stride=1, padding=0, groups=n_channels)
+
+    mu_xx = mu_x**2
+    mu_yy = mu_y**2
+    mu_xy = mu_x * mu_y
+
+    sigma_xx = F.conv2d(x**2, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xx
+    sigma_yy = F.conv2d(y**2, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_yy
+    sigma_xy = F.conv2d(x * y, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xy
+
+    # Contrast sensitivity (CS) with alpha = beta = gamma = 1.
+    cs = (2.0 * sigma_xy + c2) / (sigma_xx + sigma_yy + c2)
+
+    # Structural similarity (SSIM)
+    ss = (2.0 * mu_xy + c1) / (mu_xx + mu_yy + c1) * cs
+
+    ssim_val = ss.mean(dim=(-1, -2))
+    cs = cs.mean(dim=(-1, -2))
+    return ssim_val, cs
+
+
+def _ssim_per_channel_complex(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    kernel: torch.Tensor,
+    k1: float = 0.01,
+    k2: float = 0.03,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    r"""Calculate Structural Similarity (SSIM) index for Complex X and Y per channel.
+
+    Args:
+        x: An input tensor. Shape :math:`(N, C, H, W, 2)`.
+        y: A target tensor. Shape :math:`(N, C, H, W, 2)`.
+        kernel: 2-D gauss kernel.
+        k1: Algorithm parameter, K1 (small constant, see [1]).
+        k2: Algorithm parameter, K2 (small constant, see [1]).
+            Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results.
+
+    Returns:
+        Full Value of Complex Structural Similarity (SSIM) index.
+    """
+    n_channels = x.size(1)
+    if x.size(-2) < kernel.size(-1) or x.size(-3) < kernel.size(-2):
+        raise ValueError(
+            f"Kernel size can't be greater than actual input size. Input size: {x.size()}. "
+            f"Kernel size: {kernel.size()}"
+        )
+
+    c1 = k1**2
+    c2 = k2**2
+
+    x_real = x[..., 0]
+    x_imag = x[..., 1]
+    y_real = y[..., 0]
+    y_imag = y[..., 1]
+
+    mu1_real = F.conv2d(x_real, weight=kernel, stride=1, padding=0, groups=n_channels)
+    mu1_imag = F.conv2d(x_imag, weight=kernel, stride=1, padding=0, groups=n_channels)
+    mu2_real = F.conv2d(y_real, weight=kernel, stride=1, padding=0, groups=n_channels)
+    mu2_imag = F.conv2d(y_imag, weight=kernel, stride=1, padding=0, groups=n_channels)
+
+    mu1_sq = mu1_real.pow(2) + mu1_imag.pow(2)
+    mu2_sq = mu2_real.pow(2) + mu2_imag.pow(2)
+    mu1_mu2_real = mu1_real * mu2_real - mu1_imag * mu2_imag
+    mu1_mu2_imag = mu1_real * mu2_imag + mu1_imag * mu2_real
+
+    compensation = 1.0
+
+    x_sq = x_real.pow(2) + x_imag.pow(2)
+    y_sq = y_real.pow(2) + y_imag.pow(2)
+    x_y_real = x_real * y_real - x_imag * y_imag
+    x_y_imag = x_real * y_imag + x_imag * y_real
+
+    sigma1_sq = F.conv2d(x_sq, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_sq
+    sigma2_sq = F.conv2d(y_sq, weight=kernel, stride=1, padding=0, groups=n_channels) - mu2_sq
+    sigma12_real = F.conv2d(x_y_real, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_mu2_real
+    sigma12_imag = F.conv2d(x_y_imag, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_mu2_imag
+    sigma12 = torch.stack((sigma12_imag, sigma12_real), dim=-1)
+    mu1_mu2 = torch.stack((mu1_mu2_real, mu1_mu2_imag), dim=-1)
+    # Set alpha = beta = gamma = 1.
+    cs_map = (sigma12 * 2 + c2 * compensation) / (sigma1_sq.unsqueeze(-1) + sigma2_sq.unsqueeze(-1) + c2 * compensation)
+    ssim_map = (mu1_mu2 * 2 + c1 * compensation) / (mu1_sq.unsqueeze(-1) + mu2_sq.unsqueeze(-1) + c1 * compensation)
+    ssim_map = ssim_map * cs_map
+
+    ssim_val = ssim_map.mean(dim=(-2, -3))
+    cs = cs_map.mean(dim=(-2, -3))
+
+    return ssim_val, cs
diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7ef719b251be298dc2698ce2313a34b82c9d8ff
--- /dev/null
+++ b/TTS/tts/utils/synthesis.py
@@ -0,0 +1,327 @@
+from typing import Dict
+
+import numpy as np
+import torch
+from torch import nn
+
+
+def numpy_to_torch(np_array, dtype, cuda=False):
+    if np_array is None:
+        return None
+    tensor = torch.as_tensor(np_array, dtype=dtype)
+    if cuda:
+        return tensor.cuda()
+    return tensor
+
+
+def compute_style_mel(style_wav, ap, cuda=False):
+    style_mel = torch.FloatTensor(ap.melspectrogram(ap.load_wav(style_wav, sr=ap.sample_rate))).unsqueeze(0)
+    if cuda:
+        return style_mel.cuda()
+    return style_mel
+
+
+def run_model_torch(
+    model: nn.Module,
+    inputs: torch.Tensor,
+    speaker_id: int = None,
+    style_mel: torch.Tensor = None,
+    style_text: str = None,
+    d_vector: torch.Tensor = None,
+    language_id: torch.Tensor = None,
+) -> Dict:
+    """Run a torch model for inference. It does not support batch inference.
+
+    Args:
+        model (nn.Module): The model to run inference.
+        inputs (torch.Tensor): Input tensor with character ids.
+        speaker_id (int, optional): Input speaker ids for multi-speaker models. Defaults to None.
+        style_mel (torch.Tensor, optional): Spectrograms used for voice styling . Defaults to None.
+        d_vector (torch.Tensor, optional): d-vector for multi-speaker models    . Defaults to None.
+
+    Returns:
+        Dict: model outputs.
+    """
+    input_lengths = torch.tensor(inputs.shape[1:2]).to(inputs.device)
+    if hasattr(model, "module"):
+        _func = model.module.inference
+    else:
+        _func = model.inference
+    outputs = _func(
+        inputs,
+        aux_input={
+            "x_lengths": input_lengths,
+            "speaker_ids": speaker_id,
+            "d_vectors": d_vector,
+            "style_mel": style_mel,
+            "style_text": style_text,
+            "language_ids": language_id,
+        },
+    )
+    return outputs
+
+
+def trim_silence(wav, ap):
+    return wav[: ap.find_endpoint(wav)]
+
+
+def inv_spectrogram(postnet_output, ap, CONFIG):
+    if CONFIG.model.lower() in ["tacotron"]:
+        wav = ap.inv_spectrogram(postnet_output.T)
+    else:
+        wav = ap.inv_melspectrogram(postnet_output.T)
+    return wav
+
+
+def id_to_torch(aux_id, cuda=False):
+    if aux_id is not None:
+        aux_id = np.asarray(aux_id)
+        aux_id = torch.from_numpy(aux_id)
+    if cuda:
+        return aux_id.cuda()
+    return aux_id
+
+
+def embedding_to_torch(d_vector, cuda=False):
+    if d_vector is not None:
+        d_vector = np.asarray(d_vector)
+        d_vector = torch.from_numpy(d_vector).type(torch.FloatTensor)
+        d_vector = d_vector.squeeze().unsqueeze(0)
+    if cuda:
+        return d_vector.cuda()
+    return d_vector
+
+
+# TODO: perform GL with pytorch for batching
+def apply_griffin_lim(inputs, input_lens, CONFIG, ap):
+    """Apply griffin-lim to each sample iterating throught the first dimension.
+    Args:
+        inputs (Tensor or np.Array): Features to be converted by GL. First dimension is the batch size.
+        input_lens (Tensor or np.Array): 1D array of sample lengths.
+        CONFIG (Dict): TTS config.
+        ap (AudioProcessor): TTS audio processor.
+    """
+    wavs = []
+    for idx, spec in enumerate(inputs):
+        wav_len = (input_lens[idx] * ap.hop_length) - ap.hop_length  # inverse librosa padding
+        wav = inv_spectrogram(spec, ap, CONFIG)
+        # assert len(wav) == wav_len, f" [!] wav lenght: {len(wav)} vs expected: {wav_len}"
+        wavs.append(wav[:wav_len])
+    return wavs
+
+
+def synthesis(
+    model,
+    text,
+    CONFIG,
+    use_cuda,
+    speaker_id=None,
+    style_wav=None,
+    style_text=None,
+    use_griffin_lim=False,
+    do_trim_silence=False,
+    d_vector=None,
+    language_id=None,
+):
+    """Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
+    the vocoder model.
+
+    Args:
+        model (TTS.tts.models):
+            The TTS model to synthesize audio with.
+
+        text (str):
+            The input text to convert to speech.
+
+        CONFIG (Coqpit):
+            Model configuration.
+
+        use_cuda (bool):
+            Enable/disable CUDA.
+
+        speaker_id (int):
+            Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None.
+
+        style_wav (str | Dict[str, float]):
+            Path or tensor to/of a waveform used for computing the style embedding based on GST or Capacitron.
+            Defaults to None, meaning that Capacitron models will sample from the prior distribution to
+            generate random but realistic prosody.
+
+        style_text (str):
+            Transcription of style_wav for Capacitron models. Defaults to None.
+
+        enable_eos_bos_chars (bool):
+            enable special chars for end of sentence and start of sentence. Defaults to False.
+
+        do_trim_silence (bool):
+            trim silence after synthesis. Defaults to False.
+
+        d_vector (torch.Tensor):
+            d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None.
+
+        language_id (int):
+            Language ID passed to the language embedding layer in multi-langual model. Defaults to None.
+    """
+    # GST or Capacitron processing
+    # TODO: need to handle the case of setting both gst and capacitron to true somewhere
+    style_mel = None
+    if CONFIG.has("gst") and CONFIG.gst and style_wav is not None:
+        if isinstance(style_wav, dict):
+            style_mel = style_wav
+        else:
+            style_mel = compute_style_mel(style_wav, model.ap, cuda=use_cuda)
+
+    if CONFIG.has("capacitron_vae") and CONFIG.use_capacitron_vae and style_wav is not None:
+        style_mel = compute_style_mel(style_wav, model.ap, cuda=use_cuda)
+        style_mel = style_mel.transpose(1, 2)  # [1, time, depth]
+
+    # convert text to sequence of token IDs
+    text_inputs = np.asarray(
+        model.tokenizer.text_to_ids(text, language=language_id),
+        dtype=np.int32,
+    )
+    print(text_inputs)
+    # pass tensors to backend
+    if speaker_id is not None:
+        speaker_id = id_to_torch(speaker_id, cuda=use_cuda)
+
+    if d_vector is not None:
+        d_vector = embedding_to_torch(d_vector, cuda=use_cuda)
+
+    if language_id is not None:
+        language_id = id_to_torch(language_id, cuda=use_cuda)
+
+    if not isinstance(style_mel, dict):
+        # GST or Capacitron style mel
+        style_mel = numpy_to_torch(style_mel, torch.float, cuda=use_cuda)
+        if style_text is not None:
+            style_text = np.asarray(
+                model.tokenizer.text_to_ids(style_text, language=language_id),
+                dtype=np.int32,
+            )
+            style_text = numpy_to_torch(style_text, torch.long, cuda=use_cuda)
+            style_text = style_text.unsqueeze(0)
+
+    text_inputs = numpy_to_torch(text_inputs, torch.long, cuda=use_cuda)
+    text_inputs = text_inputs.unsqueeze(0)
+    # synthesize voice
+    outputs = run_model_torch(
+        model,
+        text_inputs,
+        speaker_id,
+        style_mel,
+        style_text,
+        d_vector=d_vector,
+        language_id=language_id,
+    )
+    # print(outputs)
+    model_outputs = outputs["model_outputs"]
+    model_outputs = model_outputs[0].data.cpu().numpy()
+    alignments = outputs["alignments"]
+    print(alignments)
+
+    # convert outputs to numpy
+    # plot results
+    wav = None
+    model_outputs = model_outputs.squeeze()
+    if model_outputs.ndim == 2:  # [T, C_spec]
+        if use_griffin_lim:
+            wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
+            # trim silence
+            if do_trim_silence:
+                wav = trim_silence(wav, model.ap)
+    else:  # [T,]
+        wav = model_outputs
+    return_dict = {
+        "wav": wav,
+        "alignments": alignments,
+        "text_inputs": text_inputs,
+        "outputs": outputs,
+    }
+    return return_dict
+
+
+def transfer_voice(
+    model,
+    CONFIG,
+    use_cuda,
+    reference_wav,
+    speaker_id=None,
+    d_vector=None,
+    reference_speaker_id=None,
+    reference_d_vector=None,
+    do_trim_silence=False,
+    use_griffin_lim=False,
+):
+    """Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
+    the vocoder model.
+
+    Args:
+        model (TTS.tts.models):
+            The TTS model to synthesize audio with.
+
+        CONFIG (Coqpit):
+            Model configuration.
+
+        use_cuda (bool):
+            Enable/disable CUDA.
+
+        reference_wav (str):
+            Path of reference_wav to be used to voice conversion.
+
+        speaker_id (int):
+            Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None.
+
+        d_vector (torch.Tensor):
+            d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None.
+
+        reference_speaker_id (int):
+            Reference Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None.
+
+        reference_d_vector (torch.Tensor):
+            Reference d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None.
+
+        enable_eos_bos_chars (bool):
+            enable special chars for end of sentence and start of sentence. Defaults to False.
+
+        do_trim_silence (bool):
+            trim silence after synthesis. Defaults to False.
+    """
+    # pass tensors to backend
+    if speaker_id is not None:
+        speaker_id = id_to_torch(speaker_id, cuda=use_cuda)
+
+    if d_vector is not None:
+        d_vector = embedding_to_torch(d_vector, cuda=use_cuda)
+
+    if reference_d_vector is not None:
+        reference_d_vector = embedding_to_torch(reference_d_vector, cuda=use_cuda)
+
+    # load reference_wav audio
+    reference_wav = embedding_to_torch(
+        model.ap.load_wav(
+            reference_wav, sr=model.args.encoder_sample_rate if model.args.encoder_sample_rate else model.ap.sample_rate
+        ),
+        cuda=use_cuda,
+    )
+
+    if hasattr(model, "module"):
+        _func = model.module.inference_voice_conversion
+    else:
+        _func = model.inference_voice_conversion
+    model_outputs = _func(reference_wav, speaker_id, d_vector, reference_speaker_id, reference_d_vector)
+
+    # convert outputs to numpy
+    # plot results
+    wav = None
+    model_outputs = model_outputs.squeeze()
+    if model_outputs.ndim == 2:  # [T, C_spec]
+        if use_griffin_lim:
+            wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
+            # trim silence
+            if do_trim_silence:
+                wav = trim_silence(wav, model.ap)
+    else:  # [T,]
+        wav = model_outputs
+
+    return wav
diff --git a/TTS/tts/utils/text/.DS_Store b/TTS/tts/utils/text/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..4d251a5c17bc65369b0787b2b61e9833ad5aafab
Binary files /dev/null and b/TTS/tts/utils/text/.DS_Store differ
diff --git a/TTS/tts/utils/text/.ipynb_checkpoints/cleaners-checkpoint.py b/TTS/tts/utils/text/.ipynb_checkpoints/cleaners-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..f02f8fb48e23cce5ca604c0c86d3e13abeb42654
--- /dev/null
+++ b/TTS/tts/utils/text/.ipynb_checkpoints/cleaners-checkpoint.py
@@ -0,0 +1,145 @@
+"""Set of default text cleaners"""
+# TODO: pick the cleaner for languages dynamically
+
+import re
+
+from anyascii import anyascii
+
+from TTS.tts.utils.text.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
+
+from .english.abbreviations import abbreviations_en
+from .english.number_norm import normalize_numbers as en_normalize_numbers
+from .english.time_norm import expand_time_english
+from .french.abbreviations import abbreviations_fr
+
+# Regular expression matching whitespace:
+_whitespace_re = re.compile(r"\s+")
+
+
+def expand_abbreviations(text, lang="en"):
+    if lang == "en":
+        _abbreviations = abbreviations_en
+    elif lang == "fr":
+        _abbreviations = abbreviations_fr
+    for regex, replacement in _abbreviations:
+        text = re.sub(regex, replacement, text)
+    return text
+
+
+def lowercase(text):
+    return text.lower()
+
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+
+def convert_to_ascii(text):
+    return anyascii(text)
+
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
+
+
+def replace_symbols(text, lang="en"):
+    text = text.replace(";", ",")
+    text = text.replace("-", " ")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    return text
+
+
+def basic_cleaners(text):
+    """Basic pipeline that lowercases and collapses whitespace without transliteration."""
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def transliteration_cleaners(text):
+    """Pipeline for non-English text that transliterates to ASCII."""
+    # text = convert_to_ascii(text)
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def basic_german_cleaners(text):
+    """Pipeline for German text"""
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+# TODO: elaborate it
+def basic_turkish_cleaners(text):
+    """Pipeline for Turkish text"""
+    text = text.replace("I", "ı")
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def english_cleaners(text):
+    """Pipeline for English text, including number and abbreviation expansion."""
+    # text = convert_to_ascii(text)
+    text = lowercase(text)
+    text = expand_time_english(text)
+    text = en_normalize_numbers(text)
+    text = expand_abbreviations(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def phoneme_cleaners(text):
+    """Pipeline for phonemes mode, including number and abbreviation expansion."""
+    text = en_normalize_numbers(text)
+    text = expand_abbreviations(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def french_cleaners(text):
+    """Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
+    text = expand_abbreviations(text, lang="fr")
+    text = lowercase(text)
+    text = replace_symbols(text, lang="fr")
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def portuguese_cleaners(text):
+    """Basic pipeline for Portuguese text. There is no need to expand abbreviation and
+    numbers, phonemizer already does that"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang="pt")
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def chinese_mandarin_cleaners(text: str) -> str:
+    """Basic pipeline for chinese"""
+    text = replace_numbers_to_characters_in_text(text)
+    return text
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..593372dc7cb2fba240eb5f08e8e2cfae5a4b4e45
--- /dev/null
+++ b/TTS/tts/utils/text/__init__.py
@@ -0,0 +1 @@
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
diff --git a/TTS/tts/utils/text/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59d924305b4a88db74c8c13573af1d8fb400a515
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97e764a57114c681d7c3cb2a76c02c0b425260a8
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..afac3ba2cac9754f22eed88aeeaa5d5cae8a3724
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/characters.cpython-37.pyc b/TTS/tts/utils/text/__pycache__/characters.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83f120a9362b23b90991d7c7a83baa287f17e7a0
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/characters.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/characters.cpython-38.pyc b/TTS/tts/utils/text/__pycache__/characters.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..471992a33dbbf617c61dd7f47bbd15dfaa7650af
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/characters.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/characters.cpython-39.pyc b/TTS/tts/utils/text/__pycache__/characters.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c039ca235f8ba06ea866a78938204cf61c47e873
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/characters.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/cleaners.cpython-37.pyc b/TTS/tts/utils/text/__pycache__/cleaners.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7a098becb609c195cbf24f0503afc905e6a0bd3
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/cleaners.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/cleaners.cpython-38.pyc b/TTS/tts/utils/text/__pycache__/cleaners.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f76032575eea37e5f418ab27e42cc093de671531
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/cleaners.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/cleaners.cpython-39.pyc b/TTS/tts/utils/text/__pycache__/cleaners.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..469d7458a0a82c87a8c8776a70d345a69575df7d
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/cleaners.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/punctuation.cpython-37.pyc b/TTS/tts/utils/text/__pycache__/punctuation.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10f16bd03560dca3d0a15d9be77e2158b916ca84
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/punctuation.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/punctuation.cpython-38.pyc b/TTS/tts/utils/text/__pycache__/punctuation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68874ba20f7f80e87bd2fb46a5f7ddc37ce9ff21
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/punctuation.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/punctuation.cpython-39.pyc b/TTS/tts/utils/text/__pycache__/punctuation.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c469209375c074c693fc835169d4000fa4ca5bf6
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/punctuation.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/tokenizer.cpython-37.pyc b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aee346b271dddb6b9dc4e3d53706f4d6fa91f38d
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/tokenizer.cpython-38.pyc b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e25e0988f53606bc1de39d15ba7b25ac706a97f9
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/__pycache__/tokenizer.cpython-39.pyc b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eed064a3dc24571fe97ae05c21950b0b7fcbe21d
Binary files /dev/null and b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py
new file mode 100644
index 0000000000000000000000000000000000000000..69835f3065331bad89a0dc1c598dc4cee07b4c29
--- /dev/null
+++ b/TTS/tts/utils/text/characters.py
@@ -0,0 +1,468 @@
+from dataclasses import replace
+from typing import Dict
+
+from TTS.tts.configs.shared_configs import CharactersConfig
+
+
+def parse_symbols():
+    return {
+        "pad": _pad,
+        "eos": _eos,
+        "bos": _bos,
+        "characters": _characters,
+        "punctuations": _punctuations,
+        "phonemes": _phonemes,
+    }
+
+
+# DEFAULT SET OF GRAPHEMES
+_pad = "<PAD>"
+_eos = "<EOS>"
+_bos = "<BOS>"
+_blank = "<BLNK>"  # TODO: check if we need this alongside with PAD
+_characters = "0123456789aáảàãạâấẩầẫậăắẳằẵặbcdđeéẻèẽẹêếểềễệfghiíỉìĩịjklmnoóỏòõọôốổồỗộơớởờỡợpqrstuúủùũụưứửừữựvwxyýỷỳỹỵz"
+_punctuations = "!'(),-.:;? "
+
+
+# DEFAULT SET OF IPA PHONEMES
+# Phonemes definition (All IPA characters)
+_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ"
+_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ"
+_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ"
+_suprasegmentals = "ˈˌːˑ"
+_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ0123456789"
+_diacrilics = "ɚ˞ɫ ̪"
+_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
+
+
+class BaseVocabulary:
+    """Base Vocabulary class.
+
+    This class only needs a vocabulary dictionary without specifying the characters.
+
+    Args:
+        vocab (Dict): A dictionary of characters and their corresponding indices.
+    """
+
+    def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None):
+        self.vocab = vocab
+        self.pad = pad
+        self.blank = blank
+        self.bos = bos
+        self.eos = eos
+
+    @property
+    def pad_id(self) -> int:
+        """Return the index of the padding character. If the padding character is not specified, return the length
+        of the vocabulary."""
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        """Return the index of the blank character. If the blank character is not specified, return the length of
+        the vocabulary."""
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def vocab(self):
+        """Return the vocabulary dictionary."""
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        """Set the vocabulary dictionary and character mapping dictionaries."""
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self._vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @staticmethod
+    def init_from_config(config, **kwargs):
+        """Initialize from the given config."""
+        if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict:
+            return (
+                BaseVocabulary(
+                    config.characters.vocab_dict,
+                    config.characters.pad,
+                    config.characters.blank,
+                    config.characters.bos,
+                    config.characters.eos,
+                ),
+                config,
+            )
+        return BaseVocabulary(**kwargs), config
+
+    @property
+    def num_chars(self):
+        """Return number of tokens in the vocabulary."""
+        return len(self._vocab)
+
+    def char_to_id(self, char: str) -> int:
+        """Map a character to an token ID."""
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        """Map an token ID to a character."""
+        return self._id_to_char[idx]
+
+
+class BaseCharacters:
+    """🐸BaseCharacters class
+
+        Every new character class should inherit from this.
+
+        Characters are oredered as follows ```[PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]```.
+
+        If you need a custom order, you need to define inherit from this class and override the ```_create_vocab``` method.
+
+        Args:
+            characters (str):
+                Main set of characters to be used in the vocabulary.
+
+            punctuations (str):
+                Characters to be treated as punctuation.
+
+            pad (str):
+                Special padding character that would be ignored by the model.
+
+            eos (str):
+                End of the sentence character.
+
+            bos (str):
+                Beginning of the sentence character.
+
+            blank (str):
+                Optional character used between characters by some models for better prosody.
+
+            is_unique (bool):
+                Remove duplicates from the provided characters. Defaults to True.
+    el
+            is_sorted (bool):
+                Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        characters: str = None,
+        punctuations: str = None,
+        pad: str = None,
+        eos: str = None,
+        bos: str = None,
+        blank: str = None,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        self._characters = characters
+        self._punctuations = punctuations
+        self._pad = pad
+        self._eos = eos
+        self._bos = bos
+        self._blank = blank
+        self.is_unique = is_unique
+        self.is_sorted = is_sorted
+        self._create_vocab()
+
+    @property
+    def pad_id(self) -> int:
+        return self.char_to_id(self.pad) if self.pad else len(self.vocab)
+
+    @property
+    def blank_id(self) -> int:
+        return self.char_to_id(self.blank) if self.blank else len(self.vocab)
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, characters):
+        self._characters = characters
+        self._create_vocab()
+
+    @property
+    def punctuations(self):
+        return self._punctuations
+
+    @punctuations.setter
+    def punctuations(self, punctuations):
+        self._punctuations = punctuations
+        self._create_vocab()
+
+    @property
+    def pad(self):
+        return self._pad
+
+    @pad.setter
+    def pad(self, pad):
+        self._pad = pad
+        self._create_vocab()
+
+    @property
+    def eos(self):
+        return self._eos
+
+    @eos.setter
+    def eos(self, eos):
+        self._eos = eos
+        self._create_vocab()
+
+    @property
+    def bos(self):
+        return self._bos
+
+    @bos.setter
+    def bos(self, bos):
+        self._bos = bos
+        self._create_vocab()
+
+    @property
+    def blank(self):
+        return self._blank
+
+    @blank.setter
+    def blank(self, blank):
+        self._blank = blank
+        self._create_vocab()
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @vocab.setter
+    def vocab(self, vocab):
+        self._vocab = vocab
+        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
+
+    @property
+    def num_chars(self):
+        return len(self._vocab)
+
+    def _create_vocab(self):
+        _vocab = self._characters
+        if self.is_unique:
+            _vocab = list(set(_vocab))
+        if self.is_sorted:
+            _vocab = sorted(_vocab)
+        _vocab = list(_vocab)
+        _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
+        _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
+        _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
+        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
+        self.vocab = _vocab + list(self._punctuations)
+        if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
+            assert (
+                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
+
+    def char_to_id(self, char: str) -> int:
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
+
+    def id_to_char(self, idx: int) -> str:
+        return self._id_to_char[idx]
+
+    def print_log(self, level: int = 0):
+        """
+        Prints the vocabulary in a nice format.
+        """
+        indent = "\t" * level
+        print(f"{indent}| > Characters: {self._characters}")
+        print(f"{indent}| > Punctuations: {self._punctuations}")
+        print(f"{indent}| > Pad: {self._pad}")
+        print(f"{indent}| > EOS: {self._eos}")
+        print(f"{indent}| > BOS: {self._bos}")
+        print(f"{indent}| > Blank: {self._blank}")
+        print(f"{indent}| > Vocab: {self.vocab}")
+        print(f"{indent}| > Num chars: {self.num_chars}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        # use character set from config
+        if config.characters is not None:
+            return BaseCharacters(**config.characters), config
+        # return default character set
+        characters = BaseCharacters()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
+        )
+
+
+class IPAPhonemes(BaseCharacters):
+    """🐸IPAPhonemes class to manage `TTS.tts` model vocabulary
+
+    Intended to be used with models using IPAPhonemes as input.
+    It uses system defaults for the undefined class arguments.
+
+    Args:
+        characters (str):
+            Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_phonemes`.
+
+        punctuations (str):
+            Characters to be treated as punctuation. Defaults to `_punctuations`.
+
+        pad (str):
+            Special padding character that would be ignored by the model. Defaults to `_pad`.
+
+        eos (str):
+            End of the sentence character. Defaults to `_eos`.
+
+        bos (str):
+            Beginning of the sentence character. Defaults to `_bos`.
+
+        blank (str):
+            Optional character used between characters by some models for better prosody. Defaults to `_blank`.
+
+        is_unique (bool):
+            Remove duplicates from the provided characters. Defaults to True.
+
+        is_sorted (bool):
+            Sort the characters in alphabetical order. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        characters: str = _phonemes,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        # band-aid for compatibility with old models
+        if "characters" in config and config.characters is not None:
+            if "phonemes" in config.characters and config.characters.phonemes is not None:
+                config.characters["characters"] = config.characters["phonemes"]
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
+            )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+class Graphemes(BaseCharacters):
+    """🐸Graphemes class to manage `TTS.tts` model vocabulary
+
+    Intended to be used with models using graphemes as input.
+    It uses system defaults for the undefined class arguments.
+
+    Args:
+        characters (str):
+            Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_characters`.
+
+        punctuations (str):
+            Characters to be treated as punctuation. Defaults to `_punctuations`.
+
+        pad (str):
+            Special padding character that would be ignored by the model. Defaults to `_pad`.
+
+        eos (str):
+            End of the sentence character. Defaults to `_eos`.
+
+        bos (str):
+            Beginning of the sentence character. Defaults to `_bos`.
+
+        is_unique (bool):
+            Remove duplicates from the provided characters. Defaults to True.
+
+        is_sorted (bool):
+            Sort the characters in alphabetical order. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        characters: str = _characters,
+        punctuations: str = _punctuations,
+        pad: str = _pad,
+        eos: str = _eos,
+        bos: str = _bos,
+        blank: str = _blank,
+        is_unique: bool = False,
+        is_sorted: bool = True,
+    ) -> None:
+        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
+
+    @staticmethod
+    def init_from_config(config: "Coqpit"):
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
+
+
+if __name__ == "__main__":
+    gr = Graphemes()
+    ph = IPAPhonemes()
+    gr.print_log()
+    ph.print_log()
diff --git a/TTS/tts/utils/text/chinese_mandarin/__init__.py b/TTS/tts/utils/text/chinese_mandarin/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48ecd2d42f11349f6dec471d43ec57cd99e6a1d0
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0578ce592379bd75b92b073fc44179169b1096e0
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed452f24a68dc5d3c9e9af5127ecc63e724848ed
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-37.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d18eaf43d3c8f0e893564df9bc250555451e8656
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-38.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d96427aeefc144234bba83140df6531ac313ae60
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-39.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36d8c2d3398c118d1e1b322a0b2125774ea28553
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-37.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d3a9593d2508ba014b4f967afda8c7a12f7e3e1
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-38.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6d60a5044dd48034d6757cdb4ddf85d8797d050
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-39.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35159f75d7a680e32739bc5c7441a79774d01486
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-37.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82b267c8df6551855c4793c851ef05b2aa599376
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-38.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46094166ec413fbab677e9972f326205864db9eb
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-39.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5dbbf97bbae68d52a95c3632d5977359a1060ad3
Binary files /dev/null and b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/chinese_mandarin/numbers.py b/TTS/tts/utils/text/chinese_mandarin/numbers.py
new file mode 100644
index 0000000000000000000000000000000000000000..4787ea61007656819eb57d52d5865b38c7afa915
--- /dev/null
+++ b/TTS/tts/utils/text/chinese_mandarin/numbers.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Licensed under WTFPL or the Unlicense or CC0.
+# This uses Python 3, but it's easy to port to Python 2 by changing
+# strings to u'xx'.
+
+import itertools
+import re
+
+
+def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) -> str:
+    """Convert numerical arabic numbers (0->9) to chinese hanzi numbers (〇 -> 九)
+
+    Args:
+        num (str): arabic number to convert
+        big (bool, optional): use financial characters. Defaults to False.
+        simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True.
+        o (bool, optional): use 〇 for 'zero'. Defaults to False.
+        twoalt (bool, optional): use 两/兩 for 'two' when appropriate. Defaults to False.
+
+    Raises:
+        ValueError: if number is more than 1e48
+        ValueError: if 'e' exposent in number
+
+    Returns:
+        str: converted number as hanzi characters
+    """
+
+    # check num first
+    nd = str(num)
+    if abs(float(nd)) >= 1e48:
+        raise ValueError("number out of range")
+    if "e" in nd:
+        raise ValueError("scientific notation is not supported")
+    c_symbol = "正负点" if simp else "正負點"
+    if o:  # formal
+        twoalt = False
+    if big:
+        c_basic = "零壹贰叁肆伍陆柒捌玖" if simp else "零壹貳參肆伍陸柒捌玖"
+        c_unit1 = "拾佰仟"
+        c_twoalt = "贰" if simp else "貳"
+    else:
+        c_basic = "〇一二三四五六七八九" if o else "零一二三四五六七八九"
+        c_unit1 = "十百千"
+        if twoalt:
+            c_twoalt = "两" if simp else "兩"
+        else:
+            c_twoalt = "二"
+    c_unit2 = "万亿兆京垓秭穰沟涧正载" if simp else "萬億兆京垓秭穰溝澗正載"
+    revuniq = lambda l: "".join(k for k, g in itertools.groupby(reversed(l)))
+    nd = str(num)
+    result = []
+    if nd[0] == "+":
+        result.append(c_symbol[0])
+    elif nd[0] == "-":
+        result.append(c_symbol[1])
+    if "." in nd:
+        integer, remainder = nd.lstrip("+-").split(".")
+    else:
+        integer, remainder = nd.lstrip("+-"), None
+    if int(integer):
+        splitted = [integer[max(i - 4, 0) : i] for i in range(len(integer), 0, -4)]
+        intresult = []
+        for nu, unit in enumerate(splitted):
+            # special cases
+            if int(unit) == 0:  # 0000
+                intresult.append(c_basic[0])
+                continue
+            if nu > 0 and int(unit) == 2:  # 0002
+                intresult.append(c_twoalt + c_unit2[nu - 1])
+                continue
+            ulist = []
+            unit = unit.zfill(4)
+            for nc, ch in enumerate(reversed(unit)):
+                if ch == "0":
+                    if ulist:  # ???0
+                        ulist.append(c_basic[0])
+                elif nc == 0:
+                    ulist.append(c_basic[int(ch)])
+                elif nc == 1 and ch == "1" and unit[1] == "0":
+                    # special case for tens
+                    # edit the 'elif' if you don't like
+                    # 十四, 三千零十四, 三千三百一十四
+                    ulist.append(c_unit1[0])
+                elif nc > 1 and ch == "2":
+                    ulist.append(c_twoalt + c_unit1[nc - 1])
+                else:
+                    ulist.append(c_basic[int(ch)] + c_unit1[nc - 1])
+            ustr = revuniq(ulist)
+            if nu == 0:
+                intresult.append(ustr)
+            else:
+                intresult.append(ustr + c_unit2[nu - 1])
+        result.append(revuniq(intresult).strip(c_basic[0]))
+    else:
+        result.append(c_basic[0])
+    if remainder:
+        result.append(c_symbol[2])
+        result.append("".join(c_basic[int(ch)] for ch in remainder))
+    return "".join(result)
+
+
+def _number_replace(match) -> str:
+    """function to apply in a match, transform all numbers in a match by chinese characters
+
+    Args:
+        match (re.Match): numbers regex matches
+
+    Returns:
+        str: replaced characters for the numbers
+    """
+    match_str: str = match.group()
+    return _num2chinese(match_str)
+
+
+def replace_numbers_to_characters_in_text(text: str) -> str:
+    """Replace all arabic numbers in a text by their equivalent in chinese characters (simplified)
+
+    Args:
+        text (str): input text to transform
+
+    Returns:
+        str: output text
+    """
+    text = re.sub(r"[0-9]+", _number_replace, text)
+    return text
diff --git a/TTS/tts/utils/text/chinese_mandarin/phonemizer.py b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..727c881e1062badc57df7418aa07e7434d57335c
--- /dev/null
+++ b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py
@@ -0,0 +1,37 @@
+from typing import List
+
+import jieba
+import pypinyin
+
+from .pinyinToPhonemes import PINYIN_DICT
+
+
+def _chinese_character_to_pinyin(text: str) -> List[str]:
+    pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)
+    pinyins_flat_list = [item for sublist in pinyins for item in sublist]
+    return pinyins_flat_list
+
+
+def _chinese_pinyin_to_phoneme(pinyin: str) -> str:
+    segment = pinyin[:-1]
+    tone = pinyin[-1]
+    phoneme = PINYIN_DICT.get(segment, [""])[0]
+    return phoneme + tone
+
+
+def chinese_text_to_phonemes(text: str, seperator: str = "|") -> str:
+    tokenized_text = jieba.cut(text, HMM=False)
+    tokenized_text = " ".join(tokenized_text)
+    pinyined_text: List[str] = _chinese_character_to_pinyin(tokenized_text)
+
+    results: List[str] = []
+
+    for token in pinyined_text:
+        if token[-1] in "12345":  # TODO transform to is_pinyin()
+            pinyin_phonemes = _chinese_pinyin_to_phoneme(token)
+
+            results += list(pinyin_phonemes)
+        else:  # is ponctuation or other
+            results += list(token)
+
+    return seperator.join(results)
diff --git a/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e25c3a4c91cddd0bf0e5d6e273262e3dbd3a2dd
--- /dev/null
+++ b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py
@@ -0,0 +1,419 @@
+PINYIN_DICT = {
+    "a": ["a"],
+    "ai": ["ai"],
+    "an": ["an"],
+    "ang": ["ɑŋ"],
+    "ao": ["aʌ"],
+    "ba": ["ba"],
+    "bai": ["bai"],
+    "ban": ["ban"],
+    "bang": ["bɑŋ"],
+    "bao": ["baʌ"],
+    # "be": ["be"], doesnt exist
+    "bei": ["bɛi"],
+    "ben": ["bœn"],
+    "beng": ["bɵŋ"],
+    "bi": ["bi"],
+    "bian": ["biɛn"],
+    "biao": ["biaʌ"],
+    "bie": ["bie"],
+    "bin": ["bin"],
+    "bing": ["bɨŋ"],
+    "bo": ["bo"],
+    "bu": ["bu"],
+    "ca": ["tsa"],
+    "cai": ["tsai"],
+    "can": ["tsan"],
+    "cang": ["tsɑŋ"],
+    "cao": ["tsaʌ"],
+    "ce": ["tsø"],
+    "cen": ["tsœn"],
+    "ceng": ["tsɵŋ"],
+    "cha": ["ʈʂa"],
+    "chai": ["ʈʂai"],
+    "chan": ["ʈʂan"],
+    "chang": ["ʈʂɑŋ"],
+    "chao": ["ʈʂaʌ"],
+    "che": ["ʈʂø"],
+    "chen": ["ʈʂœn"],
+    "cheng": ["ʈʂɵŋ"],
+    "chi": ["ʈʂʏ"],
+    "chong": ["ʈʂoŋ"],
+    "chou": ["ʈʂou"],
+    "chu": ["ʈʂu"],
+    "chua": ["ʈʂua"],
+    "chuai": ["ʈʂuai"],
+    "chuan": ["ʈʂuan"],
+    "chuang": ["ʈʂuɑŋ"],
+    "chui": ["ʈʂuei"],
+    "chun": ["ʈʂun"],
+    "chuo": ["ʈʂuo"],
+    "ci": ["tsɪ"],
+    "cong": ["tsoŋ"],
+    "cou": ["tsou"],
+    "cu": ["tsu"],
+    "cuan": ["tsuan"],
+    "cui": ["tsuei"],
+    "cun": ["tsun"],
+    "cuo": ["tsuo"],
+    "da": ["da"],
+    "dai": ["dai"],
+    "dan": ["dan"],
+    "dang": ["dɑŋ"],
+    "dao": ["daʌ"],
+    "de": ["dø"],
+    "dei": ["dei"],
+    # "den": ["dœn"],
+    "deng": ["dɵŋ"],
+    "di": ["di"],
+    "dia": ["dia"],
+    "dian": ["diɛn"],
+    "diao": ["diaʌ"],
+    "die": ["die"],
+    "ding": ["dɨŋ"],
+    "diu": ["dio"],
+    "dong": ["doŋ"],
+    "dou": ["dou"],
+    "du": ["du"],
+    "duan": ["duan"],
+    "dui": ["duei"],
+    "dun": ["dun"],
+    "duo": ["duo"],
+    "e": ["ø"],
+    "ei": ["ei"],
+    "en": ["œn"],
+    # "ng": ["œn"],
+    # "eng": ["ɵŋ"],
+    "er": ["er"],
+    "fa": ["fa"],
+    "fan": ["fan"],
+    "fang": ["fɑŋ"],
+    "fei": ["fei"],
+    "fen": ["fœn"],
+    "feng": ["fɵŋ"],
+    "fo": ["fo"],
+    "fou": ["fou"],
+    "fu": ["fu"],
+    "ga": ["ga"],
+    "gai": ["gai"],
+    "gan": ["gan"],
+    "gang": ["gɑŋ"],
+    "gao": ["gaʌ"],
+    "ge": ["gø"],
+    "gei": ["gei"],
+    "gen": ["gœn"],
+    "geng": ["gɵŋ"],
+    "gong": ["goŋ"],
+    "gou": ["gou"],
+    "gu": ["gu"],
+    "gua": ["gua"],
+    "guai": ["guai"],
+    "guan": ["guan"],
+    "guang": ["guɑŋ"],
+    "gui": ["guei"],
+    "gun": ["gun"],
+    "guo": ["guo"],
+    "ha": ["xa"],
+    "hai": ["xai"],
+    "han": ["xan"],
+    "hang": ["xɑŋ"],
+    "hao": ["xaʌ"],
+    "he": ["xø"],
+    "hei": ["xei"],
+    "hen": ["xœn"],
+    "heng": ["xɵŋ"],
+    "hong": ["xoŋ"],
+    "hou": ["xou"],
+    "hu": ["xu"],
+    "hua": ["xua"],
+    "huai": ["xuai"],
+    "huan": ["xuan"],
+    "huang": ["xuɑŋ"],
+    "hui": ["xuei"],
+    "hun": ["xun"],
+    "huo": ["xuo"],
+    "ji": ["dʑi"],
+    "jia": ["dʑia"],
+    "jian": ["dʑiɛn"],
+    "jiang": ["dʑiɑŋ"],
+    "jiao": ["dʑiaʌ"],
+    "jie": ["dʑie"],
+    "jin": ["dʑin"],
+    "jing": ["dʑɨŋ"],
+    "jiong": ["dʑioŋ"],
+    "jiu": ["dʑio"],
+    "ju": ["dʑy"],
+    "juan": ["dʑyɛn"],
+    "jue": ["dʑye"],
+    "jun": ["dʑyn"],
+    "ka": ["ka"],
+    "kai": ["kai"],
+    "kan": ["kan"],
+    "kang": ["kɑŋ"],
+    "kao": ["kaʌ"],
+    "ke": ["kø"],
+    "kei": ["kei"],
+    "ken": ["kœn"],
+    "keng": ["kɵŋ"],
+    "kong": ["koŋ"],
+    "kou": ["kou"],
+    "ku": ["ku"],
+    "kua": ["kua"],
+    "kuai": ["kuai"],
+    "kuan": ["kuan"],
+    "kuang": ["kuɑŋ"],
+    "kui": ["kuei"],
+    "kun": ["kun"],
+    "kuo": ["kuo"],
+    "la": ["la"],
+    "lai": ["lai"],
+    "lan": ["lan"],
+    "lang": ["lɑŋ"],
+    "lao": ["laʌ"],
+    "le": ["lø"],
+    "lei": ["lei"],
+    "leng": ["lɵŋ"],
+    "li": ["li"],
+    "lia": ["lia"],
+    "lian": ["liɛn"],
+    "liang": ["liɑŋ"],
+    "liao": ["liaʌ"],
+    "lie": ["lie"],
+    "lin": ["lin"],
+    "ling": ["lɨŋ"],
+    "liu": ["lio"],
+    "lo": ["lo"],
+    "long": ["loŋ"],
+    "lou": ["lou"],
+    "lu": ["lu"],
+    "lv": ["ly"],
+    "luan": ["luan"],
+    "lve": ["lye"],
+    "lue": ["lue"],
+    "lun": ["lun"],
+    "luo": ["luo"],
+    "ma": ["ma"],
+    "mai": ["mai"],
+    "man": ["man"],
+    "mang": ["mɑŋ"],
+    "mao": ["maʌ"],
+    "me": ["mø"],
+    "mei": ["mei"],
+    "men": ["mœn"],
+    "meng": ["mɵŋ"],
+    "mi": ["mi"],
+    "mian": ["miɛn"],
+    "miao": ["miaʌ"],
+    "mie": ["mie"],
+    "min": ["min"],
+    "ming": ["mɨŋ"],
+    "miu": ["mio"],
+    "mo": ["mo"],
+    "mou": ["mou"],
+    "mu": ["mu"],
+    "na": ["na"],
+    "nai": ["nai"],
+    "nan": ["nan"],
+    "nang": ["nɑŋ"],
+    "nao": ["naʌ"],
+    "ne": ["nø"],
+    "nei": ["nei"],
+    "nen": ["nœn"],
+    "neng": ["nɵŋ"],
+    "ni": ["ni"],
+    "nia": ["nia"],
+    "nian": ["niɛn"],
+    "niang": ["niɑŋ"],
+    "niao": ["niaʌ"],
+    "nie": ["nie"],
+    "nin": ["nin"],
+    "ning": ["nɨŋ"],
+    "niu": ["nio"],
+    "nong": ["noŋ"],
+    "nou": ["nou"],
+    "nu": ["nu"],
+    "nv": ["ny"],
+    "nuan": ["nuan"],
+    "nve": ["nye"],
+    "nue": ["nye"],
+    "nuo": ["nuo"],
+    "o": ["o"],
+    "ou": ["ou"],
+    "pa": ["pa"],
+    "pai": ["pai"],
+    "pan": ["pan"],
+    "pang": ["pɑŋ"],
+    "pao": ["paʌ"],
+    "pe": ["pø"],
+    "pei": ["pei"],
+    "pen": ["pœn"],
+    "peng": ["pɵŋ"],
+    "pi": ["pi"],
+    "pian": ["piɛn"],
+    "piao": ["piaʌ"],
+    "pie": ["pie"],
+    "pin": ["pin"],
+    "ping": ["pɨŋ"],
+    "po": ["po"],
+    "pou": ["pou"],
+    "pu": ["pu"],
+    "qi": ["tɕi"],
+    "qia": ["tɕia"],
+    "qian": ["tɕiɛn"],
+    "qiang": ["tɕiɑŋ"],
+    "qiao": ["tɕiaʌ"],
+    "qie": ["tɕie"],
+    "qin": ["tɕin"],
+    "qing": ["tɕɨŋ"],
+    "qiong": ["tɕioŋ"],
+    "qiu": ["tɕio"],
+    "qu": ["tɕy"],
+    "quan": ["tɕyɛn"],
+    "que": ["tɕye"],
+    "qun": ["tɕyn"],
+    "ran": ["ʐan"],
+    "rang": ["ʐɑŋ"],
+    "rao": ["ʐaʌ"],
+    "re": ["ʐø"],
+    "ren": ["ʐœn"],
+    "reng": ["ʐɵŋ"],
+    "ri": ["ʐʏ"],
+    "rong": ["ʐoŋ"],
+    "rou": ["ʐou"],
+    "ru": ["ʐu"],
+    "rua": ["ʐua"],
+    "ruan": ["ʐuan"],
+    "rui": ["ʐuei"],
+    "run": ["ʐun"],
+    "ruo": ["ʐuo"],
+    "sa": ["sa"],
+    "sai": ["sai"],
+    "san": ["san"],
+    "sang": ["sɑŋ"],
+    "sao": ["saʌ"],
+    "se": ["sø"],
+    "sen": ["sœn"],
+    "seng": ["sɵŋ"],
+    "sha": ["ʂa"],
+    "shai": ["ʂai"],
+    "shan": ["ʂan"],
+    "shang": ["ʂɑŋ"],
+    "shao": ["ʂaʌ"],
+    "she": ["ʂø"],
+    "shei": ["ʂei"],
+    "shen": ["ʂœn"],
+    "sheng": ["ʂɵŋ"],
+    "shi": ["ʂʏ"],
+    "shou": ["ʂou"],
+    "shu": ["ʂu"],
+    "shua": ["ʂua"],
+    "shuai": ["ʂuai"],
+    "shuan": ["ʂuan"],
+    "shuang": ["ʂuɑŋ"],
+    "shui": ["ʂuei"],
+    "shun": ["ʂun"],
+    "shuo": ["ʂuo"],
+    "si": ["sɪ"],
+    "song": ["soŋ"],
+    "sou": ["sou"],
+    "su": ["su"],
+    "suan": ["suan"],
+    "sui": ["suei"],
+    "sun": ["sun"],
+    "suo": ["suo"],
+    "ta": ["ta"],
+    "tai": ["tai"],
+    "tan": ["tan"],
+    "tang": ["tɑŋ"],
+    "tao": ["taʌ"],
+    "te": ["tø"],
+    "tei": ["tei"],
+    "teng": ["tɵŋ"],
+    "ti": ["ti"],
+    "tian": ["tiɛn"],
+    "tiao": ["tiaʌ"],
+    "tie": ["tie"],
+    "ting": ["tɨŋ"],
+    "tong": ["toŋ"],
+    "tou": ["tou"],
+    "tu": ["tu"],
+    "tuan": ["tuan"],
+    "tui": ["tuei"],
+    "tun": ["tun"],
+    "tuo": ["tuo"],
+    "wa": ["wa"],
+    "wai": ["wai"],
+    "wan": ["wan"],
+    "wang": ["wɑŋ"],
+    "wei": ["wei"],
+    "wen": ["wœn"],
+    "weng": ["wɵŋ"],
+    "wo": ["wo"],
+    "wu": ["wu"],
+    "xi": ["ɕi"],
+    "xia": ["ɕia"],
+    "xian": ["ɕiɛn"],
+    "xiang": ["ɕiɑŋ"],
+    "xiao": ["ɕiaʌ"],
+    "xie": ["ɕie"],
+    "xin": ["ɕin"],
+    "xing": ["ɕɨŋ"],
+    "xiong": ["ɕioŋ"],
+    "xiu": ["ɕio"],
+    "xu": ["ɕy"],
+    "xuan": ["ɕyɛn"],
+    "xue": ["ɕye"],
+    "xun": ["ɕyn"],
+    "ya": ["ia"],
+    "yan": ["iɛn"],
+    "yang": ["iɑŋ"],
+    "yao": ["iaʌ"],
+    "ye": ["ie"],
+    "yi": ["i"],
+    "yin": ["in"],
+    "ying": ["ɨŋ"],
+    "yo": ["io"],
+    "yong": ["ioŋ"],
+    "you": ["io"],
+    "yu": ["y"],
+    "yuan": ["yɛn"],
+    "yue": ["ye"],
+    "yun": ["yn"],
+    "za": ["dza"],
+    "zai": ["dzai"],
+    "zan": ["dzan"],
+    "zang": ["dzɑŋ"],
+    "zao": ["dzaʌ"],
+    "ze": ["dzø"],
+    "zei": ["dzei"],
+    "zen": ["dzœn"],
+    "zeng": ["dzɵŋ"],
+    "zha": ["dʒa"],
+    "zhai": ["dʒai"],
+    "zhan": ["dʒan"],
+    "zhang": ["dʒɑŋ"],
+    "zhao": ["dʒaʌ"],
+    "zhe": ["dʒø"],
+    # "zhei": ["dʒei"], it doesn't exist
+    "zhen": ["dʒœn"],
+    "zheng": ["dʒɵŋ"],
+    "zhi": ["dʒʏ"],
+    "zhong": ["dʒoŋ"],
+    "zhou": ["dʒou"],
+    "zhu": ["dʒu"],
+    "zhua": ["dʒua"],
+    "zhuai": ["dʒuai"],
+    "zhuan": ["dʒuan"],
+    "zhuang": ["dʒuɑŋ"],
+    "zhui": ["dʒuei"],
+    "zhun": ["dʒun"],
+    "zhuo": ["dʒuo"],
+    "zi": ["dzɪ"],
+    "zong": ["dzoŋ"],
+    "zou": ["dzou"],
+    "zu": ["dzu"],
+    "zuan": ["dzuan"],
+    "zui": ["dzuei"],
+    "zun": ["dzun"],
+    "zuo": ["dzuo"],
+}
diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py
new file mode 100644
index 0000000000000000000000000000000000000000..6443a902f133e1c17fc3be72d6cb5675bc121d30
--- /dev/null
+++ b/TTS/tts/utils/text/cleaners.py
@@ -0,0 +1,147 @@
+"""Set of default text cleaners"""
+# TODO: pick the cleaner for languages dynamically
+
+import re
+
+from anyascii import anyascii
+
+from TTS.tts.utils.text.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
+
+from .english.abbreviations import abbreviations_en
+from .english.number_norm import normalize_numbers as en_normalize_numbers
+from .english.time_norm import expand_time_english
+from .french.abbreviations import abbreviations_fr
+
+# Regular expression matching whitespace:
+_whitespace_re = re.compile(r"\s+")
+
+
+def expand_abbreviations(text, lang="en"):
+    if lang == "en":
+        _abbreviations = abbreviations_en
+    elif lang == "fr":
+        _abbreviations = abbreviations_fr
+    for regex, replacement in _abbreviations:
+        text = re.sub(regex, replacement, text)
+    return text
+
+
+def lowercase(text):
+    return text.lower()
+
+
+def collapse_whitespace(text):
+    return re.sub(_whitespace_re, " ", text).strip()
+
+
+def convert_to_ascii(text):
+    return anyascii(text)
+
+
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
+    return text
+
+
+def replace_symbols(text, lang="en"):
+    text = text.replace(";", ",")
+    text = text.replace("-", " ")
+    text = text.replace(":", ",")
+    if lang == "en":
+        text = text.replace("&", " and ")
+    elif lang == "fr":
+        text = text.replace("&", " et ")
+    elif lang == "pt":
+        text = text.replace("&", " e ")
+    elif lang == "vi":
+        text = text.replace("&", " và ")
+    return text
+
+
+def basic_cleaners(text):
+    """Basic pipeline that lowercases and collapses whitespace without transliteration."""
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def transliteration_cleaners(text):
+    """Pipeline for non-English text that transliterates to ASCII."""
+    # text = convert_to_ascii(text)
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def basic_german_cleaners(text):
+    """Pipeline for German text"""
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+# TODO: elaborate it
+def basic_turkish_cleaners(text):
+    """Pipeline for Turkish text"""
+    text = text.replace("I", "ı")
+    text = lowercase(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def english_cleaners(text):
+    """Pipeline for English text, including number and abbreviation expansion."""
+    # text = convert_to_ascii(text)
+    text = lowercase(text)
+    text = expand_time_english(text)
+    text = en_normalize_numbers(text)
+    text = expand_abbreviations(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def phoneme_cleaners(text):
+    """Pipeline for phonemes mode, including number and abbreviation expansion."""
+    text = en_normalize_numbers(text)
+    text = expand_abbreviations(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def french_cleaners(text):
+    """Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
+    text = expand_abbreviations(text, lang="fr")
+    text = lowercase(text)
+    text = replace_symbols(text, lang="fr")
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def portuguese_cleaners(text):
+    """Basic pipeline for Portuguese text. There is no need to expand abbreviation and
+    numbers, phonemizer already does that"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang="pt")
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def chinese_mandarin_cleaners(text: str) -> str:
+    """Basic pipeline for chinese"""
+    text = replace_numbers_to_characters_in_text(text)
+    return text
+
+
+def multilingual_cleaners(text):
+    """Pipeline for multilingual text"""
+    text = lowercase(text)
+    text = replace_symbols(text, lang=None)
+    text = remove_aux_symbols(text)
+    text = collapse_whitespace(text)
+    return text
diff --git a/TTS/tts/utils/text/cmudict.py b/TTS/tts/utils/text/cmudict.py
new file mode 100644
index 0000000000000000000000000000000000000000..f206fb043be1d478fa6ace36fefdefa30b0acb02
--- /dev/null
+++ b/TTS/tts/utils/text/cmudict.py
@@ -0,0 +1,151 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+VALID_SYMBOLS = [
+    "AA",
+    "AA0",
+    "AA1",
+    "AA2",
+    "AE",
+    "AE0",
+    "AE1",
+    "AE2",
+    "AH",
+    "AH0",
+    "AH1",
+    "AH2",
+    "AO",
+    "AO0",
+    "AO1",
+    "AO2",
+    "AW",
+    "AW0",
+    "AW1",
+    "AW2",
+    "AY",
+    "AY0",
+    "AY1",
+    "AY2",
+    "B",
+    "CH",
+    "D",
+    "DH",
+    "EH",
+    "EH0",
+    "EH1",
+    "EH2",
+    "ER",
+    "ER0",
+    "ER1",
+    "ER2",
+    "EY",
+    "EY0",
+    "EY1",
+    "EY2",
+    "F",
+    "G",
+    "HH",
+    "IH",
+    "IH0",
+    "IH1",
+    "IH2",
+    "IY",
+    "IY0",
+    "IY1",
+    "IY2",
+    "JH",
+    "K",
+    "L",
+    "M",
+    "N",
+    "NG",
+    "OW",
+    "OW0",
+    "OW1",
+    "OW2",
+    "OY",
+    "OY0",
+    "OY1",
+    "OY2",
+    "P",
+    "R",
+    "S",
+    "SH",
+    "T",
+    "TH",
+    "UH",
+    "UH0",
+    "UH1",
+    "UH2",
+    "UW",
+    "UW0",
+    "UW1",
+    "UW2",
+    "V",
+    "W",
+    "Y",
+    "Z",
+    "ZH",
+]
+
+
+class CMUDict:
+    """Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict"""
+
+    def __init__(self, file_or_path, keep_ambiguous=True):
+        if isinstance(file_or_path, str):
+            with open(file_or_path, encoding="latin-1") as f:
+                entries = _parse_cmudict(f)
+        else:
+            entries = _parse_cmudict(file_or_path)
+        if not keep_ambiguous:
+            entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
+        self._entries = entries
+
+    def __len__(self):
+        return len(self._entries)
+
+    def lookup(self, word):
+        """Returns list of ARPAbet pronunciations of the given word."""
+        return self._entries.get(word.upper())
+
+    @staticmethod
+    def get_arpabet(word, cmudict, punctuation_symbols):
+        first_symbol, last_symbol = "", ""
+        if word and word[0] in punctuation_symbols:
+            first_symbol = word[0]
+            word = word[1:]
+        if word and word[-1] in punctuation_symbols:
+            last_symbol = word[-1]
+            word = word[:-1]
+        arpabet = cmudict.lookup(word)
+        if arpabet is not None:
+            return first_symbol + "{%s}" % arpabet[0] + last_symbol
+        return first_symbol + word + last_symbol
+
+
+_alt_re = re.compile(r"\([0-9]+\)")
+
+
+def _parse_cmudict(file):
+    cmudict = {}
+    for line in file:
+        if line and (line[0] >= "A" and line[0] <= "Z" or line[0] == "'"):
+            parts = line.split("  ")
+            word = re.sub(_alt_re, "", parts[0])
+            pronunciation = _get_pronunciation(parts[1])
+            if pronunciation:
+                if word in cmudict:
+                    cmudict[word].append(pronunciation)
+                else:
+                    cmudict[word] = [pronunciation]
+    return cmudict
+
+
+def _get_pronunciation(s):
+    parts = s.strip().split(" ")
+    for part in parts:
+        if part not in VALID_SYMBOLS:
+            return None
+    return " ".join(parts)
diff --git a/TTS/tts/utils/text/english/__init__.py b/TTS/tts/utils/text/english/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/text/english/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7449da21f6aa8f11bf0923b1a051d486dabb9b3a
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c5c1fca6699e4b0caa77b3e58ac0656aa1ae1d5
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..42c139245856e808774c35df9f3737f4a511a447
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-37.pyc b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52ebbcb3d25cfbb48ea349fd868cfc30e6047bb3
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-38.pyc b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b374046b85319035347d0723ebfac2458b86e0fc
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-39.pyc b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4bc98eeb58ee0a95f436707f7d90e942e9f90c78
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-37.pyc b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b5c383caf0bc390c11ce80fa50edf0b706976b3
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-38.pyc b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72be41e8d61f844d40471ca6ea45c6be1494f6fe
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-39.pyc b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58d7aae02bf313397a6d6462f794eea31da4579b
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-37.pyc b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2f9498350a28052377239044d964b7b185ca33b
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-38.pyc b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71fc07decdb4c3a9716abc1ab578f3f68bdbd674
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-39.pyc b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b7d6fe6255b8b15d36ca77d5afa1b7b9d77288c
Binary files /dev/null and b/TTS/tts/utils/text/english/__pycache__/time_norm.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/english/abbreviations.py b/TTS/tts/utils/text/english/abbreviations.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd93c13c8ecfbc0df2d0c6d2fa348388940c213a
--- /dev/null
+++ b/TTS/tts/utils/text/english/abbreviations.py
@@ -0,0 +1,26 @@
+import re
+
+# List of (regular expression, replacement) pairs for abbreviations in english:
+abbreviations_en = [
+    (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
+    for x in [
+        ("mrs", "misess"),
+        ("mr", "mister"),
+        ("dr", "doctor"),
+        ("st", "saint"),
+        ("co", "company"),
+        ("jr", "junior"),
+        ("maj", "major"),
+        ("gen", "general"),
+        ("drs", "doctors"),
+        ("rev", "reverend"),
+        ("lt", "lieutenant"),
+        ("hon", "honorable"),
+        ("sgt", "sergeant"),
+        ("capt", "captain"),
+        ("esq", "esquire"),
+        ("ltd", "limited"),
+        ("col", "colonel"),
+        ("ft", "fort"),
+    ]
+]
diff --git a/TTS/tts/utils/text/english/number_norm.py b/TTS/tts/utils/text/english/number_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8377ede87ebc9d1bb9cffbbb290aa7787caea4f
--- /dev/null
+++ b/TTS/tts/utils/text/english/number_norm.py
@@ -0,0 +1,97 @@
+""" from https://github.com/keithito/tacotron """
+
+import re
+from typing import Dict
+
+import inflect
+
+_inflect = inflect.engine()
+_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
+_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
+_currency_re = re.compile(r"(£|\$|¥)([0-9\,\.]*[0-9]+)")
+_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
+_number_re = re.compile(r"-?[0-9]+")
+
+
+def _remove_commas(m):
+    return m.group(1).replace(",", "")
+
+
+def _expand_decimal_point(m):
+    return m.group(1).replace(".", " point ")
+
+
+def __expand_currency(value: str, inflection: Dict[float, str]) -> str:
+    parts = value.replace(",", "").split(".")
+    if len(parts) > 2:
+        return f"{value} {inflection[2]}"  # Unexpected format
+    text = []
+    integer = int(parts[0]) if parts[0] else 0
+    if integer > 0:
+        integer_unit = inflection.get(integer, inflection[2])
+        text.append(f"{integer} {integer_unit}")
+    fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0
+    if fraction > 0:
+        fraction_unit = inflection.get(fraction / 100, inflection[0.02])
+        text.append(f"{fraction} {fraction_unit}")
+    if len(text) == 0:
+        return f"zero {inflection[2]}"
+    return " ".join(text)
+
+
+def _expand_currency(m: "re.Match") -> str:
+    currencies = {
+        "$": {
+            0.01: "cent",
+            0.02: "cents",
+            1: "dollar",
+            2: "dollars",
+        },
+        "€": {
+            0.01: "cent",
+            0.02: "cents",
+            1: "euro",
+            2: "euros",
+        },
+        "£": {
+            0.01: "penny",
+            0.02: "pence",
+            1: "pound sterling",
+            2: "pounds sterling",
+        },
+        "¥": {
+            # TODO rin
+            0.02: "sen",
+            2: "yen",
+        },
+    }
+    unit = m.group(1)
+    currency = currencies[unit]
+    value = m.group(2)
+    return __expand_currency(value, currency)
+
+
+def _expand_ordinal(m):
+    return _inflect.number_to_words(m.group(0))
+
+
+def _expand_number(m):
+    num = int(m.group(0))
+    if 1000 < num < 3000:
+        if num == 2000:
+            return "two thousand"
+        if 2000 < num < 2010:
+            return "two thousand " + _inflect.number_to_words(num % 100)
+        if num % 100 == 0:
+            return _inflect.number_to_words(num // 100) + " hundred"
+        return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ")
+    return _inflect.number_to_words(num, andword="")
+
+
+def normalize_numbers(text):
+    text = re.sub(_comma_number_re, _remove_commas, text)
+    text = re.sub(_currency_re, _expand_currency, text)
+    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
+    text = re.sub(_ordinal_re, _expand_ordinal, text)
+    text = re.sub(_number_re, _expand_number, text)
+    return text
diff --git a/TTS/tts/utils/text/english/time_norm.py b/TTS/tts/utils/text/english/time_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8ac09e79db4a239a7f72f101503dbf0d6feb3ae
--- /dev/null
+++ b/TTS/tts/utils/text/english/time_norm.py
@@ -0,0 +1,47 @@
+import re
+
+import inflect
+
+_inflect = inflect.engine()
+
+_time_re = re.compile(
+    r"""\b
+                          ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3]))  # hours
+                          :
+                          ([0-5][0-9])                            # minutes
+                          \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm
+                          \b""",
+    re.IGNORECASE | re.X,
+)
+
+
+def _expand_num(n: int) -> str:
+    return _inflect.number_to_words(n)
+
+
+def _expand_time_english(match: "re.Match") -> str:
+    hour = int(match.group(1))
+    past_noon = hour >= 12
+    time = []
+    if hour > 12:
+        hour -= 12
+    elif hour == 0:
+        hour = 12
+        past_noon = True
+    time.append(_expand_num(hour))
+
+    minute = int(match.group(6))
+    if minute > 0:
+        if minute < 10:
+            time.append("oh")
+        time.append(_expand_num(minute))
+    am_pm = match.group(7)
+    if am_pm is None:
+        time.append("p m" if past_noon else "a m")
+    else:
+        time.extend(list(am_pm.replace(".", "")))
+    return " ".join(time)
+
+
+def expand_time_english(text: str) -> str:
+    return re.sub(_time_re, _expand_time_english, text)
diff --git a/TTS/tts/utils/text/french/__init__.py b/TTS/tts/utils/text/french/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/text/french/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bfc0f72de54edfb50f0dc0bda8c87ca5b6ffcb8b
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/french/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97fd077f03df65421709132c48c2ea66e151210d
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/french/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..165e43783f5fd83c9026c987ad935b697b74fdda
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-37.pyc b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ab6210eeebd9d3e96cfde096d93d7af9f42597c
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-38.pyc b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebae55570fcd2e7f0769326de4f9f2218fbfee48
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-39.pyc b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..396399da8eb88482487871738f75e31453b96ae0
Binary files /dev/null and b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/french/abbreviations.py b/TTS/tts/utils/text/french/abbreviations.py
new file mode 100644
index 0000000000000000000000000000000000000000..f580dfed7b4576a9f87b0a4145cb729e70050d50
--- /dev/null
+++ b/TTS/tts/utils/text/french/abbreviations.py
@@ -0,0 +1,48 @@
+import re
+
+# List of (regular expression, replacement) pairs for abbreviations in french:
+abbreviations_fr = [
+    (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
+    for x in [
+        ("M", "monsieur"),
+        ("Mlle", "mademoiselle"),
+        ("Mlles", "mesdemoiselles"),
+        ("Mme", "Madame"),
+        ("Mmes", "Mesdames"),
+        ("N.B", "nota bene"),
+        ("M", "monsieur"),
+        ("p.c.q", "parce que"),
+        ("Pr", "professeur"),
+        ("qqch", "quelque chose"),
+        ("rdv", "rendez-vous"),
+        ("max", "maximum"),
+        ("min", "minimum"),
+        ("no", "numéro"),
+        ("adr", "adresse"),
+        ("dr", "docteur"),
+        ("st", "saint"),
+        ("co", "companie"),
+        ("jr", "junior"),
+        ("sgt", "sergent"),
+        ("capt", "capitain"),
+        ("col", "colonel"),
+        ("av", "avenue"),
+        ("av. J.-C", "avant Jésus-Christ"),
+        ("apr. J.-C", "après Jésus-Christ"),
+        ("art", "article"),
+        ("boul", "boulevard"),
+        ("c.-à-d", "c’est-à-dire"),
+        ("etc", "et cetera"),
+        ("ex", "exemple"),
+        ("excl", "exclusivement"),
+        ("boul", "boulevard"),
+    ]
+] + [
+    (re.compile("\\b%s" % x[0]), x[1])
+    for x in [
+        ("Mlle", "mademoiselle"),
+        ("Mlles", "mesdemoiselles"),
+        ("Mme", "Madame"),
+        ("Mmes", "Mesdames"),
+    ]
+]
diff --git a/TTS/tts/utils/text/japanese/__init__.py b/TTS/tts/utils/text/japanese/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea4997d6f3843aa62a601a7280581e7d892f577c
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24d95d1dfab05c2333e90b7378cf0297c0051df7
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99721877762871a3afe14f895acc16a3431b0eb0
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-37.pyc b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40fe8ac9ad96aa8814a1aa50a76233a0f9bfe71d
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-38.pyc b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b3d6f31d20d32aeff6a00cade8a1532deaa481b6
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-39.pyc b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc91ddc08971cd2cbcef35054aa587968b60a8f4
Binary files /dev/null and b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/japanese/phonemizer.py b/TTS/tts/utils/text/japanese/phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..69c609c3c630fbc210e541be39084af023f91a85
--- /dev/null
+++ b/TTS/tts/utils/text/japanese/phonemizer.py
@@ -0,0 +1,467 @@
+# Convert Japanese text to phonemes which is
+# compatible with Julius https://github.com/julius-speech/segmentation-kit
+
+import re
+import unicodedata
+
+# import MeCab
+from num2words import num2words
+
+_CONVRULES = [
+    # Conversion of 2 letters
+    "アァ/ a a",
+    "イィ/ i i",
+    "イェ/ i e",
+    "イャ/ y a",
+    "ウゥ/ u:",
+    "エェ/ e e",
+    "オォ/ o:",
+    "カァ/ k a:",
+    "キィ/ k i:",
+    "クゥ/ k u:",
+    "クャ/ ky a",
+    "クュ/ ky u",
+    "クョ/ ky o",
+    "ケェ/ k e:",
+    "コォ/ k o:",
+    "ガァ/ g a:",
+    "ギィ/ g i:",
+    "グゥ/ g u:",
+    "グャ/ gy a",
+    "グュ/ gy u",
+    "グョ/ gy o",
+    "ゲェ/ g e:",
+    "ゴォ/ g o:",
+    "サァ/ s a:",
+    "シィ/ sh i:",
+    "スゥ/ s u:",
+    "スャ/ sh a",
+    "スュ/ sh u",
+    "スョ/ sh o",
+    "セェ/ s e:",
+    "ソォ/ s o:",
+    "ザァ/ z a:",
+    "ジィ/ j i:",
+    "ズゥ/ z u:",
+    "ズャ/ zy a",
+    "ズュ/ zy u",
+    "ズョ/ zy o",
+    "ゼェ/ z e:",
+    "ゾォ/ z o:",
+    "タァ/ t a:",
+    "チィ/ ch i:",
+    "ツァ/ ts a",
+    "ツィ/ ts i",
+    "ツゥ/ ts u:",
+    "ツャ/ ch a",
+    "ツュ/ ch u",
+    "ツョ/ ch o",
+    "ツェ/ ts e",
+    "ツォ/ ts o",
+    "テェ/ t e:",
+    "トォ/ t o:",
+    "ダァ/ d a:",
+    "ヂィ/ j i:",
+    "ヅゥ/ d u:",
+    "ヅャ/ zy a",
+    "ヅュ/ zy u",
+    "ヅョ/ zy o",
+    "デェ/ d e:",
+    "ドォ/ d o:",
+    "ナァ/ n a:",
+    "ニィ/ n i:",
+    "ヌゥ/ n u:",
+    "ヌャ/ ny a",
+    "ヌュ/ ny u",
+    "ヌョ/ ny o",
+    "ネェ/ n e:",
+    "ノォ/ n o:",
+    "ハァ/ h a:",
+    "ヒィ/ h i:",
+    "フゥ/ f u:",
+    "フャ/ hy a",
+    "フュ/ hy u",
+    "フョ/ hy o",
+    "ヘェ/ h e:",
+    "ホォ/ h o:",
+    "バァ/ b a:",
+    "ビィ/ b i:",
+    "ブゥ/ b u:",
+    "フャ/ hy a",
+    "ブュ/ by u",
+    "フョ/ hy o",
+    "ベェ/ b e:",
+    "ボォ/ b o:",
+    "パァ/ p a:",
+    "ピィ/ p i:",
+    "プゥ/ p u:",
+    "プャ/ py a",
+    "プュ/ py u",
+    "プョ/ py o",
+    "ペェ/ p e:",
+    "ポォ/ p o:",
+    "マァ/ m a:",
+    "ミィ/ m i:",
+    "ムゥ/ m u:",
+    "ムャ/ my a",
+    "ムュ/ my u",
+    "ムョ/ my o",
+    "メェ/ m e:",
+    "モォ/ m o:",
+    "ヤァ/ y a:",
+    "ユゥ/ y u:",
+    "ユャ/ y a:",
+    "ユュ/ y u:",
+    "ユョ/ y o:",
+    "ヨォ/ y o:",
+    "ラァ/ r a:",
+    "リィ/ r i:",
+    "ルゥ/ r u:",
+    "ルャ/ ry a",
+    "ルュ/ ry u",
+    "ルョ/ ry o",
+    "レェ/ r e:",
+    "ロォ/ r o:",
+    "ワァ/ w a:",
+    "ヲォ/ o:",
+    "ディ/ d i",
+    "デェ/ d e:",
+    "デャ/ dy a",
+    "デュ/ dy u",
+    "デョ/ dy o",
+    "ティ/ t i",
+    "テェ/ t e:",
+    "テャ/ ty a",
+    "テュ/ ty u",
+    "テョ/ ty o",
+    "スィ/ s i",
+    "ズァ/ z u a",
+    "ズィ/ z i",
+    "ズゥ/ z u",
+    "ズャ/ zy a",
+    "ズュ/ zy u",
+    "ズョ/ zy o",
+    "ズェ/ z e",
+    "ズォ/ z o",
+    "キャ/ ky a",
+    "キュ/ ky u",
+    "キョ/ ky o",
+    "シャ/ sh a",
+    "シュ/ sh u",
+    "シェ/ sh e",
+    "ショ/ sh o",
+    "チャ/ ch a",
+    "チュ/ ch u",
+    "チェ/ ch e",
+    "チョ/ ch o",
+    "トゥ/ t u",
+    "トャ/ ty a",
+    "トュ/ ty u",
+    "トョ/ ty o",
+    "ドァ/ d o a",
+    "ドゥ/ d u",
+    "ドャ/ dy a",
+    "ドュ/ dy u",
+    "ドョ/ dy o",
+    "ドォ/ d o:",
+    "ニャ/ ny a",
+    "ニュ/ ny u",
+    "ニョ/ ny o",
+    "ヒャ/ hy a",
+    "ヒュ/ hy u",
+    "ヒョ/ hy o",
+    "ミャ/ my a",
+    "ミュ/ my u",
+    "ミョ/ my o",
+    "リャ/ ry a",
+    "リュ/ ry u",
+    "リョ/ ry o",
+    "ギャ/ gy a",
+    "ギュ/ gy u",
+    "ギョ/ gy o",
+    "ヂェ/ j e",
+    "ヂャ/ j a",
+    "ヂュ/ j u",
+    "ヂョ/ j o",
+    "ジェ/ j e",
+    "ジャ/ j a",
+    "ジュ/ j u",
+    "ジョ/ j o",
+    "ビャ/ by a",
+    "ビュ/ by u",
+    "ビョ/ by o",
+    "ピャ/ py a",
+    "ピュ/ py u",
+    "ピョ/ py o",
+    "ウァ/ u a",
+    "ウィ/ w i",
+    "ウェ/ w e",
+    "ウォ/ w o",
+    "ファ/ f a",
+    "フィ/ f i",
+    "フゥ/ f u",
+    "フャ/ hy a",
+    "フュ/ hy u",
+    "フョ/ hy o",
+    "フェ/ f e",
+    "フォ/ f o",
+    "ヴァ/ b a",
+    "ヴィ/ b i",
+    "ヴェ/ b e",
+    "ヴォ/ b o",
+    "ヴュ/ by u",
+    # Conversion of 1 letter
+    "ア/ a",
+    "イ/ i",
+    "ウ/ u",
+    "エ/ e",
+    "オ/ o",
+    "カ/ k a",
+    "キ/ k i",
+    "ク/ k u",
+    "ケ/ k e",
+    "コ/ k o",
+    "サ/ s a",
+    "シ/ sh i",
+    "ス/ s u",
+    "セ/ s e",
+    "ソ/ s o",
+    "タ/ t a",
+    "チ/ ch i",
+    "ツ/ ts u",
+    "テ/ t e",
+    "ト/ t o",
+    "ナ/ n a",
+    "ニ/ n i",
+    "ヌ/ n u",
+    "ネ/ n e",
+    "ノ/ n o",
+    "ハ/ h a",
+    "ヒ/ h i",
+    "フ/ f u",
+    "ヘ/ h e",
+    "ホ/ h o",
+    "マ/ m a",
+    "ミ/ m i",
+    "ム/ m u",
+    "メ/ m e",
+    "モ/ m o",
+    "ラ/ r a",
+    "リ/ r i",
+    "ル/ r u",
+    "レ/ r e",
+    "ロ/ r o",
+    "ガ/ g a",
+    "ギ/ g i",
+    "グ/ g u",
+    "ゲ/ g e",
+    "ゴ/ g o",
+    "ザ/ z a",
+    "ジ/ j i",
+    "ズ/ z u",
+    "ゼ/ z e",
+    "ゾ/ z o",
+    "ダ/ d a",
+    "ヂ/ j i",
+    "ヅ/ z u",
+    "デ/ d e",
+    "ド/ d o",
+    "バ/ b a",
+    "ビ/ b i",
+    "ブ/ b u",
+    "ベ/ b e",
+    "ボ/ b o",
+    "パ/ p a",
+    "ピ/ p i",
+    "プ/ p u",
+    "ペ/ p e",
+    "ポ/ p o",
+    "ヤ/ y a",
+    "ユ/ y u",
+    "ヨ/ y o",
+    "ワ/ w a",
+    "ヰ/ i",
+    "ヱ/ e",
+    "ヲ/ o",
+    "ン/ N",
+    "ッ/ q",
+    "ヴ/ b u",
+    "ー/:",
+    # Try converting broken text
+    "ァ/ a",
+    "ィ/ i",
+    "ゥ/ u",
+    "ェ/ e",
+    "ォ/ o",
+    "ヮ/ w a",
+    "ォ/ o",
+    # Symbols
+    "、/ ,",
+    "。/ .",
+    "！/ !",
+    "？/ ?",
+    "・/ ,",
+]
+
+_COLON_RX = re.compile(":+")
+_REJECT_RX = re.compile("[^ a-zA-Z:,.?]")
+
+
+def _makerulemap():
+    l = [tuple(x.split("/")) for x in _CONVRULES]
+    return tuple({k: v for k, v in l if len(k) == i} for i in (1, 2))
+
+
+_RULEMAP1, _RULEMAP2 = _makerulemap()
+
+
+def kata2phoneme(text: str) -> str:
+    """Convert katakana text to phonemes."""
+    text = text.strip()
+    res = ""
+    while text:
+        if len(text) >= 2:
+            x = _RULEMAP2.get(text[:2])
+            if x is not None:
+                text = text[2:]
+                res += x
+                continue
+        x = _RULEMAP1.get(text[0])
+        if x is not None:
+            text = text[1:]
+            res += x
+            continue
+        res += " " + text[0]
+        text = text[1:]
+    res = _COLON_RX.sub(":", res)
+    return res[1:]
+
+
+_KATAKANA = "".join(chr(ch) for ch in range(ord("ァ"), ord("ン") + 1))
+_HIRAGANA = "".join(chr(ch) for ch in range(ord("ぁ"), ord("ん") + 1))
+_HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA)
+
+
+def hira2kata(text: str) -> str:
+    text = text.translate(_HIRA2KATATRANS)
+    return text.replace("う゛", "ヴ")
+
+
+_SYMBOL_TOKENS = set(list("・、。？！"))
+_NO_YOMI_TOKENS = set(list("「」『』―（）［］[]　…"))
+# _TAGGER = MeCab.Tagger()
+
+
+def text2kata(text: str) -> str:
+    parsed = ""
+    res = []
+    for line in parsed.split("\n"):
+        if line == "EOS":
+            break
+        parts = line.split("\t")
+
+        word, yomi = parts[0], parts[1]
+        if yomi:
+            res.append(yomi)
+        else:
+            if word in _SYMBOL_TOKENS:
+                res.append(word)
+            elif word in ("っ", "ッ"):
+                res.append("ッ")
+            elif word in _NO_YOMI_TOKENS:
+                pass
+            else:
+                res.append(word)
+    return hira2kata("".join(res))
+
+
+_ALPHASYMBOL_YOMI = {
+    "#": "シャープ",
+    "%": "パーセント",
+    "&": "アンド",
+    "+": "プラス",
+    "-": "マイナス",
+    ":": "コロン",
+    ";": "セミコロン",
+    "<": "小なり",
+    "=": "イコール",
+    ">": "大なり",
+    "@": "アット",
+    "a": "エー",
+    "b": "ビー",
+    "c": "シー",
+    "d": "ディー",
+    "e": "イー",
+    "f": "エフ",
+    "g": "ジー",
+    "h": "エイチ",
+    "i": "アイ",
+    "j": "ジェー",
+    "k": "ケー",
+    "l": "エル",
+    "m": "エム",
+    "n": "エヌ",
+    "o": "オー",
+    "p": "ピー",
+    "q": "キュー",
+    "r": "アール",
+    "s": "エス",
+    "t": "ティー",
+    "u": "ユー",
+    "v": "ブイ",
+    "w": "ダブリュー",
+    "x": "エックス",
+    "y": "ワイ",
+    "z": "ゼット",
+    "α": "アルファ",
+    "β": "ベータ",
+    "γ": "ガンマ",
+    "δ": "デルタ",
+    "ε": "イプシロン",
+    "ζ": "ゼータ",
+    "η": "イータ",
+    "θ": "シータ",
+    "ι": "イオタ",
+    "κ": "カッパ",
+    "λ": "ラムダ",
+    "μ": "ミュー",
+    "ν": "ニュー",
+    "ξ": "クサイ",
+    "ο": "オミクロン",
+    "π": "パイ",
+    "ρ": "ロー",
+    "σ": "シグマ",
+    "τ": "タウ",
+    "υ": "ウプシロン",
+    "φ": "ファイ",
+    "χ": "カイ",
+    "ψ": "プサイ",
+    "ω": "オメガ",
+}
+
+
+_NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+")
+_CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"}
+_CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])")
+_NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?")
+
+
+def japanese_convert_numbers_to_words(text: str) -> str:
+    res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text)
+    res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res)
+    res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res)
+    return res
+
+
+def japanese_convert_alpha_symbols_to_words(text: str) -> str:
+    return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
+
+
+def japanese_text_to_phonemes(text: str) -> str:
+    """Convert Japanese text to phonemes."""
+    res = unicodedata.normalize("NFKC", text)
+    res = japanese_convert_numbers_to_words(res)
+    res = japanese_convert_alpha_symbols_to_words(res)
+    res = text2kata(res)
+    res = kata2phoneme(res)
+    return res.replace(" ", "")
diff --git a/TTS/tts/utils/text/korean/__init__.py b/TTS/tts/utils/text/korean/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..42b63eb2a8714b5634e51352d3f5c54f7eb7089e
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee52ceca82e365a244f8532edc62ba8009384ba0
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff9bcdfea72429a133c4bc93226364705abafe58
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-37.pyc b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad52caa51f9ce5a5196a95a668001144dea16f73
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-38.pyc b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2565ed5745156ca78ff000b840c842311caa90b
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-39.pyc b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98fe14f7de2ef98d58269b0f8f5dc072aec1b0ed
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/korean.cpython-37.pyc b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eda86ce5a3a06b5fad6c4385fe15d48d48f9a241
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/korean.cpython-38.pyc b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44019d1efdfe5fb6859316f0b99b60573f459559
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/korean.cpython-39.pyc b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f32bb72e778f6da0315fd79c7669fbfddf5bb22
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/korean.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-37.pyc b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..536a18600daeae2a5605886f41ffa85ec0777547
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-38.pyc b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..038b4aa2152bbcc157589bf8dc8aa36585764d15
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-39.pyc b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f5e9c85b442193129b9a0f15e8d2c0dcd70851f
Binary files /dev/null and b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/korean/ko_dictionary.py b/TTS/tts/utils/text/korean/ko_dictionary.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d2a148234be297cca12417964b191f1f521280d
--- /dev/null
+++ b/TTS/tts/utils/text/korean/ko_dictionary.py
@@ -0,0 +1,44 @@
+# coding: utf-8
+# Add the word you want to the dictionary.
+etc_dictionary = {"1+1": "원플러스원", "2+1": "투플러스원"}
+
+
+english_dictionary = {
+    "KOREA": "코리아",
+    "IDOL": "아이돌",
+    "IT": "아이티",
+    "IQ": "아이큐",
+    "UP": "업",
+    "DOWN": "다운",
+    "PC": "피씨",
+    "CCTV": "씨씨티비",
+    "SNS": "에스엔에스",
+    "AI": "에이아이",
+    "CEO": "씨이오",
+    "A": "에이",
+    "B": "비",
+    "C": "씨",
+    "D": "디",
+    "E": "이",
+    "F": "에프",
+    "G": "지",
+    "H": "에이치",
+    "I": "아이",
+    "J": "제이",
+    "K": "케이",
+    "L": "엘",
+    "M": "엠",
+    "N": "엔",
+    "O": "오",
+    "P": "피",
+    "Q": "큐",
+    "R": "알",
+    "S": "에스",
+    "T": "티",
+    "U": "유",
+    "V": "브이",
+    "W": "더블유",
+    "X": "엑스",
+    "Y": "와이",
+    "Z": "제트",
+}
diff --git a/TTS/tts/utils/text/korean/korean.py b/TTS/tts/utils/text/korean/korean.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f39a9e4da52be75f4fbf6838da1824d7d488a47
--- /dev/null
+++ b/TTS/tts/utils/text/korean/korean.py
@@ -0,0 +1,32 @@
+﻿# coding: utf-8
+# Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py
+import re
+
+from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary
+
+
+def normalize(text):
+    text = text.strip()
+    text = re.sub("[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text)
+    text = normalize_with_dictionary(text, etc_dictionary)
+    text = normalize_english(text)
+    text = text.lower()
+    return text
+
+
+def normalize_with_dictionary(text, dic):
+    if any(key in text for key in dic.keys()):
+        pattern = re.compile("|".join(re.escape(key) for key in dic.keys()))
+        return pattern.sub(lambda x: dic[x.group()], text)
+    return text
+
+
+def normalize_english(text):
+    def fn(m):
+        word = m.group()
+        if word in english_dictionary:
+            return english_dictionary.get(word)
+        return word
+
+    text = re.sub("([A-Za-z]+)", fn, text)
+    return text
diff --git a/TTS/tts/utils/text/korean/phonemizer.py b/TTS/tts/utils/text/korean/phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed70fc35f6950b98ec715577a3303c5a271fbb0e
--- /dev/null
+++ b/TTS/tts/utils/text/korean/phonemizer.py
@@ -0,0 +1,36 @@
+from jamo import hangul_to_jamo
+
+from TTS.tts.utils.text.korean.korean import normalize
+
+g2p = None
+
+
+def korean_text_to_phonemes(text, character: str = "hangeul") -> str:
+    """
+
+    The input and output values look the same, but they are different in Unicode.
+
+    example :
+
+        input = '하늘' (Unicode : \ud558\ub298), (하 + 늘)
+        output = '하늘' (Unicode :\u1112\u1161\u1102\u1173\u11af), (ᄒ + ᅡ + ᄂ + ᅳ + ᆯ)
+
+    """
+    global g2p  # pylint: disable=global-statement
+    if g2p is None:
+        from g2pkk import G2p
+
+        g2p = G2p()
+
+    if character == "english":
+        from anyascii import anyascii
+
+        text = normalize(text)
+        text = g2p(text)
+        text = anyascii(text)
+        return text
+
+    text = normalize(text)
+    text = g2p(text)
+    text = list(hangul_to_jamo(text))  # '하늘' --> ['ᄒ', 'ᅡ', 'ᄂ', 'ᅳ', 'ᆯ']
+    return "".join(text)
diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d448b06fd1a562674db21fad15f194ac25adfc1f
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/__init__.py
@@ -0,0 +1,57 @@
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
+from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
+from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer
+from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer
+from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer
+
+PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, JA_JP_Phonemizer)}
+
+
+ESPEAK_LANGS = list(ESpeak.supported_languages().keys())
+GRUUT_LANGS = list(Gruut.supported_languages())
+
+
+# Dict setting default phonemizers for each language
+# Add Gruut languages
+_ = [Gruut.name()] * len(GRUUT_LANGS)
+DEF_LANG_TO_PHONEMIZER = dict(list(zip(GRUUT_LANGS, _)))
+
+
+# Add ESpeak languages and override any existing ones
+_ = [ESpeak.name()] * len(ESPEAK_LANGS)
+_new_dict = dict(list(zip(list(ESPEAK_LANGS), _)))
+DEF_LANG_TO_PHONEMIZER.update(_new_dict)
+
+# Force default for some languages
+DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"]
+DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name()
+DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
+DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
+
+
+def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
+    """Initiate a phonemizer by name
+
+    Args:
+        name (str):
+            Name of the phonemizer that should match `phonemizer.name()`.
+
+        kwargs (dict):
+            Extra keyword arguments that should be passed to the phonemizer.
+    """
+    if name == "espeak":
+        return ESpeak(**kwargs)
+    if name == "gruut":
+        return Gruut(**kwargs)
+    if name == "zh_cn_phonemizer":
+        return ZH_CN_Phonemizer(**kwargs)
+    if name == "ja_jp_phonemizer":
+        return JA_JP_Phonemizer(**kwargs)
+    if name == "ko_kr_phonemizer":
+        return KO_KR_Phonemizer(**kwargs)
+    raise ValueError(f"Phonemizer {name} not found")
+
+
+if __name__ == "__main__":
+    print(DEF_LANG_TO_PHONEMIZER)
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa0a9042fad0ceaf51ca25e450b71a4d49ce8d2d
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e03ece49fa14228687ffc54ef112e8d2a6b991f
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a946bc24bc03e52980031d4fb25bc552a26cbc4
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..788807eba45a551751ffe8adaa7e3eb8fe3f023c
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79ed0c172685ff16f9c2df586ea34ef985d658b9
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e871eff705f88cd805fd1101cffb8132ecbbd7d
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9126247f5db17a8bc985c907c939c71dc5976d0b
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e47b85db842a6bc37cebe5b574d9b227d96b619
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2bf088a2f5ca9fe17fa828e261034fbc6bdcadf
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83d62e1e00ee5ee6233aa810456dde226d99c065
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9db55b6457655baedfa7a37460cc3cae90d88af6
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..947e1a487b7bdef68538f89b376f4532643c58b0
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..997f068e68cc3c50f439e9175cda11b5a830903a
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..38f97988254cd34ca330da7e31dfe6c157498b42
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e31d8a89277e090093eb2e2190c1615e76c0c85
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71d2ceee013f507e5e558d3c02f0c098c4daa8d4
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd836af1ab3842037f2a98853ce03a4e39a8ae39
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cc69ef0b36cfbc7bb072ae4f515d889f470eb67
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-37.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29cc2fa39c0696c1835fbddfd1c7ecebeec60c16
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-37.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-38.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97c879451fab02a34a115bd4cc0d38a1a4af381b
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-38.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-39.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ff815af3df4f32dbe045cd8cd2ce964d904f52f
Binary files /dev/null and b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-39.pyc differ
diff --git a/TTS/tts/utils/text/phonemizers/base.py b/TTS/tts/utils/text/phonemizers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..08fa8e130a1324f9052a53dfb03f5918a24d3ec6
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/base.py
@@ -0,0 +1,141 @@
+import abc
+from typing import List, Tuple
+
+from TTS.tts.utils.text.punctuation import Punctuation
+
+
+class BasePhonemizer(abc.ABC):
+    """Base phonemizer class
+
+    Phonemization follows the following steps:
+        1. Preprocessing:
+            - remove empty lines
+            - remove punctuation
+            - keep track of punctuation marks
+
+        2. Phonemization:
+            - convert text to phonemes
+
+        3. Postprocessing:
+            - join phonemes
+            - restore punctuation marks
+
+    Args:
+        language (str):
+            Language used by the phonemizer.
+
+        punctuations (List[str]):
+            List of punctuation marks to be preserved.
+
+        keep_puncs (bool):
+            Whether to preserve punctuation marks or not.
+    """
+
+    def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False):
+
+        # ensure the backend is installed on the system
+        if not self.is_available():
+            raise RuntimeError("{} not installed on your system".format(self.name()))  # pragma: nocover
+
+        # ensure the backend support the requested language
+        self._language = self._init_language(language)
+
+        # setup punctuation processing
+        self._keep_puncs = keep_puncs
+        self._punctuator = Punctuation(punctuations)
+
+    def _init_language(self, language):
+        """Language initialization
+
+        This method may be overloaded in child classes (see Segments backend)
+
+        """
+        if not self.is_supported_language(language):
+            raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
+        return language
+
+    @property
+    def language(self):
+        """The language code configured to be used for phonemization"""
+        return self._language
+
+    @staticmethod
+    @abc.abstractmethod
+    def name():
+        """The name of the backend"""
+        ...
+
+    @classmethod
+    @abc.abstractmethod
+    def is_available(cls):
+        """Returns True if the backend is installed, False otherwise"""
+        ...
+
+    @classmethod
+    @abc.abstractmethod
+    def version(cls):
+        """Return the backend version as a tuple (major, minor, patch)"""
+        ...
+
+    @staticmethod
+    @abc.abstractmethod
+    def supported_languages():
+        """Return a dict of language codes -> name supported by the backend"""
+        ...
+
+    def is_supported_language(self, language):
+        """Returns True if `language` is supported by the backend"""
+        return language in self.supported_languages()
+
+    @abc.abstractmethod
+    def _phonemize(self, text, separator):
+        """The main phonemization method"""
+
+    def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
+        """Preprocess the text before phonemization
+
+        1. remove spaces
+        2. remove punctuation
+
+        Override this if you need a different behaviour
+        """
+        text = text.strip()
+        if self._keep_puncs:
+            # a tuple (text, punctuation marks)
+            return self._punctuator.strip_to_restore(text)
+        return [self._punctuator.strip(text)], []
+
+    def _phonemize_postprocess(self, phonemized, punctuations) -> str:
+        """Postprocess the raw phonemized output
+
+        Override this if you need a different behaviour
+        """
+        if self._keep_puncs:
+            return self._punctuator.restore(phonemized, punctuations)[0]
+        return phonemized[0]
+
+    def phonemize(self, text: str, separator="|") -> str:
+        """Returns the `text` phonemized for the given language
+
+        Args:
+            text (str):
+                Text to be phonemized.
+
+            separator (str):
+                string separator used between phonemes. Default to '_'.
+
+        Returns:
+            (str): Phonemized text
+        """
+        text, punctuations = self._phonemize_preprocess(text)
+        phonemized = []
+        for t in text:
+            p = self._phonemize(t, separator)
+            phonemized.append(p)
+        phonemized = self._phonemize_postprocess(phonemized, punctuations)
+        return phonemized
+
+    def print_logs(self, level: int = 0):
+        indent = "\t" * level
+        print(f"{indent}| > phoneme language: {self.language}")
+        print(f"{indent}| > phoneme backend: {self.name()}")
diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1f90e7af9cbd52c20c8a3ae20d584ac3d9f6014
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
@@ -0,0 +1,242 @@
+import logging
+import re
+import subprocess
+from typing import Dict, List
+import sys
+
+sys.path.append('.')
+
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+from TTS.tts.utils.text.punctuation import Punctuation
+
+
+def is_tool(name):
+    from shutil import which
+
+    return which(name) is not None
+
+
+# priority: espeakng > espeak
+if is_tool("espeak-ng"):
+    _DEF_ESPEAK_LIB = "espeak-ng"
+elif is_tool("espeak"):
+    _DEF_ESPEAK_LIB = "espeak"
+else:
+    _DEF_ESPEAK_LIB = None
+
+
+def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
+    """Run espeak with the given arguments."""
+    cmd = [
+        espeak_lib,
+        "-q",
+        "-b",
+        "1",  # UTF8 text encoding
+    ]
+    cmd.extend(args)
+    logging.debug("espeakng: executing %s", repr(cmd))
+
+    with subprocess.Popen(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    ) as p:
+        res = iter(p.stdout.readline, b"")
+        if not sync:
+            p.stdout.close()
+            if p.stderr:
+                p.stderr.close()
+            if p.stdin:
+                p.stdin.close()
+            return res
+        res2 = []
+        for line in res:
+            res2.append(line)
+        p.stdout.close()
+        if p.stderr:
+            p.stderr.close()
+        if p.stdin:
+            p.stdin.close()
+        p.wait()
+    return res2
+
+
+class ESpeak(BasePhonemizer):
+    """ESpeak wrapper calling `espeak` or `espeak-ng` from the command-line the perform G2P
+
+    Args:
+        language (str):
+            Valid language code for the used backend.
+
+        backend (str):
+            Name of the backend library to use. `espeak` or `espeak-ng`. If None, set automatically
+            prefering `espeak-ng` over `espeak`. Defaults to None.
+
+        punctuations (str):
+            Characters to be treated as punctuation. Defaults to Punctuation.default_puncs().
+
+        keep_puncs (bool):
+            If True, keep the punctuations after phonemization. Defaults to True.
+
+    Example:
+
+        >>> from TTS.tts.utils.text.phonemizers import ESpeak
+        >>> phonemizer = ESpeak("tr")
+        >>> phonemizer.phonemize("Bu Türkçe, bir örnektir.", separator="|")
+        'b|ʊ t|ˈø|r|k|tʃ|ɛ, b|ɪ|r œ|r|n|ˈɛ|c|t|ɪ|r.'
+
+    """
+
+    _ESPEAK_LIB = _DEF_ESPEAK_LIB
+
+    def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
+        if self._ESPEAK_LIB is None:
+            raise Exception(" [!] No espeak backend found. Install espeak-ng or espeak to your system.")
+        self.backend = self._ESPEAK_LIB
+
+        # band-aid for backwards compatibility
+        if language == "en":
+            language = "en-us"
+        if language == "zh-cn":
+            language = "cmn"
+
+        super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
+        if backend is not None:
+            self.backend = backend
+
+    @property
+    def backend(self):
+        return self._ESPEAK_LIB
+
+    @backend.setter
+    def backend(self, backend):
+        if backend not in ["espeak", "espeak-ng"]:
+            raise Exception("Unknown backend: %s" % backend)
+        self._ESPEAK_LIB = backend
+
+    def auto_set_espeak_lib(self) -> None:
+        if is_tool("espeak-ng"):
+            self._ESPEAK_LIB = "espeak-ng"
+        elif is_tool("espeak"):
+            self._ESPEAK_LIB = "espeak"
+        else:
+            raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
+
+    @staticmethod
+    def name():
+        return "espeak"
+
+    def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
+        """Convert input text to phonemes.
+
+        Args:
+            text (str):
+                Text to be converted to phonemes.
+
+            tie (bool, optional) : When True use a '͡' character between
+                consecutive characters of a single phoneme. Else separate phoneme
+                with '_'. This option requires espeak>=1.49. Default to False.
+        """
+        # set arguments
+        args = ["-v", f"{self._language}"]
+        # espeak and espeak-ng parses `ipa` differently
+        if tie:
+            # use '͡' between phonemes
+            if self.backend == "espeak":
+                args.append("--ipa=1")
+            else:
+                args.append("--ipa=3")
+        else:
+            # split with '_'
+            if self.backend == "espeak":
+                args.append("--ipa=3")
+            else:
+                args.append("--ipa=1")
+        if tie:
+            args.append("--tie=%s" % tie)
+
+        args.append('"' + text + '"')
+        # compute phonemes
+        phonemes = ""
+        for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
+            logging.debug("line: %s", repr(line))
+            ph_decoded = line.decode("utf8").strip()
+            # espeak need to skip first two characters of the retuned text:
+            #   version 1.48.03: "_ p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
+            #   version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
+            # espeak-ng need to skip the first character of the retuned text:
+            #   "_p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
+
+            # dealing with the conditions descrived above
+            ph_decoded = ph_decoded[:1].replace("_", "") + ph_decoded[1:]
+
+            # espeak-ng backend can add language flags that need to be removed:
+            #   "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
+            # phonemize needs to remove the language flags of the returned text:
+            #   "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
+            ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded)
+
+            phonemes += ph_decoded.strip()
+        # phonemes += "vˈɑːlədˌɪmə zˈɛlənski"
+        return phonemes.replace("_", separator)
+
+    def _phonemize(self, text, separator=None):
+        return self.phonemize_espeak(text, separator, tie=False)
+
+    @staticmethod
+    def supported_languages() -> Dict:
+        """Get a dictionary of supported languages.
+
+        Returns:
+            Dict: Dictionary of language codes.
+        """
+        if _DEF_ESPEAK_LIB is None:
+            return {}
+        args = ["--voices"]
+        langs = {}
+        count = 0
+        for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
+            line = line.decode("utf8").strip()
+            if count > 0:
+                cols = line.split()
+                lang_code = cols[1]
+                lang_name = cols[3]
+                langs[lang_code] = lang_name
+            logging.debug("line: %s", repr(line))
+            count += 1
+        return langs
+
+    def version(self) -> str:
+        """Get the version of the used backend.
+
+        Returns:
+            str: Version of the used backend.
+        """
+        args = ["--version"]
+        for line in _espeak_exe(self.backend, args, sync=True):
+            version = line.decode("utf8").strip().split()[2]
+            logging.debug("line: %s", repr(line))
+            return version
+
+    @classmethod
+    def is_available(cls):
+        """Return true if ESpeak is available else false"""
+        return is_tool("espeak") or is_tool("espeak-ng")
+
+
+if __name__ == "__main__":
+    e = ESpeak(language="vi")
+    print(e.supported_languages())
+    print(e.version())
+    print(e.language)
+    print(e.name())
+    print(e.is_available())
+
+    # e = ESpeak(language="en-us", keep_puncs=False)
+    # print("`" + e.phonemize("hello how are you today?") + "`")
+
+    e = ESpeak(language="vi", keep_puncs=True)
+    print("`" + e.phonemize("Thành phố muốn thí điểm thu thuế bất động sản thứ hai, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân") + "`")
+    e = ESpeak(language="en-us", keep_puncs=True)
+    print("`" + e.phonemize("Volodymyr Zelensky") + "`")
+    
diff --git a/TTS/tts/utils/text/phonemizers/gruut_wrapper.py b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3e9c9abd4c41935ed07ec10ed883d75b42a6bc8
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
@@ -0,0 +1,151 @@
+import importlib
+from typing import List
+
+import gruut
+from gruut_ipa import IPA
+
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+from TTS.tts.utils.text.punctuation import Punctuation
+
+# Table for str.translate to fix gruut/TTS phoneme mismatch
+GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
+
+
+class Gruut(BasePhonemizer):
+    """Gruut wrapper for G2P
+
+    Args:
+        language (str):
+            Valid language code for the used backend.
+
+        punctuations (str):
+            Characters to be treated as punctuation. Defaults to `Punctuation.default_puncs()`.
+
+        keep_puncs (bool):
+            If true, keep the punctuations after phonemization. Defaults to True.
+
+        use_espeak_phonemes (bool):
+            If true, use espeak lexicons instead of default Gruut lexicons. Defaults to False.
+
+        keep_stress (bool):
+            If true, keep the stress characters after phonemization. Defaults to False.
+
+    Example:
+
+        >>> from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
+        >>> phonemizer = Gruut('en-us')
+        >>> phonemizer.phonemize("Be a voice, not an! echo?", separator="|")
+        'b|i| ə| v|ɔ|ɪ|s, n|ɑ|t| ə|n! ɛ|k|o|ʊ?'
+    """
+
+    def __init__(
+        self,
+        language: str,
+        punctuations=Punctuation.default_puncs(),
+        keep_puncs=True,
+        use_espeak_phonemes=False,
+        keep_stress=False,
+    ):
+        super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
+        self.use_espeak_phonemes = use_espeak_phonemes
+        self.keep_stress = keep_stress
+
+    @staticmethod
+    def name():
+        return "gruut"
+
+    def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:  # pylint: disable=unused-argument
+        """Convert input text to phonemes.
+
+        Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
+        that constitude a single sound.
+
+        It doesn't affect 🐸TTS since it individually converts each character to token IDs.
+
+        Examples::
+            "hello how are you today?" -> `h|ɛ|l|o|ʊ| h|a|ʊ| ɑ|ɹ| j|u| t|ə|d|e|ɪ`
+
+        Args:
+            text (str):
+                Text to be converted to phonemes.
+
+            tie (bool, optional) : When True use a '͡' character between
+                consecutive characters of a single phoneme. Else separate phoneme
+                with '_'. This option requires espeak>=1.49. Default to False.
+        """
+        ph_list = []
+        for sentence in gruut.sentences(text, lang=self.language, espeak=self.use_espeak_phonemes):
+            for word in sentence:
+                if word.is_break:
+                    # Use actual character for break phoneme (e.g., comma)
+                    if ph_list:
+                        # Join with previous word
+                        ph_list[-1].append(word.text)
+                    else:
+                        # First word is punctuation
+                        ph_list.append([word.text])
+                elif word.phonemes:
+                    # Add phonemes for word
+                    word_phonemes = []
+
+                    for word_phoneme in word.phonemes:
+                        if not self.keep_stress:
+                            # Remove primary/secondary stress
+                            word_phoneme = IPA.without_stress(word_phoneme)
+
+                        word_phoneme = word_phoneme.translate(GRUUT_TRANS_TABLE)
+
+                        if word_phoneme:
+                            # Flatten phonemes
+                            word_phonemes.extend(word_phoneme)
+
+                    if word_phonemes:
+                        ph_list.append(word_phonemes)
+
+        ph_words = [separator.join(word_phonemes) for word_phonemes in ph_list]
+        ph = f"{separator} ".join(ph_words)
+        return ph
+
+    def _phonemize(self, text, separator):
+        return self.phonemize_gruut(text, separator, tie=False)
+
+    def is_supported_language(self, language):
+        """Returns True if `language` is supported by the backend"""
+        return gruut.is_language_supported(language)
+
+    @staticmethod
+    def supported_languages() -> List:
+        """Get a dictionary of supported languages.
+
+        Returns:
+            List: List of language codes.
+        """
+        return list(gruut.get_supported_languages())
+
+    def version(self):
+        """Get the version of the used backend.
+
+        Returns:
+            str: Version of the used backend.
+        """
+        return gruut.__version__
+
+    @classmethod
+    def is_available(cls):
+        """Return true if ESpeak is available else false"""
+        return importlib.util.find_spec("gruut") is not None
+
+
+if __name__ == "__main__":
+    e = Gruut(language="en-us")
+    print(e.supported_languages())
+    print(e.version())
+    print(e.language)
+    print(e.name())
+    print(e.is_available())
+
+    e = Gruut(language="en-us", keep_puncs=False)
+    print("`" + e.phonemize("hello how are you today?") + "`")
+
+    e = Gruut(language="en-us", keep_puncs=True)
+    print("`" + e.phonemize("hello how, are you today?") + "`")
diff --git a/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..60b965f9d8f16327a5b6da41729601a96debfdc6
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
@@ -0,0 +1,72 @@
+from typing import Dict
+
+from TTS.tts.utils.text.japanese.phonemizer import japanese_text_to_phonemes
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+
+_DEF_JA_PUNCS = "、.,[]()?!〽~『』「」【】"
+
+_TRANS_TABLE = {"、": ","}
+
+
+def trans(text):
+    for i, j in _TRANS_TABLE.items():
+        text = text.replace(i, j)
+    return text
+
+
+class JA_JP_Phonemizer(BasePhonemizer):
+    """🐸TTS Ja-Jp phonemizer using functions in `TTS.tts.utils.text.japanese.phonemizer`
+
+    TODO: someone with JA knowledge should check this implementation
+
+    Example:
+
+        >>> from TTS.tts.utils.text.phonemizers import JA_JP_Phonemizer
+        >>> phonemizer = JA_JP_Phonemizer()
+        >>> phonemizer.phonemize("どちらに行きますか？", separator="|")
+        'd|o|c|h|i|r|a|n|i|i|k|i|m|a|s|u|k|a|?'
+
+    """
+
+    language = "ja-jp"
+
+    def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):  # pylint: disable=unused-argument
+        super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
+
+    @staticmethod
+    def name():
+        return "ja_jp_phonemizer"
+
+    def _phonemize(self, text: str, separator: str = "|") -> str:
+        ph = japanese_text_to_phonemes(text)
+        if separator is not None or separator != "":
+            return separator.join(ph)
+        return ph
+
+    def phonemize(self, text: str, separator="|") -> str:
+        """Custom phonemize for JP_JA
+
+        Skip pre-post processing steps used by the other phonemizers.
+        """
+        return self._phonemize(text, separator)
+
+    @staticmethod
+    def supported_languages() -> Dict:
+        return {"ja-jp": "Japanese (Japan)"}
+
+    def version(self) -> str:
+        return "0.0.1"
+
+    def is_available(self) -> bool:
+        return True
+
+
+# if __name__ == "__main__":
+#     text = "これは、電話をかけるための私の日本語の例のテキストです。"
+#     e = JA_JP_Phonemizer()
+#     print(e.supported_languages())
+#     print(e.version())
+#     print(e.language)
+#     print(e.name())
+#     print(e.is_available())
+#     print("`" + e.phonemize(text) + "`")
diff --git a/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py b/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3f8d43b382c38292a258046f950c816010c895e
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py
@@ -0,0 +1,65 @@
+from typing import Dict
+
+from TTS.tts.utils.text.korean.phonemizer import korean_text_to_phonemes
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+
+_DEF_KO_PUNCS = "、.,[]()?!〽~『』「」【】"
+
+
+class KO_KR_Phonemizer(BasePhonemizer):
+    """🐸TTS ko_kr_phonemizer using functions in `TTS.tts.utils.text.korean.phonemizer`
+
+    TODO: Add Korean to character (ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆳᆴᆵᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ)
+
+    Example:
+
+        >>> from TTS.tts.utils.text.phonemizers import KO_KR_Phonemizer
+        >>> phonemizer = KO_KR_Phonemizer()
+        >>> phonemizer.phonemize("이 문장은 음성합성 테스트를 위한 문장입니다.", separator="|")
+        'ᄋ|ᅵ| |ᄆ|ᅮ|ᆫ|ᄌ|ᅡ|ᆼ|ᄋ|ᅳ| |ᄂ|ᅳ|ᆷ|ᄉ|ᅥ|ᆼ|ᄒ|ᅡ|ᆸ|ᄊ|ᅥ|ᆼ| |ᄐ|ᅦ|ᄉ|ᅳ|ᄐ|ᅳ|ᄅ|ᅳ| |ᄅ|ᅱ|ᄒ|ᅡ|ᆫ| |ᄆ|ᅮ|ᆫ|ᄌ|ᅡ|ᆼ|ᄋ|ᅵ|ᆷ|ᄂ|ᅵ|ᄃ|ᅡ|.'
+
+        >>> from TTS.tts.utils.text.phonemizers import KO_KR_Phonemizer
+        >>> phonemizer = KO_KR_Phonemizer()
+        >>> phonemizer.phonemize("이 문장은 음성합성 테스트를 위한 문장입니다.", separator="|", character='english')
+        'I| |M|u|n|J|a|n|g|E|u| |N|e|u|m|S|e|o|n|g|H|a|b|S|s|e|o|n|g| |T|e|S|e|u|T|e|u|L|e|u| |L|w|i|H|a|n| |M|u|n|J|a|n|g|I|m|N|i|D|a|.'
+
+    """
+
+    language = "ko-kr"
+
+    def __init__(self, punctuations=_DEF_KO_PUNCS, keep_puncs=True, **kwargs):  # pylint: disable=unused-argument
+        super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
+
+    @staticmethod
+    def name():
+        return "ko_kr_phonemizer"
+
+    def _phonemize(self, text: str, separator: str = "", character: str = "hangeul") -> str:
+        ph = korean_text_to_phonemes(text, character=character)
+        if separator is not None or separator != "":
+            return separator.join(ph)
+        return ph
+
+    def phonemize(self, text: str, separator: str = "", character: str = "hangeul") -> str:
+        return self._phonemize(text, separator, character)
+
+    @staticmethod
+    def supported_languages() -> Dict:
+        return {"ko-kr": "hangeul(korean)"}
+
+    def version(self) -> str:
+        return "0.0.2"
+
+    def is_available(self) -> bool:
+        return True
+
+
+if __name__ == "__main__":
+    texts = "이 문장은 음성합성 테스트를 위한 문장입니다."
+    e = KO_KR_Phonemizer()
+    print(e.supported_languages())
+    print(e.version())
+    print(e.language)
+    print(e.name())
+    print(e.is_available())
+    print(e.phonemize(texts))
diff --git a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e36b0a2a1f98aae72be017a3b0a956d6300afb61
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
@@ -0,0 +1,55 @@
+from typing import Dict, List
+
+from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
+
+
+class MultiPhonemizer:
+    """🐸TTS multi-phonemizer that operates phonemizers for multiple langugages
+
+    Args:
+        custom_lang_to_phonemizer (Dict):
+            Custom phonemizer mapping if you want to change the defaults. In the format of
+            `{"lang_code", "phonemizer_name"}`. When it is None, `DEF_LANG_TO_PHONEMIZER` is used. Defaults to `{}`.
+
+    TODO: find a way to pass custom kwargs to the phonemizers
+    """
+
+    lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
+    language = "multi-lingual"
+
+    def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:  # pylint: disable=dangerous-default-value
+        self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
+        self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
+
+    @staticmethod
+    def init_phonemizers(lang_to_phonemizer_name: Dict) -> Dict:
+        lang_to_phonemizer = {}
+        for k, v in lang_to_phonemizer_name.items():
+            phonemizer = get_phonemizer_by_name(v, language=k)
+            lang_to_phonemizer[k] = phonemizer
+        return lang_to_phonemizer
+
+    @staticmethod
+    def name():
+        return "multi-phonemizer"
+
+    def phonemize(self, text, language, separator="|"):
+        return self.lang_to_phonemizer[language].phonemize(text, separator)
+
+    def supported_languages(self) -> List:
+        return list(self.lang_to_phonemizer_name.keys())
+
+
+# if __name__ == "__main__":
+#     texts = {
+#         "tr": "Merhaba, bu Türkçe bit örnek!",
+#         "en-us": "Hello, this is English example!",
+#         "de": "Hallo, das ist ein Deutches Beipiel!",
+#         "zh-cn": "这是中国的例子",
+#     }
+#     phonemes = {}
+#     ph = MultiPhonemizer()
+#     for lang, text in texts.items():
+#         phoneme = ph.phonemize(text, lang)
+#         phonemes[lang] = phoneme
+#     print(phonemes)
diff --git a/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..41480c417356fd941e71e3eff0099eb38ac7296a
--- /dev/null
+++ b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
@@ -0,0 +1,62 @@
+from typing import Dict
+
+from TTS.tts.utils.text.chinese_mandarin.phonemizer import chinese_text_to_phonemes
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+
+_DEF_ZH_PUNCS = "、.,[]()?!〽~『』「」【】"
+
+
+class ZH_CN_Phonemizer(BasePhonemizer):
+    """🐸TTS Zh-Cn phonemizer using functions in `TTS.tts.utils.text.chinese_mandarin.phonemizer`
+
+    Args:
+        punctuations (str):
+            Set of characters to be treated as punctuation. Defaults to `_DEF_ZH_PUNCS`.
+
+        keep_puncs (bool):
+            If True, keep the punctuations after phonemization. Defaults to False.
+
+    Example ::
+
+        "这是，样本中文。" -> `d|ʒ|ø|4| |ʂ|ʏ|4| |，| |i|ɑ|ŋ|4|b|œ|n|3| |d|ʒ|o|ŋ|1|w|œ|n|2| |。`
+
+    TODO: someone with Mandarin knowledge should check this implementation
+    """
+
+    language = "zh-cn"
+
+    def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):  # pylint: disable=unused-argument
+        super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
+
+    @staticmethod
+    def name():
+        return "zh_cn_phonemizer"
+
+    @staticmethod
+    def phonemize_zh_cn(text: str, separator: str = "|") -> str:
+        ph = chinese_text_to_phonemes(text, separator)
+        return ph
+
+    def _phonemize(self, text, separator):
+        return self.phonemize_zh_cn(text, separator)
+
+    @staticmethod
+    def supported_languages() -> Dict:
+        return {"zh-cn": "Chinese (China)"}
+
+    def version(self) -> str:
+        return "0.0.1"
+
+    def is_available(self) -> bool:
+        return True
+
+
+# if __name__ == "__main__":
+#     text = "这是，样本中文。"
+#     e = ZH_CN_Phonemizer()
+#     print(e.supported_languages())
+#     print(e.version())
+#     print(e.language)
+#     print(e.name())
+#     print(e.is_available())
+#     print("`" + e.phonemize(text) + "`")
diff --git a/TTS/tts/utils/text/punctuation.py b/TTS/tts/utils/text/punctuation.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d199cc545b36f6089056d904fd0a03670012c4d
--- /dev/null
+++ b/TTS/tts/utils/text/punctuation.py
@@ -0,0 +1,172 @@
+import collections
+import re
+from enum import Enum
+
+import six
+
+_DEF_PUNCS = ';:,.!?¡¿—…"«»“”'
+
+_PUNC_IDX = collections.namedtuple("_punc_index", ["punc", "position"])
+
+
+class PuncPosition(Enum):
+    """Enum for the punctuations positions"""
+
+    BEGIN = 0
+    END = 1
+    MIDDLE = 2
+    ALONE = 3
+
+
+class Punctuation:
+    """Handle punctuations in text.
+
+    Just strip punctuations from text or strip and restore them later.
+
+    Args:
+        puncs (str): The punctuations to be processed. Defaults to `_DEF_PUNCS`.
+
+    Example:
+        >>> punc = Punctuation()
+        >>> punc.strip("This is. example !")
+        'This is example'
+
+        >>> text_striped, punc_map = punc.strip_to_restore("This is. example !")
+        >>> ' '.join(text_striped)
+        'This is example'
+
+        >>> text_restored = punc.restore(text_striped, punc_map)
+        >>> text_restored[0]
+        'This is. example !'
+    """
+
+    def __init__(self, puncs: str = _DEF_PUNCS):
+        self.puncs = puncs
+
+    @staticmethod
+    def default_puncs():
+        """Return default set of punctuations."""
+        return _DEF_PUNCS
+
+    @property
+    def puncs(self):
+        return self._puncs
+
+    @puncs.setter
+    def puncs(self, value):
+        if not isinstance(value, six.string_types):
+            raise ValueError("[!] Punctuations must be of type str.")
+        self._puncs = "".join(list(dict.fromkeys(list(value))))  # remove duplicates without changing the oreder
+        self.puncs_regular_exp = re.compile(rf"(\s*[{re.escape(self._puncs)}]+\s*)+")
+
+    def strip(self, text):
+        """Remove all the punctuations by replacing with `space`.
+
+        Args:
+            text (str): The text to be processed.
+
+        Example::
+
+            "This is. example !" -> "This is example "
+        """
+        return re.sub(self.puncs_regular_exp, " ", text).rstrip().lstrip()
+
+    def strip_to_restore(self, text):
+        """Remove punctuations from text to restore them later.
+
+        Args:
+            text (str): The text to be processed.
+
+        Examples ::
+
+            "This is. example !" -> [["This is", "example"], [".", "!"]]
+
+        """
+        text, puncs = self._strip_to_restore(text)
+        return text, puncs
+
+    def _strip_to_restore(self, text):
+        """Auxiliary method for Punctuation.preserve()"""
+        matches = list(re.finditer(self.puncs_regular_exp, text))
+        if not matches:
+            return [text], []
+        # the text is only punctuations
+        if len(matches) == 1 and matches[0].group() == text:
+            return [], [_PUNC_IDX(text, PuncPosition.ALONE)]
+        # build a punctuation map to be used later to restore punctuations
+        puncs = []
+        for match in matches:
+            position = PuncPosition.MIDDLE
+            if match == matches[0] and text.startswith(match.group()):
+                position = PuncPosition.BEGIN
+            elif match == matches[-1] and text.endswith(match.group()):
+                position = PuncPosition.END
+            puncs.append(_PUNC_IDX(match.group(), position))
+        # convert str text to a List[str], each item is separated by a punctuation
+        splitted_text = []
+        for idx, punc in enumerate(puncs):
+            split = text.split(punc.punc)
+            prefix, suffix = split[0], punc.punc.join(split[1:])
+            splitted_text.append(prefix)
+            # if the text does not end with a punctuation, add it to the last item
+            if idx == len(puncs) - 1 and len(suffix) > 0:
+                splitted_text.append(suffix)
+            text = suffix
+        return splitted_text, puncs
+
+    @classmethod
+    def restore(cls, text, puncs):
+        """Restore punctuation in a text.
+
+        Args:
+            text (str): The text to be processed.
+            puncs (List[str]): The list of punctuations map to be used for restoring.
+
+        Examples ::
+
+            ['This is', 'example'], ['.', '!'] -> "This is. example!"
+
+        """
+        return cls._restore(text, puncs, 0)
+
+    @classmethod
+    def _restore(cls, text, puncs, num):  # pylint: disable=too-many-return-statements
+        """Auxiliary method for Punctuation.restore()"""
+        if not puncs:
+            return text
+
+        # nothing have been phonemized, returns the puncs alone
+        if not text:
+            return ["".join(m.punc for m in puncs)]
+
+        current = puncs[0]
+
+        if current.position == PuncPosition.BEGIN:
+            return cls._restore([current.punc + text[0]] + text[1:], puncs[1:], num)
+
+        if current.position == PuncPosition.END:
+            return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)
+
+        if current.position == PuncPosition.ALONE:
+            return [current.mark] + cls._restore(text, puncs[1:], num + 1)
+
+        # POSITION == MIDDLE
+        if len(text) == 1:  # pragma: nocover
+            # a corner case where the final part of an intermediate
+            # mark (I) has not been phonemized
+            return cls._restore([text[0] + current.punc], puncs[1:], num)
+
+        return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
+
+
+# if __name__ == "__main__":
+#     punc = Punctuation()
+#     text = "This is. This is, example!"
+
+#     print(punc.strip(text))
+
+#     split_text, puncs = punc.strip_to_restore(text)
+#     print(split_text, " ---- ", puncs)
+
+#     restored_text = punc.restore(split_text, puncs)
+#     print(restored_text)
diff --git a/TTS/tts/utils/text/tokenizer.py b/TTS/tts/utils/text/tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c29776390dde29d56273f2dbf8cfa7bed8cd8148
--- /dev/null
+++ b/TTS/tts/utils/text/tokenizer.py
@@ -0,0 +1,211 @@
+from typing import Callable, Dict, List, Union
+
+from TTS.tts.utils.text import cleaners
+from TTS.tts.utils.text.characters import Graphemes, IPAPhonemes
+from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
+from TTS.utils.generic_utils import get_import_path, import_class
+
+
+class TTSTokenizer:
+    """🐸TTS tokenizer to convert input characters to token IDs and back.
+
+    Token IDs for OOV chars are discarded but those are stored in `self.not_found_characters` for later.
+
+    Args:
+        use_phonemes (bool):
+            Whether to use phonemes instead of characters. Defaults to False.
+
+        characters (Characters):
+            A Characters object to use for character-to-ID and ID-to-character mappings.
+
+        text_cleaner (callable):
+            A function to pre-process the text before tokenization and phonemization. Defaults to None.
+
+        phonemizer (Phonemizer):
+            A phonemizer object or a dict that maps language codes to phonemizer objects. Defaults to None.
+
+    Example:
+
+        >>> from TTS.tts.utils.text.tokenizer import TTSTokenizer
+        >>> tokenizer = TTSTokenizer(use_phonemes=False, characters=Graphemes())
+        >>> text = "Hello world!"
+        >>> ids = tokenizer.text_to_ids(text)
+        >>> text_hat = tokenizer.ids_to_text(ids)
+        >>> assert text == text_hat
+    """
+
+    def __init__(
+        self,
+        use_phonemes=False,
+        text_cleaner: Callable = None,
+        characters: "BaseCharacters" = None,
+        phonemizer: Union["Phonemizer", Dict] = None,
+        add_blank: bool = False,
+        use_eos_bos=False,
+    ):
+        self.text_cleaner = text_cleaner
+        self.use_phonemes = use_phonemes
+        self.add_blank = add_blank
+        self.use_eos_bos = use_eos_bos
+        self.characters = characters
+        self.not_found_characters = []
+        self.phonemizer = phonemizer
+
+    @property
+    def characters(self):
+        return self._characters
+
+    @characters.setter
+    def characters(self, new_characters):
+        self._characters = new_characters
+        self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
+        self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
+
+    def encode(self, text: str) -> List[int]:
+        """Encodes a string of text as a sequence of IDs."""
+        token_ids = []
+        for char in text:
+            # print(char)
+            try:
+                idx = self.characters.char_to_id(char)
+                token_ids.append(idx)
+            except KeyError:
+                # discard but store not found characters
+                if char not in self.not_found_characters:
+                    self.not_found_characters.append(char)
+                    print(text)
+                    print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.")
+        print(token_ids)
+        return token_ids
+
+    def decode(self, token_ids: List[int]) -> str:
+        """Decodes a sequence of IDs to a string of text."""
+        text = ""
+        for token_id in token_ids:
+            text += self.characters.id_to_char(token_id)
+        return text
+
+    def text_to_ids(self, text: str, language: str = None) -> List[int]:  # pylint: disable=unused-argument
+        """Converts a string of text to a sequence of token IDs.
+
+        Args:
+            text(str):
+                The text to convert to token IDs.
+
+            language(str):
+                The language code of the text. Defaults to None.
+
+        TODO:
+            - Add support for language-specific processing.
+
+        1. Text normalizatin
+        2. Phonemization (if use_phonemes is True)
+        3. Add blank char between characters
+        4. Add BOS and EOS characters
+        5. Text to token IDs
+        """
+        # TODO: text cleaner should pick the right routine based on the language
+        if self.text_cleaner is not None:
+            text = self.text_cleaner(text)
+        print(text)
+        if self.use_phonemes:
+            print("use phonemes")
+            text = self.phonemizer.phonemize(text, separator="")
+        if self.add_blank:
+            text = self.intersperse_blank_char(text, True)
+        if self.use_eos_bos:
+            text = self.pad_with_bos_eos(text)
+        print(text)
+        return self.encode(text)
+
+    def ids_to_text(self, id_sequence: List[int]) -> str:
+        """Converts a sequence of token IDs to a string of text."""
+        return self.decode(id_sequence)
+
+    def pad_with_bos_eos(self, char_sequence: List[str]):
+        """Pads a sequence with the special BOS and EOS characters."""
+        return [self.characters.bos] + list(char_sequence) + [self.characters.eos]
+
+    def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
+        """Intersperses the blank character between characters in a sequence.
+
+        Use the ```blank``` character if defined else use the ```pad``` character.
+        """
+        char_to_use = self.characters.blank if use_blank_char else self.characters.pad
+        result = [char_to_use] * (len(char_sequence) * 2 + 1)
+        result[1::2] = char_sequence
+        return result
+
+    def print_logs(self, level: int = 0):
+        indent = "\t" * level
+        print(f"{indent}| > add_blank: {self.add_blank}")
+        print(f"{indent}| > use_eos_bos: {self.use_eos_bos}")
+        print(f"{indent}| > use_phonemes: {self.use_phonemes}")
+        if self.use_phonemes:
+            print(f"{indent}| > phonemizer:")
+            self.phonemizer.print_logs(level + 1)
+        if len(self.not_found_characters) > 0:
+            print(f"{indent}| > {len(self.not_found_characters)} not found characters:")
+            for char in self.not_found_characters:
+                print(f"{indent}| > {char}")
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
+        """Init Tokenizer object from config
+
+        Args:
+            config (Coqpit): Coqpit model config.
+            characters (BaseCharacters): Defines the model character set. If not set, use the default options based on
+                the config values. Defaults to None.
+        """
+        # init cleaners
+        text_cleaner = None
+        if isinstance(config.text_cleaner, (str, list)):
+            text_cleaner = getattr(cleaners, config.text_cleaner)
+
+        # init characters
+        if characters is None:
+            # set characters based on defined characters class
+            if config.characters and config.characters.characters_class:
+                CharactersClass = import_class(config.characters.characters_class)
+                characters, new_config = CharactersClass.init_from_config(config)
+            # set characters based on config
+            else:
+                if config.use_phonemes:
+                    # init phoneme set
+                    characters, new_config = IPAPhonemes().init_from_config(config)
+                else:
+                    # init character set
+                    characters, new_config = Graphemes().init_from_config(config)
+
+        else:
+            characters, new_config = characters.init_from_config(config)
+
+        # set characters class
+        new_config.characters.characters_class = get_import_path(characters)
+
+        # init phonemizer
+        phonemizer = None
+        if config.use_phonemes:
+            phonemizer_kwargs = {"language": config.phoneme_language}
+
+            if "phonemizer" in config and config.phonemizer:
+                phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs)
+            else:
+                try:
+                    phonemizer = get_phonemizer_by_name(
+                        DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs
+                    )
+                    new_config.phonemizer = phonemizer.name()
+                except KeyError as e:
+                    raise ValueError(
+                        f"""No phonemizer found for language {config.phoneme_language}.
+                        You may need to install a third party library for this language."""
+                    ) from e
+
+        return (
+            TTSTokenizer(
+                config.use_phonemes, text_cleaner, characters, phonemizer, config.add_blank, config.enable_eos_bos_chars
+            ),
+            new_config,
+        )
diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py
new file mode 100644
index 0000000000000000000000000000000000000000..78c12981098ed1870ad799a72e7f7b80e4aafc17
--- /dev/null
+++ b/TTS/tts/utils/visual.py
@@ -0,0 +1,202 @@
+import librosa
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+matplotlib.use("Agg")
+
+
+def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
+    if isinstance(alignment, torch.Tensor):
+        alignment_ = alignment.detach().cpu().numpy().squeeze()
+    else:
+        alignment_ = alignment
+    alignment_ = alignment_.astype(np.float32) if alignment_.dtype == np.float16 else alignment_
+    fig, ax = plt.subplots(figsize=fig_size)
+    im = ax.imshow(alignment_.T, aspect="auto", origin="lower", interpolation="none")
+    fig.colorbar(im, ax=ax)
+    xlabel = "Decoder timestep"
+    if info is not None:
+        xlabel += "\n\n" + info
+    plt.xlabel(xlabel)
+    plt.ylabel("Encoder timestep")
+    # plt.yticks(range(len(text)), list(text))
+    plt.tight_layout()
+    if title is not None:
+        plt.title(title)
+    if not output_fig:
+        plt.close()
+    return fig
+
+
+def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig=False):
+    if isinstance(spectrogram, torch.Tensor):
+        spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T
+    else:
+        spectrogram_ = spectrogram.T
+    spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_
+    if ap is not None:
+        spectrogram_ = ap.denormalize(spectrogram_)  # pylint: disable=protected-access
+    fig = plt.figure(figsize=fig_size)
+    plt.imshow(spectrogram_, aspect="auto", origin="lower")
+    plt.colorbar()
+    plt.tight_layout()
+    if not output_fig:
+        plt.close()
+    return fig
+
+
+def plot_pitch(pitch, spectrogram, ap=None, fig_size=(30, 10), output_fig=False):
+    """Plot pitch curves on top of the spectrogram.
+
+    Args:
+        pitch (np.array): Pitch values.
+        spectrogram (np.array): Spectrogram values.
+
+    Shapes:
+        pitch: :math:`(T,)`
+        spec: :math:`(C, T)`
+    """
+
+    if isinstance(spectrogram, torch.Tensor):
+        spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T
+    else:
+        spectrogram_ = spectrogram.T
+    spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_
+    if ap is not None:
+        spectrogram_ = ap.denormalize(spectrogram_)  # pylint: disable=protected-access
+
+    old_fig_size = plt.rcParams["figure.figsize"]
+    if fig_size is not None:
+        plt.rcParams["figure.figsize"] = fig_size
+
+    fig, ax = plt.subplots()
+
+    ax.imshow(spectrogram_, aspect="auto", origin="lower")
+    ax.set_xlabel("time")
+    ax.set_ylabel("spec_freq")
+
+    ax2 = ax.twinx()
+    ax2.plot(pitch, linewidth=5.0, color="red")
+    ax2.set_ylabel("F0")
+
+    plt.rcParams["figure.figsize"] = old_fig_size
+    if not output_fig:
+        plt.close()
+    return fig
+
+
+def plot_avg_pitch(pitch, chars, fig_size=(30, 10), output_fig=False):
+    """Plot pitch curves on top of the input characters.
+
+    Args:
+        pitch (np.array): Pitch values.
+        chars (str): Characters to place to the x-axis.
+
+    Shapes:
+        pitch: :math:`(T,)`
+    """
+    old_fig_size = plt.rcParams["figure.figsize"]
+    if fig_size is not None:
+        plt.rcParams["figure.figsize"] = fig_size
+
+    fig, ax = plt.subplots()
+
+    x = np.array(range(len(chars)))
+    my_xticks = chars
+    plt.xticks(x, my_xticks)
+
+    ax.set_xlabel("characters")
+    ax.set_ylabel("freq")
+
+    ax2 = ax.twinx()
+    ax2.plot(pitch, linewidth=5.0, color="red")
+    ax2.set_ylabel("F0")
+
+    plt.rcParams["figure.figsize"] = old_fig_size
+    if not output_fig:
+        plt.close()
+    return fig
+
+
+def visualize(
+    alignment,
+    postnet_output,
+    text,
+    hop_length,
+    CONFIG,
+    tokenizer,
+    stop_tokens=None,
+    decoder_output=None,
+    output_path=None,
+    figsize=(8, 24),
+    output_fig=False,
+):
+    """Intended to be used in Notebooks."""
+
+    if decoder_output is not None:
+        num_plot = 4
+    else:
+        num_plot = 3
+
+    label_fontsize = 16
+    fig = plt.figure(figsize=figsize)
+
+    plt.subplot(num_plot, 1, 1)
+    plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
+    plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
+    plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
+    # compute phoneme representation and back
+    if CONFIG.use_phonemes:
+        seq = tokenizer.text_to_ids(text)
+        text = tokenizer.ids_to_text(seq)
+        print(text)
+    plt.yticks(range(len(text)), list(text))
+    plt.colorbar()
+
+    if stop_tokens is not None:
+        # plot stopnet predictions
+        plt.subplot(num_plot, 1, 2)
+        plt.plot(range(len(stop_tokens)), list(stop_tokens))
+
+    # plot postnet spectrogram
+    plt.subplot(num_plot, 1, 3)
+    librosa.display.specshow(
+        postnet_output.T,
+        sr=CONFIG.audio["sample_rate"],
+        hop_length=hop_length,
+        x_axis="time",
+        y_axis="linear",
+        fmin=CONFIG.audio["mel_fmin"],
+        fmax=CONFIG.audio["mel_fmax"],
+    )
+
+    plt.xlabel("Time", fontsize=label_fontsize)
+    plt.ylabel("Hz", fontsize=label_fontsize)
+    plt.tight_layout()
+    plt.colorbar()
+
+    if decoder_output is not None:
+        plt.subplot(num_plot, 1, 4)
+        librosa.display.specshow(
+            decoder_output.T,
+            sr=CONFIG.audio["sample_rate"],
+            hop_length=hop_length,
+            x_axis="time",
+            y_axis="linear",
+            fmin=CONFIG.audio["mel_fmin"],
+            fmax=CONFIG.audio["mel_fmax"],
+        )
+        plt.xlabel("Time", fontsize=label_fontsize)
+        plt.ylabel("Hz", fontsize=label_fontsize)
+        plt.tight_layout()
+        plt.colorbar()
+
+    if output_path:
+        print(output_path)
+        fig.savefig(output_path)
+        plt.close()
+
+    if not output_fig:
+        plt.close()
diff --git a/TTS/utils/__init__.py b/TTS/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/utils/__pycache__/__init__.cpython-310.pyc b/TTS/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d3e2c1b90fe70546c9cbc4d821fb21130274600
Binary files /dev/null and b/TTS/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/utils/__pycache__/__init__.cpython-37.pyc b/TTS/utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd68fe3e4ff2ddeb8a94fe33c33b9386c029a77a
Binary files /dev/null and b/TTS/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/__init__.cpython-38.pyc b/TTS/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fdfa64000af38b6a0fe313a2d9cf3ede85e9dc19
Binary files /dev/null and b/TTS/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/__init__.cpython-39.pyc b/TTS/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a6dc3220d5eaba09ebf1bdfefc29d70a9d989a83
Binary files /dev/null and b/TTS/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/capacitron_optimizer.cpython-37.pyc b/TTS/utils/__pycache__/capacitron_optimizer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46e9d87aacdbdc29c9c5a1571a94f0de3a1a0252
Binary files /dev/null and b/TTS/utils/__pycache__/capacitron_optimizer.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/capacitron_optimizer.cpython-38.pyc b/TTS/utils/__pycache__/capacitron_optimizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a683b56e603034230c610c233a7b0df92ce080a
Binary files /dev/null and b/TTS/utils/__pycache__/capacitron_optimizer.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/capacitron_optimizer.cpython-39.pyc b/TTS/utils/__pycache__/capacitron_optimizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3732164b25632fa28e12e1df704a7dfd24e5c21d
Binary files /dev/null and b/TTS/utils/__pycache__/capacitron_optimizer.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/generic_utils.cpython-310.pyc b/TTS/utils/__pycache__/generic_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29af9a5d5c6966fde909e6a457216f73d8393a97
Binary files /dev/null and b/TTS/utils/__pycache__/generic_utils.cpython-310.pyc differ
diff --git a/TTS/utils/__pycache__/generic_utils.cpython-37.pyc b/TTS/utils/__pycache__/generic_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d31502baa350db70e04a87c012d6a1fff94b36ae
Binary files /dev/null and b/TTS/utils/__pycache__/generic_utils.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/generic_utils.cpython-38.pyc b/TTS/utils/__pycache__/generic_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a13a34bcc7d7ea6b37cec00083200292851eb6b9
Binary files /dev/null and b/TTS/utils/__pycache__/generic_utils.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/generic_utils.cpython-39.pyc b/TTS/utils/__pycache__/generic_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bb3c1c2d94d290878d74acde17225c8156b8d588
Binary files /dev/null and b/TTS/utils/__pycache__/generic_utils.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/io.cpython-310.pyc b/TTS/utils/__pycache__/io.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cfb253f6733349cb4805a2cef74f893ab507376e
Binary files /dev/null and b/TTS/utils/__pycache__/io.cpython-310.pyc differ
diff --git a/TTS/utils/__pycache__/io.cpython-37.pyc b/TTS/utils/__pycache__/io.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1fd380aeea757130a7337b0fbb862dee84e7193
Binary files /dev/null and b/TTS/utils/__pycache__/io.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/io.cpython-38.pyc b/TTS/utils/__pycache__/io.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..055421c0ad17224b032be132c53bb7eb03ecef2d
Binary files /dev/null and b/TTS/utils/__pycache__/io.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/io.cpython-39.pyc b/TTS/utils/__pycache__/io.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d507cb522ec0107a6700833490dbea4066358f90
Binary files /dev/null and b/TTS/utils/__pycache__/io.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/radam.cpython-37.pyc b/TTS/utils/__pycache__/radam.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edd8db709a5b238a96d801184b4cdaf5931733d2
Binary files /dev/null and b/TTS/utils/__pycache__/radam.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/radam.cpython-38.pyc b/TTS/utils/__pycache__/radam.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e33f8a7e194be3b0dd12966bb5150c65ca70ab1
Binary files /dev/null and b/TTS/utils/__pycache__/radam.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/radam.cpython-39.pyc b/TTS/utils/__pycache__/radam.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f3ac7d966759e54778a5506db1afec51202e78aa
Binary files /dev/null and b/TTS/utils/__pycache__/radam.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/synthesizer.cpython-38.pyc b/TTS/utils/__pycache__/synthesizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf705dcf48445a5d6bbd8a746f08289f3a5a711a
Binary files /dev/null and b/TTS/utils/__pycache__/synthesizer.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/synthesizer.cpython-39.pyc b/TTS/utils/__pycache__/synthesizer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..26447bd86a3889673aa2a9ed5ff70ca37f6c23d2
Binary files /dev/null and b/TTS/utils/__pycache__/synthesizer.cpython-39.pyc differ
diff --git a/TTS/utils/__pycache__/training.cpython-37.pyc b/TTS/utils/__pycache__/training.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a479660d39be3f6692c904be872e7f74a8e24c4e
Binary files /dev/null and b/TTS/utils/__pycache__/training.cpython-37.pyc differ
diff --git a/TTS/utils/__pycache__/training.cpython-38.pyc b/TTS/utils/__pycache__/training.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1d7e126a3c0f8ea917628d7f98214c81252a05ba
Binary files /dev/null and b/TTS/utils/__pycache__/training.cpython-38.pyc differ
diff --git a/TTS/utils/__pycache__/training.cpython-39.pyc b/TTS/utils/__pycache__/training.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0d3fe0442aa66818a62b1cbeefa63a07652a2b4
Binary files /dev/null and b/TTS/utils/__pycache__/training.cpython-39.pyc differ
diff --git a/TTS/utils/audio/.ipynb_checkpoints/processor-checkpoint.py b/TTS/utils/audio/.ipynb_checkpoints/processor-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d16474a0035ff3aea02d6507e78b6d3d7480b77
--- /dev/null
+++ b/TTS/utils/audio/.ipynb_checkpoints/processor-checkpoint.py
@@ -0,0 +1,767 @@
+from typing import Dict, Tuple
+
+import librosa
+import numpy as np
+import scipy.io.wavfile
+import scipy.signal
+import soundfile as sf
+
+from TTS.tts.utils.helpers import StandardScaler
+from TTS.utils.audio.numpy_transforms import compute_f0
+
+# pylint: disable=too-many-public-methods
+
+
+class AudioProcessor(object):
+    """Audio Processor for TTS.
+
+    Note:
+        All the class arguments are set to default values to enable a flexible initialization
+        of the class with the model config. They are not meaningful for all the arguments.
+
+    Args:
+        sample_rate (int, optional):
+            target audio sampling rate. Defaults to None.
+
+        resample (bool, optional):
+            enable/disable resampling of the audio clips when the target sampling rate does not match the original sampling rate. Defaults to False.
+
+        num_mels (int, optional):
+            number of melspectrogram dimensions. Defaults to None.
+
+        log_func (int, optional):
+            log exponent used for converting spectrogram aplitude to DB.
+
+        min_level_db (int, optional):
+            minimum db threshold for the computed melspectrograms. Defaults to None.
+
+        frame_shift_ms (int, optional):
+            milliseconds of frames between STFT columns. Defaults to None.
+
+        frame_length_ms (int, optional):
+            milliseconds of STFT window length. Defaults to None.
+
+        hop_length (int, optional):
+            number of frames between STFT columns. Used if ```frame_shift_ms``` is None. Defaults to None.
+
+        win_length (int, optional):
+            STFT window length. Used if ```frame_length_ms``` is None. Defaults to None.
+
+        ref_level_db (int, optional):
+            reference DB level to avoid background noise. In general <20DB corresponds to the air noise. Defaults to None.
+
+        fft_size (int, optional):
+            FFT window size for STFT. Defaults to 1024.
+
+        power (int, optional):
+            Exponent value applied to the spectrogram before GriffinLim. Defaults to None.
+
+        preemphasis (float, optional):
+            Preemphasis coefficient. Preemphasis is disabled if == 0.0. Defaults to 0.0.
+
+        signal_norm (bool, optional):
+            enable/disable signal normalization. Defaults to None.
+
+        symmetric_norm (bool, optional):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else [0, k], Defaults to None.
+
+        max_norm (float, optional):
+            ```k``` defining the normalization range. Defaults to None.
+
+        mel_fmin (int, optional):
+            minimum filter frequency for computing melspectrograms. Defaults to None.
+
+        mel_fmax (int, optional):
+            maximum filter frequency for computing melspectrograms. Defaults to None.
+
+        pitch_fmin (int, optional):
+            minimum filter frequency for computing pitch. Defaults to None.
+
+        pitch_fmax (int, optional):
+            maximum filter frequency for computing pitch. Defaults to None.
+
+        spec_gain (int, optional):
+            gain applied when converting amplitude to DB. Defaults to 20.
+
+        stft_pad_mode (str, optional):
+            Padding mode for STFT. Defaults to 'reflect'.
+
+        clip_norm (bool, optional):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        griffin_lim_iters (int, optional):
+            Number of GriffinLim iterations. Defaults to None.
+
+        do_trim_silence (bool, optional):
+            enable/disable silence trimming when loading the audio signal. Defaults to False.
+
+        trim_db (int, optional):
+            DB threshold used for silence trimming. Defaults to 60.
+
+        do_sound_norm (bool, optional):
+            enable/disable signal normalization. Defaults to False.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        stats_path (str, optional):
+            Path to the computed stats file. Defaults to None.
+
+        verbose (bool, optional):
+            enable/disable logging. Defaults to True.
+
+    """
+
+    def __init__(
+        self,
+        sample_rate=None,
+        resample=False,
+        num_mels=None,
+        log_func="np.log10",
+        min_level_db=None,
+        frame_shift_ms=None,
+        frame_length_ms=None,
+        hop_length=None,
+        win_length=None,
+        ref_level_db=None,
+        fft_size=1024,
+        power=None,
+        preemphasis=0.0,
+        signal_norm=None,
+        symmetric_norm=None,
+        max_norm=None,
+        mel_fmin=None,
+        mel_fmax=None,
+        pitch_fmax=None,
+        pitch_fmin=None,
+        spec_gain=20,
+        stft_pad_mode="reflect",
+        clip_norm=True,
+        griffin_lim_iters=None,
+        do_trim_silence=False,
+        trim_db=60,
+        do_sound_norm=False,
+        do_amp_to_db_linear=True,
+        do_amp_to_db_mel=True,
+        do_rms_norm=False,
+        db_level=None,
+        stats_path=None,
+        verbose=True,
+        **_,
+    ):
+
+        # setup class attributed
+        self.sample_rate = sample_rate
+        self.resample = resample
+        self.num_mels = num_mels
+        self.log_func = log_func
+        self.min_level_db = min_level_db or 0
+        self.frame_shift_ms = frame_shift_ms
+        self.frame_length_ms = frame_length_ms
+        self.ref_level_db = ref_level_db
+        self.fft_size = fft_size
+        self.power = power
+        self.preemphasis = preemphasis
+        self.griffin_lim_iters = griffin_lim_iters
+        self.signal_norm = signal_norm
+        self.symmetric_norm = symmetric_norm
+        self.mel_fmin = mel_fmin or 0
+        self.mel_fmax = mel_fmax
+        self.pitch_fmin = pitch_fmin
+        self.pitch_fmax = pitch_fmax
+        self.spec_gain = float(spec_gain)
+        self.stft_pad_mode = stft_pad_mode
+        self.max_norm = 1.0 if max_norm is None else float(max_norm)
+        self.clip_norm = clip_norm
+        self.do_trim_silence = do_trim_silence
+        self.trim_db = trim_db
+        self.do_sound_norm = do_sound_norm
+        self.do_amp_to_db_linear = do_amp_to_db_linear
+        self.do_amp_to_db_mel = do_amp_to_db_mel
+        self.do_rms_norm = do_rms_norm
+        self.db_level = db_level
+        self.stats_path = stats_path
+        # setup exp_func for db to amp conversion
+        if log_func == "np.log":
+            self.base = np.e
+        elif log_func == "np.log10":
+            self.base = 10
+        else:
+            raise ValueError(" [!] unknown `log_func` value.")
+        # setup stft parameters
+        if hop_length is None:
+            # compute stft parameters from given time values
+            self.hop_length, self.win_length = self._stft_parameters()
+        else:
+            # use stft parameters from config file
+            self.hop_length = hop_length
+            self.win_length = win_length
+        assert min_level_db != 0.0, " [!] min_level_db is 0"
+        assert (
+            self.win_length <= self.fft_size
+        ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
+        members = vars(self)
+        if verbose:
+            print(" > Setting up Audio Processor...")
+            for key, value in members.items():
+                print(" | > {}:{}".format(key, value))
+        # create spectrogram utils
+        self.mel_basis = self._build_mel_basis()
+        self.inv_mel_basis = np.linalg.pinv(self._build_mel_basis())
+        # setup scaler
+        if stats_path and signal_norm:
+            mel_mean, mel_std, linear_mean, linear_std, _ = self.load_stats(stats_path)
+            self.setup_scaler(mel_mean, mel_std, linear_mean, linear_std)
+            self.signal_norm = True
+            self.max_norm = None
+            self.clip_norm = None
+            self.symmetric_norm = None
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", verbose=True):
+        if "audio" in config:
+            return AudioProcessor(verbose=verbose, **config.audio)
+        return AudioProcessor(verbose=verbose, **config)
+
+    ### setting up the parameters ###
+    def _build_mel_basis(
+        self,
+    ) -> np.ndarray:
+        """Build melspectrogram basis.
+
+        Returns:
+            np.ndarray: melspectrogram basis.
+        """
+        if self.mel_fmax is not None:
+            assert self.mel_fmax <= self.sample_rate // 2
+        return librosa.filters.mel(
+            self.sample_rate, self.fft_size, n_mels=self.num_mels, fmin=self.mel_fmin, fmax=self.mel_fmax
+        )
+
+    def _stft_parameters(
+        self,
+    ) -> Tuple[int, int]:
+        """Compute the real STFT parameters from the time values.
+
+        Returns:
+            Tuple[int, int]: hop length and window length for STFT.
+        """
+        factor = self.frame_length_ms / self.frame_shift_ms
+        assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms"
+        hop_length = int(self.frame_shift_ms / 1000.0 * self.sample_rate)
+        win_length = int(hop_length * factor)
+        return hop_length, win_length
+
+    ### normalization ###
+    def normalize(self, S: np.ndarray) -> np.ndarray:
+        """Normalize values into `[0, self.max_norm]` or `[-self.max_norm, self.max_norm]`
+
+        Args:
+            S (np.ndarray): Spectrogram to normalize.
+
+        Raises:
+            RuntimeError: Mean and variance is computed from incompatible parameters.
+
+        Returns:
+            np.ndarray: Normalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S.shape[0] == self.num_mels:
+                    return self.mel_scaler.transform(S.T).T
+                elif S.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.transform(S.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            # range normalization
+            S -= self.ref_level_db  # discard certain range of DB assuming it is air noise
+            S_norm = (S - self.min_level_db) / (-self.min_level_db)
+            if self.symmetric_norm:
+                S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
+                if self.clip_norm:
+                    S_norm = np.clip(
+                        S_norm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                return S_norm
+            else:
+                S_norm = self.max_norm * S_norm
+                if self.clip_norm:
+                    S_norm = np.clip(S_norm, 0, self.max_norm)
+                return S_norm
+        else:
+            return S
+
+    def denormalize(self, S: np.ndarray) -> np.ndarray:
+        """Denormalize spectrogram values.
+
+        Args:
+            S (np.ndarray): Spectrogram to denormalize.
+
+        Raises:
+            RuntimeError: Mean and variance are incompatible.
+
+        Returns:
+            np.ndarray: Denormalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S_denorm = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S_denorm.shape[0] == self.num_mels:
+                    return self.mel_scaler.inverse_transform(S_denorm.T).T
+                elif S_denorm.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.inverse_transform(S_denorm.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            if self.symmetric_norm:
+                if self.clip_norm:
+                    S_denorm = np.clip(
+                        S_denorm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
+                return S_denorm + self.ref_level_db
+            else:
+                if self.clip_norm:
+                    S_denorm = np.clip(S_denorm, 0, self.max_norm)
+                S_denorm = (S_denorm * -self.min_level_db / self.max_norm) + self.min_level_db
+                return S_denorm + self.ref_level_db
+        else:
+            return S_denorm
+
+    ### Mean-STD scaling ###
+    def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np.array, np.array, Dict]:
+        """Loading mean and variance statistics from a `npy` file.
+
+        Args:
+            stats_path (str): Path to the `npy` file containing
+
+        Returns:
+            Tuple[np.array, np.array, np.array, np.array, Dict]: loaded statistics and the config used to
+                compute them.
+        """
+        stats = np.load(stats_path, allow_pickle=True).item()  # pylint: disable=unexpected-keyword-arg
+        mel_mean = stats["mel_mean"]
+        mel_std = stats["mel_std"]
+        linear_mean = stats["linear_mean"]
+        linear_std = stats["linear_std"]
+        stats_config = stats["audio_config"]
+        # check all audio parameters used for computing stats
+        skip_parameters = ["griffin_lim_iters", "stats_path", "do_trim_silence", "ref_level_db", "power"]
+        for key in stats_config.keys():
+            if key in skip_parameters:
+                continue
+            if key not in ["sample_rate", "trim_db"]:
+                assert (
+                    stats_config[key] == self.__dict__[key]
+                ), f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}"
+        return mel_mean, mel_std, linear_mean, linear_std, stats_config
+
+    # pylint: disable=attribute-defined-outside-init
+    def setup_scaler(
+        self, mel_mean: np.ndarray, mel_std: np.ndarray, linear_mean: np.ndarray, linear_std: np.ndarray
+    ) -> None:
+        """Initialize scaler objects used in mean-std normalization.
+
+        Args:
+            mel_mean (np.ndarray): Mean for melspectrograms.
+            mel_std (np.ndarray): STD for melspectrograms.
+            linear_mean (np.ndarray): Mean for full scale spectrograms.
+            linear_std (np.ndarray): STD for full scale spectrograms.
+        """
+        self.mel_scaler = StandardScaler()
+        self.mel_scaler.set_stats(mel_mean, mel_std)
+        self.linear_scaler = StandardScaler()
+        self.linear_scaler.set_stats(linear_mean, linear_std)
+
+    ### DB and AMP conversion ###
+    # pylint: disable=no-self-use
+    def _amp_to_db(self, x: np.ndarray) -> np.ndarray:
+        """Convert amplitude values to decibels.
+
+        Args:
+            x (np.ndarray): Amplitude spectrogram.
+
+        Returns:
+            np.ndarray: Decibels spectrogram.
+        """
+        return self.spec_gain * _log(np.maximum(1e-5, x), self.base)
+
+    # pylint: disable=no-self-use
+    def _db_to_amp(self, x: np.ndarray) -> np.ndarray:
+        """Convert decibels spectrogram to amplitude spectrogram.
+
+        Args:
+            x (np.ndarray): Decibels spectrogram.
+
+        Returns:
+            np.ndarray: Amplitude spectrogram.
+        """
+        return _exp(x / self.spec_gain, self.base)
+
+    ### Preemphasis ###
+    def apply_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Apply pre-emphasis to the audio signal. Useful to reduce the correlation between neighbouring signal values.
+
+        Args:
+            x (np.ndarray): Audio signal.
+
+        Raises:
+            RuntimeError: Preemphasis coeff is set to 0.
+
+        Returns:
+            np.ndarray: Decorrelated audio signal.
+        """
+        if self.preemphasis == 0:
+            raise RuntimeError(" [!] Preemphasis is set 0.0.")
+        return scipy.signal.lfilter([1, -self.preemphasis], [1], x)
+
+    def apply_inv_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Reverse pre-emphasis."""
+        if self.preemphasis == 0:
+            raise RuntimeError(" [!] Preemphasis is set 0.0.")
+        return scipy.signal.lfilter([1], [1, -self.preemphasis], x)
+
+    ### SPECTROGRAMs ###
+    def _linear_to_mel(self, spectrogram: np.ndarray) -> np.ndarray:
+        """Project a full scale spectrogram to a melspectrogram.
+
+        Args:
+            spectrogram (np.ndarray): Full scale spectrogram.
+
+        Returns:
+            np.ndarray: Melspectrogram
+        """
+        return np.dot(self.mel_basis, spectrogram)
+
+    def _mel_to_linear(self, mel_spec: np.ndarray) -> np.ndarray:
+        """Convert a melspectrogram to full scale spectrogram."""
+        return np.maximum(1e-10, np.dot(self.inv_mel_basis, mel_spec))
+
+    def spectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a spectrogram from a waveform.
+
+        Args:
+            y (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Spectrogram.
+        """
+        if self.preemphasis != 0:
+            D = self._stft(self.apply_preemphasis(y))
+        else:
+            D = self._stft(y)
+        if self.do_amp_to_db_linear:
+            S = self._amp_to_db(np.abs(D))
+        else:
+            S = np.abs(D)
+        return self.normalize(S).astype(np.float32)
+
+    def melspectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a melspectrogram from a waveform."""
+        if self.preemphasis != 0:
+            D = self._stft(self.apply_preemphasis(y))
+        else:
+            D = self._stft(y)
+        if self.do_amp_to_db_mel:
+            S = self._amp_to_db(self._linear_to_mel(np.abs(D)))
+        else:
+            S = self._linear_to_mel(np.abs(D))
+        return self.normalize(S).astype(np.float32)
+
+    def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
+        S = self.denormalize(spectrogram)
+        S = self._db_to_amp(S)
+        # Reconstruct phase
+        if self.preemphasis != 0:
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)
+
+    def inv_melspectrogram(self, mel_spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a melspectrogram to a waveform using Griffi-Lim vocoder."""
+        D = self.denormalize(mel_spectrogram)
+        S = self._db_to_amp(D)
+        S = self._mel_to_linear(S)  # Convert back to linear
+        if self.preemphasis != 0:
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)
+
+    def out_linear_to_mel(self, linear_spec: np.ndarray) -> np.ndarray:
+        """Convert a full scale linear spectrogram output of a network to a melspectrogram.
+
+        Args:
+            linear_spec (np.ndarray): Normalized full scale linear spectrogram.
+
+        Returns:
+            np.ndarray: Normalized melspectrogram.
+        """
+        S = self.denormalize(linear_spec)
+        S = self._db_to_amp(S)
+        S = self._linear_to_mel(np.abs(S))
+        S = self._amp_to_db(S)
+        mel = self.normalize(S)
+        return mel
+
+    ### STFT and ISTFT ###
+    def _stft(self, y: np.ndarray) -> np.ndarray:
+        """Librosa STFT wrapper.
+
+        Args:
+            y (np.ndarray): Audio signal.
+
+        Returns:
+            np.ndarray: Complex number array.
+        """
+        return librosa.stft(
+            y=y,
+            n_fft=self.fft_size,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            pad_mode=self.stft_pad_mode,
+            window="hann",
+            center=True,
+        )
+
+    def _istft(self, y: np.ndarray) -> np.ndarray:
+        """Librosa iSTFT wrapper."""
+        return librosa.istft(y, hop_length=self.hop_length, win_length=self.win_length)
+
+    def _griffin_lim(self, S):
+        angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
+        S_complex = np.abs(S).astype(np.complex)
+        y = self._istft(S_complex * angles)
+        if not np.isfinite(y).all():
+            print(" [!] Waveform is not finite everywhere. Skipping the GL.")
+            return np.array([0.0])
+        for _ in range(self.griffin_lim_iters):
+            angles = np.exp(1j * np.angle(self._stft(y)))
+            y = self._istft(S_complex * angles)
+        return y
+
+    def compute_stft_paddings(self, x, pad_sides=1):
+        """Compute paddings used by Librosa's STFT. Compute right padding (final frame) or both sides padding
+        (first and final frames)"""
+        assert pad_sides in (1, 2)
+        pad = (x.shape[0] // self.hop_length + 1) * self.hop_length - x.shape[0]
+        if pad_sides == 1:
+            return 0, pad
+        return pad // 2, pad // 2 + pad % 2
+
+    def compute_f0(self, x: np.ndarray) -> np.ndarray:
+        """Compute pitch (f0) of a waveform using the same parameters used for computing melspectrogram.
+
+        Args:
+            x (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Pitch.
+
+        Examples:
+            >>> WAV_FILE = filename = librosa.util.example_audio_file()
+            >>> from TTS.config import BaseAudioConfig
+            >>> from TTS.utils.audio import AudioProcessor
+            >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1)
+            >>> ap = AudioProcessor(**conf)
+            >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+            >>> pitch = ap.compute_f0(wav)
+        """
+        assert self.pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
+        assert self.pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`."
+        # align F0 length to the spectrogram length
+        if len(x) % self.hop_length == 0:
+            x = np.pad(x, (0, self.hop_length // 2), mode=self.stft_pad_mode)
+
+        f0 = compute_f0(
+            x=x,
+            pitch_fmax=self.pitch_fmax,
+            pitch_fmin=self.pitch_fmin,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            sample_rate=self.sample_rate,
+            stft_pad_mode=self.stft_pad_mode,
+            center=True,
+        )
+
+        return f0
+
+    ### Audio Processing ###
+    def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int:
+        """Find the last point without silence at the end of a audio signal.
+
+        Args:
+            wav (np.ndarray): Audio signal.
+            threshold_db (int, optional): Silence threshold in decibels. Defaults to -40.
+            min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8.
+
+        Returns:
+            int: Last point without silence.
+        """
+        window_length = int(self.sample_rate * min_silence_sec)
+        hop_length = int(window_length / 4)
+        threshold = self._db_to_amp(-self.trim_db)
+        for x in range(hop_length, len(wav) - window_length, hop_length):
+            if np.max(wav[x : x + window_length]) < threshold:
+                return x + hop_length
+        return len(wav)
+
+    def trim_silence(self, wav):
+        """Trim silent parts with a threshold and 0.01 sec margin"""
+        margin = int(self.sample_rate * 0.01)
+        wav = wav[margin:-margin]
+        return librosa.effects.trim(wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[
+            0
+        ]
+
+    @staticmethod
+    def sound_norm(x: np.ndarray) -> np.ndarray:
+        """Normalize the volume of an audio signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: Volume normalized waveform.
+        """
+        return x / abs(x).max() * 0.95
+
+    @staticmethod
+    def _rms_norm(wav, db_level=-27):
+        r = 10 ** (db_level / 20)
+        a = np.sqrt((len(wav) * (r**2)) / np.sum(wav**2))
+        return wav * a
+
+    def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray:
+        """Normalize the volume based on RMS of the signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: RMS normalized waveform.
+        """
+        if db_level is None:
+            db_level = self.db_level
+        assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0"
+        wav = self._rms_norm(x, db_level)
+        return wav
+
+    ### save and load ###
+    def load_wav(self, filename: str, sr: int = None) -> np.ndarray:
+        """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
+
+        Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
+        Args:
+            filename (str): Path to the wav file.
+            sr (int, optional): Sampling rate for resampling. Defaults to None.
+
+        Returns:
+            np.ndarray: Loaded waveform.
+        """
+        if self.resample:
+            # loading with resampling. It is significantly slower.
+            x, sr = librosa.load(filename, sr=self.sample_rate)
+        elif sr is None:
+            # SF is faster than librosa for loading files
+            x, sr = sf.read(filename)
+            assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
+        else:
+            x, sr = librosa.load(filename, sr=sr)
+        if self.do_trim_silence:
+            try:
+                x = self.trim_silence(x)
+            except ValueError:
+                print(f" [!] File cannot be trimmed for silence - {filename}")
+        if self.do_sound_norm:
+            x = self.sound_norm(x)
+        if self.do_rms_norm:
+            x = self.rms_volume_norm(x, self.db_level)
+        return x
+
+    def save_wav(self, wav: np.ndarray, path: str, sr: int = None) -> None:
+        """Save a waveform to a file using Scipy.
+
+        Args:
+            wav (np.ndarray): Waveform to save.
+            path (str): Path to a output file.
+            sr (int, optional): Sampling rate used for saving to the file. Defaults to None.
+        """
+        if self.do_rms_norm:
+            wav_norm = self.rms_volume_norm(wav, self.db_level) * 32767
+        else:
+            wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
+
+        scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))
+
+    def get_duration(self, filename: str) -> float:
+        """Get the duration of a wav file using Librosa.
+
+        Args:
+            filename (str): Path to the wav file.
+        """
+        return librosa.get_duration(filename)
+
+    @staticmethod
+    def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
+        mu = 2**qc - 1
+        # wav_abs = np.minimum(np.abs(wav), 1.0)
+        signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1.0 + mu)
+        # Quantize signal to the specified number of levels.
+        signal = (signal + 1) / 2 * mu + 0.5
+        return np.floor(
+            signal,
+        )
+
+    @staticmethod
+    def mulaw_decode(wav, qc):
+        """Recovers waveform from quantized values."""
+        mu = 2**qc - 1
+        x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
+        return x
+
+    @staticmethod
+    def encode_16bits(x):
+        return np.clip(x * 2**15, -(2**15), 2**15 - 1).astype(np.int16)
+
+    @staticmethod
+    def quantize(x: np.ndarray, bits: int) -> np.ndarray:
+        """Quantize a waveform to a given number of bits.
+
+        Args:
+            x (np.ndarray): Waveform to quantize. Must be normalized into the range `[-1, 1]`.
+            bits (int): Number of quantization bits.
+
+        Returns:
+            np.ndarray: Quantized waveform.
+        """
+        return (x + 1.0) * (2**bits - 1) / 2
+
+    @staticmethod
+    def dequantize(x, bits):
+        """Dequantize a waveform from the given number of bits."""
+        return 2 * x / (2**bits - 1) - 1
+
+
+def _log(x, base):
+    if base == 10:
+        return np.log10(x)
+    return np.log(x)
+
+
+def _exp(x, base):
+    if base == 10:
+        return np.power(10, x)
+    return np.exp(x)
diff --git a/TTS/utils/audio/__init__.py b/TTS/utils/audio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f18f22199908ee0dd5445e34527f5fddb65cfed8
--- /dev/null
+++ b/TTS/utils/audio/__init__.py
@@ -0,0 +1 @@
+from TTS.utils.audio.processor import AudioProcessor
diff --git a/TTS/utils/audio/__pycache__/__init__.cpython-310.pyc b/TTS/utils/audio/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5dc3d67e50dc14b74d91a7637d4d46692131bc90
Binary files /dev/null and b/TTS/utils/audio/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/utils/audio/__pycache__/__init__.cpython-37.pyc b/TTS/utils/audio/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6174398b2feb1ae1ae8b3ce941441ef6e36c40e5
Binary files /dev/null and b/TTS/utils/audio/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TTS/utils/audio/__pycache__/__init__.cpython-38.pyc b/TTS/utils/audio/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69b417982369b2a6d28850613f6b79913231bdb3
Binary files /dev/null and b/TTS/utils/audio/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/utils/audio/__pycache__/__init__.cpython-39.pyc b/TTS/utils/audio/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4e9834d92f376d5240a8987acddb21b502fefae
Binary files /dev/null and b/TTS/utils/audio/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/utils/audio/__pycache__/numpy_transforms.cpython-310.pyc b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b2c64839bb6ce0e3b0284681def08c0d11d7327
Binary files /dev/null and b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-310.pyc differ
diff --git a/TTS/utils/audio/__pycache__/numpy_transforms.cpython-37.pyc b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d792737ca6ae4bfc4f85aed6ea1226a1ade99c6
Binary files /dev/null and b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-37.pyc differ
diff --git a/TTS/utils/audio/__pycache__/numpy_transforms.cpython-38.pyc b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e27a5e20f06f35e80192698f49d173f166e9e9e7
Binary files /dev/null and b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-38.pyc differ
diff --git a/TTS/utils/audio/__pycache__/numpy_transforms.cpython-39.pyc b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2c6d8f100069fae95dfcfb08007ce979fc993a7
Binary files /dev/null and b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-39.pyc differ
diff --git a/TTS/utils/audio/__pycache__/processor.cpython-310.pyc b/TTS/utils/audio/__pycache__/processor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e68fc847fb1e53d7908db8e5d487f979f719227d
Binary files /dev/null and b/TTS/utils/audio/__pycache__/processor.cpython-310.pyc differ
diff --git a/TTS/utils/audio/__pycache__/processor.cpython-37.pyc b/TTS/utils/audio/__pycache__/processor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0447d8a3064ce4e3ea58aebce7b35b9b39e5c9b0
Binary files /dev/null and b/TTS/utils/audio/__pycache__/processor.cpython-37.pyc differ
diff --git a/TTS/utils/audio/__pycache__/processor.cpython-38.pyc b/TTS/utils/audio/__pycache__/processor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f507cb02c9c11d7dc3385a743754dd85f2c10ed5
Binary files /dev/null and b/TTS/utils/audio/__pycache__/processor.cpython-38.pyc differ
diff --git a/TTS/utils/audio/__pycache__/processor.cpython-39.pyc b/TTS/utils/audio/__pycache__/processor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45073889e3202d5fcd088051c0e2e33834fd44de
Binary files /dev/null and b/TTS/utils/audio/__pycache__/processor.cpython-39.pyc differ
diff --git a/TTS/utils/audio/__pycache__/torch_transforms.cpython-310.pyc b/TTS/utils/audio/__pycache__/torch_transforms.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5ce76f8aef72776114870875f007394a0b1e527
Binary files /dev/null and b/TTS/utils/audio/__pycache__/torch_transforms.cpython-310.pyc differ
diff --git a/TTS/utils/audio/__pycache__/torch_transforms.cpython-37.pyc b/TTS/utils/audio/__pycache__/torch_transforms.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be75268bb2b5211c24890e82bad9e3b16c1bf94c
Binary files /dev/null and b/TTS/utils/audio/__pycache__/torch_transforms.cpython-37.pyc differ
diff --git a/TTS/utils/audio/__pycache__/torch_transforms.cpython-38.pyc b/TTS/utils/audio/__pycache__/torch_transforms.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..714b6a340525f5193860e54d28508af21f3de0a0
Binary files /dev/null and b/TTS/utils/audio/__pycache__/torch_transforms.cpython-38.pyc differ
diff --git a/TTS/utils/audio/__pycache__/torch_transforms.cpython-39.pyc b/TTS/utils/audio/__pycache__/torch_transforms.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19069eadbd2e9b9cfbf0a718be7e373cf7debc26
Binary files /dev/null and b/TTS/utils/audio/__pycache__/torch_transforms.cpython-39.pyc differ
diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..952b2243e5f6c24889d55c9e5135ee70410bdb71
--- /dev/null
+++ b/TTS/utils/audio/numpy_transforms.py
@@ -0,0 +1,456 @@
+from typing import Tuple
+
+import librosa
+import numpy as np
+import scipy
+import soundfile as sf
+from librosa import pyin
+
+# For using kwargs
+# pylint: disable=unused-argument
+
+
+def build_mel_basis(
+    *,
+    sample_rate: int = None,
+    fft_size: int = None,
+    num_mels: int = None,
+    mel_fmax: int = None,
+    mel_fmin: int = None,
+    **kwargs,
+) -> np.ndarray:
+    """Build melspectrogram basis.
+
+    Returns:
+        np.ndarray: melspectrogram basis.
+    """
+    if mel_fmax is not None:
+        assert mel_fmax <= sample_rate // 2
+        assert mel_fmax - mel_fmin > 0
+    return librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=mel_fmin, fmax=mel_fmax)
+
+
+def millisec_to_length(
+    *, frame_length_ms: int = None, frame_shift_ms: int = None, sample_rate: int = None, **kwargs
+) -> Tuple[int, int]:
+    """Compute hop and window length from milliseconds.
+
+    Returns:
+        Tuple[int, int]: hop length and window length for STFT.
+    """
+    factor = frame_length_ms / frame_shift_ms
+    assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms"
+    win_length = int(frame_length_ms / 1000.0 * sample_rate)
+    hop_length = int(win_length / float(factor))
+    return win_length, hop_length
+
+
+def _log(x, base):
+    if base == 10:
+        return np.log10(x)
+    return np.log(x)
+
+
+def _exp(x, base):
+    if base == 10:
+        return np.power(10, x)
+    return np.exp(x)
+
+
+def amp_to_db(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray:
+    """Convert amplitude values to decibels.
+
+    Args:
+        x (np.ndarray): Amplitude spectrogram.
+        gain (float): Gain factor. Defaults to 1.
+        base (int): Logarithm base. Defaults to 10.
+
+    Returns:
+        np.ndarray: Decibels spectrogram.
+    """
+    assert (x < 0).sum() == 0, " [!] Input values must be non-negative."
+    return gain * _log(np.maximum(1e-8, x), base)
+
+
+# pylint: disable=no-self-use
+def db_to_amp(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray:
+    """Convert decibels spectrogram to amplitude spectrogram.
+
+    Args:
+        x (np.ndarray): Decibels spectrogram.
+        gain (float): Gain factor. Defaults to 1.
+        base (int): Logarithm base. Defaults to 10.
+
+    Returns:
+        np.ndarray: Amplitude spectrogram.
+    """
+    return _exp(x / gain, base)
+
+
+def preemphasis(*, x: np.ndarray, coef: float = 0.97, **kwargs) -> np.ndarray:
+    """Apply pre-emphasis to the audio signal. Useful to reduce the correlation between neighbouring signal values.
+
+    Args:
+        x (np.ndarray): Audio signal.
+
+    Raises:
+        RuntimeError: Preemphasis coeff is set to 0.
+
+    Returns:
+        np.ndarray: Decorrelated audio signal.
+    """
+    if coef == 0:
+        raise RuntimeError(" [!] Preemphasis is set 0.0.")
+    return scipy.signal.lfilter([1, -coef], [1], x)
+
+
+def deemphasis(*, x: np.ndarray = None, coef: float = 0.97, **kwargs) -> np.ndarray:
+    """Reverse pre-emphasis."""
+    if coef == 0:
+        raise RuntimeError(" [!] Preemphasis is set 0.0.")
+    return scipy.signal.lfilter([1], [1, -coef], x)
+
+
+def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Convert a full scale linear spectrogram output of a network to a melspectrogram.
+
+    Args:
+        spec (np.ndarray): Normalized full scale linear spectrogram.
+
+    Shapes:
+        - spec: :math:`[C, T]`
+
+    Returns:
+        np.ndarray: Normalized melspectrogram.
+    """
+    return np.dot(mel_basis, spec)
+
+
+def mel_to_spec(*, mel: np.ndarray = None, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Convert a melspectrogram to full scale spectrogram."""
+    assert (mel < 0).sum() == 0, " [!] Input values must be non-negative."
+    inv_mel_basis = np.linalg.pinv(mel_basis)
+    return np.maximum(1e-10, np.dot(inv_mel_basis, mel))
+
+
+def wav_to_spec(*, wav: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Compute a spectrogram from a waveform.
+
+    Args:
+        wav (np.ndarray): Waveform. Shape :math:`[T_wav,]`
+
+    Returns:
+        np.ndarray: Spectrogram. Shape :math:`[C, T_spec]`. :math:`T_spec == T_wav / hop_length`
+    """
+    D = stft(y=wav, **kwargs)
+    S = np.abs(D)
+    return S.astype(np.float32)
+
+
+def wav_to_mel(*, wav: np.ndarray = None, mel_basis=None, **kwargs) -> np.ndarray:
+    """Compute a melspectrogram from a waveform."""
+    D = stft(y=wav, **kwargs)
+    S = spec_to_mel(spec=np.abs(D), mel_basis=mel_basis, **kwargs)
+    return S.astype(np.float32)
+
+
+def spec_to_wav(*, spec: np.ndarray, power: float = 1.5, **kwargs) -> np.ndarray:
+    """Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
+    S = spec.copy()
+    return griffin_lim(spec=S**power, **kwargs)
+
+
+def mel_to_wav(*, mel: np.ndarray = None, power: float = 1.5, **kwargs) -> np.ndarray:
+    """Convert a melspectrogram to a waveform using Griffi-Lim vocoder."""
+    S = mel.copy()
+    S = mel_to_spec(mel=S, mel_basis=kwargs["mel_basis"])  # Convert back to linear
+    return griffin_lim(spec=S**power, **kwargs)
+
+
+### STFT and ISTFT ###
+def stft(
+    *,
+    y: np.ndarray = None,
+    fft_size: int = None,
+    hop_length: int = None,
+    win_length: int = None,
+    pad_mode: str = "reflect",
+    window: str = "hann",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Librosa STFT wrapper.
+
+    Check http://librosa.org/doc/main/generated/librosa.stft.html argument details.
+
+    Returns:
+        np.ndarray: Complex number array.
+    """
+    return librosa.stft(
+        y=y,
+        n_fft=fft_size,
+        hop_length=hop_length,
+        win_length=win_length,
+        pad_mode=pad_mode,
+        window=window,
+        center=center,
+    )
+
+
+def istft(
+    *,
+    y: np.ndarray = None,
+    fft_size: int = None,
+    hop_length: int = None,
+    win_length: int = None,
+    window: str = "hann",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Librosa iSTFT wrapper.
+
+    Check http://librosa.org/doc/main/generated/librosa.istft.html argument details.
+
+    Returns:
+        np.ndarray: Complex number array.
+    """
+    return librosa.istft(y, hop_length=hop_length, win_length=win_length, center=center, window=window)
+
+
+def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np.ndarray:
+    angles = np.exp(2j * np.pi * np.random.rand(*spec.shape))
+    S_complex = np.abs(spec).astype(np.complex)
+    y = istft(y=S_complex * angles, **kwargs)
+    if not np.isfinite(y).all():
+        print(" [!] Waveform is not finite everywhere. Skipping the GL.")
+        return np.array([0.0])
+    for _ in range(num_iter):
+        angles = np.exp(1j * np.angle(stft(y=y, **kwargs)))
+        y = istft(y=S_complex * angles, **kwargs)
+    return y
+
+
+def compute_stft_paddings(
+    *, x: np.ndarray = None, hop_length: int = None, pad_two_sides: bool = False, **kwargs
+) -> Tuple[int, int]:
+    """Compute paddings used by Librosa's STFT. Compute right padding (final frame) or both sides padding
+    (first and final frames)"""
+    pad = (x.shape[0] // hop_length + 1) * hop_length - x.shape[0]
+    if not pad_two_sides:
+        return 0, pad
+    return pad // 2, pad // 2 + pad % 2
+
+
+def compute_f0(
+    *,
+    x: np.ndarray = None,
+    pitch_fmax: float = None,
+    pitch_fmin: float = None,
+    hop_length: int = None,
+    win_length: int = None,
+    sample_rate: int = None,
+    stft_pad_mode: str = "reflect",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Compute pitch (f0) of a waveform using the same parameters used for computing melspectrogram.
+
+    Args:
+        x (np.ndarray): Waveform. Shape :math:`[T_wav,]`
+        pitch_fmax (float): Pitch max value.
+        pitch_fmin (float): Pitch min value.
+        hop_length (int): Number of frames between STFT columns.
+        win_length (int): STFT window length.
+        sample_rate (int): Audio sampling rate.
+        stft_pad_mode (str): Padding mode for STFT.
+        center (bool): Centered padding.
+
+    Returns:
+        np.ndarray: Pitch. Shape :math:`[T_pitch,]`. :math:`T_pitch == T_wav / hop_length`
+
+    Examples:
+        >>> WAV_FILE = filename = librosa.util.example_audio_file()
+        >>> from TTS.config import BaseAudioConfig
+        >>> from TTS.utils.audio import AudioProcessor
+        >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1)
+        >>> ap = AudioProcessor(**conf)
+        >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+        >>> pitch = ap.compute_f0(wav)
+    """
+    assert pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
+    assert pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`."
+
+    f0, voiced_mask, _ = pyin(
+        y=x.astype(np.double),
+        fmin=pitch_fmin,
+        fmax=pitch_fmax,
+        sr=sample_rate,
+        frame_length=win_length,
+        win_length=win_length // 2,
+        hop_length=hop_length,
+        pad_mode=stft_pad_mode,
+        center=center,
+        n_thresholds=100,
+        beta_parameters=(2, 18),
+        boltzmann_parameter=2,
+        resolution=0.1,
+        max_transition_rate=35.92,
+        switch_prob=0.01,
+        no_trough_prob=0.01,
+    )
+    f0[~voiced_mask] = 0.0
+
+    return f0
+
+
+### Audio Processing ###
+def find_endpoint(
+    *,
+    wav: np.ndarray = None,
+    trim_db: float = -40,
+    sample_rate: int = None,
+    min_silence_sec=0.8,
+    gain: float = None,
+    base: int = None,
+    **kwargs,
+) -> int:
+    """Find the last point without silence at the end of a audio signal.
+
+    Args:
+        wav (np.ndarray): Audio signal.
+        threshold_db (int, optional): Silence threshold in decibels. Defaults to -40.
+        min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8.
+        gian (float, optional): Gain to be used to convert trim_db to trim_amp. Defaults to None.
+        base (int, optional): Base of the logarithm used to convert trim_db to trim_amp. Defaults to 10.
+
+    Returns:
+        int: Last point without silence.
+    """
+    window_length = int(sample_rate * min_silence_sec)
+    hop_length = int(window_length / 4)
+    threshold = db_to_amp(x=-trim_db, gain=gain, base=base)
+    for x in range(hop_length, len(wav) - window_length, hop_length):
+        if np.max(wav[x : x + window_length]) < threshold:
+            return x + hop_length
+    return len(wav)
+
+
+def trim_silence(
+    *,
+    wav: np.ndarray = None,
+    sample_rate: int = None,
+    trim_db: float = None,
+    win_length: int = None,
+    hop_length: int = None,
+    **kwargs,
+) -> np.ndarray:
+    """Trim silent parts with a threshold and 0.01 sec margin"""
+    margin = int(sample_rate * 0.01)
+    wav = wav[margin:-margin]
+    return librosa.effects.trim(wav, top_db=trim_db, frame_length=win_length, hop_length=hop_length)[0]
+
+
+def volume_norm(*, x: np.ndarray = None, coef: float = 0.95, **kwargs) -> np.ndarray:
+    """Normalize the volume of an audio signal.
+
+    Args:
+        x (np.ndarray): Raw waveform.
+        coef (float): Coefficient to rescale the maximum value. Defaults to 0.95.
+
+    Returns:
+        np.ndarray: Volume normalized waveform.
+    """
+    return x / abs(x).max() * coef
+
+
+def rms_norm(*, wav: np.ndarray = None, db_level: float = -27.0, **kwargs) -> np.ndarray:
+    r = 10 ** (db_level / 20)
+    a = np.sqrt((len(wav) * (r**2)) / np.sum(wav**2))
+    return wav * a
+
+
+def rms_volume_norm(*, x: np.ndarray, db_level: float = -27.0, **kwargs) -> np.ndarray:
+    """Normalize the volume based on RMS of the signal.
+
+    Args:
+        x (np.ndarray): Raw waveform.
+        db_level (float): Target dB level in RMS. Defaults to -27.0.
+
+    Returns:
+        np.ndarray: RMS normalized waveform.
+    """
+    assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0"
+    wav = rms_norm(wav=x, db_level=db_level)
+    return wav
+
+
+def load_wav(*, filename: str, sample_rate: int = None, resample: bool = False, **kwargs) -> np.ndarray:
+    """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
+
+    Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
+    Args:
+        filename (str): Path to the wav file.
+        sr (int, optional): Sampling rate for resampling. Defaults to None.
+        resample (bool, optional): Resample the audio file when loading. Slows down the I/O time. Defaults to False.
+
+    Returns:
+        np.ndarray: Loaded waveform.
+    """
+    if resample:
+        # loading with resampling. It is significantly slower.
+        x, _ = librosa.load(filename, sr=sample_rate)
+    else:
+        # SF is faster than librosa for loading files
+        x, _ = sf.read(filename)
+    return x
+
+
+def save_wav(*, wav: np.ndarray, path: str, sample_rate: int = None, **kwargs) -> None:
+    """Save float waveform to a file using Scipy.
+
+    Args:
+        wav (np.ndarray): Waveform with float values in range [-1, 1] to save.
+        path (str): Path to a output file.
+        sr (int, optional): Sampling rate used for saving to the file. Defaults to None.
+    """
+    wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
+    scipy.io.wavfile.write(path, sample_rate, wav_norm.astype(np.int16))
+
+
+def mulaw_encode(*, wav: np.ndarray, mulaw_qc: int, **kwargs) -> np.ndarray:
+    mu = 2**mulaw_qc - 1
+    signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1.0 + mu)
+    signal = (signal + 1) / 2 * mu + 0.5
+    return np.floor(
+        signal,
+    )
+
+
+def mulaw_decode(*, wav, mulaw_qc: int, **kwargs) -> np.ndarray:
+    """Recovers waveform from quantized values."""
+    mu = 2**mulaw_qc - 1
+    x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
+    return x
+
+
+def encode_16bits(*, x: np.ndarray, **kwargs) -> np.ndarray:
+    return np.clip(x * 2**15, -(2**15), 2**15 - 1).astype(np.int16)
+
+
+def quantize(*, x: np.ndarray, quantize_bits: int, **kwargs) -> np.ndarray:
+    """Quantize a waveform to a given number of bits.
+
+    Args:
+        x (np.ndarray): Waveform to quantize. Must be normalized into the range `[-1, 1]`.
+        quantize_bits (int): Number of quantization bits.
+
+    Returns:
+        np.ndarray: Quantized waveform.
+    """
+    return (x + 1.0) * (2**quantize_bits - 1) / 2
+
+
+def dequantize(*, x, quantize_bits, **kwargs) -> np.ndarray:
+    """Dequantize a waveform from the given number of bits."""
+    return 2 * x / (2**quantize_bits - 1) - 1
diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d16474a0035ff3aea02d6507e78b6d3d7480b77
--- /dev/null
+++ b/TTS/utils/audio/processor.py
@@ -0,0 +1,767 @@
+from typing import Dict, Tuple
+
+import librosa
+import numpy as np
+import scipy.io.wavfile
+import scipy.signal
+import soundfile as sf
+
+from TTS.tts.utils.helpers import StandardScaler
+from TTS.utils.audio.numpy_transforms import compute_f0
+
+# pylint: disable=too-many-public-methods
+
+
+class AudioProcessor(object):
+    """Audio Processor for TTS.
+
+    Note:
+        All the class arguments are set to default values to enable a flexible initialization
+        of the class with the model config. They are not meaningful for all the arguments.
+
+    Args:
+        sample_rate (int, optional):
+            target audio sampling rate. Defaults to None.
+
+        resample (bool, optional):
+            enable/disable resampling of the audio clips when the target sampling rate does not match the original sampling rate. Defaults to False.
+
+        num_mels (int, optional):
+            number of melspectrogram dimensions. Defaults to None.
+
+        log_func (int, optional):
+            log exponent used for converting spectrogram aplitude to DB.
+
+        min_level_db (int, optional):
+            minimum db threshold for the computed melspectrograms. Defaults to None.
+
+        frame_shift_ms (int, optional):
+            milliseconds of frames between STFT columns. Defaults to None.
+
+        frame_length_ms (int, optional):
+            milliseconds of STFT window length. Defaults to None.
+
+        hop_length (int, optional):
+            number of frames between STFT columns. Used if ```frame_shift_ms``` is None. Defaults to None.
+
+        win_length (int, optional):
+            STFT window length. Used if ```frame_length_ms``` is None. Defaults to None.
+
+        ref_level_db (int, optional):
+            reference DB level to avoid background noise. In general <20DB corresponds to the air noise. Defaults to None.
+
+        fft_size (int, optional):
+            FFT window size for STFT. Defaults to 1024.
+
+        power (int, optional):
+            Exponent value applied to the spectrogram before GriffinLim. Defaults to None.
+
+        preemphasis (float, optional):
+            Preemphasis coefficient. Preemphasis is disabled if == 0.0. Defaults to 0.0.
+
+        signal_norm (bool, optional):
+            enable/disable signal normalization. Defaults to None.
+
+        symmetric_norm (bool, optional):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else [0, k], Defaults to None.
+
+        max_norm (float, optional):
+            ```k``` defining the normalization range. Defaults to None.
+
+        mel_fmin (int, optional):
+            minimum filter frequency for computing melspectrograms. Defaults to None.
+
+        mel_fmax (int, optional):
+            maximum filter frequency for computing melspectrograms. Defaults to None.
+
+        pitch_fmin (int, optional):
+            minimum filter frequency for computing pitch. Defaults to None.
+
+        pitch_fmax (int, optional):
+            maximum filter frequency for computing pitch. Defaults to None.
+
+        spec_gain (int, optional):
+            gain applied when converting amplitude to DB. Defaults to 20.
+
+        stft_pad_mode (str, optional):
+            Padding mode for STFT. Defaults to 'reflect'.
+
+        clip_norm (bool, optional):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        griffin_lim_iters (int, optional):
+            Number of GriffinLim iterations. Defaults to None.
+
+        do_trim_silence (bool, optional):
+            enable/disable silence trimming when loading the audio signal. Defaults to False.
+
+        trim_db (int, optional):
+            DB threshold used for silence trimming. Defaults to 60.
+
+        do_sound_norm (bool, optional):
+            enable/disable signal normalization. Defaults to False.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        stats_path (str, optional):
+            Path to the computed stats file. Defaults to None.
+
+        verbose (bool, optional):
+            enable/disable logging. Defaults to True.
+
+    """
+
+    def __init__(
+        self,
+        sample_rate=None,
+        resample=False,
+        num_mels=None,
+        log_func="np.log10",
+        min_level_db=None,
+        frame_shift_ms=None,
+        frame_length_ms=None,
+        hop_length=None,
+        win_length=None,
+        ref_level_db=None,
+        fft_size=1024,
+        power=None,
+        preemphasis=0.0,
+        signal_norm=None,
+        symmetric_norm=None,
+        max_norm=None,
+        mel_fmin=None,
+        mel_fmax=None,
+        pitch_fmax=None,
+        pitch_fmin=None,
+        spec_gain=20,
+        stft_pad_mode="reflect",
+        clip_norm=True,
+        griffin_lim_iters=None,
+        do_trim_silence=False,
+        trim_db=60,
+        do_sound_norm=False,
+        do_amp_to_db_linear=True,
+        do_amp_to_db_mel=True,
+        do_rms_norm=False,
+        db_level=None,
+        stats_path=None,
+        verbose=True,
+        **_,
+    ):
+
+        # setup class attributed
+        self.sample_rate = sample_rate
+        self.resample = resample
+        self.num_mels = num_mels
+        self.log_func = log_func
+        self.min_level_db = min_level_db or 0
+        self.frame_shift_ms = frame_shift_ms
+        self.frame_length_ms = frame_length_ms
+        self.ref_level_db = ref_level_db
+        self.fft_size = fft_size
+        self.power = power
+        self.preemphasis = preemphasis
+        self.griffin_lim_iters = griffin_lim_iters
+        self.signal_norm = signal_norm
+        self.symmetric_norm = symmetric_norm
+        self.mel_fmin = mel_fmin or 0
+        self.mel_fmax = mel_fmax
+        self.pitch_fmin = pitch_fmin
+        self.pitch_fmax = pitch_fmax
+        self.spec_gain = float(spec_gain)
+        self.stft_pad_mode = stft_pad_mode
+        self.max_norm = 1.0 if max_norm is None else float(max_norm)
+        self.clip_norm = clip_norm
+        self.do_trim_silence = do_trim_silence
+        self.trim_db = trim_db
+        self.do_sound_norm = do_sound_norm
+        self.do_amp_to_db_linear = do_amp_to_db_linear
+        self.do_amp_to_db_mel = do_amp_to_db_mel
+        self.do_rms_norm = do_rms_norm
+        self.db_level = db_level
+        self.stats_path = stats_path
+        # setup exp_func for db to amp conversion
+        if log_func == "np.log":
+            self.base = np.e
+        elif log_func == "np.log10":
+            self.base = 10
+        else:
+            raise ValueError(" [!] unknown `log_func` value.")
+        # setup stft parameters
+        if hop_length is None:
+            # compute stft parameters from given time values
+            self.hop_length, self.win_length = self._stft_parameters()
+        else:
+            # use stft parameters from config file
+            self.hop_length = hop_length
+            self.win_length = win_length
+        assert min_level_db != 0.0, " [!] min_level_db is 0"
+        assert (
+            self.win_length <= self.fft_size
+        ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
+        members = vars(self)
+        if verbose:
+            print(" > Setting up Audio Processor...")
+            for key, value in members.items():
+                print(" | > {}:{}".format(key, value))
+        # create spectrogram utils
+        self.mel_basis = self._build_mel_basis()
+        self.inv_mel_basis = np.linalg.pinv(self._build_mel_basis())
+        # setup scaler
+        if stats_path and signal_norm:
+            mel_mean, mel_std, linear_mean, linear_std, _ = self.load_stats(stats_path)
+            self.setup_scaler(mel_mean, mel_std, linear_mean, linear_std)
+            self.signal_norm = True
+            self.max_norm = None
+            self.clip_norm = None
+            self.symmetric_norm = None
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", verbose=True):
+        if "audio" in config:
+            return AudioProcessor(verbose=verbose, **config.audio)
+        return AudioProcessor(verbose=verbose, **config)
+
+    ### setting up the parameters ###
+    def _build_mel_basis(
+        self,
+    ) -> np.ndarray:
+        """Build melspectrogram basis.
+
+        Returns:
+            np.ndarray: melspectrogram basis.
+        """
+        if self.mel_fmax is not None:
+            assert self.mel_fmax <= self.sample_rate // 2
+        return librosa.filters.mel(
+            self.sample_rate, self.fft_size, n_mels=self.num_mels, fmin=self.mel_fmin, fmax=self.mel_fmax
+        )
+
+    def _stft_parameters(
+        self,
+    ) -> Tuple[int, int]:
+        """Compute the real STFT parameters from the time values.
+
+        Returns:
+            Tuple[int, int]: hop length and window length for STFT.
+        """
+        factor = self.frame_length_ms / self.frame_shift_ms
+        assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms"
+        hop_length = int(self.frame_shift_ms / 1000.0 * self.sample_rate)
+        win_length = int(hop_length * factor)
+        return hop_length, win_length
+
+    ### normalization ###
+    def normalize(self, S: np.ndarray) -> np.ndarray:
+        """Normalize values into `[0, self.max_norm]` or `[-self.max_norm, self.max_norm]`
+
+        Args:
+            S (np.ndarray): Spectrogram to normalize.
+
+        Raises:
+            RuntimeError: Mean and variance is computed from incompatible parameters.
+
+        Returns:
+            np.ndarray: Normalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S.shape[0] == self.num_mels:
+                    return self.mel_scaler.transform(S.T).T
+                elif S.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.transform(S.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            # range normalization
+            S -= self.ref_level_db  # discard certain range of DB assuming it is air noise
+            S_norm = (S - self.min_level_db) / (-self.min_level_db)
+            if self.symmetric_norm:
+                S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
+                if self.clip_norm:
+                    S_norm = np.clip(
+                        S_norm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                return S_norm
+            else:
+                S_norm = self.max_norm * S_norm
+                if self.clip_norm:
+                    S_norm = np.clip(S_norm, 0, self.max_norm)
+                return S_norm
+        else:
+            return S
+
+    def denormalize(self, S: np.ndarray) -> np.ndarray:
+        """Denormalize spectrogram values.
+
+        Args:
+            S (np.ndarray): Spectrogram to denormalize.
+
+        Raises:
+            RuntimeError: Mean and variance are incompatible.
+
+        Returns:
+            np.ndarray: Denormalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S_denorm = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S_denorm.shape[0] == self.num_mels:
+                    return self.mel_scaler.inverse_transform(S_denorm.T).T
+                elif S_denorm.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.inverse_transform(S_denorm.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            if self.symmetric_norm:
+                if self.clip_norm:
+                    S_denorm = np.clip(
+                        S_denorm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
+                return S_denorm + self.ref_level_db
+            else:
+                if self.clip_norm:
+                    S_denorm = np.clip(S_denorm, 0, self.max_norm)
+                S_denorm = (S_denorm * -self.min_level_db / self.max_norm) + self.min_level_db
+                return S_denorm + self.ref_level_db
+        else:
+            return S_denorm
+
+    ### Mean-STD scaling ###
+    def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np.array, np.array, Dict]:
+        """Loading mean and variance statistics from a `npy` file.
+
+        Args:
+            stats_path (str): Path to the `npy` file containing
+
+        Returns:
+            Tuple[np.array, np.array, np.array, np.array, Dict]: loaded statistics and the config used to
+                compute them.
+        """
+        stats = np.load(stats_path, allow_pickle=True).item()  # pylint: disable=unexpected-keyword-arg
+        mel_mean = stats["mel_mean"]
+        mel_std = stats["mel_std"]
+        linear_mean = stats["linear_mean"]
+        linear_std = stats["linear_std"]
+        stats_config = stats["audio_config"]
+        # check all audio parameters used for computing stats
+        skip_parameters = ["griffin_lim_iters", "stats_path", "do_trim_silence", "ref_level_db", "power"]
+        for key in stats_config.keys():
+            if key in skip_parameters:
+                continue
+            if key not in ["sample_rate", "trim_db"]:
+                assert (
+                    stats_config[key] == self.__dict__[key]
+                ), f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}"
+        return mel_mean, mel_std, linear_mean, linear_std, stats_config
+
+    # pylint: disable=attribute-defined-outside-init
+    def setup_scaler(
+        self, mel_mean: np.ndarray, mel_std: np.ndarray, linear_mean: np.ndarray, linear_std: np.ndarray
+    ) -> None:
+        """Initialize scaler objects used in mean-std normalization.
+
+        Args:
+            mel_mean (np.ndarray): Mean for melspectrograms.
+            mel_std (np.ndarray): STD for melspectrograms.
+            linear_mean (np.ndarray): Mean for full scale spectrograms.
+            linear_std (np.ndarray): STD for full scale spectrograms.
+        """
+        self.mel_scaler = StandardScaler()
+        self.mel_scaler.set_stats(mel_mean, mel_std)
+        self.linear_scaler = StandardScaler()
+        self.linear_scaler.set_stats(linear_mean, linear_std)
+
+    ### DB and AMP conversion ###
+    # pylint: disable=no-self-use
+    def _amp_to_db(self, x: np.ndarray) -> np.ndarray:
+        """Convert amplitude values to decibels.
+
+        Args:
+            x (np.ndarray): Amplitude spectrogram.
+
+        Returns:
+            np.ndarray: Decibels spectrogram.
+        """
+        return self.spec_gain * _log(np.maximum(1e-5, x), self.base)
+
+    # pylint: disable=no-self-use
+    def _db_to_amp(self, x: np.ndarray) -> np.ndarray:
+        """Convert decibels spectrogram to amplitude spectrogram.
+
+        Args:
+            x (np.ndarray): Decibels spectrogram.
+
+        Returns:
+            np.ndarray: Amplitude spectrogram.
+        """
+        return _exp(x / self.spec_gain, self.base)
+
+    ### Preemphasis ###
+    def apply_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Apply pre-emphasis to the audio signal. Useful to reduce the correlation between neighbouring signal values.
+
+        Args:
+            x (np.ndarray): Audio signal.
+
+        Raises:
+            RuntimeError: Preemphasis coeff is set to 0.
+
+        Returns:
+            np.ndarray: Decorrelated audio signal.
+        """
+        if self.preemphasis == 0:
+            raise RuntimeError(" [!] Preemphasis is set 0.0.")
+        return scipy.signal.lfilter([1, -self.preemphasis], [1], x)
+
+    def apply_inv_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Reverse pre-emphasis."""
+        if self.preemphasis == 0:
+            raise RuntimeError(" [!] Preemphasis is set 0.0.")
+        return scipy.signal.lfilter([1], [1, -self.preemphasis], x)
+
+    ### SPECTROGRAMs ###
+    def _linear_to_mel(self, spectrogram: np.ndarray) -> np.ndarray:
+        """Project a full scale spectrogram to a melspectrogram.
+
+        Args:
+            spectrogram (np.ndarray): Full scale spectrogram.
+
+        Returns:
+            np.ndarray: Melspectrogram
+        """
+        return np.dot(self.mel_basis, spectrogram)
+
+    def _mel_to_linear(self, mel_spec: np.ndarray) -> np.ndarray:
+        """Convert a melspectrogram to full scale spectrogram."""
+        return np.maximum(1e-10, np.dot(self.inv_mel_basis, mel_spec))
+
+    def spectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a spectrogram from a waveform.
+
+        Args:
+            y (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Spectrogram.
+        """
+        if self.preemphasis != 0:
+            D = self._stft(self.apply_preemphasis(y))
+        else:
+            D = self._stft(y)
+        if self.do_amp_to_db_linear:
+            S = self._amp_to_db(np.abs(D))
+        else:
+            S = np.abs(D)
+        return self.normalize(S).astype(np.float32)
+
+    def melspectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a melspectrogram from a waveform."""
+        if self.preemphasis != 0:
+            D = self._stft(self.apply_preemphasis(y))
+        else:
+            D = self._stft(y)
+        if self.do_amp_to_db_mel:
+            S = self._amp_to_db(self._linear_to_mel(np.abs(D)))
+        else:
+            S = self._linear_to_mel(np.abs(D))
+        return self.normalize(S).astype(np.float32)
+
+    def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
+        S = self.denormalize(spectrogram)
+        S = self._db_to_amp(S)
+        # Reconstruct phase
+        if self.preemphasis != 0:
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)
+
+    def inv_melspectrogram(self, mel_spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a melspectrogram to a waveform using Griffi-Lim vocoder."""
+        D = self.denormalize(mel_spectrogram)
+        S = self._db_to_amp(D)
+        S = self._mel_to_linear(S)  # Convert back to linear
+        if self.preemphasis != 0:
+            return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
+        return self._griffin_lim(S**self.power)
+
+    def out_linear_to_mel(self, linear_spec: np.ndarray) -> np.ndarray:
+        """Convert a full scale linear spectrogram output of a network to a melspectrogram.
+
+        Args:
+            linear_spec (np.ndarray): Normalized full scale linear spectrogram.
+
+        Returns:
+            np.ndarray: Normalized melspectrogram.
+        """
+        S = self.denormalize(linear_spec)
+        S = self._db_to_amp(S)
+        S = self._linear_to_mel(np.abs(S))
+        S = self._amp_to_db(S)
+        mel = self.normalize(S)
+        return mel
+
+    ### STFT and ISTFT ###
+    def _stft(self, y: np.ndarray) -> np.ndarray:
+        """Librosa STFT wrapper.
+
+        Args:
+            y (np.ndarray): Audio signal.
+
+        Returns:
+            np.ndarray: Complex number array.
+        """
+        return librosa.stft(
+            y=y,
+            n_fft=self.fft_size,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            pad_mode=self.stft_pad_mode,
+            window="hann",
+            center=True,
+        )
+
+    def _istft(self, y: np.ndarray) -> np.ndarray:
+        """Librosa iSTFT wrapper."""
+        return librosa.istft(y, hop_length=self.hop_length, win_length=self.win_length)
+
+    def _griffin_lim(self, S):
+        angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
+        S_complex = np.abs(S).astype(np.complex)
+        y = self._istft(S_complex * angles)
+        if not np.isfinite(y).all():
+            print(" [!] Waveform is not finite everywhere. Skipping the GL.")
+            return np.array([0.0])
+        for _ in range(self.griffin_lim_iters):
+            angles = np.exp(1j * np.angle(self._stft(y)))
+            y = self._istft(S_complex * angles)
+        return y
+
+    def compute_stft_paddings(self, x, pad_sides=1):
+        """Compute paddings used by Librosa's STFT. Compute right padding (final frame) or both sides padding
+        (first and final frames)"""
+        assert pad_sides in (1, 2)
+        pad = (x.shape[0] // self.hop_length + 1) * self.hop_length - x.shape[0]
+        if pad_sides == 1:
+            return 0, pad
+        return pad // 2, pad // 2 + pad % 2
+
+    def compute_f0(self, x: np.ndarray) -> np.ndarray:
+        """Compute pitch (f0) of a waveform using the same parameters used for computing melspectrogram.
+
+        Args:
+            x (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Pitch.
+
+        Examples:
+            >>> WAV_FILE = filename = librosa.util.example_audio_file()
+            >>> from TTS.config import BaseAudioConfig
+            >>> from TTS.utils.audio import AudioProcessor
+            >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1)
+            >>> ap = AudioProcessor(**conf)
+            >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+            >>> pitch = ap.compute_f0(wav)
+        """
+        assert self.pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
+        assert self.pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`."
+        # align F0 length to the spectrogram length
+        if len(x) % self.hop_length == 0:
+            x = np.pad(x, (0, self.hop_length // 2), mode=self.stft_pad_mode)
+
+        f0 = compute_f0(
+            x=x,
+            pitch_fmax=self.pitch_fmax,
+            pitch_fmin=self.pitch_fmin,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            sample_rate=self.sample_rate,
+            stft_pad_mode=self.stft_pad_mode,
+            center=True,
+        )
+
+        return f0
+
+    ### Audio Processing ###
+    def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int:
+        """Find the last point without silence at the end of a audio signal.
+
+        Args:
+            wav (np.ndarray): Audio signal.
+            threshold_db (int, optional): Silence threshold in decibels. Defaults to -40.
+            min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8.
+
+        Returns:
+            int: Last point without silence.
+        """
+        window_length = int(self.sample_rate * min_silence_sec)
+        hop_length = int(window_length / 4)
+        threshold = self._db_to_amp(-self.trim_db)
+        for x in range(hop_length, len(wav) - window_length, hop_length):
+            if np.max(wav[x : x + window_length]) < threshold:
+                return x + hop_length
+        return len(wav)
+
+    def trim_silence(self, wav):
+        """Trim silent parts with a threshold and 0.01 sec margin"""
+        margin = int(self.sample_rate * 0.01)
+        wav = wav[margin:-margin]
+        return librosa.effects.trim(wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[
+            0
+        ]
+
+    @staticmethod
+    def sound_norm(x: np.ndarray) -> np.ndarray:
+        """Normalize the volume of an audio signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: Volume normalized waveform.
+        """
+        return x / abs(x).max() * 0.95
+
+    @staticmethod
+    def _rms_norm(wav, db_level=-27):
+        r = 10 ** (db_level / 20)
+        a = np.sqrt((len(wav) * (r**2)) / np.sum(wav**2))
+        return wav * a
+
+    def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray:
+        """Normalize the volume based on RMS of the signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: RMS normalized waveform.
+        """
+        if db_level is None:
+            db_level = self.db_level
+        assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0"
+        wav = self._rms_norm(x, db_level)
+        return wav
+
+    ### save and load ###
+    def load_wav(self, filename: str, sr: int = None) -> np.ndarray:
+        """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
+
+        Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
+        Args:
+            filename (str): Path to the wav file.
+            sr (int, optional): Sampling rate for resampling. Defaults to None.
+
+        Returns:
+            np.ndarray: Loaded waveform.
+        """
+        if self.resample:
+            # loading with resampling. It is significantly slower.
+            x, sr = librosa.load(filename, sr=self.sample_rate)
+        elif sr is None:
+            # SF is faster than librosa for loading files
+            x, sr = sf.read(filename)
+            assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
+        else:
+            x, sr = librosa.load(filename, sr=sr)
+        if self.do_trim_silence:
+            try:
+                x = self.trim_silence(x)
+            except ValueError:
+                print(f" [!] File cannot be trimmed for silence - {filename}")
+        if self.do_sound_norm:
+            x = self.sound_norm(x)
+        if self.do_rms_norm:
+            x = self.rms_volume_norm(x, self.db_level)
+        return x
+
+    def save_wav(self, wav: np.ndarray, path: str, sr: int = None) -> None:
+        """Save a waveform to a file using Scipy.
+
+        Args:
+            wav (np.ndarray): Waveform to save.
+            path (str): Path to a output file.
+            sr (int, optional): Sampling rate used for saving to the file. Defaults to None.
+        """
+        if self.do_rms_norm:
+            wav_norm = self.rms_volume_norm(wav, self.db_level) * 32767
+        else:
+            wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
+
+        scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))
+
+    def get_duration(self, filename: str) -> float:
+        """Get the duration of a wav file using Librosa.
+
+        Args:
+            filename (str): Path to the wav file.
+        """
+        return librosa.get_duration(filename)
+
+    @staticmethod
+    def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
+        mu = 2**qc - 1
+        # wav_abs = np.minimum(np.abs(wav), 1.0)
+        signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1.0 + mu)
+        # Quantize signal to the specified number of levels.
+        signal = (signal + 1) / 2 * mu + 0.5
+        return np.floor(
+            signal,
+        )
+
+    @staticmethod
+    def mulaw_decode(wav, qc):
+        """Recovers waveform from quantized values."""
+        mu = 2**qc - 1
+        x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
+        return x
+
+    @staticmethod
+    def encode_16bits(x):
+        return np.clip(x * 2**15, -(2**15), 2**15 - 1).astype(np.int16)
+
+    @staticmethod
+    def quantize(x: np.ndarray, bits: int) -> np.ndarray:
+        """Quantize a waveform to a given number of bits.
+
+        Args:
+            x (np.ndarray): Waveform to quantize. Must be normalized into the range `[-1, 1]`.
+            bits (int): Number of quantization bits.
+
+        Returns:
+            np.ndarray: Quantized waveform.
+        """
+        return (x + 1.0) * (2**bits - 1) / 2
+
+    @staticmethod
+    def dequantize(x, bits):
+        """Dequantize a waveform from the given number of bits."""
+        return 2 * x / (2**bits - 1) - 1
+
+
+def _log(x, base):
+    if base == 10:
+        return np.log10(x)
+    return np.log(x)
+
+
+def _exp(x, base):
+    if base == 10:
+        return np.power(10, x)
+    return np.exp(x)
diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4523ad08eaab214204eaa2a0e4c381b93b19ad1
--- /dev/null
+++ b/TTS/utils/audio/torch_transforms.py
@@ -0,0 +1,163 @@
+import librosa
+import torch
+from torch import nn
+
+
+class TorchSTFT(nn.Module):  # pylint: disable=abstract-method
+    """Some of the audio processing funtions using Torch for faster batch processing.
+
+    Args:
+
+        n_fft (int):
+            FFT window size for STFT.
+
+        hop_length (int):
+            number of frames between STFT columns.
+
+        win_length (int, optional):
+            STFT window length.
+
+        pad_wav (bool, optional):
+            If True pad the audio with (n_fft - hop_length) / 2). Defaults to False.
+
+        window (str, optional):
+            The name of a function to create a window tensor that is applied/multiplied to each frame/window. Defaults to "hann_window"
+
+        sample_rate (int, optional):
+            target audio sampling rate. Defaults to None.
+
+        mel_fmin (int, optional):
+            minimum filter frequency for computing melspectrograms. Defaults to None.
+
+        mel_fmax (int, optional):
+            maximum filter frequency for computing melspectrograms. Defaults to None.
+
+        n_mels (int, optional):
+            number of melspectrogram dimensions. Defaults to None.
+
+        use_mel (bool, optional):
+            If True compute the melspectrograms otherwise. Defaults to False.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to False.
+
+        spec_gain (float, optional):
+            gain applied when converting amplitude to DB. Defaults to 1.0.
+
+        power (float, optional):
+            Exponent for the magnitude spectrogram, e.g., 1 for energy, 2 for power, etc.  Defaults to None.
+
+        use_htk (bool, optional):
+            Use HTK formula in mel filter instead of Slaney.
+
+        mel_norm (None, 'slaney', or number, optional):
+            If 'slaney', divide the triangular mel weights by the width of the mel band
+            (area normalization).
+
+            If numeric, use `librosa.util.normalize` to normalize each filter by to unit l_p norm.
+            See `librosa.util.normalize` for a full description of supported norm values
+            (including `+-np.inf`).
+
+            Otherwise, leave all the triangles aiming for a peak value of 1.0. Defaults to "slaney".
+    """
+
+    def __init__(
+        self,
+        n_fft,
+        hop_length,
+        win_length,
+        pad_wav=False,
+        window="hann_window",
+        sample_rate=None,
+        mel_fmin=0,
+        mel_fmax=None,
+        n_mels=80,
+        use_mel=False,
+        do_amp_to_db=False,
+        spec_gain=1.0,
+        power=None,
+        use_htk=False,
+        mel_norm="slaney",
+    ):
+        super().__init__()
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.pad_wav = pad_wav
+        self.sample_rate = sample_rate
+        self.mel_fmin = mel_fmin
+        self.mel_fmax = mel_fmax
+        self.n_mels = n_mels
+        self.use_mel = use_mel
+        self.do_amp_to_db = do_amp_to_db
+        self.spec_gain = spec_gain
+        self.power = power
+        self.use_htk = use_htk
+        self.mel_norm = mel_norm
+        self.window = nn.Parameter(getattr(torch, window)(win_length), requires_grad=False)
+        self.mel_basis = None
+        if use_mel:
+            self._build_mel_basis()
+
+    def __call__(self, x):
+        """Compute spectrogram frames by torch based stft.
+
+        Args:
+            x (Tensor): input waveform
+
+        Returns:
+            Tensor: spectrogram frames.
+
+        Shapes:
+            x: [B x T] or [:math:`[B, 1, T]`]
+        """
+        if x.ndim == 2:
+            x = x.unsqueeze(1)
+        if self.pad_wav:
+            padding = int((self.n_fft - self.hop_length) / 2)
+            x = torch.nn.functional.pad(x, (padding, padding), mode="reflect")
+        # B x D x T x 2
+        o = torch.stft(
+            x.squeeze(1),
+            self.n_fft,
+            self.hop_length,
+            self.win_length,
+            self.window,
+            center=True,
+            pad_mode="reflect",  # compatible with audio.py
+            normalized=False,
+            onesided=True,
+            return_complex=False,
+        )
+        M = o[:, :, :, 0]
+        P = o[:, :, :, 1]
+        S = torch.sqrt(torch.clamp(M**2 + P**2, min=1e-8))
+
+        if self.power is not None:
+            S = S**self.power
+
+        if self.use_mel:
+            S = torch.matmul(self.mel_basis.to(x), S)
+        if self.do_amp_to_db:
+            S = self._amp_to_db(S, spec_gain=self.spec_gain)
+        return S
+
+    def _build_mel_basis(self):
+        mel_basis = librosa.filters.mel(
+            self.sample_rate,
+            self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.mel_fmin,
+            fmax=self.mel_fmax,
+            htk=self.use_htk,
+            norm=self.mel_norm,
+        )
+        self.mel_basis = torch.from_numpy(mel_basis).float()
+
+    @staticmethod
+    def _amp_to_db(x, spec_gain=1.0):
+        return torch.log(torch.clamp(x, min=1e-5) * spec_gain)
+
+    @staticmethod
+    def _db_to_amp(x, spec_gain=1.0):
+        return torch.exp(x) / spec_gain
diff --git a/TTS/utils/callbacks.py b/TTS/utils/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..511d215c656f1ce3ed31484963db64fae4dc77d4
--- /dev/null
+++ b/TTS/utils/callbacks.py
@@ -0,0 +1,105 @@
+class TrainerCallback:
+    @staticmethod
+    def on_init_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_init_start"):
+                trainer.model.module.on_init_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_init_start"):
+                trainer.model.on_init_start(trainer)
+
+        if hasattr(trainer.criterion, "on_init_start"):
+            trainer.criterion.on_init_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_init_start"):
+            trainer.optimizer.on_init_start(trainer)
+
+    @staticmethod
+    def on_init_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_init_end"):
+                trainer.model.module.on_init_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_init_end"):
+                trainer.model.on_init_end(trainer)
+
+        if hasattr(trainer.criterion, "on_init_end"):
+            trainer.criterion.on_init_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_init_end"):
+            trainer.optimizer.on_init_end(trainer)
+
+    @staticmethod
+    def on_epoch_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_epoch_start"):
+                trainer.model.module.on_epoch_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_epoch_start"):
+                trainer.model.on_epoch_start(trainer)
+
+        if hasattr(trainer.criterion, "on_epoch_start"):
+            trainer.criterion.on_epoch_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_epoch_start"):
+            trainer.optimizer.on_epoch_start(trainer)
+
+    @staticmethod
+    def on_epoch_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_epoch_end"):
+                trainer.model.module.on_epoch_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_epoch_end"):
+                trainer.model.on_epoch_end(trainer)
+
+        if hasattr(trainer.criterion, "on_epoch_end"):
+            trainer.criterion.on_epoch_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_epoch_end"):
+            trainer.optimizer.on_epoch_end(trainer)
+
+    @staticmethod
+    def on_train_step_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_train_step_start"):
+                trainer.model.module.on_train_step_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_train_step_start"):
+                trainer.model.on_train_step_start(trainer)
+
+        if hasattr(trainer.criterion, "on_train_step_start"):
+            trainer.criterion.on_train_step_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_train_step_start"):
+            trainer.optimizer.on_train_step_start(trainer)
+
+    @staticmethod
+    def on_train_step_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_train_step_end"):
+                trainer.model.module.on_train_step_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_train_step_end"):
+                trainer.model.on_train_step_end(trainer)
+
+        if hasattr(trainer.criterion, "on_train_step_end"):
+            trainer.criterion.on_train_step_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_train_step_end"):
+            trainer.optimizer.on_train_step_end(trainer)
+
+    @staticmethod
+    def on_keyboard_interrupt(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_keyboard_interrupt"):
+                trainer.model.module.on_keyboard_interrupt(trainer)
+        else:
+            if hasattr(trainer.model, "on_keyboard_interrupt"):
+                trainer.model.on_keyboard_interrupt(trainer)
+
+        if hasattr(trainer.criterion, "on_keyboard_interrupt"):
+            trainer.criterion.on_keyboard_interrupt(trainer)
+
+        if hasattr(trainer.optimizer, "on_keyboard_interrupt"):
+            trainer.optimizer.on_keyboard_interrupt(trainer)
diff --git a/TTS/utils/capacitron_optimizer.py b/TTS/utils/capacitron_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7206ffd508896cab96a22288f33a93e999c5f009
--- /dev/null
+++ b/TTS/utils/capacitron_optimizer.py
@@ -0,0 +1,67 @@
+from typing import Generator
+
+from trainer.trainer_utils import get_optimizer
+
+
+class CapacitronOptimizer:
+    """Double optimizer class for the Capacitron model."""
+
+    def __init__(self, config: dict, model_params: Generator) -> None:
+        self.primary_params, self.secondary_params = self.split_model_parameters(model_params)
+
+        optimizer_names = list(config.optimizer_params.keys())
+        optimizer_parameters = list(config.optimizer_params.values())
+
+        self.primary_optimizer = get_optimizer(
+            optimizer_names[0],
+            optimizer_parameters[0],
+            config.lr,
+            parameters=self.primary_params,
+        )
+
+        self.secondary_optimizer = get_optimizer(
+            optimizer_names[1],
+            self.extract_optimizer_parameters(optimizer_parameters[1]),
+            optimizer_parameters[1]["lr"],
+            parameters=self.secondary_params,
+        )
+
+        self.param_groups = self.primary_optimizer.param_groups
+
+    def first_step(self):
+        self.secondary_optimizer.step()
+        self.secondary_optimizer.zero_grad()
+        self.primary_optimizer.zero_grad()
+
+    def step(self):
+        # Update param groups to display the correct learning rate
+        self.param_groups = self.primary_optimizer.param_groups
+        self.primary_optimizer.step()
+
+    def zero_grad(self, set_to_none=False):
+        self.primary_optimizer.zero_grad(set_to_none)
+        self.secondary_optimizer.zero_grad(set_to_none)
+
+    def load_state_dict(self, state_dict):
+        self.primary_optimizer.load_state_dict(state_dict[0])
+        self.secondary_optimizer.load_state_dict(state_dict[1])
+
+    def state_dict(self):
+        return [self.primary_optimizer.state_dict(), self.secondary_optimizer.state_dict()]
+
+    @staticmethod
+    def split_model_parameters(model_params: Generator) -> list:
+        primary_params = []
+        secondary_params = []
+        for name, param in model_params:
+            if param.requires_grad:
+                if name == "capacitron_vae_layer.beta":
+                    secondary_params.append(param)
+                else:
+                    primary_params.append(param)
+        return [iter(primary_params), iter(secondary_params)]
+
+    @staticmethod
+    def extract_optimizer_parameters(params: dict) -> dict:
+        """Extract parameters that are not the learning rate"""
+        return {k: v for k, v in params.items() if k != "lr"}
diff --git a/TTS/utils/distribute.py b/TTS/utils/distribute.py
new file mode 100644
index 0000000000000000000000000000000000000000..a51ef7661ece97c87c165ad1aba4c9d9700379dc
--- /dev/null
+++ b/TTS/utils/distribute.py
@@ -0,0 +1,20 @@
+# edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py
+import torch
+import torch.distributed as dist
+
+
+def reduce_tensor(tensor, num_gpus):
+    rt = tensor.clone()
+    dist.all_reduce(rt, op=dist.reduce_op.SUM)
+    rt /= num_gpus
+    return rt
+
+
+def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url):
+    assert torch.cuda.is_available(), "Distributed mode requires CUDA."
+
+    # Set cuda device so everything is done on the right GPU.
+    torch.cuda.set_device(rank % torch.cuda.device_count())
+
+    # Initialize distributed communication
+    dist.init_process_group(dist_backend, init_method=dist_url, world_size=num_gpus, rank=rank, group_name=group_name)
diff --git a/TTS/utils/download.py b/TTS/utils/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..de9b31a7a87071a964cd171b2075b03a7a433a76
--- /dev/null
+++ b/TTS/utils/download.py
@@ -0,0 +1,207 @@
+# Adapted from https://github.com/pytorch/audio/
+
+import hashlib
+import logging
+import os
+import tarfile
+import urllib
+import urllib.request
+import zipfile
+from os.path import expanduser
+from typing import Any, Iterable, List, Optional
+
+from torch.utils.model_zoo import tqdm
+
+
+def stream_url(
+    url: str, start_byte: Optional[int] = None, block_size: int = 32 * 1024, progress_bar: bool = True
+) -> Iterable:
+    """Stream url by chunk
+
+    Args:
+        url (str): Url.
+        start_byte (int or None, optional): Start streaming at that point (Default: ``None``).
+        block_size (int, optional): Size of chunks to stream (Default: ``32 * 1024``).
+        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
+    """
+
+    # If we already have the whole file, there is no need to download it again
+    req = urllib.request.Request(url, method="HEAD")
+    with urllib.request.urlopen(req) as response:
+        url_size = int(response.info().get("Content-Length", -1))
+    if url_size == start_byte:
+        return
+
+    req = urllib.request.Request(url)
+    if start_byte:
+        req.headers["Range"] = "bytes={}-".format(start_byte)
+
+    with urllib.request.urlopen(req) as upointer, tqdm(
+        unit="B",
+        unit_scale=True,
+        unit_divisor=1024,
+        total=url_size,
+        disable=not progress_bar,
+    ) as pbar:
+
+        num_bytes = 0
+        while True:
+            chunk = upointer.read(block_size)
+            if not chunk:
+                break
+            yield chunk
+            num_bytes += len(chunk)
+            pbar.update(len(chunk))
+
+
+def download_url(
+    url: str,
+    download_folder: str,
+    filename: Optional[str] = None,
+    hash_value: Optional[str] = None,
+    hash_type: str = "sha256",
+    progress_bar: bool = True,
+    resume: bool = False,
+) -> None:
+    """Download file to disk.
+
+    Args:
+        url (str): Url.
+        download_folder (str): Folder to download file.
+        filename (str or None, optional): Name of downloaded file. If None, it is inferred from the url
+            (Default: ``None``).
+        hash_value (str or None, optional): Hash for url (Default: ``None``).
+        hash_type (str, optional): Hash type, among "sha256" and "md5" (Default: ``"sha256"``).
+        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
+        resume (bool, optional): Enable resuming download (Default: ``False``).
+    """
+
+    req = urllib.request.Request(url, method="HEAD")
+    req_info = urllib.request.urlopen(req).info()  # pylint: disable=consider-using-with
+
+    # Detect filename
+    filename = filename or req_info.get_filename() or os.path.basename(url)
+    filepath = os.path.join(download_folder, filename)
+    if resume and os.path.exists(filepath):
+        mode = "ab"
+        local_size: Optional[int] = os.path.getsize(filepath)
+
+    elif not resume and os.path.exists(filepath):
+        raise RuntimeError("{} already exists. Delete the file manually and retry.".format(filepath))
+    else:
+        mode = "wb"
+        local_size = None
+
+    if hash_value and local_size == int(req_info.get("Content-Length", -1)):
+        with open(filepath, "rb") as file_obj:
+            if validate_file(file_obj, hash_value, hash_type):
+                return
+        raise RuntimeError("The hash of {} does not match. Delete the file manually and retry.".format(filepath))
+
+    with open(filepath, mode) as fpointer:
+        for chunk in stream_url(url, start_byte=local_size, progress_bar=progress_bar):
+            fpointer.write(chunk)
+
+    with open(filepath, "rb") as file_obj:
+        if hash_value and not validate_file(file_obj, hash_value, hash_type):
+            raise RuntimeError("The hash of {} does not match. Delete the file manually and retry.".format(filepath))
+
+
+def validate_file(file_obj: Any, hash_value: str, hash_type: str = "sha256") -> bool:
+    """Validate a given file object with its hash.
+
+    Args:
+        file_obj: File object to read from.
+        hash_value (str): Hash for url.
+        hash_type (str, optional): Hash type, among "sha256" and "md5" (Default: ``"sha256"``).
+
+    Returns:
+        bool: return True if its a valid file, else False.
+    """
+
+    if hash_type == "sha256":
+        hash_func = hashlib.sha256()
+    elif hash_type == "md5":
+        hash_func = hashlib.md5()
+    else:
+        raise ValueError
+
+    while True:
+        # Read by chunk to avoid filling memory
+        chunk = file_obj.read(1024**2)
+        if not chunk:
+            break
+        hash_func.update(chunk)
+
+    return hash_func.hexdigest() == hash_value
+
+
+def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bool = False) -> List[str]:
+    """Extract archive.
+    Args:
+        from_path (str): the path of the archive.
+        to_path (str or None, optional): the root path of the extraced files (directory of from_path)
+            (Default: ``None``)
+        overwrite (bool, optional): overwrite existing files (Default: ``False``)
+
+    Returns:
+        list: List of paths to extracted files even if not overwritten.
+    """
+
+    if to_path is None:
+        to_path = os.path.dirname(from_path)
+
+    try:
+        with tarfile.open(from_path, "r") as tar:
+            logging.info("Opened tar file %s.", from_path)
+            files = []
+            for file_ in tar:  # type: Any
+                file_path = os.path.join(to_path, file_.name)
+                if file_.isfile():
+                    files.append(file_path)
+                    if os.path.exists(file_path):
+                        logging.info("%s already extracted.", file_path)
+                        if not overwrite:
+                            continue
+                tar.extract(file_, to_path)
+            return files
+    except tarfile.ReadError:
+        pass
+
+    try:
+        with zipfile.ZipFile(from_path, "r") as zfile:
+            logging.info("Opened zip file %s.", from_path)
+            files = zfile.namelist()
+            for file_ in files:
+                file_path = os.path.join(to_path, file_)
+                if os.path.exists(file_path):
+                    logging.info("%s already extracted.", file_path)
+                    if not overwrite:
+                        continue
+                zfile.extract(file_, to_path)
+        return files
+    except zipfile.BadZipFile:
+        pass
+
+    raise NotImplementedError(" > [!] only supports tar.gz, tgz, and zip achives.")
+
+
+def download_kaggle_dataset(dataset_path: str, dataset_name: str, output_path: str):
+    """Download dataset from kaggle.
+    Args:
+        dataset_path (str):
+        This the kaggle link to the dataset. for example vctk is 'mfekadu/english-multispeaker-corpus-for-voice-cloning'
+        dataset_name (str): Name of the folder the dataset will be saved in.
+        output_path (str): Path of the location you want the dataset folder to be saved to.
+    """
+    data_path = os.path.join(output_path, dataset_name)
+    try:
+        import kaggle  # pylint: disable=import-outside-toplevel
+
+        kaggle.api.authenticate()
+        print(f"""\nDownloading {dataset_name}...""")
+        kaggle.api.dataset_download_files(dataset_path, path=data_path, unzip=True)
+    except OSError:
+        print(
+            f"""[!] in order to download kaggle datasets, you need to have a kaggle api token stored in your {os.path.join(expanduser('~'), '.kaggle/kaggle.json')}"""
+        )
diff --git a/TTS/utils/downloaders.py b/TTS/utils/downloaders.py
new file mode 100644
index 0000000000000000000000000000000000000000..104dc7b94e17b1d7f828103d2396d6c5115b628a
--- /dev/null
+++ b/TTS/utils/downloaders.py
@@ -0,0 +1,126 @@
+import os
+from typing import Optional
+
+from TTS.utils.download import download_kaggle_dataset, download_url, extract_archive
+
+
+def download_ljspeech(path: str):
+    """Download and extract LJSpeech dataset
+
+    Args:
+        path (str): path to the directory where the dataset will be stored.
+    """
+    os.makedirs(path, exist_ok=True)
+    url = "https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
+
+
+def download_vctk(path: str, use_kaggle: Optional[bool] = False):
+    """Download and extract VCTK dataset.
+
+    Args:
+        path (str): path to the directory where the dataset will be stored.
+
+        use_kaggle (bool, optional): Downloads vctk dataset from kaggle. Is generally faster. Defaults to False.
+    """
+    if use_kaggle:
+        download_kaggle_dataset("mfekadu/english-multispeaker-corpus-for-voice-cloning", "VCTK", path)
+    else:
+        os.makedirs(path, exist_ok=True)
+        url = "https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
+        download_url(url, path)
+        basename = os.path.basename(url)
+        archive = os.path.join(path, basename)
+        print(" > Extracting archive file...")
+        extract_archive(archive)
+
+
+def download_tweb(path: str):
+    """Download and extract Tweb dataset
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+    """
+    download_kaggle_dataset("bryanpark/the-world-english-bible-speech-dataset", "TWEB", path)
+
+
+def download_libri_tts(path: str, subset: Optional[str] = "all"):
+    """Download and extract libri tts dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+
+        subset (str, optional): Name of the subset to download. If you only want to download a certain
+        portion specify it here. Defaults to 'all'.
+    """
+
+    subset_dict = {
+        "libri-tts-clean-100": "http://www.openslr.org/resources/60/train-clean-100.tar.gz",
+        "libri-tts-clean-360": "http://www.openslr.org/resources/60/train-clean-360.tar.gz",
+        "libri-tts-other-500": "http://www.openslr.org/resources/60/train-other-500.tar.gz",
+        "libri-tts-dev-clean": "http://www.openslr.org/resources/60/dev-clean.tar.gz",
+        "libri-tts-dev-other": "http://www.openslr.org/resources/60/dev-other.tar.gz",
+        "libri-tts-test-clean": "http://www.openslr.org/resources/60/test-clean.tar.gz",
+        "libri-tts-test-other": "http://www.openslr.org/resources/60/test-other.tar.gz",
+    }
+
+    os.makedirs(path, exist_ok=True)
+    if subset == "all":
+        for sub, val in subset_dict.items():
+            print(f" > Downloading {sub}...")
+            download_url(val, path)
+            basename = os.path.basename(val)
+            archive = os.path.join(path, basename)
+            print(" > Extracting archive file...")
+            extract_archive(archive)
+        print(" > All subsets downloaded")
+    else:
+        url = subset_dict[subset]
+        download_url(url, path)
+        basename = os.path.basename(url)
+        archive = os.path.join(path, basename)
+        print(" > Extracting archive file...")
+        extract_archive(archive)
+
+
+def download_thorsten_de(path: str):
+    """Download and extract Thorsten german male voice dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+    """
+    os.makedirs(path, exist_ok=True)
+    url = "https://www.openslr.org/resources/95/thorsten-de_v02.tgz"
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
+
+
+def download_mailabs(path: str, language: str = "english"):
+    """Download and extract Mailabs dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+
+        language (str): Language subset to download. Defaults to english.
+    """
+    language_dict = {
+        "english": "https://data.solak.de/data/Training/stt_tts/en_US.tgz",
+        "german": "https://data.solak.de/data/Training/stt_tts/de_DE.tgz",
+        "french": "https://data.solak.de/data/Training/stt_tts/fr_FR.tgz",
+        "italian": "https://data.solak.de/data/Training/stt_tts/it_IT.tgz",
+        "spanish": "https://data.solak.de/data/Training/stt_tts/es_ES.tgz",
+    }
+    os.makedirs(path, exist_ok=True)
+    url = language_dict[language]
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b685210c1179b8adfc1ed57c9a5089aff07f52ae
--- /dev/null
+++ b/TTS/utils/generic_utils.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+import datetime
+import importlib
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict
+
+import fsspec
+import torch
+
+
+def to_cuda(x: torch.Tensor) -> torch.Tensor:
+    if x is None:
+        return None
+    if torch.is_tensor(x):
+        x = x.contiguous()
+        if torch.cuda.is_available():
+            x = x.cuda(non_blocking=True)
+    return x
+
+
+def get_cuda():
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    return use_cuda, device
+
+
+def get_git_branch():
+    try:
+        out = subprocess.check_output(["git", "branch"]).decode("utf8")
+        current = next(line for line in out.split("\n") if line.startswith("*"))
+        current.replace("* ", "")
+    except subprocess.CalledProcessError:
+        current = "inside_docker"
+    except FileNotFoundError:
+        current = "unknown"
+    return current
+
+
+def get_commit_hash():
+    """https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script"""
+    # try:
+    #     subprocess.check_output(['git', 'diff-index', '--quiet',
+    #                              'HEAD'])  # Verify client is clean
+    # except:
+    #     raise RuntimeError(
+    #         " !! Commit before training to get the commit hash.")
+    try:
+        commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip()
+    # Not copying .git folder into docker container
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        commit = "0000000"
+    return commit
+
+
+def get_experiment_folder_path(root_path, model_name):
+    """Get an experiment folder path with the current date and time"""
+    date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")
+    commit_hash = get_commit_hash()
+    output_folder = os.path.join(root_path, model_name + "-" + date_str + "-" + commit_hash)
+    return output_folder
+
+
+def remove_experiment_folder(experiment_path):
+    """Check folder if there is a checkpoint, otherwise remove the folder"""
+    fs = fsspec.get_mapper(experiment_path).fs
+    checkpoint_files = fs.glob(experiment_path + "/*.pth")
+    if not checkpoint_files:
+        if fs.exists(experiment_path):
+            fs.rm(experiment_path, recursive=True)
+            print(" ! Run is removed from {}".format(experiment_path))
+    else:
+        print(" ! Run is kept in {}".format(experiment_path))
+
+
+def count_parameters(model):
+    r"""Count number of trainable parameters in a network"""
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+
+def to_camel(text):
+    text = text.capitalize()
+    text = re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text)
+    text = text.replace("Tts", "TTS")
+    return text
+
+
+def find_module(module_path: str, module_name: str) -> object:
+    module_name = module_name.lower()
+    module = importlib.import_module(module_path + "." + module_name)
+    class_name = to_camel(module_name)
+    return getattr(module, class_name)
+
+
+def import_class(module_path: str) -> object:
+    """Import a class from a module path.
+
+    Args:
+        module_path (str): The module path of the class.
+
+    Returns:
+        object: The imported class.
+    """
+    class_name = module_path.split(".")[-1]
+    module_path = ".".join(module_path.split(".")[:-1])
+    module = importlib.import_module(module_path)
+    return getattr(module, class_name)
+
+
+def get_import_path(obj: object) -> str:
+    """Get the import path of a class.
+
+    Args:
+        obj (object): The class object.
+
+    Returns:
+        str: The import path of the class.
+    """
+    return ".".join([type(obj).__module__, type(obj).__name__])
+
+
+def get_user_data_dir(appname):
+    if sys.platform == "win32":
+        import winreg  # pylint: disable=import-outside-toplevel
+
+        key = winreg.OpenKey(
+            winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
+        )
+        dir_, _ = winreg.QueryValueEx(key, "Local AppData")
+        ans = Path(dir_).resolve(strict=False)
+    elif sys.platform == "darwin":
+        ans = Path("~/Library/Application Support/").expanduser()
+    else:
+        ans = Path.home().joinpath(".local/share")
+    return ans.joinpath(appname)
+
+
+def set_init_dict(model_dict, checkpoint_state, c):
+    # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
+    for k, v in checkpoint_state.items():
+        if k not in model_dict:
+            print(" | > Layer missing in the model definition: {}".format(k))
+    # 1. filter out unnecessary keys
+    pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict}
+    # 2. filter out different size layers
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if v.numel() == model_dict[k].numel()}
+    # 3. skip reinit layers
+    if c.has("reinit_layers") and c.reinit_layers is not None:
+        for reinit_layer_name in c.reinit_layers:
+            pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k}
+    # 4. overwrite entries in the existing state dict
+    model_dict.update(pretrained_dict)
+    print(" | > {} / {} layers are restored.".format(len(pretrained_dict), len(model_dict)))
+    return model_dict
+
+
+def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict:
+    """Format kwargs to hande auxilary inputs to models.
+
+    Args:
+        def_args (Dict): A dictionary of argument names and their default values if not defined in `kwargs`.
+        kwargs (Dict): A `dict` or `kwargs` that includes auxilary inputs to the model.
+
+    Returns:
+        Dict: arguments with formatted auxilary inputs.
+    """
+    for name in def_args:
+        if name not in kwargs:
+            kwargs[def_args[name]] = None
+    return kwargs
+
+
+class KeepAverage:
+    def __init__(self):
+        self.avg_values = {}
+        self.iters = {}
+
+    def __getitem__(self, key):
+        return self.avg_values[key]
+
+    def items(self):
+        return self.avg_values.items()
+
+    def add_value(self, name, init_val=0, init_iter=0):
+        self.avg_values[name] = init_val
+        self.iters[name] = init_iter
+
+    def update_value(self, name, value, weighted_avg=False):
+        if name not in self.avg_values:
+            # add value if not exist before
+            self.add_value(name, init_val=value)
+        else:
+            # else update existing value
+            if weighted_avg:
+                self.avg_values[name] = 0.99 * self.avg_values[name] + 0.01 * value
+                self.iters[name] += 1
+            else:
+                self.avg_values[name] = self.avg_values[name] * self.iters[name] + value
+                self.iters[name] += 1
+                self.avg_values[name] /= self.iters[name]
+
+    def add_values(self, name_dict):
+        for key, value in name_dict.items():
+            self.add_value(key, init_val=value)
+
+    def update_values(self, value_dict):
+        for key, value in value_dict.items():
+            self.update_value(key, value)
diff --git a/TTS/utils/io.py b/TTS/utils/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9bdf3e68620052c15f7f5b2d0c00c8ad426f85f
--- /dev/null
+++ b/TTS/utils/io.py
@@ -0,0 +1,216 @@
+import datetime
+import json
+import os
+import pickle as pickle_tts
+import shutil
+from typing import Any, Callable, Dict, Union
+
+import fsspec
+import torch
+from coqpit import Coqpit
+
+from TTS.utils.generic_utils import get_user_data_dir
+
+
+class RenamingUnpickler(pickle_tts.Unpickler):
+    """Overload default pickler to solve module renaming problem"""
+
+    def find_class(self, module, name):
+        return super().find_class(module.replace("mozilla_voice_tts", "TTS"), name)
+
+
+class AttrDict(dict):
+    """A custom dict which converts dict keys
+    to class attributes"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__dict__ = self
+
+
+def copy_model_files(config: Coqpit, out_path, new_fields=None):
+    """Copy config.json and other model files to training folder and add
+    new fields.
+
+    Args:
+        config (Coqpit): Coqpit config defining the training run.
+        out_path (str): output path to copy the file.
+        new_fields (dict): new fileds to be added or edited
+            in the config file.
+    """
+    copy_config_path = os.path.join(out_path, "config.json")
+    # add extra information fields
+    if new_fields:
+        config.update(new_fields, allow_new=True)
+    # TODO: Revert to config.save_json() once Coqpit supports arbitrary paths.
+    with fsspec.open(copy_config_path, "w", encoding="utf8") as f:
+        json.dump(config.to_dict(), f, indent=4)
+
+    # copy model stats file if available
+    if config.audio.stats_path is not None:
+        copy_stats_path = os.path.join(out_path, "scale_stats.npy")
+        filesystem = fsspec.get_mapper(copy_stats_path).fs
+        if not filesystem.exists(copy_stats_path):
+            with fsspec.open(config.audio.stats_path, "rb") as source_file:
+                with fsspec.open(copy_stats_path, "wb") as target_file:
+                    shutil.copyfileobj(source_file, target_file)
+
+
+def load_fsspec(
+    path: str,
+    map_location: Union[str, Callable, torch.device, Dict[Union[str, torch.device], Union[str, torch.device]]] = None,
+    cache: bool = True,
+    **kwargs,
+) -> Any:
+    """Like torch.load but can load from other locations (e.g. s3:// , gs://).
+
+    Args:
+        path: Any path or url supported by fsspec.
+        map_location: torch.device or str.
+        cache: If True, cache a remote file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to True.
+        **kwargs: Keyword arguments forwarded to torch.load.
+
+    Returns:
+        Object stored in path.
+    """
+    is_local = os.path.isdir(path) or os.path.isfile(path)
+    if cache and not is_local:
+        with fsspec.open(
+            f"filecache::{path}",
+            filecache={"cache_storage": str(get_user_data_dir("tts_cache"))},
+            mode="rb",
+        ) as f:
+            return torch.load(f, map_location=map_location, **kwargs)
+    else:
+        with fsspec.open(path, "rb") as f:
+            return torch.load(f, map_location=map_location, **kwargs)
+
+
+def load_checkpoint(
+    model, checkpoint_path, use_cuda=False, eval=False, cache=False
+):  # pylint: disable=redefined-builtin
+    try:
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+    except ModuleNotFoundError:
+        pickle_tts.Unpickler = RenamingUnpickler
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), pickle_module=pickle_tts, cache=cache)
+    model.load_state_dict(state["model"])
+    if use_cuda:
+        model.cuda()
+    if eval:
+        model.eval()
+    return model, state
+
+
+def save_fsspec(state: Any, path: str, **kwargs):
+    """Like torch.save but can save to other locations (e.g. s3:// , gs://).
+
+    Args:
+        state: State object to save
+        path: Any path or url supported by fsspec.
+        **kwargs: Keyword arguments forwarded to torch.save.
+    """
+    with fsspec.open(path, "wb") as f:
+        torch.save(state, f, **kwargs)
+
+
+def save_model(config, model, optimizer, scaler, current_step, epoch, output_path, **kwargs):
+    if hasattr(model, "module"):
+        model_state = model.module.state_dict()
+    else:
+        model_state = model.state_dict()
+    if isinstance(optimizer, list):
+        optimizer_state = [optim.state_dict() for optim in optimizer]
+    elif optimizer.__class__.__name__ == "CapacitronOptimizer":
+        optimizer_state = [optimizer.primary_optimizer.state_dict(), optimizer.secondary_optimizer.state_dict()]
+    else:
+        optimizer_state = optimizer.state_dict() if optimizer is not None else None
+
+    if isinstance(scaler, list):
+        scaler_state = [s.state_dict() for s in scaler]
+    else:
+        scaler_state = scaler.state_dict() if scaler is not None else None
+
+    if isinstance(config, Coqpit):
+        config = config.to_dict()
+
+    state = {
+        "config": config,
+        "model": model_state,
+        "optimizer": optimizer_state,
+        "scaler": scaler_state,
+        "step": current_step,
+        "epoch": epoch,
+        "date": datetime.date.today().strftime("%B %d, %Y"),
+    }
+    state.update(kwargs)
+    save_fsspec(state, output_path)
+
+
+def save_checkpoint(
+    config,
+    model,
+    optimizer,
+    scaler,
+    current_step,
+    epoch,
+    output_folder,
+    **kwargs,
+):
+    file_name = "checkpoint_{}.pth".format(current_step)
+    checkpoint_path = os.path.join(output_folder, file_name)
+    print("\n > CHECKPOINT : {}".format(checkpoint_path))
+    save_model(
+        config,
+        model,
+        optimizer,
+        scaler,
+        current_step,
+        epoch,
+        checkpoint_path,
+        **kwargs,
+    )
+
+
+def save_best_model(
+    current_loss,
+    best_loss,
+    config,
+    model,
+    optimizer,
+    scaler,
+    current_step,
+    epoch,
+    out_path,
+    keep_all_best=False,
+    keep_after=10000,
+    **kwargs,
+):
+    if current_loss < best_loss:
+        best_model_name = f"best_model_{current_step}.pth"
+        checkpoint_path = os.path.join(out_path, best_model_name)
+        print(" > BEST MODEL : {}".format(checkpoint_path))
+        save_model(
+            config,
+            model,
+            optimizer,
+            scaler,
+            current_step,
+            epoch,
+            checkpoint_path,
+            model_loss=current_loss,
+            **kwargs,
+        )
+        fs = fsspec.get_mapper(out_path).fs
+        # only delete previous if current is saved successfully
+        if not keep_all_best or (current_step < keep_after):
+            model_names = fs.glob(os.path.join(out_path, "best_model*.pth"))
+            for model_name in model_names:
+                if os.path.basename(model_name) != best_model_name:
+                    fs.rm(model_name)
+        # create a shortcut which always points to the currently best model
+        shortcut_name = "best_model.pth"
+        shortcut_path = os.path.join(out_path, shortcut_name)
+        fs.copy(checkpoint_path, shortcut_path)
+        best_loss = current_loss
+    return best_loss
diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
new file mode 100644
index 0000000000000000000000000000000000000000..645099e01ab70bd79c84c530bad155cfd5d71efb
--- /dev/null
+++ b/TTS/utils/manage.py
@@ -0,0 +1,377 @@
+import json
+import os
+import zipfile
+from pathlib import Path
+from shutil import copyfile, rmtree
+from typing import Dict, Tuple
+
+import requests
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.utils.generic_utils import get_user_data_dir
+
+LICENSE_URLS = {
+    "cc by-nc-nd 4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
+    "mpl": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mpl2": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mpl 2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mit": "https://choosealicense.com/licenses/mit/",
+    "apache 2.0": "https://choosealicense.com/licenses/apache-2.0/",
+    "apache2": "https://choosealicense.com/licenses/apache-2.0/",
+    "cc-by-sa 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
+}
+
+
+class ModelManager(object):
+    """Manage TTS models defined in .models.json.
+    It provides an interface to list and download
+    models defines in '.model.json'
+
+    Models are downloaded under '.TTS' folder in the user's
+    home path.
+
+    Args:
+        models_file (str): path to .model.json file. Defaults to None.
+        output_prefix (str): prefix to `tts` to download models. Defaults to None
+        progress_bar (bool): print a progress bar when donwloading a file. Defaults to False.
+    """
+
+    def __init__(self, models_file=None, output_prefix=None, progress_bar=False):
+        super().__init__()
+        self.progress_bar = progress_bar
+        if output_prefix is None:
+            self.output_prefix = get_user_data_dir("tts")
+        else:
+            self.output_prefix = os.path.join(output_prefix, "tts")
+        self.models_dict = None
+        if models_file is not None:
+            self.read_models_file(models_file)
+        else:
+            # try the default location
+            path = Path(__file__).parent / "../.models.json"
+            self.read_models_file(path)
+
+    def read_models_file(self, file_path):
+        """Read .models.json as a dict
+
+        Args:
+            file_path (str): path to .models.json.
+        """
+        with open(file_path, "r", encoding="utf-8") as json_file:
+            self.models_dict = json.load(json_file)
+
+    def _list_models(self, model_type, model_count=0):
+        model_list = []
+        for lang in self.models_dict[model_type]:
+            for dataset in self.models_dict[model_type][lang]:
+                for model in self.models_dict[model_type][lang][dataset]:
+                    model_full_name = f"{model_type}--{lang}--{dataset}--{model}"
+                    output_path = os.path.join(self.output_prefix, model_full_name)
+                    if os.path.exists(output_path):
+                        print(f" {model_count}: {model_type}/{lang}/{dataset}/{model} [already downloaded]")
+                    else:
+                        print(f" {model_count}: {model_type}/{lang}/{dataset}/{model}")
+                    model_list.append(f"{model_type}/{lang}/{dataset}/{model}")
+                    model_count += 1
+        return model_list
+
+    def _list_for_model_type(self, model_type):
+        print(" Name format: language/dataset/model")
+        models_name_list = []
+        model_count = 1
+        model_type = "tts_models"
+        models_name_list.extend(self._list_models(model_type, model_count))
+        return [name.replace(model_type + "/", "") for name in models_name_list]
+
+    def list_models(self):
+        print(" Name format: type/language/dataset/model")
+        models_name_list = []
+        model_count = 1
+        for model_type in self.models_dict:
+            model_list = self._list_models(model_type, model_count)
+            models_name_list.extend(model_list)
+        return models_name_list
+
+    def model_info_by_idx(self, model_query):
+        """Print the description of the model from .models.json file using model_idx
+
+        Args:
+            model_query (str): <model_tye>/<model_idx>
+        """
+        model_name_list = []
+        model_type, model_query_idx = model_query.split("/")
+        try:
+            model_query_idx = int(model_query_idx)
+            if model_query_idx <= 0:
+                print("> model_query_idx should be a positive integer!")
+                return
+        except:
+            print("> model_query_idx should be an integer!")
+            return
+        model_count = 0
+        if model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                for dataset in self.models_dict[model_type][lang]:
+                    for model in self.models_dict[model_type][lang][dataset]:
+                        model_name_list.append(f"{model_type}/{lang}/{dataset}/{model}")
+                        model_count += 1
+        else:
+            print(f"> model_type {model_type} does not exist in the list.")
+            return
+        if model_query_idx > model_count:
+            print(f"model query idx exceeds the number of available models [{model_count}] ")
+        else:
+            model_type, lang, dataset, model = model_name_list[model_query_idx - 1].split("/")
+            print(f"> model type : {model_type}")
+            print(f"> language supported : {lang}")
+            print(f"> dataset used : {dataset}")
+            print(f"> model name : {model}")
+            if "description" in self.models_dict[model_type][lang][dataset][model]:
+                print(f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}")
+            else:
+                print("> description : coming soon")
+            if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]:
+                print(f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}")
+
+    def model_info_by_full_name(self, model_query_name):
+        """Print the description of the model from .models.json file using model_full_name
+
+        Args:
+            model_query_name (str): Format is <model_type>/<language>/<dataset>/<model_name>
+        """
+        model_type, lang, dataset, model = model_query_name.split("/")
+        if model_type in self.models_dict:
+            if lang in self.models_dict[model_type]:
+                if dataset in self.models_dict[model_type][lang]:
+                    if model in self.models_dict[model_type][lang][dataset]:
+                        print(f"> model type : {model_type}")
+                        print(f"> language supported : {lang}")
+                        print(f"> dataset used : {dataset}")
+                        print(f"> model name : {model}")
+                        if "description" in self.models_dict[model_type][lang][dataset][model]:
+                            print(
+                                f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}"
+                            )
+                        else:
+                            print("> description : coming soon")
+                        if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]:
+                            print(
+                                f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}"
+                            )
+                    else:
+                        print(f"> model {model} does not exist for {model_type}/{lang}/{dataset}.")
+                else:
+                    print(f"> dataset {dataset} does not exist for {model_type}/{lang}.")
+            else:
+                print(f"> lang {lang} does not exist for {model_type}.")
+        else:
+            print(f"> model_type {model_type} does not exist in the list.")
+
+    def list_tts_models(self):
+        """Print all `TTS` models and return a list of model names
+
+        Format is `language/dataset/model`
+        """
+        return self._list_for_model_type("tts_models")
+
+    def list_vocoder_models(self):
+        """Print all the `vocoder` models and return a list of model names
+
+        Format is `language/dataset/model`
+        """
+        return self._list_for_model_type("vocoder_models")
+
+    def list_langs(self):
+        """Print all the available languages"""
+        print(" Name format: type/language")
+        for model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                print(f" >: {model_type}/{lang} ")
+
+    def list_datasets(self):
+        """Print all the datasets"""
+        print(" Name format: type/language/dataset")
+        for model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                for dataset in self.models_dict[model_type][lang]:
+                    print(f" >: {model_type}/{lang}/{dataset}")
+
+    @staticmethod
+    def print_model_license(model_item: Dict):
+        """Print the license of a model
+
+        Args:
+            model_item (dict): model item in the models.json
+        """
+        if "license" in model_item and model_item["license"].strip() != "":
+            print(f" > Model's license - {model_item['license']}")
+            if model_item["license"].lower() in LICENSE_URLS:
+                print(f" > Check {LICENSE_URLS[model_item['license'].lower()]} for more info.")
+            else:
+                print(" > Check https://opensource.org/licenses for more info.")
+        else:
+            print(" > Model's license - No license information available")
+
+    def download_model(self, model_name):
+        """Download model files given the full model name.
+        Model name is in the format
+            'type/language/dataset/model'
+            e.g. 'tts_model/en/ljspeech/tacotron'
+
+        Every model must have the following files:
+            - *.pth : pytorch model checkpoint file.
+            - config.json : model config file.
+            - scale_stats.npy (if exist): scale values for preprocessing.
+
+        Args:
+            model_name (str): model name as explained above.
+        """
+        # fetch model info from the dict
+        model_type, lang, dataset, model = model_name.split("/")
+        model_full_name = f"{model_type}--{lang}--{dataset}--{model}"
+        model_item = self.models_dict[model_type][lang][dataset][model]
+        # set the model specific output path
+        output_path = os.path.join(self.output_prefix, model_full_name)
+        if os.path.exists(output_path):
+            print(f" > {model_name} is already downloaded.")
+        else:
+            os.makedirs(output_path, exist_ok=True)
+            print(f" > Downloading model to {output_path}")
+            # download from github release
+            self._download_zip_file(model_item["github_rls_url"], output_path, self.progress_bar)
+            self.print_model_license(model_item=model_item)
+        # find downloaded files
+        output_model_path, output_config_path = self._find_files(output_path)
+        # update paths in the config.json
+        self._update_paths(output_path, output_config_path)
+        return output_model_path, output_config_path, model_item
+
+    @staticmethod
+    def _find_files(output_path: str) -> Tuple[str, str]:
+        """Find the model and config files in the output path
+
+        Args:
+            output_path (str): path to the model files
+
+        Returns:
+            Tuple[str, str]: path to the model file and config file
+        """
+        model_file = None
+        config_file = None
+        for file_name in os.listdir(output_path):
+            if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]:
+                model_file = os.path.join(output_path, file_name)
+            elif file_name == "config.json":
+                config_file = os.path.join(output_path, file_name)
+        if model_file is None:
+            raise ValueError(" [!] Model file not found in the output path")
+        if config_file is None:
+            raise ValueError(" [!] Config file not found in the output path")
+        return model_file, config_file
+
+    @staticmethod
+    def _find_speaker_encoder(output_path: str) -> str:
+        """Find the speaker encoder file in the output path
+
+        Args:
+            output_path (str): path to the model files
+
+        Returns:
+            str: path to the speaker encoder file
+        """
+        speaker_encoder_file = None
+        for file_name in os.listdir(output_path):
+            if file_name in ["model_se.pth", "model_se.pth.tar"]:
+                speaker_encoder_file = os.path.join(output_path, file_name)
+        return speaker_encoder_file
+
+    def _update_paths(self, output_path: str, config_path: str) -> None:
+        """Update paths for certain files in config.json after download.
+
+        Args:
+            output_path (str): local path the model is downloaded to.
+            config_path (str): local config.json path.
+        """
+        output_stats_path = os.path.join(output_path, "scale_stats.npy")
+        output_d_vector_file_path = os.path.join(output_path, "speakers.json")
+        output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
+        speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
+        speaker_encoder_model_path = self._find_speaker_encoder(output_path)
+
+        # update the scale_path.npy file path in the model config.json
+        self._update_path("audio.stats_path", output_stats_path, config_path)
+
+        # update the speakers.json file path in the model config.json to the current path
+        self._update_path("d_vector_file", output_d_vector_file_path, config_path)
+        self._update_path("model_args.d_vector_file", output_d_vector_file_path, config_path)
+
+        # update the speaker_ids.json file path in the model config.json to the current path
+        self._update_path("speakers_file", output_speaker_ids_file_path, config_path)
+        self._update_path("model_args.speakers_file", output_speaker_ids_file_path, config_path)
+
+        # update the speaker_encoder file path in the model config.json to the current path
+        self._update_path("speaker_encoder_model_path", speaker_encoder_model_path, config_path)
+        self._update_path("model_args.speaker_encoder_model_path", speaker_encoder_model_path, config_path)
+        self._update_path("speaker_encoder_config_path", speaker_encoder_config_path, config_path)
+        self._update_path("model_args.speaker_encoder_config_path", speaker_encoder_config_path, config_path)
+
+    @staticmethod
+    def _update_path(field_name, new_path, config_path):
+        """Update the path in the model config.json for the current environment after download"""
+        if new_path and os.path.exists(new_path):
+            config = load_config(config_path)
+            field_names = field_name.split(".")
+            if len(field_names) > 1:
+                # field name points to a sub-level field
+                sub_conf = config
+                for fd in field_names[:-1]:
+                    if fd in sub_conf:
+                        sub_conf = sub_conf[fd]
+                    else:
+                        return
+                sub_conf[field_names[-1]] = new_path
+            else:
+                # field name points to a top-level field
+                config[field_name] = new_path
+            config.save_json(config_path)
+
+    @staticmethod
+    def _download_zip_file(file_url, output_folder, progress_bar):
+        """Download the github releases"""
+        # download the file
+        r = requests.get(file_url, stream=True)
+        # extract the file
+        try:
+            total_size_in_bytes = int(r.headers.get("content-length", 0))
+            block_size = 1024  # 1 Kibibyte
+            if progress_bar:
+                progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
+            temp_zip_name = os.path.join(output_folder, file_url.split("/")[-1])
+            with open(temp_zip_name, "wb") as file:
+                for data in r.iter_content(block_size):
+                    if progress_bar:
+                        progress_bar.update(len(data))
+                    file.write(data)
+            with zipfile.ZipFile(temp_zip_name) as z:
+                z.extractall(output_folder)
+            os.remove(temp_zip_name)  # delete zip after extract
+        except zipfile.BadZipFile:
+            print(f" > Error: Bad zip file - {file_url}")
+            raise zipfile.BadZipFile  # pylint: disable=raise-missing-from
+        # move the files to the outer path
+        for file_path in z.namelist()[1:]:
+            src_path = os.path.join(output_folder, file_path)
+            dst_path = os.path.join(output_folder, os.path.basename(file_path))
+            copyfile(src_path, dst_path)
+        # remove the extracted folder
+        rmtree(os.path.join(output_folder, z.namelist()[0]))
+
+    @staticmethod
+    def _check_dict_key(my_dict, key):
+        if key in my_dict.keys() and my_dict[key] is not None:
+            if not isinstance(key, str):
+                return True
+            if isinstance(key, str) and len(my_dict[key]) > 0:
+                return True
+        return False
diff --git a/TTS/utils/radam.py b/TTS/utils/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..73426e6433bc03dfa4d0a2e2eca43d5ed4e919e7
--- /dev/null
+++ b/TTS/utils/radam.py
@@ -0,0 +1,107 @@
+# modified from https://github.com/LiyuanLucasLiu/RAdam
+
+import math
+
+import torch
+from torch.optim.optimizer import Optimizer
+
+
+class RAdam(Optimizer):
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True):
+        if lr < 0.0:
+            raise ValueError("Invalid learning rate: {}".format(lr))
+        if eps < 0.0:
+            raise ValueError("Invalid epsilon value: {}".format(eps))
+        if not 0.0 <= betas[0] < 1.0:
+            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+        if not 0.0 <= betas[1] < 1.0:
+            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+
+        self.degenerated_to_sgd = degenerated_to_sgd
+        if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict):
+            for param in params:
+                if "betas" in param and (param["betas"][0] != betas[0] or param["betas"][1] != betas[1]):
+                    param["buffer"] = [[None, None, None] for _ in range(10)]
+        defaults = dict(
+            lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)]
+        )
+        super().__init__(params, defaults)
+
+    def __setstate__(self, state):  # pylint: disable=useless-super-delegation
+        super().__setstate__(state)
+
+    def step(self, closure=None):
+
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data.float()
+                if grad.is_sparse:
+                    raise RuntimeError("RAdam does not support sparse gradients")
+
+                p_data_fp32 = p.data.float()
+
+                state = self.state[p]
+
+                if len(state) == 0:
+                    state["step"] = 0
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32)
+                    state["exp_avg_sq"] = torch.zeros_like(p_data_fp32)
+                else:
+                    state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32)
+                    state["exp_avg_sq"] = state["exp_avg_sq"].type_as(p_data_fp32)
+
+                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
+                beta1, beta2 = group["betas"]
+
+                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
+
+                state["step"] += 1
+                buffered = group["buffer"][int(state["step"] % 10)]
+                if state["step"] == buffered[0]:
+                    N_sma, step_size = buffered[1], buffered[2]
+                else:
+                    buffered[0] = state["step"]
+                    beta2_t = beta2 ** state["step"]
+                    N_sma_max = 2 / (1 - beta2) - 1
+                    N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1 - beta2_t)
+                    buffered[1] = N_sma
+
+                    # more conservative since it's an approximated value
+                    if N_sma >= 5:
+                        step_size = math.sqrt(
+                            (1 - beta2_t)
+                            * (N_sma - 4)
+                            / (N_sma_max - 4)
+                            * (N_sma - 2)
+                            / N_sma
+                            * N_sma_max
+                            / (N_sma_max - 2)
+                        ) / (1 - beta1 ** state["step"])
+                    elif self.degenerated_to_sgd:
+                        step_size = 1.0 / (1 - beta1 ** state["step"])
+                    else:
+                        step_size = -1
+                    buffered[2] = step_size
+
+                # more conservative since it's an approximated value
+                if N_sma >= 5:
+                    if group["weight_decay"] != 0:
+                        p_data_fp32.add_(p_data_fp32, alpha=-group["weight_decay"] * group["lr"])
+                    denom = exp_avg_sq.sqrt().add_(group["eps"])
+                    p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size * group["lr"])
+                    p.data.copy_(p_data_fp32)
+                elif step_size > 0:
+                    if group["weight_decay"] != 0:
+                        p_data_fp32.add_(p_data_fp32, alpha=-group["weight_decay"] * group["lr"])
+                    p_data_fp32.add_(exp_avg, alpha=-step_size * group["lr"])
+                    p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/TTS/utils/samplers.py b/TTS/utils/samplers.py
new file mode 100644
index 0000000000000000000000000000000000000000..df5d4185eb0d32986a51614e5f6a74555044efb9
--- /dev/null
+++ b/TTS/utils/samplers.py
@@ -0,0 +1,202 @@
+import math
+import random
+from typing import Callable, List, Union
+
+from torch.utils.data.sampler import BatchSampler, Sampler, SubsetRandomSampler
+
+
+class SubsetSampler(Sampler):
+    """
+    Samples elements sequentially from a given list of indices.
+
+    Args:
+        indices (list): a sequence of indices
+    """
+
+    def __init__(self, indices):
+        super().__init__(indices)
+        self.indices = indices
+
+    def __iter__(self):
+        return (self.indices[i] for i in range(len(self.indices)))
+
+    def __len__(self):
+        return len(self.indices)
+
+
+class PerfectBatchSampler(Sampler):
+    """
+    Samples a mini-batch of indices for a balanced class batching
+
+    Args:
+        dataset_items(list): dataset items to sample from.
+        classes (list): list of classes of dataset_items to sample from.
+        batch_size (int): total number of samples to be sampled in a mini-batch.
+        num_gpus (int): number of GPU in the data parallel mode.
+        shuffle (bool): if True, samples randomly, otherwise samples sequentially.
+        drop_last (bool): if True, drops last incomplete batch.
+    """
+
+    def __init__(
+        self,
+        dataset_items,
+        classes,
+        batch_size,
+        num_classes_in_batch,
+        num_gpus=1,
+        shuffle=True,
+        drop_last=False,
+        label_key="class_name",
+    ):
+        super().__init__(dataset_items)
+        assert (
+            batch_size % (num_classes_in_batch * num_gpus) == 0
+        ), "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)."
+
+        label_indices = {}
+        for idx, item in enumerate(dataset_items):
+            label = item[label_key]
+            if label not in label_indices.keys():
+                label_indices[label] = [idx]
+            else:
+                label_indices[label].append(idx)
+
+        if shuffle:
+            self._samplers = [SubsetRandomSampler(label_indices[key]) for key in classes]
+        else:
+            self._samplers = [SubsetSampler(label_indices[key]) for key in classes]
+
+        self._batch_size = batch_size
+        self._drop_last = drop_last
+        self._dp_devices = num_gpus
+        self._num_classes_in_batch = num_classes_in_batch
+
+    def __iter__(self):
+
+        batch = []
+        if self._num_classes_in_batch != len(self._samplers):
+            valid_samplers_idx = random.sample(range(len(self._samplers)), self._num_classes_in_batch)
+        else:
+            valid_samplers_idx = None
+
+        iters = [iter(s) for s in self._samplers]
+        done = False
+
+        while True:
+            b = []
+            for i, it in enumerate(iters):
+                if valid_samplers_idx is not None and i not in valid_samplers_idx:
+                    continue
+                idx = next(it, None)
+                if idx is None:
+                    done = True
+                    break
+                b.append(idx)
+            if done:
+                break
+            batch += b
+            if len(batch) == self._batch_size:
+                yield batch
+                batch = []
+                if valid_samplers_idx is not None:
+                    valid_samplers_idx = random.sample(range(len(self._samplers)), self._num_classes_in_batch)
+
+        if not self._drop_last:
+            if len(batch) > 0:
+                groups = len(batch) // self._num_classes_in_batch
+                if groups % self._dp_devices == 0:
+                    yield batch
+                else:
+                    batch = batch[: (groups // self._dp_devices) * self._dp_devices * self._num_classes_in_batch]
+                    if len(batch) > 0:
+                        yield batch
+
+    def __len__(self):
+        class_batch_size = self._batch_size // self._num_classes_in_batch
+        return min(((len(s) + class_batch_size - 1) // class_batch_size) for s in self._samplers)
+
+
+def identity(x):
+    return x
+
+
+class SortedSampler(Sampler):
+    """Samples elements sequentially, always in the same order.
+
+    Taken from https://github.com/PetrochukM/PyTorch-NLP
+
+    Args:
+        data (iterable): Iterable data.
+        sort_key (callable): Specifies a function of one argument that is used to extract a
+            numerical comparison key from each list element.
+
+    Example:
+        >>> list(SortedSampler(range(10), sort_key=lambda i: -i))
+        [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+
+    """
+
+    def __init__(self, data, sort_key: Callable = identity):
+        super().__init__(data)
+        self.data = data
+        self.sort_key = sort_key
+        zip_ = [(i, self.sort_key(row)) for i, row in enumerate(self.data)]
+        zip_ = sorted(zip_, key=lambda r: r[1])
+        self.sorted_indexes = [item[0] for item in zip_]
+
+    def __iter__(self):
+        return iter(self.sorted_indexes)
+
+    def __len__(self):
+        return len(self.data)
+
+
+class BucketBatchSampler(BatchSampler):
+    """Bucket batch sampler
+
+    Adapted from https://github.com/PetrochukM/PyTorch-NLP
+
+    Args:
+        sampler (torch.data.utils.sampler.Sampler):
+        batch_size (int): Size of mini-batch.
+        drop_last (bool): If `True` the sampler will drop the last batch if its size would be less
+            than `batch_size`.
+        data (list): List of data samples.
+        sort_key (callable, optional): Callable to specify a comparison key for sorting.
+        bucket_size_multiplier (int, optional): Buckets are of size
+            `batch_size * bucket_size_multiplier`.
+
+    Example:
+        >>> sampler = WeightedRandomSampler(weights, len(weights))
+        >>> sampler = BucketBatchSampler(sampler, data=data_items, batch_size=32, drop_last=True)
+    """
+
+    def __init__(
+        self,
+        sampler,
+        data,
+        batch_size,
+        drop_last,
+        sort_key: Union[Callable, List] = identity,
+        bucket_size_multiplier=100,
+    ):
+        super().__init__(sampler, batch_size, drop_last)
+        self.data = data
+        self.sort_key = sort_key
+        _bucket_size = batch_size * bucket_size_multiplier
+        if hasattr(sampler, "__len__"):
+            _bucket_size = min(_bucket_size, len(sampler))
+        self.bucket_sampler = BatchSampler(sampler, _bucket_size, False)
+
+    def __iter__(self):
+        for idxs in self.bucket_sampler:
+            bucket_data = [self.data[idx] for idx in idxs]
+            sorted_sampler = SortedSampler(bucket_data, self.sort_key)
+            for batch_idx in SubsetRandomSampler(list(BatchSampler(sorted_sampler, self.batch_size, self.drop_last))):
+                sorted_idxs = [idxs[i] for i in batch_idx]
+                yield sorted_idxs
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        return math.ceil(len(self.sampler) / self.batch_size)
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bbfc52cad4e8e1e9bdb9d1bf7fade86c7d73c9b
--- /dev/null
+++ b/TTS/utils/synthesizer.py
@@ -0,0 +1,383 @@
+import time
+from typing import List
+
+import numpy as np
+import pysbd
+import torch
+
+from TTS.config import load_config
+from TTS.tts.models import setup_model as setup_tts_model
+
+# pylint: disable=unused-wildcard-import
+# pylint: disable=wildcard-import
+from TTS.tts.utils.synthesis import synthesis, transfer_voice, trim_silence
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.models import setup_model as setup_vocoder_model
+from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
+
+
+class Synthesizer(object):
+    def __init__(
+        self,
+        tts_checkpoint: str,
+        tts_config_path: str,
+        tts_speakers_file: str = "",
+        tts_languages_file: str = "",
+        vocoder_checkpoint: str = "",
+        vocoder_config: str = "",
+        encoder_checkpoint: str = "",
+        encoder_config: str = "",
+        use_cuda: bool = False,
+    ) -> None:
+        """General 🐸 TTS interface for inference. It takes a tts and a vocoder
+        model and synthesize speech from the provided text.
+
+        The text is divided into a list of sentences using `pysbd` and synthesize
+        speech on each sentence separately.
+
+        If you have certain special characters in your text, you need to handle
+        them before providing the text to Synthesizer.
+
+        TODO: set the segmenter based on the source language
+
+        Args:
+            tts_checkpoint (str): path to the tts model file.
+            tts_config_path (str): path to the tts config file.
+            vocoder_checkpoint (str, optional): path to the vocoder model file. Defaults to None.
+            vocoder_config (str, optional): path to the vocoder config file. Defaults to None.
+            encoder_checkpoint (str, optional): path to the speaker encoder model file. Defaults to `""`,
+            encoder_config (str, optional): path to the speaker encoder config file. Defaults to `""`,
+            use_cuda (bool, optional): enable/disable cuda. Defaults to False.
+        """
+        self.tts_checkpoint = tts_checkpoint
+        self.tts_config_path = tts_config_path
+        self.tts_speakers_file = tts_speakers_file
+        self.tts_languages_file = tts_languages_file
+        self.vocoder_checkpoint = vocoder_checkpoint
+        self.vocoder_config = vocoder_config
+        self.encoder_checkpoint = encoder_checkpoint
+        self.encoder_config = encoder_config
+        self.use_cuda = use_cuda
+
+        self.tts_model = None
+        self.vocoder_model = None
+        self.speaker_manager = None
+        self.num_speakers = 0
+        self.tts_speakers = {}
+        self.language_manager = None
+        self.num_languages = 0
+        self.tts_languages = {}
+        self.d_vector_dim = 0
+        self.seg = self._get_segmenter("en")
+        self.use_cuda = use_cuda
+
+        if self.use_cuda:
+            assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
+        self._load_tts(tts_checkpoint, tts_config_path, use_cuda)
+        self.output_sample_rate = self.tts_config.audio["sample_rate"]
+        if vocoder_checkpoint:
+            self._load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda)
+            self.output_sample_rate = self.vocoder_config.audio["sample_rate"]
+
+    @staticmethod
+    def _get_segmenter(lang: str):
+        """get the sentence segmenter for the given language.
+
+        Args:
+            lang (str): target language code.
+
+        Returns:
+            [type]: [description]
+        """
+        return pysbd.Segmenter(language=lang, clean=True)
+
+    def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None:
+        """Load the TTS model.
+
+        1. Load the model config.
+        2. Init the model from the config.
+        3. Load the model weights.
+        4. Move the model to the GPU if CUDA is enabled.
+        5. Init the speaker manager in the model.
+
+        Args:
+            tts_checkpoint (str): path to the model checkpoint.
+            tts_config_path (str): path to the model config file.
+            use_cuda (bool): enable/disable CUDA use.
+        """
+        # pylint: disable=global-statement
+        self.tts_config = load_config(tts_config_path)
+        if self.tts_config["use_phonemes"] and self.tts_config["phonemizer"] is None:
+            raise ValueError("Phonemizer is not defined in the TTS config.")
+
+        self.tts_model = setup_tts_model(config=self.tts_config)
+
+        if not self.encoder_checkpoint:
+            self._set_speaker_encoder_paths_from_tts_config()
+
+        self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True)
+        if use_cuda:
+            self.tts_model.cuda()
+
+        if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager"):
+            self.tts_model.speaker_manager.init_encoder(self.encoder_checkpoint, self.encoder_config, use_cuda)
+
+    def _set_speaker_encoder_paths_from_tts_config(self):
+        """Set the encoder paths from the tts model config for models with speaker encoders."""
+        if hasattr(self.tts_config, "model_args") and hasattr(
+            self.tts_config.model_args, "speaker_encoder_config_path"
+        ):
+            self.encoder_checkpoint = self.tts_config.model_args.speaker_encoder_model_path
+            self.encoder_config = self.tts_config.model_args.speaker_encoder_config_path
+
+    def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> None:
+        """Load the vocoder model.
+
+        1. Load the vocoder config.
+        2. Init the AudioProcessor for the vocoder.
+        3. Init the vocoder model from the config.
+        4. Move the model to the GPU if CUDA is enabled.
+
+        Args:
+            model_file (str): path to the model checkpoint.
+            model_config (str): path to the model config file.
+            use_cuda (bool): enable/disable CUDA use.
+        """
+        self.vocoder_config = load_config(model_config)
+        self.vocoder_ap = AudioProcessor(verbose=False, **self.vocoder_config.audio)
+        self.vocoder_model = setup_vocoder_model(self.vocoder_config)
+        self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True)
+        if use_cuda:
+            self.vocoder_model.cuda()
+
+    def split_into_sentences(self, text) -> List[str]:
+        """Split give text into sentences.
+
+        Args:
+            text (str): input text in string format.
+
+        Returns:
+            List[str]: list of sentences.
+        """
+        return self.seg.segment(text)
+
+    def save_wav(self, wav: List[int], path: str) -> None:
+        """Save the waveform as a file.
+
+        Args:
+            wav (List[int]): waveform as a list of values.
+            path (str): output path to save the waveform.
+        """
+        wav = np.array(wav)
+        self.tts_model.ap.save_wav(wav, path, self.output_sample_rate)
+
+    def tts(
+        self,
+        text: str = "",
+        speaker_name: str = "",
+        language_name: str = "",
+        speaker_wav=None,
+        style_wav=None,
+        style_text=None,
+        reference_wav=None,
+        reference_speaker_name=None,
+    ) -> List[int]:
+        """🐸 TTS magic. Run all the models and generate speech.
+
+        Args:
+            text (str): input text.
+            speaker_name (str, optional): spekaer id for multi-speaker models. Defaults to "".
+            language_name (str, optional): language id for multi-language models. Defaults to "".
+            speaker_wav (Union[str, List[str]], optional): path to the speaker wav. Defaults to None.
+            style_wav ([type], optional): style waveform for GST. Defaults to None.
+            style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.
+            reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.
+            reference_speaker_name ([type], optional): spekaer id of reference waveform. Defaults to None.
+        Returns:
+            List[int]: [description]
+        """
+        start_time = time.time()
+        wavs = []
+
+        if not text and not reference_wav:
+            raise ValueError(
+                "You need to define either `text` (for sythesis) or a `reference_wav` (for voice conversion) to use the Coqui TTS API."
+            )
+
+        if text:
+            sens = self.split_into_sentences(text)
+            print(" > Text splitted to sentences.")
+            print(sens)
+
+        # handle multi-speaker
+        speaker_embedding = None
+        speaker_id = None
+        if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"):
+            if speaker_name and isinstance(speaker_name, str):
+                if self.tts_config.use_d_vector_file:
+                    # get the average speaker embedding from the saved d_vectors.
+                    speaker_embedding = self.tts_model.speaker_manager.get_mean_embedding(
+                        speaker_name, num_samples=None, randomize=False
+                    )
+                    speaker_embedding = np.array(speaker_embedding)[None, :]  # [1 x embedding_dim]
+                else:
+                    # get speaker idx from the speaker name
+                    speaker_id = self.tts_model.speaker_manager.name_to_id[speaker_name]
+
+            elif not speaker_name and not speaker_wav:
+                raise ValueError(
+                    " [!] Look like you use a multi-speaker model. "
+                    "You need to define either a `speaker_name` or a `speaker_wav` to use a multi-speaker model."
+                )
+            else:
+                speaker_embedding = None
+        else:
+            if speaker_name:
+                raise ValueError(
+                    f" [!] Missing speakers.json file path for selecting speaker {speaker_name}."
+                    "Define path for speaker.json if it is a multi-speaker model or remove defined speaker idx. "
+                )
+
+        # handle multi-lingaul
+        language_id = None
+        if self.tts_languages_file or (
+            hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None
+        ):
+            if language_name and isinstance(language_name, str):
+                language_id = self.tts_model.language_manager.name_to_id[language_name]
+
+            elif not language_name:
+                raise ValueError(
+                    " [!] Look like you use a multi-lingual model. "
+                    "You need to define either a `language_name` or a `style_wav` to use a multi-lingual model."
+                )
+
+            else:
+                raise ValueError(
+                    f" [!] Missing language_ids.json file path for selecting language {language_name}."
+                    "Define path for language_ids.json if it is a multi-lingual model or remove defined language idx. "
+                )
+
+        # compute a new d_vector from the given clip.
+        if speaker_wav is not None:
+            speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)
+
+        use_gl = self.vocoder_model is None
+        print(f" > Processing time: hihi")
+        if not reference_wav:
+            print(f" > Processing time: hihhii")
+            for sen in sens:
+                # synthesize voice
+                outputs = synthesis(
+                    model=self.tts_model,
+                    text=sen,
+                    CONFIG=self.tts_config,
+                    use_cuda=self.use_cuda,
+                    speaker_id=speaker_id,
+                    style_wav=style_wav,
+                    style_text=style_text,
+                    use_griffin_lim=use_gl,
+                    d_vector=speaker_embedding,
+                    language_id=language_id,
+                )
+                waveform = outputs["wav"]
+                mel_postnet_spec = outputs["outputs"]["model_outputs"][0].detach().cpu().numpy()
+                if not use_gl:
+                    print(f" >Not use gl")
+                    # denormalize tts output based on tts audio config
+                    mel_postnet_spec = self.tts_model.ap.denormalize(mel_postnet_spec.T).T
+                    device_type = "cuda" if self.use_cuda else "cpu"
+                    # renormalize spectrogram based on vocoder config
+                    vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T)
+                    # compute scale factor for possible sample rate mismatch
+                    scale_factor = [
+                        1,
+                        self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
+                    ]
+                    if scale_factor[1] != 1:
+                        print(" > interpolating tts model output.")
+                        vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
+                    else:
+                        vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
+                    # run vocoder model
+                    # [1, T, C]
+                    waveform = self.vocoder_model.inference(vocoder_input.to(device_type))
+                if self.use_cuda and not use_gl:
+                    waveform = waveform.cpu()
+                if not use_gl:
+                    waveform = waveform.numpy()
+                waveform = waveform.squeeze()
+
+                # trim silence
+                if "do_trim_silence" in self.tts_config.audio and self.tts_config.audio["do_trim_silence"]:
+                    waveform = trim_silence(waveform, self.tts_model.ap)
+
+                wavs += list(waveform)
+                wavs += [0] * 10000
+        else:
+            print(f" > Processing time: hidsahi")
+            print(f"ascascascascascascascascascascascascascascascascascascascascascascascascascascascascascascascasc")
+            # get the speaker embedding or speaker id for the reference wav file
+            reference_speaker_embedding = None
+            reference_speaker_id = None
+            if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"):
+                if reference_speaker_name and isinstance(reference_speaker_name, str):
+                    if self.tts_config.use_d_vector_file:
+                        # get the speaker embedding from the saved d_vectors.
+                        reference_speaker_embedding = self.tts_model.speaker_manager.get_embeddings_by_name(
+                            reference_speaker_name
+                        )[0]
+                        reference_speaker_embedding = np.array(reference_speaker_embedding)[
+                            None, :
+                        ]  # [1 x embedding_dim]
+                    else:
+                        # get speaker idx from the speaker name
+                        reference_speaker_id = self.tts_model.speaker_manager.name_to_id[reference_speaker_name]
+                else:
+                    reference_speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(
+                        reference_wav
+                    )
+            outputs = transfer_voice(
+                model=self.tts_model,
+                CONFIG=self.tts_config,
+                use_cuda=self.use_cuda,
+                reference_wav=reference_wav,
+                speaker_id=speaker_id,
+                d_vector=speaker_embedding,
+                use_griffin_lim=use_gl,
+                reference_speaker_id=reference_speaker_id,
+                reference_d_vector=reference_speaker_embedding,
+            )
+            waveform = outputs
+            if not use_gl:
+                mel_postnet_spec = outputs[0].detach().cpu().numpy()
+                # denormalize tts output based on tts audio config
+                mel_postnet_spec = self.tts_model.ap.denormalize(mel_postnet_spec.T).T
+                device_type = "cuda" if self.use_cuda else "cpu"
+                # renormalize spectrogram based on vocoder config
+                vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T)
+                # compute scale factor for possible sample rate mismatch
+                scale_factor = [
+                    1,
+                    self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
+                ]
+                if scale_factor[1] != 1:
+                    print(" > interpolating tts model output.")
+                    vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
+                else:
+                    vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
+                # run vocoder model
+                # [1, T, C]
+                waveform = self.vocoder_model.inference(vocoder_input.to(device_type))
+            if self.use_cuda:
+                waveform = waveform.cpu()
+            if not use_gl:
+                waveform = waveform.numpy()
+            wavs = waveform.squeeze()
+
+        # compute stats
+        process_time = time.time() - start_time
+        audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
+        print(f" > Processing time: {process_time}")
+        print(f" > Real-time factor: {process_time / audio_time}")
+        return wavs
diff --git a/TTS/utils/training.py b/TTS/utils/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..b51f55e92b56bece69ae61f99f68b48c88938261
--- /dev/null
+++ b/TTS/utils/training.py
@@ -0,0 +1,44 @@
+import numpy as np
+import torch
+
+
+def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None):
+    r"""Check model gradient against unexpected jumps and failures"""
+    skip_flag = False
+    if ignore_stopnet:
+        if not amp_opt_params:
+            grad_norm = torch.nn.utils.clip_grad_norm_(
+                [param for name, param in model.named_parameters() if "stopnet" not in name], grad_clip
+            )
+        else:
+            grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip)
+    else:
+        if not amp_opt_params:
+            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
+        else:
+            grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip)
+
+    # compatibility with different torch versions
+    if isinstance(grad_norm, float):
+        if np.isinf(grad_norm):
+            print(" | > Gradient is INF !!")
+            skip_flag = True
+    else:
+        if torch.isinf(grad_norm):
+            print(" | > Gradient is INF !!")
+            skip_flag = True
+    return grad_norm, skip_flag
+
+
+def gradual_training_scheduler(global_step, config):
+    """Setup the gradual training schedule wrt number
+    of active GPUs"""
+    num_gpus = torch.cuda.device_count()
+    if num_gpus == 0:
+        num_gpus = 1
+    new_values = None
+    # we set the scheduling wrt num_gpus
+    for values in config.gradual_training:
+        if global_step * num_gpus >= values[0]:
+            new_values = values
+    return new_values[1], new_values[2]
diff --git a/TTS/utils/vad.py b/TTS/utils/vad.py
new file mode 100644
index 0000000000000000000000000000000000000000..c978c8377ce1f5beb3d45895147a741006d5edfd
--- /dev/null
+++ b/TTS/utils/vad.py
@@ -0,0 +1,84 @@
+import soundfile as sf
+import torch
+import torchaudio
+
+
+def read_audio(path):
+    wav, sr = torchaudio.load(path)
+
+    if wav.size(0) > 1:
+        wav = wav.mean(dim=0, keepdim=True)
+
+    return wav.squeeze(0), sr
+
+
+def resample_wav(wav, sr, new_sr):
+    wav = wav.unsqueeze(0)
+    transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=new_sr)
+    wav = transform(wav)
+    return wav.squeeze(0)
+
+
+def map_timestamps_to_new_sr(vad_sr, new_sr, timestamps, just_begging_end=False):
+    factor = new_sr / vad_sr
+    new_timestamps = []
+    if just_begging_end and timestamps:
+        # get just the start and end timestamps
+        new_dict = {"start": int(timestamps[0]["start"] * factor), "end": int(timestamps[-1]["end"] * factor)}
+        new_timestamps.append(new_dict)
+    else:
+        for ts in timestamps:
+            # map to the new SR
+            new_dict = {"start": int(ts["start"] * factor), "end": int(ts["end"] * factor)}
+            new_timestamps.append(new_dict)
+
+    return new_timestamps
+
+
+def get_vad_model_and_utils(use_cuda=False):
+    model, utils = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=True, onnx=False)
+    if use_cuda:
+        model = model.cuda()
+
+    get_speech_timestamps, save_audio, _, _, collect_chunks = utils
+    return model, get_speech_timestamps, save_audio, collect_chunks
+
+
+def remove_silence(
+    model_and_utils, audio_path, out_path, vad_sample_rate=8000, trim_just_beginning_and_end=True, use_cuda=False
+):
+
+    # get the VAD model and utils functions
+    model, get_speech_timestamps, _, collect_chunks = model_and_utils
+
+    # read ground truth wav and resample the audio for the VAD
+    wav, gt_sample_rate = read_audio(audio_path)
+
+    # if needed, resample the audio for the VAD model
+    if gt_sample_rate != vad_sample_rate:
+        wav_vad = resample_wav(wav, gt_sample_rate, vad_sample_rate)
+    else:
+        wav_vad = wav
+
+    if use_cuda:
+        wav_vad = wav_vad.cuda()
+
+    # get speech timestamps from full audio file
+    speech_timestamps = get_speech_timestamps(wav_vad, model, sampling_rate=vad_sample_rate, window_size_samples=768)
+
+    # map the current speech_timestamps to the sample rate of the ground truth audio
+    new_speech_timestamps = map_timestamps_to_new_sr(
+        vad_sample_rate, gt_sample_rate, speech_timestamps, trim_just_beginning_and_end
+    )
+
+    # if have speech timestamps else save the wav
+    if new_speech_timestamps:
+        wav = collect_chunks(new_speech_timestamps, wav)
+        is_speech = True
+    else:
+        print(f"> The file {audio_path} probably does not have speech please check it !!")
+        is_speech = False
+
+    # save audio
+    sf.write(out_path, wav, gt_sample_rate, subtype="PCM_16")
+    return out_path, is_speech
diff --git a/TTS/vocoder/README.md b/TTS/vocoder/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b9fb17c8f09fa6e8c217087e31fb8c52d96da536
--- /dev/null
+++ b/TTS/vocoder/README.md
@@ -0,0 +1,39 @@
+# Mozilla TTS Vocoders (Experimental)
+
+Here there are vocoder model implementations which can be combined with the other TTS models.
+
+Currently, following models are implemented:
+
+- Melgan
+- MultiBand-Melgan
+- ParallelWaveGAN
+- GAN-TTS (Discriminator Only)
+
+It is also very easy to adapt different vocoder models as we provide a flexible and modular (but not too modular) framework.
+
+## Training a model
+
+You can see here an example (Soon)[Colab Notebook]() training MelGAN with LJSpeech dataset.
+
+In order to train a new model, you need to gather all wav files into a folder and give this folder to `data_path` in '''config.json'''
+
+You need to define other relevant parameters in your ```config.json``` and then start traning with the following command.
+
+```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --config_path path/to/config.json```
+
+Example config files can be found under `tts/vocoder/configs/` folder.
+
+You can continue a previous training run by the following command.
+
+```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --continue_path path/to/your/model/folder```
+
+You can fine-tune a pre-trained model by the following command.
+
+```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth```
+
+Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off.
+
+You can also follow your training runs on Tensorboard as you do with our TTS models.
+
+## Acknowledgement
+Thanks to @kan-bayashi for his [repository](https://github.com/kan-bayashi/ParallelWaveGAN) being the start point of our work.
diff --git a/TTS/vocoder/__init__.py b/TTS/vocoder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/vocoder/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0281fac23a1334d750a06e3dcc166f72a79cfe6
Binary files /dev/null and b/TTS/vocoder/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f3db748b497a8508b5b27f4051116f7fdf502ae
Binary files /dev/null and b/TTS/vocoder/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..081fdc7f2fc967fb16b53ecbd8ea1c48a80ea4f7
Binary files /dev/null and b/TTS/vocoder/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__init__.py b/TTS/vocoder/configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5e11b990c6d7294e7cb00c3e024bbb5f94a8105
--- /dev/null
+++ b/TTS/vocoder/configs/__init__.py
@@ -0,0 +1,17 @@
+import importlib
+import os
+from inspect import isclass
+
+# import all files under configs/
+configs_dir = os.path.dirname(__file__)
+for file in os.listdir(configs_dir):
+    path = os.path.join(configs_dir, file)
+    if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
+        config_name = file[: file.find(".py")] if file.endswith(".py") else file
+        module = importlib.import_module("TTS.vocoder.configs." + config_name)
+        for attribute_name in dir(module):
+            attribute = getattr(module, attribute_name)
+
+            if isclass(attribute):
+                # Add the class to this package's variables
+                globals()[attribute_name] = attribute
diff --git a/TTS/vocoder/configs/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3b557b9799ffda56c581f273652d3a898566841
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d3f0338ed77f1a02c9358e8230cc19711d91e2f
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59d28df7814785d70c56355f22def7cc94487893
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e30240d66d301d30e403f95582203033f3e9a048
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba4744bcb7f6ce8c87e6c8668d5e7f1f9fb1afa7
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a0aecc14f3ff70f0b69c3f9dc14f953727272e8a
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ca210a8eb64c501e362a42835d1cf3480426ddae
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0138908369bf9fefe80146b032003731f9b9273e
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6237b28bbcc019f9988202841bd07d572d4409b3
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/melgan_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c696b9b0c210a9f525c8c04bd3282701187c6901
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/melgan_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e0ebfddf43fb7ae8ffed1214a79983dee03203b
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/melgan_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e4188133c8e91daac50343e9fb63c4631536a96
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/melgan_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31037f30ed191570759f0dfc606c0d00bae01278
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f882936a75cdd8439204fcb5416ad6171c4034f
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92a0a53c2a1de6b66ac1296be729c17c44c0e1be
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..32f86217833abe5b07f9024dcd2758ed83ee09be
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1be0a39fb529996713daefd3e69d6bb2cf17e91f
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bcb49b74eb7e52dec9b530b5d608eebe6d1fd44
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/shared_configs.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b2d7e014e83051b748f68b23e4426b2dcee49a8
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/shared_configs.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f4ac5809915f16f251401bb6c1d204f065d51fb
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/shared_configs.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebf5bb6973cb33ef46c7259485f6278f4e386d31
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/univnet_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..109f6f96687e6fe214b31725404ccbae983b57f1
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/univnet_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d4286ec5aac5115fe0e3c37662ce732401d8dcf7
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/univnet_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e43e568eb2abdf6e524acd3bfd16cb2348b2330
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..65a2d9b913226f3956682edfd660342041291b9c
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e813df954c643902781a25c405db7f87c4a16b03
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16427f88b068f9409a127e5d779c72263e59631a
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-310.pyc b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17637f40372339640eda2d943b4abc57b953061f
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-310.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-38.pyc b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b083daad4ab7ed5367b09bb45f396363bf89c1fe
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-38.pyc differ
diff --git a/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-39.pyc b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98b493f4d39959510ae1a890f67e7b2bc50052cb
Binary files /dev/null and b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-39.pyc differ
diff --git a/TTS/vocoder/configs/fullband_melgan_config.py b/TTS/vocoder/configs/fullband_melgan_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ab83aace678e328a8f99a5f0dc63e54ed99d4c4
--- /dev/null
+++ b/TTS/vocoder/configs/fullband_melgan_config.py
@@ -0,0 +1,106 @@
+from dataclasses import dataclass, field
+
+from .shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class FullbandMelganConfig(BaseGANVocoderConfig):
+    """Defines parameters for FullBand MelGAN vocoder.
+
+    Example:
+
+        >>> from TTS.vocoder.configs import FullbandMelganConfig
+        >>> config = FullbandMelganConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `fullband_melgan`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'melgan_multiscale_discriminator`.
+        discriminator_model_params (dict): The discriminator model parameters. Defaults to
+            '{"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]}`
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `melgan_generator`.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 16.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        use_stft_loss (bool):
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False.
+        stft_loss_params (dict): STFT loss parameters. Default to
+        `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+    """
+
+    model: str = "fullband_melgan"
+
+    # Model specific params
+    discriminator_model: str = "melgan_multiscale_discriminator"
+    discriminator_model_params: dict = field(
+        default_factory=lambda: {"base_channels": 16, "max_channels": 512, "downsample_factors": [4, 4, 4]}
+    )
+    generator_model: str = "melgan_generator"
+    generator_model_params: dict = field(
+        default_factory=lambda: {"upsample_factors": [8, 8, 2, 2], "num_res_blocks": 4}
+    )
+
+    # Training - overrides
+    batch_size: int = 16
+    seq_len: int = 8192
+    pad_short: int = 2000
+    use_noise_augment: bool = True
+    use_cache: bool = True
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = False
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = True  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = False
+
+    stft_loss_params: dict = field(
+        default_factory=lambda: {
+            "n_ffts": [1024, 2048, 512],
+            "hop_lengths": [120, 240, 50],
+            "win_lengths": [600, 1200, 240],
+        }
+    )
+
+    # loss weights - overrides
+    stft_loss_weight: float = 0.5
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 2.5
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 108
+    l1_spec_loss_weight: float = 0.0
diff --git a/TTS/vocoder/configs/hifigan_config.py b/TTS/vocoder/configs/hifigan_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a102f0c89588b1a7fe270225e4b0fefa2e4bc71
--- /dev/null
+++ b/TTS/vocoder/configs/hifigan_config.py
@@ -0,0 +1,136 @@
+from dataclasses import dataclass, field
+
+from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class HifiganConfig(BaseGANVocoderConfig):
+    """Defines parameters for FullBand MelGAN vocoder.
+
+    Example:
+
+        >>> from TTS.vocoder.configs import HifiganConfig
+        >>> config = HifiganConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `hifigan`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'hifigan_discriminator`.
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `hifigan_generator`.
+        generator_model_params (dict): Parameters of the generator model. Defaults to
+            `
+            {
+                "upsample_factors": [8, 8, 2, 2],
+                "upsample_kernel_sizes": [16, 16, 4, 4],
+                "upsample_initial_channel": 512,
+                "resblock_kernel_sizes": [3, 7, 11],
+                "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                "resblock_type": "1",
+            }
+            `
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 16.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        use_stft_loss (bool):
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False.
+        stft_loss_params (dict):
+            STFT loss parameters. Default to
+            `{
+                "n_ffts": [1024, 2048, 512],
+                "hop_lengths": [120, 240, 50],
+                "win_lengths": [600, 1200, 240]
+            }`
+        l1_spec_loss_params (dict):
+            L1 spectrogram loss parameters. Default to
+            `{
+                "use_mel": True,
+                "sample_rate": 22050,
+                "n_fft": 1024,
+                "hop_length": 256,
+                "win_length": 1024,
+                "n_mels": 80,
+                "mel_fmin": 0.0,
+                "mel_fmax": None,
+            }`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+    """
+
+    model: str = "hifigan"
+    # model specific params
+    discriminator_model: str = "hifigan_discriminator"
+    generator_model: str = "hifigan_generator"
+    generator_model_params: dict = field(
+        default_factory=lambda: {
+            "upsample_factors": [8, 8, 2, 2],
+            "upsample_kernel_sizes": [16, 16, 4, 4],
+            "upsample_initial_channel": 512,
+            "resblock_kernel_sizes": [3, 7, 11],
+            "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+            "resblock_type": "1",
+        }
+    )
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = False
+    use_subband_stft_loss: bool = False
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = True  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = True
+
+    # loss weights - overrides
+    stft_loss_weight: float = 0
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 1
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 108
+    l1_spec_loss_weight: float = 45
+    l1_spec_loss_params: dict = field(
+        default_factory=lambda: {
+            "use_mel": True,
+            "sample_rate": 22050,
+            "n_fft": 1024,
+            "hop_length": 256,
+            "win_length": 1024,
+            "n_mels": 80,
+            "mel_fmin": 0.0,
+            "mel_fmax": None,
+        }
+    )
+
+    # optimizer parameters
+    lr: float = 1e-4
+    wd: float = 1e-6
diff --git a/TTS/vocoder/configs/melgan_config.py b/TTS/vocoder/configs/melgan_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc35b6f8b70891d4904baefad802d9c62fe67925
--- /dev/null
+++ b/TTS/vocoder/configs/melgan_config.py
@@ -0,0 +1,106 @@
+from dataclasses import dataclass, field
+
+from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class MelganConfig(BaseGANVocoderConfig):
+    """Defines parameters for MelGAN vocoder.
+
+    Example:
+
+        >>> from TTS.vocoder.configs import MelganConfig
+        >>> config = MelganConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `melgan`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'melgan_multiscale_discriminator`.
+        discriminator_model_params (dict): The discriminator model parameters. Defaults to
+            '{"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]}`
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `melgan_generator`.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 16.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        use_stft_loss (bool):
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False.
+        stft_loss_params (dict): STFT loss parameters. Default to
+        `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+    """
+
+    model: str = "melgan"
+
+    # Model specific params
+    discriminator_model: str = "melgan_multiscale_discriminator"
+    discriminator_model_params: dict = field(
+        default_factory=lambda: {"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]}
+    )
+    generator_model: str = "melgan_generator"
+    generator_model_params: dict = field(
+        default_factory=lambda: {"upsample_factors": [8, 8, 2, 2], "num_res_blocks": 3}
+    )
+
+    # Training - overrides
+    batch_size: int = 16
+    seq_len: int = 8192
+    pad_short: int = 2000
+    use_noise_augment: bool = True
+    use_cache: bool = True
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = False
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = True  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = False
+
+    stft_loss_params: dict = field(
+        default_factory=lambda: {
+            "n_ffts": [1024, 2048, 512],
+            "hop_lengths": [120, 240, 50],
+            "win_lengths": [600, 1200, 240],
+        }
+    )
+
+    # loss weights - overrides
+    stft_loss_weight: float = 0.5
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 2.5
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 108
+    l1_spec_loss_weight: float = 0
diff --git a/TTS/vocoder/configs/multiband_melgan_config.py b/TTS/vocoder/configs/multiband_melgan_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..763113537f36a8615b2b77369bf5bde01527fe53
--- /dev/null
+++ b/TTS/vocoder/configs/multiband_melgan_config.py
@@ -0,0 +1,144 @@
+from dataclasses import dataclass, field
+
+from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class MultibandMelganConfig(BaseGANVocoderConfig):
+    """Defines parameters for MultiBandMelGAN vocoder.
+
+    Example:
+
+        >>> from TTS.vocoder.configs import MultibandMelganConfig
+        >>> config = MultibandMelganConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `multiband_melgan`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'melgan_multiscale_discriminator`.
+        discriminator_model_params (dict): The discriminator model parameters. Defaults to
+            '{
+                "base_channels": 16,
+                "max_channels": 512,
+                "downsample_factors": [4, 4, 4]
+            }`
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `melgan_generator`.
+        generator_model_param (dict):
+            The generator model parameters. Defaults to `{"upsample_factors": [8, 4, 2], "num_res_blocks": 4}`.
+        use_pqmf (bool):
+            enable / disable PQMF modulation for multi-band training. Defaults to True.
+        lr_gen (float):
+            Initial learning rate for the generator model. Defaults to 0.0001.
+        lr_disc (float):
+            Initial learning rate for the discriminator model. Defaults to 0.0001.
+        optimizer (torch.optim.Optimizer):
+            Optimizer used for the training. Defaults to `AdamW`.
+        optimizer_params (dict):
+            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
+        lr_scheduler_gen (torch.optim.Scheduler):
+            Learning rate scheduler for the generator. Defaults to `MultiStepLR`.
+        lr_scheduler_gen_params (dict):
+            Parameters for the generator learning rate scheduler. Defaults to
+            `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}`.
+        lr_scheduler_disc (torch.optim.Scheduler):
+            Learning rate scheduler for the discriminator. Defaults to `MultiStepLR`.
+        lr_scheduler_dict_params (dict):
+            Parameters for the discriminator learning rate scheduler. Defaults to
+            `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}`.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 16.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        steps_to_start_discriminator (int):
+            Number of steps required to start training the discriminator. Defaults to 0.
+        use_stft_loss (bool):`
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False.
+        stft_loss_params (dict): STFT loss parameters. Default to
+            `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+    """
+
+    model: str = "multiband_melgan"
+
+    # Model specific params
+    discriminator_model: str = "melgan_multiscale_discriminator"
+    discriminator_model_params: dict = field(
+        default_factory=lambda: {"base_channels": 16, "max_channels": 512, "downsample_factors": [4, 4, 4]}
+    )
+    generator_model: str = "multiband_melgan_generator"
+    generator_model_params: dict = field(default_factory=lambda: {"upsample_factors": [8, 4, 2], "num_res_blocks": 4})
+    use_pqmf: bool = True
+
+    # optimizer - overrides
+    lr_gen: float = 0.0001  # Initial learning rate.
+    lr_disc: float = 0.0001  # Initial learning rate.
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0})
+    lr_scheduler_gen: str = "MultiStepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_gen_params: dict = field(
+        default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}
+    )
+    lr_scheduler_disc: str = "MultiStepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_disc_params: dict = field(
+        default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}
+    )
+
+    # Training - overrides
+    batch_size: int = 64
+    seq_len: int = 16384
+    pad_short: int = 2000
+    use_noise_augment: bool = False
+    use_cache: bool = True
+    steps_to_start_discriminator: bool = 200000
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = True
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = False  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = False
+
+    subband_stft_loss_params: dict = field(
+        default_factory=lambda: {"n_ffts": [384, 683, 171], "hop_lengths": [30, 60, 10], "win_lengths": [150, 300, 60]}
+    )
+
+    # loss weights - overrides
+    stft_loss_weight: float = 0.5
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 2.5
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 108
+    l1_spec_loss_weight: float = 0
diff --git a/TTS/vocoder/configs/parallel_wavegan_config.py b/TTS/vocoder/configs/parallel_wavegan_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7845dd6bf835ebab4cc5d8b65962b7347b7711cf
--- /dev/null
+++ b/TTS/vocoder/configs/parallel_wavegan_config.py
@@ -0,0 +1,133 @@
+from dataclasses import dataclass, field
+
+from .shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class ParallelWaveganConfig(BaseGANVocoderConfig):
+    """Defines parameters for ParallelWavegan vocoder.
+
+    Args:
+        model (str):
+            Model name used for selecting the right configuration at initialization. Defaults to `gan`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'parallel_wavegan_discriminator`.
+        discriminator_model_params (dict): The discriminator model kwargs. Defaults to
+            '{"num_layers": 10}`
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `parallel_wavegan_generator`.
+        generator_model_param (dict):
+            The generator model kwargs. Defaults to `{"upsample_factors": [4, 4, 4, 4], "stacks": 3, "num_res_blocks": 30}`.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 16.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        steps_to_start_discriminator (int):
+            Number of steps required to start training the discriminator. Defaults to 0.
+        use_stft_loss (bool):`
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False.
+        stft_loss_params (dict): STFT loss parameters. Default to
+            `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 0.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        lr_gen (float):
+            Generator model initial learning rate. Defaults to 0.0002.
+        lr_disc (float):
+            Discriminator model initial learning rate. Defaults to 0.0002.
+        optimizer (torch.optim.Optimizer):
+            Optimizer used for the training. Defaults to `AdamW`.
+        optimizer_params (dict):
+            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
+        lr_scheduler_gen (torch.optim.Scheduler):
+            Learning rate scheduler for the generator. Defaults to `ExponentialLR`.
+        lr_scheduler_gen_params (dict):
+            Parameters for the generator learning rate scheduler. Defaults to `{"gamma": 0.5, "step_size": 200000, "last_epoch": -1}`.
+        lr_scheduler_disc (torch.optim.Scheduler):
+            Learning rate scheduler for the discriminator. Defaults to `ExponentialLR`.
+        lr_scheduler_dict_params (dict):
+            Parameters for the discriminator learning rate scheduler. Defaults to `{"gamma": 0.5, "step_size": 200000, "last_epoch": -1}`.
+    """
+
+    model: str = "parallel_wavegan"
+
+    # Model specific params
+    discriminator_model: str = "parallel_wavegan_discriminator"
+    discriminator_model_params: dict = field(default_factory=lambda: {"num_layers": 10})
+    generator_model: str = "parallel_wavegan_generator"
+    generator_model_params: dict = field(
+        default_factory=lambda: {"upsample_factors": [4, 4, 4, 4], "stacks": 3, "num_res_blocks": 30}
+    )
+
+    # Training - overrides
+    batch_size: int = 6
+    seq_len: int = 25600
+    pad_short: int = 2000
+    use_noise_augment: bool = False
+    use_cache: bool = True
+    steps_to_start_discriminator: int = 200000
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = False
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = False  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = False
+
+    stft_loss_params: dict = field(
+        default_factory=lambda: {
+            "n_ffts": [1024, 2048, 512],
+            "hop_lengths": [120, 240, 50],
+            "win_lengths": [600, 1200, 240],
+        }
+    )
+
+    # loss weights - overrides
+    stft_loss_weight: float = 0.5
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 2.5
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 0
+    l1_spec_loss_weight: float = 0
+
+    # optimizer overrides
+    lr_gen: float = 0.0002  # Initial learning rate.
+    lr_disc: float = 0.0002  # Initial learning rate.
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0})
+    lr_scheduler_gen: str = "StepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.5, "step_size": 200000, "last_epoch": -1})
+    lr_scheduler_disc: str = "StepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_disc_params: dict = field(
+        default_factory=lambda: {"gamma": 0.5, "step_size": 200000, "last_epoch": -1}
+    )
+    scheduler_after_epoch: bool = False
diff --git a/TTS/vocoder/configs/shared_configs.py b/TTS/vocoder/configs/shared_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..a558cfcabbc2abc26be60065d3ac75cebd829f28
--- /dev/null
+++ b/TTS/vocoder/configs/shared_configs.py
@@ -0,0 +1,182 @@
+from dataclasses import dataclass, field
+
+from TTS.config import BaseAudioConfig, BaseTrainingConfig
+
+
+@dataclass
+class BaseVocoderConfig(BaseTrainingConfig):
+    """Shared parameters among all the vocoder models.
+    Args:
+        audio (BaseAudioConfig):
+            Audio processor config instance. Defaultsto `BaseAudioConfig()`.
+        use_noise_augment (bool):
+            Augment the input audio with random noise. Defaults to False/
+        eval_split_size (int):
+            Number of instances used for evaluation. Defaults to 10.
+        data_path (str):
+            Root path of the training data. All the audio files found recursively from this root path are used for
+            training. Defaults to `""`.
+        feature_path (str):
+            Root path to the precomputed feature files. Defaults to None.
+        seq_len (int):
+            Length of the waveform segments used for training. Defaults to 1000.
+        pad_short (int):
+            Extra padding for the waveforms shorter than `seq_len`. Defaults to 0.
+        conv_path (int):
+            Extra padding for the feature frames against convolution of the edge frames. Defaults to MISSING.
+            Defaults to 0.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. If the RAM is not enough, if may cause OOM.
+            Defaults to False.
+        epochs (int):
+            Number of training epochs to. Defaults to 10000.
+        wd (float):
+            Weight decay.
+         optimizer (torch.optim.Optimizer):
+            Optimizer used for the training. Defaults to `AdamW`.
+        optimizer_params (dict):
+            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
+    """
+
+    audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    # dataloading
+    use_noise_augment: bool = False  # enable/disable random noise augmentation in spectrograms.
+    eval_split_size: int = 10  # number of samples used for evaluation.
+    # dataset
+    data_path: str = ""  # root data path. It finds all wav files recursively from there.
+    feature_path: str = None  # if you use precomputed features
+    seq_len: int = 1000  # signal length used in training.
+    pad_short: int = 0  # additional padding for short wavs
+    conv_pad: int = 0  # additional padding against convolutions applied to spectrograms
+    use_cache: bool = False  # use in memory cache to keep the computed features. This might cause OOM.
+    # OPTIMIZER
+    epochs: int = 10000  # total number of epochs to train.
+    wd: float = 0.0  # Weight decay weight.
+    optimizer: str = "AdamW"
+    optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0})
+
+
+@dataclass
+class BaseGANVocoderConfig(BaseVocoderConfig):
+    """Base config class used among all the GAN based vocoders.
+    Args:
+        use_stft_loss (bool):
+            enable / disable the use of STFT loss. Defaults to True.
+        use_subband_stft_loss (bool):
+            enable / disable the use of Subband STFT loss. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable the use of Mean Squared Error based GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable the use of Hinge GAN loss. Defaults to True.
+        use_feat_match_loss (bool):
+            enable / disable feature matching loss. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable L1 spectrogram loss. Defaults to True.
+        stft_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 0.
+        subband_stft_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 0.
+        mse_G_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 1.
+        hinge_G_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 0.
+        feat_match_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 100.
+        l1_spec_loss_weight (float):
+            Loss weight that multiplies the computed loss value. Defaults to 45.
+        stft_loss_params (dict):
+            Parameters for the STFT loss. Defaults to `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`.
+        l1_spec_loss_params (dict):
+            Parameters for the L1 spectrogram loss. Defaults to
+            `{
+                "use_mel": True,
+                "sample_rate": 22050,
+                "n_fft": 1024,
+                "hop_length": 256,
+                "win_length": 1024,
+                "n_mels": 80,
+                "mel_fmin": 0.0,
+                "mel_fmax": None,
+            }`
+        target_loss (str):
+            Target loss name that defines the quality of the model. Defaults to `G_avg_loss`.
+        grad_clip (list):
+            A list of gradient clipping theresholds for each optimizer. Any value less than 0 disables clipping.
+            Defaults to [5, 5].
+        lr_gen (float):
+            Generator model initial learning rate. Defaults to 0.0002.
+        lr_disc (float):
+            Discriminator model initial learning rate. Defaults to 0.0002.
+        lr_scheduler_gen (torch.optim.Scheduler):
+            Learning rate scheduler for the generator. Defaults to `ExponentialLR`.
+        lr_scheduler_gen_params (dict):
+            Parameters for the generator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`.
+        lr_scheduler_disc (torch.optim.Scheduler):
+            Learning rate scheduler for the discriminator. Defaults to `ExponentialLR`.
+        lr_scheduler_disc_params (dict):
+            Parameters for the discriminator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`.
+        scheduler_after_epoch (bool):
+            Whether to update the learning rate schedulers after each epoch. Defaults to True.
+        use_pqmf (bool):
+            enable / disable PQMF for subband approximation at training. Defaults to False.
+        steps_to_start_discriminator (int):
+            Number of steps required to start training the discriminator. Defaults to 0.
+        diff_samples_for_G_and_D (bool):
+            enable / disable use of different training samples for the generator and the discriminator iterations.
+            Enabling it results in slower iterations but faster convergance in some cases. Defaults to False.
+    """
+
+    model: str = "gan"
+
+    # LOSS PARAMETERS
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = True
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = True
+    use_feat_match_loss: bool = True  # requires MelGAN Discriminators (MelGAN and HifiGAN)
+    use_l1_spec_loss: bool = True
+
+    # loss weights
+    stft_loss_weight: float = 0
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 1
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 100
+    l1_spec_loss_weight: float = 45
+
+    stft_loss_params: dict = field(
+        default_factory=lambda: {
+            "n_ffts": [1024, 2048, 512],
+            "hop_lengths": [120, 240, 50],
+            "win_lengths": [600, 1200, 240],
+        }
+    )
+
+    l1_spec_loss_params: dict = field(
+        default_factory=lambda: {
+            "use_mel": True,
+            "sample_rate": 22050,
+            "n_fft": 1024,
+            "hop_length": 256,
+            "win_length": 1024,
+            "n_mels": 80,
+            "mel_fmin": 0.0,
+            "mel_fmax": None,
+        }
+    )
+
+    target_loss: str = "loss_0"  # loss value to pick the best model to save after each epoch
+
+    # optimizer
+    grad_clip: float = field(default_factory=lambda: [5, 5])
+    lr_gen: float = 0.0002  # Initial learning rate.
+    lr_disc: float = 0.0002  # Initial learning rate.
+    lr_scheduler_gen: str = "ExponentialLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1})
+    lr_scheduler_disc: str = "ExponentialLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1})
+    scheduler_after_epoch: bool = True
+
+    use_pqmf: bool = False  # enable/disable using pqmf for multi-band training. (Multi-band MelGAN)
+    steps_to_start_discriminator = 0  # start training the discriminator after this number of steps.
+    diff_samples_for_G_and_D: bool = False  # use different samples for G and D training steps.
diff --git a/TTS/vocoder/configs/univnet_config.py b/TTS/vocoder/configs/univnet_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..67f324cfce5f701f0d7453beab81590bef6be114
--- /dev/null
+++ b/TTS/vocoder/configs/univnet_config.py
@@ -0,0 +1,161 @@
+from dataclasses import dataclass, field
+from typing import Dict
+
+from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig
+
+
+@dataclass
+class UnivnetConfig(BaseGANVocoderConfig):
+    """Defines parameters for UnivNet vocoder.
+
+    Example:
+
+        >>> from TTS.vocoder.configs import UnivNetConfig
+        >>> config = UnivNetConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `UnivNet`.
+        discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to
+            'UnivNet_discriminator`.
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `UnivNet_generator`.
+        generator_model_params (dict): Parameters of the generator model. Defaults to
+            `
+            {
+                "use_mel": True,
+                "sample_rate": 22050,
+                "n_fft": 1024,
+                "hop_length": 256,
+                "win_length": 1024,
+                "n_mels": 80,
+                "mel_fmin": 0.0,
+                "mel_fmax": None,
+            }
+            `
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 32.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 8192.
+        pad_short (int):
+            Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0.
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        use_stft_loss (bool):
+            enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True.
+        use_subband_stft (bool):
+            enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True.
+        use_mse_gan_loss (bool):
+            enable / disable using Mean Squeare Error GAN loss. Defaults to True.
+        use_hinge_gan_loss (bool):
+            enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models.
+            Defaults to False.
+        use_feat_match_loss (bool):
+            enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True.
+        use_l1_spec_loss (bool):
+            enable / disable using L1 spectrogram loss originally used by univnet model. Defaults to False.
+        stft_loss_params (dict):
+            STFT loss parameters. Default to
+            `{
+                "n_ffts": [1024, 2048, 512],
+                "hop_lengths": [120, 240, 50],
+                "win_lengths": [600, 1200, 240]
+            }`
+        l1_spec_loss_params (dict):
+            L1 spectrogram loss parameters. Default to
+            `{
+                "use_mel": True,
+                "sample_rate": 22050,
+                "n_fft": 1024,
+                "hop_length": 256,
+                "win_length": 1024,
+                "n_mels": 80,
+                "mel_fmin": 0.0,
+                "mel_fmax": None,
+            }`
+        stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total
+            model loss. Defaults to 0.5.
+        subband_stft_loss_weight (float):
+            Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        mse_G_loss_weight (float):
+            MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5.
+        hinge_G_loss_weight (float):
+            Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+        feat_match_loss_weight (float):
+            Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108.
+        l1_spec_loss_weight (float):
+            L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0.
+    """
+
+    model: str = "univnet"
+    batch_size: int = 32
+    # model specific params
+    discriminator_model: str = "univnet_discriminator"
+    generator_model: str = "univnet_generator"
+    generator_model_params: Dict = field(
+        default_factory=lambda: {
+            "in_channels": 64,
+            "out_channels": 1,
+            "hidden_channels": 32,
+            "cond_channels": 80,
+            "upsample_factors": [8, 8, 4],
+            "lvc_layers_each_block": 4,
+            "lvc_kernel_size": 3,
+            "kpnet_hidden_channels": 64,
+            "kpnet_conv_size": 3,
+            "dropout": 0.0,
+        }
+    )
+
+    # LOSS PARAMETERS - overrides
+    use_stft_loss: bool = True
+    use_subband_stft_loss: bool = False
+    use_mse_gan_loss: bool = True
+    use_hinge_gan_loss: bool = False
+    use_feat_match_loss: bool = False  # requires MelGAN Discriminators (MelGAN and univnet)
+    use_l1_spec_loss: bool = False
+
+    # loss weights - overrides
+    stft_loss_weight: float = 2.5
+    stft_loss_params: Dict = field(
+        default_factory=lambda: {
+            "n_ffts": [1024, 2048, 512],
+            "hop_lengths": [120, 240, 50],
+            "win_lengths": [600, 1200, 240],
+        }
+    )
+    subband_stft_loss_weight: float = 0
+    mse_G_loss_weight: float = 1
+    hinge_G_loss_weight: float = 0
+    feat_match_loss_weight: float = 0
+    l1_spec_loss_weight: float = 0
+    l1_spec_loss_params: Dict = field(
+        default_factory=lambda: {
+            "use_mel": True,
+            "sample_rate": 22050,
+            "n_fft": 1024,
+            "hop_length": 256,
+            "win_length": 1024,
+            "n_mels": 80,
+            "mel_fmin": 0.0,
+            "mel_fmax": None,
+        }
+    )
+
+    # optimizer parameters
+    lr_gen: float = 1e-4  # Initial learning rate.
+    lr_disc: float = 1e-4  # Initial learning rate.
+    lr_scheduler_gen: str = None  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    # lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1})
+    lr_scheduler_disc: str = None  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    # lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1})
+    optimizer_params: Dict = field(default_factory=lambda: {"betas": [0.5, 0.9], "weight_decay": 0.0})
+    steps_to_start_discriminator: int = 200000
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.generator_model_params["cond_channels"] = self.audio.num_mels
diff --git a/TTS/vocoder/configs/wavegrad_config.py b/TTS/vocoder/configs/wavegrad_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c39813ae68c3d8c77614c9a5188ac5f2a59d991d
--- /dev/null
+++ b/TTS/vocoder/configs/wavegrad_config.py
@@ -0,0 +1,90 @@
+from dataclasses import dataclass, field
+
+from TTS.vocoder.configs.shared_configs import BaseVocoderConfig
+from TTS.vocoder.models.wavegrad import WavegradArgs
+
+
+@dataclass
+class WavegradConfig(BaseVocoderConfig):
+    """Defines parameters for WaveGrad vocoder.
+    Example:
+
+        >>> from TTS.vocoder.configs import WavegradConfig
+        >>> config = WavegradConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `wavegrad`.
+        generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `wavegrad`.
+        model_params (WavegradArgs): Model parameters. Check `WavegradArgs` for default values.
+        target_loss (str):
+            Target loss name that defines the quality of the model. Defaults to `avg_wavegrad_loss`.
+        epochs (int):
+            Number of epochs to traing the model. Defaults to 10000.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 96.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 6144.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        mixed_precision (bool):
+            enable / disable mixed precision training. Default is True.
+        eval_split_size (int):
+            Number of samples used for evalutaion. Defaults to 50.
+        train_noise_schedule (dict):
+            Training noise schedule. Defaults to
+            `{"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000}`
+        test_noise_schedule (dict):
+            Inference noise schedule. For a better performance, you may need to use `bin/tune_wavegrad.py` to find a
+            better schedule. Defaults to
+            `
+            {
+                "min_val": 1e-6,
+                "max_val": 1e-2,
+                "num_steps": 50,
+            }
+            `
+        grad_clip (float):
+            Gradient clipping threshold. If <= 0.0, no clipping is applied. Defaults to 1.0
+        lr (float):
+            Initila leraning rate. Defaults to 1e-4.
+        lr_scheduler (str):
+            One of the learning rate schedulers from `torch.optim.scheduler.*`. Defaults to `MultiStepLR`.
+        lr_scheduler_params (dict):
+            kwargs for the scheduler. Defaults to `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}`
+    """
+
+    model: str = "wavegrad"
+    # Model specific params
+    generator_model: str = "wavegrad"
+    model_params: WavegradArgs = field(default_factory=WavegradArgs)
+    target_loss: str = "loss"  # loss value to pick the best model to save after each epoch
+
+    # Training - overrides
+    epochs: int = 10000
+    batch_size: int = 96
+    seq_len: int = 6144
+    use_cache: bool = True
+    mixed_precision: bool = True
+    eval_split_size: int = 50
+
+    # NOISE SCHEDULE PARAMS
+    train_noise_schedule: dict = field(default_factory=lambda: {"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000})
+
+    test_noise_schedule: dict = field(
+        default_factory=lambda: {  # inference noise schedule. Try TTS/bin/tune_wavegrad.py to find the optimal values.
+            "min_val": 1e-6,
+            "max_val": 1e-2,
+            "num_steps": 50,
+        }
+    )
+
+    # optimizer overrides
+    grad_clip: float = 1.0
+    lr: float = 1e-4  # Initial learning rate.
+    lr_scheduler: str = "MultiStepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_params: dict = field(
+        default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}
+    )
diff --git a/TTS/vocoder/configs/wavernn_config.py b/TTS/vocoder/configs/wavernn_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..f39400e5e50b56d4ff79c8c148fd518b3ec3b390
--- /dev/null
+++ b/TTS/vocoder/configs/wavernn_config.py
@@ -0,0 +1,102 @@
+from dataclasses import dataclass, field
+
+from TTS.vocoder.configs.shared_configs import BaseVocoderConfig
+from TTS.vocoder.models.wavernn import WavernnArgs
+
+
+@dataclass
+class WavernnConfig(BaseVocoderConfig):
+    """Defines parameters for Wavernn vocoder.
+    Example:
+
+        >>> from TTS.vocoder.configs import WavernnConfig
+        >>> config = WavernnConfig()
+
+    Args:
+        model (str):
+            Model name used for selecting the right model at initialization. Defaults to `wavernn`.
+        mode (str):
+            Output mode of the WaveRNN vocoder. `mold` for Mixture of Logistic Distribution, `gauss` for a single
+            Gaussian Distribution and `bits` for quantized bits as the model's output.
+        mulaw (bool):
+            enable / disable the use of Mulaw quantization for training. Only applicable if `mode == 'bits'`. Defaults
+            to `True`.
+        generator_model (str):
+            One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is
+            considered as a generator too. Defaults to `WaveRNN`.
+        wavernn_model_params (dict):
+            kwargs for the WaveRNN model. Defaults to
+            `{
+                "rnn_dims": 512,
+                "fc_dims": 512,
+                "compute_dims": 128,
+                "res_out_dims": 128,
+                "num_res_blocks": 10,
+                "use_aux_net": True,
+                "use_upsample_net": True,
+                "upsample_factors": [4, 8, 8]
+            }`
+        batched (bool):
+            enable / disable the batched inference. It speeds up the inference by splitting the input into segments and
+            processing the segments in a batch. Then it merges the outputs with a certain overlap and smoothing. If
+            you set it False, without CUDA, it is too slow to be practical. Defaults to True.
+        target_samples (int):
+            Size of the segments in batched mode. Defaults to 11000.
+        overlap_sampels (int):
+            Size of the overlap between consecutive segments. Defaults to 550.
+        batch_size (int):
+            Batch size used at training. Larger values use more memory. Defaults to 256.
+        seq_len (int):
+            Audio segment length used at training. Larger values use more memory. Defaults to 1280.
+
+        use_noise_augment (bool):
+            enable / disable random noise added to the input waveform. The noise is added after computing the
+            features. Defaults to True.
+        use_cache (bool):
+            enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is
+            not large enough. Defaults to True.
+        mixed_precision (bool):
+            enable / disable mixed precision training. Default is True.
+        eval_split_size (int):
+            Number of samples used for evalutaion. Defaults to 50.
+        num_epochs_before_test (int):
+            Number of epochs waited to run the next evalution. Since inference takes some time, it is better to
+            wait some number of epochs not ot waste training time. Defaults to 10.
+        grad_clip (float):
+            Gradient clipping threshold. If <= 0.0, no clipping is applied. Defaults to 4.0
+        lr (float):
+            Initila leraning rate. Defaults to 1e-4.
+        lr_scheduler (str):
+            One of the learning rate schedulers from `torch.optim.scheduler.*`. Defaults to `MultiStepLR`.
+        lr_scheduler_params (dict):
+            kwargs for the scheduler. Defaults to `{"gamma": 0.5, "milestones": [200000, 400000, 600000]}`
+    """
+
+    model: str = "wavernn"
+
+    # Model specific params
+    model_args: WavernnArgs = field(default_factory=WavernnArgs)
+    target_loss: str = "loss"
+
+    # Inference
+    batched: bool = True
+    target_samples: int = 11000
+    overlap_samples: int = 550
+
+    # Training - overrides
+    epochs: int = 10000
+    batch_size: int = 256
+    seq_len: int = 1280
+    use_noise_augment: bool = False
+    use_cache: bool = True
+    mixed_precision: bool = True
+    eval_split_size: int = 50
+    num_epochs_before_test: int = (
+        10  # number of epochs to wait until the next test run (synthesizing a full audio clip).
+    )
+
+    # optimizer overrides
+    grad_clip: float = 4.0
+    lr: float = 1e-4  # Initial learning rate.
+    lr_scheduler: str = "MultiStepLR"  # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
+    lr_scheduler_params: dict = field(default_factory=lambda: {"gamma": 0.5, "milestones": [200000, 400000, 600000]})
diff --git a/TTS/vocoder/datasets/__init__.py b/TTS/vocoder/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..871eb0d20276ffc691fd6da796bf65df6c23ea0d
--- /dev/null
+++ b/TTS/vocoder/datasets/__init__.py
@@ -0,0 +1,58 @@
+from typing import List
+
+from coqpit import Coqpit
+from torch.utils.data import Dataset
+
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.gan_dataset import GANDataset
+from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
+from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
+from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset
+
+
+def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List, verbose: bool) -> Dataset:
+    if config.model.lower() in "gan":
+        dataset = GANDataset(
+            ap=ap,
+            items=data_items,
+            seq_len=config.seq_len,
+            hop_len=ap.hop_length,
+            pad_short=config.pad_short,
+            conv_pad=config.conv_pad,
+            return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False,
+            is_training=not is_eval,
+            return_segments=not is_eval,
+            use_noise_augment=config.use_noise_augment,
+            use_cache=config.use_cache,
+            verbose=verbose,
+        )
+        dataset.shuffle_mapping()
+    elif config.model.lower() == "wavegrad":
+        dataset = WaveGradDataset(
+            ap=ap,
+            items=data_items,
+            seq_len=config.seq_len,
+            hop_len=ap.hop_length,
+            pad_short=config.pad_short,
+            conv_pad=config.conv_pad,
+            is_training=not is_eval,
+            return_segments=True,
+            use_noise_augment=False,
+            use_cache=config.use_cache,
+            verbose=verbose,
+        )
+    elif config.model.lower() == "wavernn":
+        dataset = WaveRNNDataset(
+            ap=ap,
+            items=data_items,
+            seq_len=config.seq_len,
+            hop_len=ap.hop_length,
+            pad=config.model_params.pad,
+            mode=config.model_params.mode,
+            mulaw=config.model_params.mulaw,
+            is_training=not is_eval,
+            verbose=verbose,
+        )
+    else:
+        raise ValueError(f" [!] Dataset for model {config.model.lower()} cannot be found.")
+    return dataset
diff --git a/TTS/vocoder/datasets/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/datasets/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7688f0ea3c2edd93a8e823068c7db217fb9224f
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/datasets/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9242629a71208789603b9a8e5a136e13c7f1d10d
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/datasets/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d471a6cf45c875f3b2d4e762dc90b9c41014012
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-310.pyc b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c474916baa41d666a5b9267157080f308bb9bada
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-310.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-38.pyc b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f22277449063eaaae06a3850b52b80f42aa290a1
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-38.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-39.pyc b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..713205bfb2781e610d141e7bc26e24ed1ab171ad
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-39.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/preprocess.cpython-310.pyc b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..436009527dad9f3d73718ed34583dc7fb8f511ca
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-310.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/preprocess.cpython-38.pyc b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3affdb5a1a3164ed6d58535e9cc303552c983c7
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-38.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/preprocess.cpython-39.pyc b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a0e5af75e397a338eee9df69dd523b29a8d52563
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-39.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-310.pyc b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1fe5857a7a219e4765da96e4b290093cc82e778
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-310.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-38.pyc b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..946802ee23d68777f3ea3977022716043768ee78
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-38.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-39.pyc b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9242ac44ce165fcfa09f5d267b7b6f693ecf2c6c
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-39.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-310.pyc b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f8038ad4a3e158d1a529a5f152590637ca3e44a
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-310.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-38.pyc b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a7ef18f0a754993d67f06fb6d4bb75698fb9ba7
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-38.pyc differ
diff --git a/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-39.pyc b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d365760428cd850be9f02c1998f05699f30a7e4
Binary files /dev/null and b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-39.pyc differ
diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a782067e1badef3522ac5b7d1b6407e3f291502a
--- /dev/null
+++ b/TTS/vocoder/datasets/gan_dataset.py
@@ -0,0 +1,153 @@
+import glob
+import os
+import random
+from multiprocessing import Manager
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+
+class GANDataset(Dataset):
+    """
+    GAN Dataset searchs for all the wav files under root path
+    and converts them to acoustic features on the fly and returns
+    random segments of (audio, feature) couples.
+    """
+
+    def __init__(
+        self,
+        ap,
+        items,
+        seq_len,
+        hop_len,
+        pad_short,
+        conv_pad=2,
+        return_pairs=False,
+        is_training=True,
+        return_segments=True,
+        use_noise_augment=False,
+        use_cache=False,
+        verbose=False,
+    ):
+        super().__init__()
+        self.ap = ap
+        self.item_list = items
+        self.compute_feat = not isinstance(items[0], (tuple, list))
+        self.seq_len = seq_len
+        self.hop_len = hop_len
+        self.pad_short = pad_short
+        self.conv_pad = conv_pad
+        self.return_pairs = return_pairs
+        self.is_training = is_training
+        self.return_segments = return_segments
+        self.use_cache = use_cache
+        self.use_noise_augment = use_noise_augment
+        self.verbose = verbose
+
+        assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len."
+        self.feat_frame_len = seq_len // hop_len + (2 * conv_pad)
+
+        # map G and D instances
+        self.G_to_D_mappings = list(range(len(self.item_list)))
+        self.shuffle_mapping()
+
+        # cache acoustic features
+        if use_cache:
+            self.create_feature_cache()
+
+    def create_feature_cache(self):
+        self.manager = Manager()
+        self.cache = self.manager.list()
+        self.cache += [None for _ in range(len(self.item_list))]
+
+    @staticmethod
+    def find_wav_files(path):
+        return glob.glob(os.path.join(path, "**", "*.wav"), recursive=True)
+
+    def __len__(self):
+        return len(self.item_list)
+
+    def __getitem__(self, idx):
+        """Return different items for Generator and Discriminator and
+        cache acoustic features"""
+
+        # set the seed differently for each worker
+        if torch.utils.data.get_worker_info():
+            random.seed(torch.utils.data.get_worker_info().seed)
+
+        if self.return_segments:
+            item1 = self.load_item(idx)
+            if self.return_pairs:
+                idx2 = self.G_to_D_mappings[idx]
+                item2 = self.load_item(idx2)
+                return item1, item2
+            return item1
+        item1 = self.load_item(idx)
+        return item1
+
+    def _pad_short_samples(self, audio, mel=None):
+        """Pad samples shorter than the output sequence length"""
+        if len(audio) < self.seq_len:
+            audio = np.pad(audio, (0, self.seq_len - len(audio)), mode="constant", constant_values=0.0)
+
+        if mel is not None and mel.shape[1] < self.feat_frame_len:
+            pad_value = self.ap.melspectrogram(np.zeros([self.ap.win_length]))[:, 0]
+            mel = np.pad(
+                mel,
+                ([0, 0], [0, self.feat_frame_len - mel.shape[1]]),
+                mode="constant",
+                constant_values=pad_value.mean(),
+            )
+        return audio, mel
+
+    def shuffle_mapping(self):
+        random.shuffle(self.G_to_D_mappings)
+
+    def load_item(self, idx):
+        """load (audio, feat) couple"""
+        if self.compute_feat:
+            # compute features from wav
+            wavpath = self.item_list[idx]
+            # print(wavpath)
+
+            if self.use_cache and self.cache[idx] is not None:
+                audio, mel = self.cache[idx]
+            else:
+                audio = self.ap.load_wav(wavpath)
+                mel = self.ap.melspectrogram(audio)
+                audio, mel = self._pad_short_samples(audio, mel)
+        else:
+
+            # load precomputed features
+            wavpath, feat_path = self.item_list[idx]
+
+            if self.use_cache and self.cache[idx] is not None:
+                audio, mel = self.cache[idx]
+            else:
+                audio = self.ap.load_wav(wavpath)
+                mel = np.load(feat_path)
+                audio, mel = self._pad_short_samples(audio, mel)
+
+        # correct the audio length wrt padding applied in stft
+        audio = np.pad(audio, (0, self.hop_len), mode="edge")
+        audio = audio[: mel.shape[-1] * self.hop_len]
+        assert (
+            mel.shape[-1] * self.hop_len == audio.shape[-1]
+        ), f" [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}"
+
+        audio = torch.from_numpy(audio).float().unsqueeze(0)
+        mel = torch.from_numpy(mel).float().squeeze(0)
+
+        if self.return_segments:
+            max_mel_start = mel.shape[1] - self.feat_frame_len
+            mel_start = random.randint(0, max_mel_start)
+            mel_end = mel_start + self.feat_frame_len
+            mel = mel[:, mel_start:mel_end]
+
+            audio_start = mel_start * self.hop_len
+            audio = audio[:, audio_start : audio_start + self.seq_len]
+
+        if self.use_noise_augment and self.is_training and self.return_segments:
+            audio = audio + (1 / 32768) * torch.randn_like(audio)
+        return (mel, audio)
diff --git a/TTS/vocoder/datasets/preprocess.py b/TTS/vocoder/datasets/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f69b812fa58949eadc78b450114f03b19e5c80c
--- /dev/null
+++ b/TTS/vocoder/datasets/preprocess.py
@@ -0,0 +1,70 @@
+import glob
+import os
+from pathlib import Path
+
+import numpy as np
+from coqpit import Coqpit
+from tqdm import tqdm
+
+from TTS.utils.audio import AudioProcessor
+
+
+def preprocess_wav_files(out_path: str, config: Coqpit, ap: AudioProcessor):
+    """Process wav and compute mel and quantized wave signal.
+    It is mainly used by WaveRNN dataloader.
+
+    Args:
+        out_path (str): Parent folder path to save the files.
+        config (Coqpit): Model config.
+        ap (AudioProcessor): Audio processor.
+    """
+    os.makedirs(os.path.join(out_path, "quant"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "mel"), exist_ok=True)
+    wav_files = find_wav_files(config.data_path)
+    for path in tqdm(wav_files):
+        wav_name = Path(path).stem
+        quant_path = os.path.join(out_path, "quant", wav_name + ".npy")
+        mel_path = os.path.join(out_path, "mel", wav_name + ".npy")
+        y = ap.load_wav(path)
+        mel = ap.melspectrogram(y)
+        np.save(mel_path, mel)
+        if isinstance(config.mode, int):
+            quant = ap.mulaw_encode(y, qc=config.mode) if config.model_args.mulaw else ap.quantize(y, bits=config.mode)
+            np.save(quant_path, quant)
+
+
+def find_wav_files(data_path, file_ext="wav"):
+    wav_paths = glob.glob(os.path.join(data_path, "**", f"*.{file_ext}"), recursive=True)
+    return wav_paths
+
+
+def find_feat_files(data_path):
+    feat_paths = glob.glob(os.path.join(data_path, "**", "*.npy"), recursive=True)
+    return feat_paths
+
+
+def load_wav_data(data_path, eval_split_size, file_ext="wav"):
+    wav_paths = find_wav_files(data_path, file_ext=file_ext)
+    assert len(wav_paths) > 0, f" [!] {data_path} is empty."
+    np.random.seed(0)
+    np.random.shuffle(wav_paths)
+    return wav_paths[:eval_split_size], wav_paths[eval_split_size:]
+
+
+def load_wav_feat_data(data_path, feat_path, eval_split_size):
+    wav_paths = find_wav_files(data_path)
+    feat_paths = find_feat_files(feat_path)
+
+    wav_paths.sort(key=lambda x: Path(x).stem)
+    feat_paths.sort(key=lambda x: Path(x).stem)
+
+    assert len(wav_paths) == len(feat_paths), f" [!] {len(wav_paths)} vs {feat_paths}"
+    for wav, feat in zip(wav_paths, feat_paths):
+        wav_name = Path(wav).stem
+        feat_name = Path(feat).stem
+        assert wav_name == feat_name
+
+    items = list(zip(wav_paths, feat_paths))
+    np.random.seed(0)
+    np.random.shuffle(items)
+    return items[:eval_split_size], items[eval_split_size:]
diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d941eab33e2bb24c2c53dc2d9ced7befce7e052b
--- /dev/null
+++ b/TTS/vocoder/datasets/wavegrad_dataset.py
@@ -0,0 +1,152 @@
+import glob
+import os
+import random
+from multiprocessing import Manager
+from typing import List, Tuple
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+
+class WaveGradDataset(Dataset):
+    """
+    WaveGrad Dataset searchs for all the wav files under root path
+    and converts them to acoustic features on the fly and returns
+    random segments of (audio, feature) couples.
+    """
+
+    def __init__(
+        self,
+        ap,
+        items,
+        seq_len,
+        hop_len,
+        pad_short,
+        conv_pad=2,
+        is_training=True,
+        return_segments=True,
+        use_noise_augment=False,
+        use_cache=False,
+        verbose=False,
+    ):
+
+        super().__init__()
+        self.ap = ap
+        self.item_list = items
+        self.seq_len = seq_len if return_segments else None
+        self.hop_len = hop_len
+        self.pad_short = pad_short
+        self.conv_pad = conv_pad
+        self.is_training = is_training
+        self.return_segments = return_segments
+        self.use_cache = use_cache
+        self.use_noise_augment = use_noise_augment
+        self.verbose = verbose
+
+        if return_segments:
+            assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len."
+        self.feat_frame_len = seq_len // hop_len + (2 * conv_pad)
+
+        # cache acoustic features
+        if use_cache:
+            self.create_feature_cache()
+
+    def create_feature_cache(self):
+        self.manager = Manager()
+        self.cache = self.manager.list()
+        self.cache += [None for _ in range(len(self.item_list))]
+
+    @staticmethod
+    def find_wav_files(path):
+        return glob.glob(os.path.join(path, "**", "*.wav"), recursive=True)
+
+    def __len__(self):
+        return len(self.item_list)
+
+    def __getitem__(self, idx):
+        item = self.load_item(idx)
+        return item
+
+    def load_test_samples(self, num_samples: int) -> List[Tuple]:
+        """Return test samples.
+
+        Args:
+            num_samples (int): Number of samples to return.
+
+        Returns:
+            List[Tuple]: melspectorgram and audio.
+
+        Shapes:
+            - melspectrogram (Tensor): :math:`[C, T]`
+            - audio (Tensor): :math:`[T_audio]`
+        """
+        samples = []
+        return_segments = self.return_segments
+        self.return_segments = False
+        for idx in range(num_samples):
+            mel, audio = self.load_item(idx)
+            samples.append([mel, audio])
+        self.return_segments = return_segments
+        return samples
+
+    def load_item(self, idx):
+        """load (audio, feat) couple"""
+        # compute features from wav
+        wavpath = self.item_list[idx]
+
+        if self.use_cache and self.cache[idx] is not None:
+            audio = self.cache[idx]
+        else:
+            audio = self.ap.load_wav(wavpath)
+
+            if self.return_segments:
+                # correct audio length wrt segment length
+                if audio.shape[-1] < self.seq_len + self.pad_short:
+                    audio = np.pad(
+                        audio, (0, self.seq_len + self.pad_short - len(audio)), mode="constant", constant_values=0.0
+                    )
+                assert (
+                    audio.shape[-1] >= self.seq_len + self.pad_short
+                ), f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}"
+
+            # correct the audio length wrt hop length
+            p = (audio.shape[-1] // self.hop_len + 1) * self.hop_len - audio.shape[-1]
+            audio = np.pad(audio, (0, p), mode="constant", constant_values=0.0)
+
+            if self.use_cache:
+                self.cache[idx] = audio
+
+        if self.return_segments:
+            max_start = len(audio) - self.seq_len
+            start = random.randint(0, max_start)
+            end = start + self.seq_len
+            audio = audio[start:end]
+
+        if self.use_noise_augment and self.is_training and self.return_segments:
+            audio = audio + (1 / 32768) * torch.randn_like(audio)
+
+        mel = self.ap.melspectrogram(audio)
+        mel = mel[..., :-1]  # ignore the padding
+
+        audio = torch.from_numpy(audio).float()
+        mel = torch.from_numpy(mel).float().squeeze(0)
+        return (mel, audio)
+
+    @staticmethod
+    def collate_full_clips(batch):
+        """This is used in tune_wavegrad.py.
+        It pads sequences to the max length."""
+        max_mel_length = max([b[0].shape[1] for b in batch]) if len(batch) > 1 else batch[0][0].shape[1]
+        max_audio_length = max([b[1].shape[0] for b in batch]) if len(batch) > 1 else batch[0][1].shape[0]
+
+        mels = torch.zeros([len(batch), batch[0][0].shape[0], max_mel_length])
+        audios = torch.zeros([len(batch), max_audio_length])
+
+        for idx, b in enumerate(batch):
+            mel = b[0]
+            audio = b[1]
+            mels[idx, :, : mel.shape[1]] = mel
+            audios[idx, : audio.shape[0]] = audio
+
+        return mels, audios
diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c771cf0ed5bb228eb8f4aaa6c850665c4997170
--- /dev/null
+++ b/TTS/vocoder/datasets/wavernn_dataset.py
@@ -0,0 +1,117 @@
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+
+class WaveRNNDataset(Dataset):
+    """
+    WaveRNN Dataset searchs for all the wav files under root path
+    and converts them to acoustic features on the fly.
+    """
+
+    def __init__(
+        self, ap, items, seq_len, hop_len, pad, mode, mulaw, is_training=True, verbose=False, return_segments=True
+    ):
+
+        super().__init__()
+        self.ap = ap
+        self.compute_feat = not isinstance(items[0], (tuple, list))
+        self.item_list = items
+        self.seq_len = seq_len
+        self.hop_len = hop_len
+        self.mel_len = seq_len // hop_len
+        self.pad = pad
+        self.mode = mode
+        self.mulaw = mulaw
+        self.is_training = is_training
+        self.verbose = verbose
+        self.return_segments = return_segments
+
+        assert self.seq_len % self.hop_len == 0
+
+    def __len__(self):
+        return len(self.item_list)
+
+    def __getitem__(self, index):
+        item = self.load_item(index)
+        return item
+
+    def load_test_samples(self, num_samples):
+        samples = []
+        return_segments = self.return_segments
+        self.return_segments = False
+        for idx in range(num_samples):
+            mel, audio, _ = self.load_item(idx)
+            samples.append([mel, audio])
+        self.return_segments = return_segments
+        return samples
+
+    def load_item(self, index):
+        """
+        load (audio, feat) couple if feature_path is set
+        else compute it on the fly
+        """
+        if self.compute_feat:
+
+            wavpath = self.item_list[index]
+            audio = self.ap.load_wav(wavpath)
+            if self.return_segments:
+                min_audio_len = 2 * self.seq_len + (2 * self.pad * self.hop_len)
+            else:
+                min_audio_len = audio.shape[0] + (2 * self.pad * self.hop_len)
+            if audio.shape[0] < min_audio_len:
+                print(" [!] Instance is too short! : {}".format(wavpath))
+                audio = np.pad(audio, [0, min_audio_len - audio.shape[0] + self.hop_len])
+            mel = self.ap.melspectrogram(audio)
+
+            if self.mode in ["gauss", "mold"]:
+                x_input = audio
+            elif isinstance(self.mode, int):
+                x_input = (
+                    self.ap.mulaw_encode(audio, qc=self.mode) if self.mulaw else self.ap.quantize(audio, bits=self.mode)
+                )
+            else:
+                raise RuntimeError("Unknown dataset mode - ", self.mode)
+
+        else:
+
+            wavpath, feat_path = self.item_list[index]
+            mel = np.load(feat_path.replace("/quant/", "/mel/"))
+
+            if mel.shape[-1] < self.mel_len + 2 * self.pad:
+                print(" [!] Instance is too short! : {}".format(wavpath))
+                self.item_list[index] = self.item_list[index + 1]
+                feat_path = self.item_list[index]
+                mel = np.load(feat_path.replace("/quant/", "/mel/"))
+            if self.mode in ["gauss", "mold"]:
+                x_input = self.ap.load_wav(wavpath)
+            elif isinstance(self.mode, int):
+                x_input = np.load(feat_path.replace("/mel/", "/quant/"))
+            else:
+                raise RuntimeError("Unknown dataset mode - ", self.mode)
+
+        return mel, x_input, wavpath
+
+    def collate(self, batch):
+        mel_win = self.seq_len // self.hop_len + 2 * self.pad
+        max_offsets = [x[0].shape[-1] - (mel_win + 2 * self.pad) for x in batch]
+
+        mel_offsets = [np.random.randint(0, offset) for offset in max_offsets]
+        sig_offsets = [(offset + self.pad) * self.hop_len for offset in mel_offsets]
+
+        mels = [x[0][:, mel_offsets[i] : mel_offsets[i] + mel_win] for i, x in enumerate(batch)]
+
+        coarse = [x[1][sig_offsets[i] : sig_offsets[i] + self.seq_len + 1] for i, x in enumerate(batch)]
+
+        mels = np.stack(mels).astype(np.float32)
+        if self.mode in ["gauss", "mold"]:
+            coarse = np.stack(coarse).astype(np.float32)
+            coarse = torch.FloatTensor(coarse)
+            x_input = coarse[:, : self.seq_len]
+        elif isinstance(self.mode, int):
+            coarse = np.stack(coarse).astype(np.int64)
+            coarse = torch.LongTensor(coarse)
+            x_input = 2 * coarse[:, : self.seq_len].float() / (2**self.mode - 1.0) - 1.0
+        y_coarse = coarse[:, 1:]
+        mels = torch.FloatTensor(mels)
+        return x_input, mels, y_coarse
diff --git a/TTS/vocoder/layers/__init__.py b/TTS/vocoder/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/vocoder/layers/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/layers/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..832dc309400c1ce6980f3dd592e66af38282e46c
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/layers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6de7aca78dbb1a560e7adf177d1b292723feb43f
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/layers/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37b447db5e7aa57e433e878d02ea76594b49ff65
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/losses.cpython-310.pyc b/TTS/vocoder/layers/__pycache__/losses.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31f463daceb0edcb56a07b86c02bd16cd20d0000
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/losses.cpython-310.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/losses.cpython-38.pyc b/TTS/vocoder/layers/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9f3deae4dc193f580dbc5c70fd21d526d74ae56
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/losses.cpython-38.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/losses.cpython-39.pyc b/TTS/vocoder/layers/__pycache__/losses.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9dd5e99cfe1e271b68004e71acfe0aa241a95bae
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/losses.cpython-39.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/lvc_block.cpython-310.pyc b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b0f6d07194972cdd9847e3b03a84ae7e681fce1
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-310.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/lvc_block.cpython-38.pyc b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cdb576cf862653f1b1988bc0584be461baeba43
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-38.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/lvc_block.cpython-39.pyc b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f62f0b8147c831e30e38919c5973567d1c3cbe5c
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/lvc_block.cpython-39.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/wavegrad.cpython-310.pyc b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1d1190c016629cfe573fc2411b9fafe4ac3f4c24
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-310.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/wavegrad.cpython-38.pyc b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3df2cdac1a83e64df2e0f4454080b739b9e32568
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-38.pyc differ
diff --git a/TTS/vocoder/layers/__pycache__/wavegrad.cpython-39.pyc b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..724daafd93e0509ee81c97075a1a0f6cd4ce628a
Binary files /dev/null and b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-39.pyc differ
diff --git a/TTS/vocoder/layers/hifigan.py b/TTS/vocoder/layers/hifigan.py
new file mode 100644
index 0000000000000000000000000000000000000000..f51200724887b04746a125b7d7c368e0315ce7da
--- /dev/null
+++ b/TTS/vocoder/layers/hifigan.py
@@ -0,0 +1,53 @@
+from torch import nn
+
+
+# pylint: disable=dangerous-default-value
+class ResStack(nn.Module):
+    def __init__(self, kernel, channel, padding, dilations=[1, 3, 5]):
+        super().__init__()
+        resstack = []
+        for dilation in dilations:
+            resstack += [
+                nn.LeakyReLU(0.2),
+                nn.ReflectionPad1d(dilation),
+                nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=kernel, dilation=dilation)),
+                nn.LeakyReLU(0.2),
+                nn.ReflectionPad1d(padding),
+                nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)),
+            ]
+        self.resstack = nn.Sequential(*resstack)
+
+        self.shortcut = nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1))
+
+    def forward(self, x):
+        x1 = self.shortcut(x)
+        x2 = self.resstack(x)
+        return x1 + x2
+
+    def remove_weight_norm(self):
+        nn.utils.remove_weight_norm(self.shortcut)
+        nn.utils.remove_weight_norm(self.resstack[2])
+        nn.utils.remove_weight_norm(self.resstack[5])
+        nn.utils.remove_weight_norm(self.resstack[8])
+        nn.utils.remove_weight_norm(self.resstack[11])
+        nn.utils.remove_weight_norm(self.resstack[14])
+        nn.utils.remove_weight_norm(self.resstack[17])
+
+
+class MRF(nn.Module):
+    def __init__(self, kernels, channel, dilations=[1, 3, 5]):  # # pylint: disable=dangerous-default-value
+        super().__init__()
+        self.resblock1 = ResStack(kernels[0], channel, 0, dilations)
+        self.resblock2 = ResStack(kernels[1], channel, 6, dilations)
+        self.resblock3 = ResStack(kernels[2], channel, 12, dilations)
+
+    def forward(self, x):
+        x1 = self.resblock1(x)
+        x2 = self.resblock2(x)
+        x3 = self.resblock3(x)
+        return x1 + x2 + x3
+
+    def remove_weight_norm(self):
+        self.resblock1.remove_weight_norm()
+        self.resblock2.remove_weight_norm()
+        self.resblock3.remove_weight_norm()
diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..befc43cca6ff3a3477cdcfbfcb0593d28f60d2c6
--- /dev/null
+++ b/TTS/vocoder/layers/losses.py
@@ -0,0 +1,368 @@
+from typing import Dict, Union
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from TTS.utils.audio.torch_transforms import TorchSTFT
+from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss
+
+#################################
+# GENERATOR LOSSES
+#################################
+
+
+class STFTLoss(nn.Module):
+    """STFT loss. Input generate and real waveforms are converted
+    to spectrograms compared with L1 and Spectral convergence losses.
+    It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf"""
+
+    def __init__(self, n_fft, hop_length, win_length):
+        super().__init__()
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.stft = TorchSTFT(n_fft, hop_length, win_length)
+
+    def forward(self, y_hat, y):
+        y_hat_M = self.stft(y_hat)
+        y_M = self.stft(y)
+        # magnitude loss
+        loss_mag = F.l1_loss(torch.log(y_M), torch.log(y_hat_M))
+        # spectral convergence loss
+        loss_sc = torch.norm(y_M - y_hat_M, p="fro") / torch.norm(y_M, p="fro")
+        return loss_mag, loss_sc
+
+
+class MultiScaleSTFTLoss(torch.nn.Module):
+    """Multi-scale STFT loss. Input generate and real waveforms are converted
+    to spectrograms compared with L1 and Spectral convergence losses.
+    It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf"""
+
+    def __init__(self, n_ffts=(1024, 2048, 512), hop_lengths=(120, 240, 50), win_lengths=(600, 1200, 240)):
+        super().__init__()
+        self.loss_funcs = torch.nn.ModuleList()
+        for n_fft, hop_length, win_length in zip(n_ffts, hop_lengths, win_lengths):
+            self.loss_funcs.append(STFTLoss(n_fft, hop_length, win_length))
+
+    def forward(self, y_hat, y):
+        N = len(self.loss_funcs)
+        loss_sc = 0
+        loss_mag = 0
+        for f in self.loss_funcs:
+            lm, lsc = f(y_hat, y)
+            loss_mag += lm
+            loss_sc += lsc
+        loss_sc /= N
+        loss_mag /= N
+        return loss_mag, loss_sc
+
+
+class L1SpecLoss(nn.Module):
+    """L1 Loss over Spectrograms as described in HiFiGAN paper https://arxiv.org/pdf/2010.05646.pdf"""
+
+    def __init__(
+        self, sample_rate, n_fft, hop_length, win_length, mel_fmin=None, mel_fmax=None, n_mels=None, use_mel=True
+    ):
+        super().__init__()
+        self.use_mel = use_mel
+        self.stft = TorchSTFT(
+            n_fft,
+            hop_length,
+            win_length,
+            sample_rate=sample_rate,
+            mel_fmin=mel_fmin,
+            mel_fmax=mel_fmax,
+            n_mels=n_mels,
+            use_mel=use_mel,
+        )
+
+    def forward(self, y_hat, y):
+        y_hat_M = self.stft(y_hat)
+        y_M = self.stft(y)
+        # magnitude loss
+        loss_mag = F.l1_loss(torch.log(y_M), torch.log(y_hat_M))
+        return loss_mag
+
+
+class MultiScaleSubbandSTFTLoss(MultiScaleSTFTLoss):
+    """Multiscale STFT loss for multi band model outputs.
+    From MultiBand-MelGAN paper https://arxiv.org/abs/2005.05106"""
+
+    # pylint: disable=no-self-use
+    def forward(self, y_hat, y):
+        y_hat = y_hat.view(-1, 1, y_hat.shape[2])
+        y = y.view(-1, 1, y.shape[2])
+        return super().forward(y_hat.squeeze(1), y.squeeze(1))
+
+
+class MSEGLoss(nn.Module):
+    """Mean Squared Generator Loss"""
+
+    # pylint: disable=no-self-use
+    def forward(self, score_real):
+        loss_fake = F.mse_loss(score_real, score_real.new_ones(score_real.shape))
+        return loss_fake
+
+
+class HingeGLoss(nn.Module):
+    """Hinge Discriminator Loss"""
+
+    # pylint: disable=no-self-use
+    def forward(self, score_real):
+        # TODO: this might be wrong
+        loss_fake = torch.mean(F.relu(1.0 - score_real))
+        return loss_fake
+
+
+##################################
+# DISCRIMINATOR LOSSES
+##################################
+
+
+class MSEDLoss(nn.Module):
+    """Mean Squared Discriminator Loss"""
+
+    def __init__(
+        self,
+    ):
+        super().__init__()
+        self.loss_func = nn.MSELoss()
+
+    # pylint: disable=no-self-use
+    def forward(self, score_fake, score_real):
+        loss_real = self.loss_func(score_real, score_real.new_ones(score_real.shape))
+        loss_fake = self.loss_func(score_fake, score_fake.new_zeros(score_fake.shape))
+        loss_d = loss_real + loss_fake
+        return loss_d, loss_real, loss_fake
+
+
+class HingeDLoss(nn.Module):
+    """Hinge Discriminator Loss"""
+
+    # pylint: disable=no-self-use
+    def forward(self, score_fake, score_real):
+        loss_real = torch.mean(F.relu(1.0 - score_real))
+        loss_fake = torch.mean(F.relu(1.0 + score_fake))
+        loss_d = loss_real + loss_fake
+        return loss_d, loss_real, loss_fake
+
+
+class MelganFeatureLoss(nn.Module):
+    def __init__(
+        self,
+    ):
+        super().__init__()
+        self.loss_func = nn.L1Loss()
+
+    # pylint: disable=no-self-use
+    def forward(self, fake_feats, real_feats):
+        loss_feats = 0
+        num_feats = 0
+        for idx, _ in enumerate(fake_feats):
+            for fake_feat, real_feat in zip(fake_feats[idx], real_feats[idx]):
+                loss_feats += self.loss_func(fake_feat, real_feat)
+                num_feats += 1
+        loss_feats = loss_feats / num_feats
+        return loss_feats
+
+
+#####################################
+# LOSS WRAPPERS
+#####################################
+
+
+def _apply_G_adv_loss(scores_fake, loss_func):
+    """Compute G adversarial loss function
+    and normalize values"""
+    adv_loss = 0
+    if isinstance(scores_fake, list):
+        for score_fake in scores_fake:
+            fake_loss = loss_func(score_fake)
+            adv_loss += fake_loss
+        adv_loss /= len(scores_fake)
+    else:
+        fake_loss = loss_func(scores_fake)
+        adv_loss = fake_loss
+    return adv_loss
+
+
+def _apply_D_loss(scores_fake, scores_real, loss_func):
+    """Compute D loss func and normalize loss values"""
+    loss = 0
+    real_loss = 0
+    fake_loss = 0
+    if isinstance(scores_fake, list):
+        # multi-scale loss
+        for score_fake, score_real in zip(scores_fake, scores_real):
+            total_loss, real_loss, fake_loss = loss_func(score_fake=score_fake, score_real=score_real)
+            loss += total_loss
+            real_loss += real_loss
+            fake_loss += fake_loss
+        # normalize loss values with number of scales (discriminators)
+        loss /= len(scores_fake)
+        real_loss /= len(scores_real)
+        fake_loss /= len(scores_fake)
+    else:
+        # single scale loss
+        total_loss, real_loss, fake_loss = loss_func(scores_fake, scores_real)
+        loss = total_loss
+    return loss, real_loss, fake_loss
+
+
+##################################
+# MODEL LOSSES
+##################################
+
+
+class GeneratorLoss(nn.Module):
+    """Generator Loss Wrapper. Based on model configuration it sets a right set of loss functions and computes
+    losses. It allows to experiment with different combinations of loss functions with different models by just
+    changing configurations.
+
+    Args:
+        C (AttrDict): model configuration.
+    """
+
+    def __init__(self, C):
+        super().__init__()
+        assert not (
+            C.use_mse_gan_loss and C.use_hinge_gan_loss
+        ), " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+
+        self.use_stft_loss = C.use_stft_loss if "use_stft_loss" in C else False
+        self.use_subband_stft_loss = C.use_subband_stft_loss if "use_subband_stft_loss" in C else False
+        self.use_mse_gan_loss = C.use_mse_gan_loss if "use_mse_gan_loss" in C else False
+        self.use_hinge_gan_loss = C.use_hinge_gan_loss if "use_hinge_gan_loss" in C else False
+        self.use_feat_match_loss = C.use_feat_match_loss if "use_feat_match_loss" in C else False
+        self.use_l1_spec_loss = C.use_l1_spec_loss if "use_l1_spec_loss" in C else False
+
+        self.stft_loss_weight = C.stft_loss_weight if "stft_loss_weight" in C else 0.0
+        self.subband_stft_loss_weight = C.subband_stft_loss_weight if "subband_stft_loss_weight" in C else 0.0
+        self.mse_gan_loss_weight = C.mse_G_loss_weight if "mse_G_loss_weight" in C else 0.0
+        self.hinge_gan_loss_weight = C.hinge_G_loss_weight if "hinde_G_loss_weight" in C else 0.0
+        self.feat_match_loss_weight = C.feat_match_loss_weight if "feat_match_loss_weight" in C else 0.0
+        self.l1_spec_loss_weight = C.l1_spec_loss_weight if "l1_spec_loss_weight" in C else 0.0
+
+        if C.use_stft_loss:
+            self.stft_loss = MultiScaleSTFTLoss(**C.stft_loss_params)
+        if C.use_subband_stft_loss:
+            self.subband_stft_loss = MultiScaleSubbandSTFTLoss(**C.subband_stft_loss_params)
+        if C.use_mse_gan_loss:
+            self.mse_loss = MSEGLoss()
+        if C.use_hinge_gan_loss:
+            self.hinge_loss = HingeGLoss()
+        if C.use_feat_match_loss:
+            self.feat_match_loss = MelganFeatureLoss()
+        if C.use_l1_spec_loss:
+            assert C.audio["sample_rate"] == C.l1_spec_loss_params["sample_rate"]
+            self.l1_spec_loss = L1SpecLoss(**C.l1_spec_loss_params)
+
+    def forward(
+        self, y_hat=None, y=None, scores_fake=None, feats_fake=None, feats_real=None, y_hat_sub=None, y_sub=None
+    ):
+        gen_loss = 0
+        adv_loss = 0
+        return_dict = {}
+
+        # STFT Loss
+        if self.use_stft_loss:
+            stft_loss_mg, stft_loss_sc = self.stft_loss(y_hat[:, :, : y.size(2)].squeeze(1), y.squeeze(1))
+            return_dict["G_stft_loss_mg"] = stft_loss_mg
+            return_dict["G_stft_loss_sc"] = stft_loss_sc
+            gen_loss = gen_loss + self.stft_loss_weight * (stft_loss_mg + stft_loss_sc)
+
+        # L1 Spec loss
+        if self.use_l1_spec_loss:
+            l1_spec_loss = self.l1_spec_loss(y_hat, y)
+            return_dict["G_l1_spec_loss"] = l1_spec_loss
+            gen_loss = gen_loss + self.l1_spec_loss_weight * l1_spec_loss
+
+        # subband STFT Loss
+        if self.use_subband_stft_loss:
+            subband_stft_loss_mg, subband_stft_loss_sc = self.subband_stft_loss(y_hat_sub, y_sub)
+            return_dict["G_subband_stft_loss_mg"] = subband_stft_loss_mg
+            return_dict["G_subband_stft_loss_sc"] = subband_stft_loss_sc
+            gen_loss = gen_loss + self.subband_stft_loss_weight * (subband_stft_loss_mg + subband_stft_loss_sc)
+
+        # multiscale MSE adversarial loss
+        if self.use_mse_gan_loss and scores_fake is not None:
+            mse_fake_loss = _apply_G_adv_loss(scores_fake, self.mse_loss)
+            return_dict["G_mse_fake_loss"] = mse_fake_loss
+            adv_loss = adv_loss + self.mse_gan_loss_weight * mse_fake_loss
+
+        # multiscale Hinge adversarial loss
+        if self.use_hinge_gan_loss and not scores_fake is not None:
+            hinge_fake_loss = _apply_G_adv_loss(scores_fake, self.hinge_loss)
+            return_dict["G_hinge_fake_loss"] = hinge_fake_loss
+            adv_loss = adv_loss + self.hinge_gan_loss_weight * hinge_fake_loss
+
+        # Feature Matching Loss
+        if self.use_feat_match_loss and not feats_fake is None:
+            feat_match_loss = self.feat_match_loss(feats_fake, feats_real)
+            return_dict["G_feat_match_loss"] = feat_match_loss
+            adv_loss = adv_loss + self.feat_match_loss_weight * feat_match_loss
+        return_dict["loss"] = gen_loss + adv_loss
+        return_dict["G_gen_loss"] = gen_loss
+        return_dict["G_adv_loss"] = adv_loss
+        return return_dict
+
+
+class DiscriminatorLoss(nn.Module):
+    """Like ```GeneratorLoss```"""
+
+    def __init__(self, C):
+        super().__init__()
+        assert not (
+            C.use_mse_gan_loss and C.use_hinge_gan_loss
+        ), " [!] Cannot use HingeGANLoss and MSEGANLoss together."
+
+        self.use_mse_gan_loss = C.use_mse_gan_loss
+        self.use_hinge_gan_loss = C.use_hinge_gan_loss
+
+        if C.use_mse_gan_loss:
+            self.mse_loss = MSEDLoss()
+        if C.use_hinge_gan_loss:
+            self.hinge_loss = HingeDLoss()
+
+    def forward(self, scores_fake, scores_real):
+        loss = 0
+        return_dict = {}
+
+        if self.use_mse_gan_loss:
+            mse_D_loss, mse_D_real_loss, mse_D_fake_loss = _apply_D_loss(
+                scores_fake=scores_fake, scores_real=scores_real, loss_func=self.mse_loss
+            )
+            return_dict["D_mse_gan_loss"] = mse_D_loss
+            return_dict["D_mse_gan_real_loss"] = mse_D_real_loss
+            return_dict["D_mse_gan_fake_loss"] = mse_D_fake_loss
+            loss += mse_D_loss
+
+        if self.use_hinge_gan_loss:
+            hinge_D_loss, hinge_D_real_loss, hinge_D_fake_loss = _apply_D_loss(
+                scores_fake=scores_fake, scores_real=scores_real, loss_func=self.hinge_loss
+            )
+            return_dict["D_hinge_gan_loss"] = hinge_D_loss
+            return_dict["D_hinge_gan_real_loss"] = hinge_D_real_loss
+            return_dict["D_hinge_gan_fake_loss"] = hinge_D_fake_loss
+            loss += hinge_D_loss
+
+        return_dict["loss"] = loss
+        return return_dict
+
+
+class WaveRNNLoss(nn.Module):
+    def __init__(self, wave_rnn_mode: Union[str, int]):
+        super().__init__()
+        if wave_rnn_mode == "mold":
+            self.loss_func = discretized_mix_logistic_loss
+        elif wave_rnn_mode == "gauss":
+            self.loss_func = gaussian_loss
+        elif isinstance(wave_rnn_mode, int):
+            self.loss_func = torch.nn.CrossEntropyLoss()
+        else:
+            raise ValueError(" [!] Unknown mode for Wavernn.")
+
+    def forward(self, y_hat, y) -> Dict:
+        loss = self.loss_func(y_hat, y)
+        return {"loss": loss}
diff --git a/TTS/vocoder/layers/lvc_block.py b/TTS/vocoder/layers/lvc_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..8913a1132ec769fd304077412289c01c0d1cb17b
--- /dev/null
+++ b/TTS/vocoder/layers/lvc_block.py
@@ -0,0 +1,198 @@
+import torch
+import torch.nn.functional as F
+
+
+class KernelPredictor(torch.nn.Module):
+    """Kernel predictor for the location-variable convolutions"""
+
+    def __init__(  # pylint: disable=dangerous-default-value
+        self,
+        cond_channels,
+        conv_in_channels,
+        conv_out_channels,
+        conv_layers,
+        conv_kernel_size=3,
+        kpnet_hidden_channels=64,
+        kpnet_conv_size=3,
+        kpnet_dropout=0.0,
+        kpnet_nonlinear_activation="LeakyReLU",
+        kpnet_nonlinear_activation_params={"negative_slope": 0.1},
+    ):
+        """
+        Args:
+            cond_channels (int): number of channel for the conditioning sequence,
+            conv_in_channels (int): number of channel for the input sequence,
+            conv_out_channels (int): number of channel for the output sequence,
+            conv_layers (int):
+            kpnet_
+        """
+        super().__init__()
+
+        self.conv_in_channels = conv_in_channels
+        self.conv_out_channels = conv_out_channels
+        self.conv_kernel_size = conv_kernel_size
+        self.conv_layers = conv_layers
+
+        l_w = conv_in_channels * conv_out_channels * conv_kernel_size * conv_layers
+        l_b = conv_out_channels * conv_layers
+
+        padding = (kpnet_conv_size - 1) // 2
+        self.input_conv = torch.nn.Sequential(
+            torch.nn.Conv1d(cond_channels, kpnet_hidden_channels, 5, padding=(5 - 1) // 2, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+        )
+
+        self.residual_conv = torch.nn.Sequential(
+            torch.nn.Dropout(kpnet_dropout),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+            torch.nn.Dropout(kpnet_dropout),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+            torch.nn.Dropout(kpnet_dropout),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+            torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True),
+            getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params),
+        )
+
+        self.kernel_conv = torch.nn.Conv1d(kpnet_hidden_channels, l_w, kpnet_conv_size, padding=padding, bias=True)
+        self.bias_conv = torch.nn.Conv1d(kpnet_hidden_channels, l_b, kpnet_conv_size, padding=padding, bias=True)
+
+    def forward(self, c):
+        """
+        Args:
+            c (Tensor): the conditioning sequence (batch, cond_channels, cond_length)
+        Returns:
+        """
+        batch, _, cond_length = c.shape
+
+        c = self.input_conv(c)
+        c = c + self.residual_conv(c)
+        k = self.kernel_conv(c)
+        b = self.bias_conv(c)
+
+        kernels = k.contiguous().view(
+            batch, self.conv_layers, self.conv_in_channels, self.conv_out_channels, self.conv_kernel_size, cond_length
+        )
+        bias = b.contiguous().view(batch, self.conv_layers, self.conv_out_channels, cond_length)
+        return kernels, bias
+
+
+class LVCBlock(torch.nn.Module):
+    """the location-variable convolutions"""
+
+    def __init__(
+        self,
+        in_channels,
+        cond_channels,
+        upsample_ratio,
+        conv_layers=4,
+        conv_kernel_size=3,
+        cond_hop_length=256,
+        kpnet_hidden_channels=64,
+        kpnet_conv_size=3,
+        kpnet_dropout=0.0,
+    ):
+        super().__init__()
+
+        self.cond_hop_length = cond_hop_length
+        self.conv_layers = conv_layers
+        self.conv_kernel_size = conv_kernel_size
+        self.convs = torch.nn.ModuleList()
+
+        self.upsample = torch.nn.ConvTranspose1d(
+            in_channels,
+            in_channels,
+            kernel_size=upsample_ratio * 2,
+            stride=upsample_ratio,
+            padding=upsample_ratio // 2 + upsample_ratio % 2,
+            output_padding=upsample_ratio % 2,
+        )
+
+        self.kernel_predictor = KernelPredictor(
+            cond_channels=cond_channels,
+            conv_in_channels=in_channels,
+            conv_out_channels=2 * in_channels,
+            conv_layers=conv_layers,
+            conv_kernel_size=conv_kernel_size,
+            kpnet_hidden_channels=kpnet_hidden_channels,
+            kpnet_conv_size=kpnet_conv_size,
+            kpnet_dropout=kpnet_dropout,
+        )
+
+        for i in range(conv_layers):
+            padding = (3**i) * int((conv_kernel_size - 1) / 2)
+            conv = torch.nn.Conv1d(
+                in_channels, in_channels, kernel_size=conv_kernel_size, padding=padding, dilation=3**i
+            )
+
+            self.convs.append(conv)
+
+    def forward(self, x, c):
+        """forward propagation of the location-variable convolutions.
+        Args:
+            x (Tensor): the input sequence (batch, in_channels, in_length)
+            c (Tensor): the conditioning sequence (batch, cond_channels, cond_length)
+
+        Returns:
+            Tensor: the output sequence (batch, in_channels, in_length)
+        """
+        in_channels = x.shape[1]
+        kernels, bias = self.kernel_predictor(c)
+
+        x = F.leaky_relu(x, 0.2)
+        x = self.upsample(x)
+
+        for i in range(self.conv_layers):
+            y = F.leaky_relu(x, 0.2)
+            y = self.convs[i](y)
+            y = F.leaky_relu(y, 0.2)
+
+            k = kernels[:, i, :, :, :, :]
+            b = bias[:, i, :, :]
+            y = self.location_variable_convolution(y, k, b, 1, self.cond_hop_length)
+            x = x + torch.sigmoid(y[:, :in_channels, :]) * torch.tanh(y[:, in_channels:, :])
+        return x
+
+    @staticmethod
+    def location_variable_convolution(x, kernel, bias, dilation, hop_size):
+        """perform location-variable convolution operation on the input sequence (x) using the local convolution kernl.
+        Time: 414 μs ± 309 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each), test on NVIDIA V100.
+        Args:
+            x (Tensor): the input sequence (batch, in_channels, in_length).
+            kernel (Tensor): the local convolution kernel (batch, in_channel, out_channels, kernel_size, kernel_length)
+            bias (Tensor): the bias for the local convolution (batch, out_channels, kernel_length)
+            dilation (int): the dilation of convolution.
+            hop_size (int): the hop_size of the conditioning sequence.
+        Returns:
+            (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length).
+        """
+        batch, _, in_length = x.shape
+        batch, _, out_channels, kernel_size, kernel_length = kernel.shape
+
+        assert in_length == (
+            kernel_length * hop_size
+        ), f"length of (x, kernel) is not matched, {in_length} vs {kernel_length * hop_size}"
+
+        padding = dilation * int((kernel_size - 1) / 2)
+        x = F.pad(x, (padding, padding), "constant", 0)  # (batch, in_channels, in_length + 2*padding)
+        x = x.unfold(2, hop_size + 2 * padding, hop_size)  # (batch, in_channels, kernel_length, hop_size + 2*padding)
+
+        if hop_size < dilation:
+            x = F.pad(x, (0, dilation), "constant", 0)
+        x = x.unfold(
+            3, dilation, dilation
+        )  # (batch, in_channels, kernel_length, (hop_size + 2*padding)/dilation, dilation)
+        x = x[:, :, :, :, :hop_size]
+        x = x.transpose(3, 4)  # (batch, in_channels, kernel_length, dilation, (hop_size + 2*padding)/dilation)
+        x = x.unfold(4, kernel_size, 1)  # (batch, in_channels, kernel_length, dilation, _, kernel_size)
+
+        o = torch.einsum("bildsk,biokl->bolsd", x, kernel)
+        o = o + bias.unsqueeze(-1).unsqueeze(-1)
+        o = o.contiguous().view(batch, out_channels, -1)
+        return o
diff --git a/TTS/vocoder/layers/melgan.py b/TTS/vocoder/layers/melgan.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bb328e98354dc0683b3c5b4f4160dd54d92fabd
--- /dev/null
+++ b/TTS/vocoder/layers/melgan.py
@@ -0,0 +1,42 @@
+from torch import nn
+from torch.nn.utils import weight_norm
+
+
+class ResidualStack(nn.Module):
+    def __init__(self, channels, num_res_blocks, kernel_size):
+        super().__init__()
+
+        assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd."
+        base_padding = (kernel_size - 1) // 2
+
+        self.blocks = nn.ModuleList()
+        for idx in range(num_res_blocks):
+            layer_kernel_size = kernel_size
+            layer_dilation = layer_kernel_size**idx
+            layer_padding = base_padding * layer_dilation
+            self.blocks += [
+                nn.Sequential(
+                    nn.LeakyReLU(0.2),
+                    nn.ReflectionPad1d(layer_padding),
+                    weight_norm(
+                        nn.Conv1d(channels, channels, kernel_size=kernel_size, dilation=layer_dilation, bias=True)
+                    ),
+                    nn.LeakyReLU(0.2),
+                    weight_norm(nn.Conv1d(channels, channels, kernel_size=1, bias=True)),
+                )
+            ]
+
+        self.shortcuts = nn.ModuleList(
+            [weight_norm(nn.Conv1d(channels, channels, kernel_size=1, bias=True)) for i in range(num_res_blocks)]
+        )
+
+    def forward(self, x):
+        for block, shortcut in zip(self.blocks, self.shortcuts):
+            x = shortcut(x) + block(x)
+        return x
+
+    def remove_weight_norm(self):
+        for block, shortcut in zip(self.blocks, self.shortcuts):
+            nn.utils.remove_weight_norm(block[2])
+            nn.utils.remove_weight_norm(block[4])
+            nn.utils.remove_weight_norm(shortcut)
diff --git a/TTS/vocoder/layers/parallel_wavegan.py b/TTS/vocoder/layers/parallel_wavegan.py
new file mode 100644
index 0000000000000000000000000000000000000000..51142e5eceb20564585635a9040a24bc8eb3b6e3
--- /dev/null
+++ b/TTS/vocoder/layers/parallel_wavegan.py
@@ -0,0 +1,77 @@
+import torch
+from torch.nn import functional as F
+
+
+class ResidualBlock(torch.nn.Module):
+    """Residual block module in WaveNet."""
+
+    def __init__(
+        self,
+        kernel_size=3,
+        res_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        aux_channels=80,
+        dropout=0.0,
+        dilation=1,
+        bias=True,
+        use_causal_conv=False,
+    ):
+        super().__init__()
+        self.dropout = dropout
+        # no future time stamps available
+        if use_causal_conv:
+            padding = (kernel_size - 1) * dilation
+        else:
+            assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+            padding = (kernel_size - 1) // 2 * dilation
+        self.use_causal_conv = use_causal_conv
+
+        # dilation conv
+        self.conv = torch.nn.Conv1d(
+            res_channels, gate_channels, kernel_size, padding=padding, dilation=dilation, bias=bias
+        )
+
+        # local conditioning
+        if aux_channels > 0:
+            self.conv1x1_aux = torch.nn.Conv1d(aux_channels, gate_channels, 1, bias=False)
+        else:
+            self.conv1x1_aux = None
+
+        # conv output is split into two groups
+        gate_out_channels = gate_channels // 2
+        self.conv1x1_out = torch.nn.Conv1d(gate_out_channels, res_channels, 1, bias=bias)
+        self.conv1x1_skip = torch.nn.Conv1d(gate_out_channels, skip_channels, 1, bias=bias)
+
+    def forward(self, x, c):
+        """
+        x: B x D_res x T
+        c: B x D_aux x T
+        """
+        residual = x
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.conv(x)
+
+        # remove future time steps if use_causal_conv conv
+        x = x[:, :, : residual.size(-1)] if self.use_causal_conv else x
+
+        # split into two part for gated activation
+        splitdim = 1
+        xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim)
+
+        # local conditioning
+        if c is not None:
+            assert self.conv1x1_aux is not None
+            c = self.conv1x1_aux(c)
+            ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim)
+            xa, xb = xa + ca, xb + cb
+
+        x = torch.tanh(xa) * torch.sigmoid(xb)
+
+        # for skip connection
+        s = self.conv1x1_skip(x)
+
+        # for residual connection
+        x = (self.conv1x1_out(x) + residual) * (0.5**2)
+
+        return x, s
diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py
new file mode 100644
index 0000000000000000000000000000000000000000..6253efbbefc32222464a97bee99707d46bcdcf8b
--- /dev/null
+++ b/TTS/vocoder/layers/pqmf.py
@@ -0,0 +1,53 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+from scipy import signal as sig
+
+
+# adapted from
+# https://github.com/kan-bayashi/ParallelWaveGAN/tree/master/parallel_wavegan
+class PQMF(torch.nn.Module):
+    def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0):
+        super().__init__()
+
+        self.N = N
+        self.taps = taps
+        self.cutoff = cutoff
+        self.beta = beta
+
+        QMF = sig.firwin(taps + 1, cutoff, window=("kaiser", beta))
+        H = np.zeros((N, len(QMF)))
+        G = np.zeros((N, len(QMF)))
+        for k in range(N):
+            constant_factor = (
+                (2 * k + 1) * (np.pi / (2 * N)) * (np.arange(taps + 1) - ((taps - 1) / 2))
+            )  # TODO: (taps - 1) -> taps
+            phase = (-1) ** k * np.pi / 4
+            H[k] = 2 * QMF * np.cos(constant_factor + phase)
+
+            G[k] = 2 * QMF * np.cos(constant_factor - phase)
+
+        H = torch.from_numpy(H[:, None, :]).float()
+        G = torch.from_numpy(G[None, :, :]).float()
+
+        self.register_buffer("H", H)
+        self.register_buffer("G", G)
+
+        updown_filter = torch.zeros((N, N, N)).float()
+        for k in range(N):
+            updown_filter[k, k, 0] = 1.0
+        self.register_buffer("updown_filter", updown_filter)
+        self.N = N
+
+        self.pad_fn = torch.nn.ConstantPad1d(taps // 2, 0.0)
+
+    def forward(self, x):
+        return self.analysis(x)
+
+    def analysis(self, x):
+        return F.conv1d(x, self.H, padding=self.taps // 2, stride=self.N)
+
+    def synthesis(self, x):
+        x = F.conv_transpose1d(x, self.updown_filter * self.N, stride=self.N)
+        x = F.conv1d(x, self.G, padding=self.taps // 2)
+        return x
diff --git a/TTS/vocoder/layers/qmf.dat b/TTS/vocoder/layers/qmf.dat
new file mode 100644
index 0000000000000000000000000000000000000000..4793abbfb3c67b88fed43f262c28f6fc2f26e5ca
--- /dev/null
+++ b/TTS/vocoder/layers/qmf.dat
@@ -0,0 +1,640 @@
+  0.0000000e+000
+ -5.5252865e-004
+ -5.6176926e-004
+ -4.9475181e-004
+ -4.8752280e-004
+ -4.8937912e-004
+ -5.0407143e-004
+ -5.2265643e-004
+ -5.4665656e-004
+ -5.6778026e-004
+ -5.8709305e-004
+ -6.1327474e-004
+ -6.3124935e-004
+ -6.5403334e-004
+ -6.7776908e-004
+ -6.9416146e-004
+ -7.1577365e-004
+ -7.2550431e-004
+ -7.4409419e-004
+ -7.4905981e-004
+ -7.6813719e-004
+ -7.7248486e-004
+ -7.8343323e-004
+ -7.7798695e-004
+ -7.8036647e-004
+ -7.8014496e-004
+ -7.7579773e-004
+ -7.6307936e-004
+ -7.5300014e-004
+ -7.3193572e-004
+ -7.2153920e-004
+ -6.9179375e-004
+ -6.6504151e-004
+ -6.3415949e-004
+ -5.9461189e-004
+ -5.5645764e-004
+ -5.1455722e-004
+ -4.6063255e-004
+ -4.0951215e-004
+ -3.5011759e-004
+ -2.8969812e-004
+ -2.0983373e-004
+ -1.4463809e-004
+ -6.1733441e-005
+  1.3494974e-005
+  1.0943831e-004
+  2.0430171e-004
+  2.9495311e-004
+  4.0265402e-004
+  5.1073885e-004
+  6.2393761e-004
+  7.4580259e-004
+  8.6084433e-004
+  9.8859883e-004
+  1.1250155e-003
+  1.2577885e-003
+  1.3902495e-003
+  1.5443220e-003
+  1.6868083e-003
+  1.8348265e-003
+  1.9841141e-003
+  2.1461584e-003
+  2.3017255e-003
+  2.4625617e-003
+  2.6201759e-003
+  2.7870464e-003
+  2.9469448e-003
+  3.1125421e-003
+  3.2739613e-003
+  3.4418874e-003
+  3.6008268e-003
+  3.7603923e-003
+  3.9207432e-003
+  4.0819753e-003
+  4.2264269e-003
+  4.3730720e-003
+  4.5209853e-003
+  4.6606461e-003
+  4.7932561e-003
+  4.9137604e-003
+  5.0393023e-003
+  5.1407354e-003
+  5.2461166e-003
+  5.3471681e-003
+  5.4196776e-003
+  5.4876040e-003
+  5.5475715e-003
+  5.5938023e-003
+  5.6220643e-003
+  5.6455197e-003
+  5.6389200e-003
+  5.6266114e-003
+  5.5917129e-003
+  5.5404364e-003
+  5.4753783e-003
+  5.3838976e-003
+  5.2715759e-003
+  5.1382275e-003
+  4.9839688e-003
+  4.8109469e-003
+  4.6039530e-003
+  4.3801862e-003
+  4.1251642e-003
+  3.8456408e-003
+  3.5401247e-003
+  3.2091886e-003
+  2.8446758e-003
+  2.4508540e-003
+  2.0274176e-003
+  1.5784683e-003
+  1.0902329e-003
+  5.8322642e-004
+  2.7604519e-005
+ -5.4642809e-004
+ -1.1568136e-003
+ -1.8039473e-003
+ -2.4826724e-003
+ -3.1933778e-003
+ -3.9401124e-003
+ -4.7222596e-003
+ -5.5337211e-003
+ -6.3792293e-003
+ -7.2615817e-003
+ -8.1798233e-003
+ -9.1325330e-003
+ -1.0115022e-002
+ -1.1131555e-002
+ -1.2185000e-002
+ -1.3271822e-002
+ -1.4390467e-002
+ -1.5540555e-002
+ -1.6732471e-002
+ -1.7943338e-002
+ -1.9187243e-002
+ -2.0453179e-002
+ -2.1746755e-002
+ -2.3068017e-002
+ -2.4416099e-002
+ -2.5787585e-002
+ -2.7185943e-002
+ -2.8607217e-002
+ -3.0050266e-002
+ -3.1501761e-002
+ -3.2975408e-002
+ -3.4462095e-002
+ -3.5969756e-002
+ -3.7481285e-002
+ -3.9005368e-002
+ -4.0534917e-002
+ -4.2064909e-002
+ -4.3609754e-002
+ -4.5148841e-002
+ -4.6684303e-002
+ -4.8216572e-002
+ -4.9738576e-002
+ -5.1255616e-002
+ -5.2763075e-002
+ -5.4245277e-002
+ -5.5717365e-002
+ -5.7161645e-002
+ -5.8591568e-002
+ -5.9983748e-002
+ -6.1345517e-002
+ -6.2685781e-002
+ -6.3971590e-002
+ -6.5224711e-002
+ -6.6436751e-002
+ -6.7607599e-002
+ -6.8704383e-002
+ -6.9763024e-002
+ -7.0762871e-002
+ -7.1700267e-002
+ -7.2568258e-002
+ -7.3362026e-002
+ -7.4100364e-002
+ -7.4745256e-002
+ -7.5313734e-002
+ -7.5800836e-002
+ -7.6199248e-002
+ -7.6499217e-002
+ -7.6709349e-002
+ -7.6817398e-002
+ -7.6823001e-002
+ -7.6720492e-002
+ -7.6505072e-002
+ -7.6174832e-002
+ -7.5730576e-002
+ -7.5157626e-002
+ -7.4466439e-002
+ -7.3640601e-002
+ -7.2677464e-002
+ -7.1582636e-002
+ -7.0353307e-002
+ -6.8966401e-002
+ -6.7452502e-002
+ -6.5769067e-002
+ -6.3944481e-002
+ -6.1960278e-002
+ -5.9816657e-002
+ -5.7515269e-002
+ -5.5046003e-002
+ -5.2409382e-002
+ -4.9597868e-002
+ -4.6630331e-002
+ -4.3476878e-002
+ -4.0145828e-002
+ -3.6641812e-002
+ -3.2958393e-002
+ -2.9082401e-002
+ -2.5030756e-002
+ -2.0799707e-002
+ -1.6370126e-002
+ -1.1762383e-002
+ -6.9636862e-003
+ -1.9765601e-003
+  3.2086897e-003
+  8.5711749e-003
+  1.4128883e-002
+  1.9883413e-002
+  2.5822729e-002
+  3.1953127e-002
+  3.8277657e-002
+  4.4780682e-002
+  5.1480418e-002
+  5.8370533e-002
+  6.5440985e-002
+  7.2694330e-002
+  8.0137293e-002
+  8.7754754e-002
+  9.5553335e-002
+  1.0353295e-001
+  1.1168269e-001
+  1.2000780e-001
+  1.2850029e-001
+  1.3715518e-001
+  1.4597665e-001
+  1.5496071e-001
+  1.6409589e-001
+  1.7338082e-001
+  1.8281725e-001
+  1.9239667e-001
+  2.0212502e-001
+  2.1197359e-001
+  2.2196527e-001
+  2.3206909e-001
+  2.4230169e-001
+  2.5264803e-001
+  2.6310533e-001
+  2.7366340e-001
+  2.8432142e-001
+  2.9507167e-001
+  3.0590986e-001
+  3.1682789e-001
+  3.2781137e-001
+  3.3887227e-001
+  3.4999141e-001
+  3.6115899e-001
+  3.7237955e-001
+  3.8363500e-001
+  3.9492118e-001
+  4.0623177e-001
+  4.1756969e-001
+  4.2891199e-001
+  4.4025538e-001
+  4.5159965e-001
+  4.6293081e-001
+  4.7424532e-001
+  4.8552531e-001
+  4.9677083e-001
+  5.0798175e-001
+  5.1912350e-001
+  5.3022409e-001
+  5.4125534e-001
+  5.5220513e-001
+  5.6307891e-001
+  5.7385241e-001
+  5.8454032e-001
+  5.9511231e-001
+  6.0557835e-001
+  6.1591099e-001
+  6.2612427e-001
+  6.3619801e-001
+  6.4612697e-001
+  6.5590163e-001
+  6.6551399e-001
+  6.7496632e-001
+  6.8423533e-001
+  6.9332824e-001
+  7.0223887e-001
+  7.1094104e-001
+  7.1944626e-001
+  7.2774489e-001
+  7.3582118e-001
+  7.4368279e-001
+  7.5131375e-001
+  7.5870808e-001
+  7.6586749e-001
+  7.7277809e-001
+  7.7942875e-001
+  7.8583531e-001
+  7.9197358e-001
+  7.9784664e-001
+  8.0344858e-001
+  8.0876950e-001
+  8.1381913e-001
+  8.1857760e-001
+  8.2304199e-001
+  8.2722753e-001
+  8.3110385e-001
+  8.3469374e-001
+  8.3797173e-001
+  8.4095414e-001
+  8.4362383e-001
+  8.4598185e-001
+  8.4803158e-001
+  8.4978052e-001
+  8.5119715e-001
+  8.5230470e-001
+  8.5310209e-001
+  8.5357206e-001
+  8.5373856e-001
+  8.5357206e-001
+  8.5310209e-001
+  8.5230470e-001
+  8.5119715e-001
+  8.4978052e-001
+  8.4803158e-001
+  8.4598185e-001
+  8.4362383e-001
+  8.4095414e-001
+  8.3797173e-001
+  8.3469374e-001
+  8.3110385e-001
+  8.2722753e-001
+  8.2304199e-001
+  8.1857760e-001
+  8.1381913e-001
+  8.0876950e-001
+  8.0344858e-001
+  7.9784664e-001
+  7.9197358e-001
+  7.8583531e-001
+  7.7942875e-001
+  7.7277809e-001
+  7.6586749e-001
+  7.5870808e-001
+  7.5131375e-001
+  7.4368279e-001
+  7.3582118e-001
+  7.2774489e-001
+  7.1944626e-001
+  7.1094104e-001
+  7.0223887e-001
+  6.9332824e-001
+  6.8423533e-001
+  6.7496632e-001
+  6.6551399e-001
+  6.5590163e-001
+  6.4612697e-001
+  6.3619801e-001
+  6.2612427e-001
+  6.1591099e-001
+  6.0557835e-001
+  5.9511231e-001
+  5.8454032e-001
+  5.7385241e-001
+  5.6307891e-001
+  5.5220513e-001
+  5.4125534e-001
+  5.3022409e-001
+  5.1912350e-001
+  5.0798175e-001
+  4.9677083e-001
+  4.8552531e-001
+  4.7424532e-001
+  4.6293081e-001
+  4.5159965e-001
+  4.4025538e-001
+  4.2891199e-001
+  4.1756969e-001
+  4.0623177e-001
+  3.9492118e-001
+  3.8363500e-001
+  3.7237955e-001
+  3.6115899e-001
+  3.4999141e-001
+  3.3887227e-001
+  3.2781137e-001
+  3.1682789e-001
+  3.0590986e-001
+  2.9507167e-001
+  2.8432142e-001
+  2.7366340e-001
+  2.6310533e-001
+  2.5264803e-001
+  2.4230169e-001
+  2.3206909e-001
+  2.2196527e-001
+  2.1197359e-001
+  2.0212502e-001
+  1.9239667e-001
+  1.8281725e-001
+  1.7338082e-001
+  1.6409589e-001
+  1.5496071e-001
+  1.4597665e-001
+  1.3715518e-001
+  1.2850029e-001
+  1.2000780e-001
+  1.1168269e-001
+  1.0353295e-001
+  9.5553335e-002
+  8.7754754e-002
+  8.0137293e-002
+  7.2694330e-002
+  6.5440985e-002
+  5.8370533e-002
+  5.1480418e-002
+  4.4780682e-002
+  3.8277657e-002
+  3.1953127e-002
+  2.5822729e-002
+  1.9883413e-002
+  1.4128883e-002
+  8.5711749e-003
+  3.2086897e-003
+ -1.9765601e-003
+ -6.9636862e-003
+ -1.1762383e-002
+ -1.6370126e-002
+ -2.0799707e-002
+ -2.5030756e-002
+ -2.9082401e-002
+ -3.2958393e-002
+ -3.6641812e-002
+ -4.0145828e-002
+ -4.3476878e-002
+ -4.6630331e-002
+ -4.9597868e-002
+ -5.2409382e-002
+ -5.5046003e-002
+ -5.7515269e-002
+ -5.9816657e-002
+ -6.1960278e-002
+ -6.3944481e-002
+ -6.5769067e-002
+ -6.7452502e-002
+ -6.8966401e-002
+ -7.0353307e-002
+ -7.1582636e-002
+ -7.2677464e-002
+ -7.3640601e-002
+ -7.4466439e-002
+ -7.5157626e-002
+ -7.5730576e-002
+ -7.6174832e-002
+ -7.6505072e-002
+ -7.6720492e-002
+ -7.6823001e-002
+ -7.6817398e-002
+ -7.6709349e-002
+ -7.6499217e-002
+ -7.6199248e-002
+ -7.5800836e-002
+ -7.5313734e-002
+ -7.4745256e-002
+ -7.4100364e-002
+ -7.3362026e-002
+ -7.2568258e-002
+ -7.1700267e-002
+ -7.0762871e-002
+ -6.9763024e-002
+ -6.8704383e-002
+ -6.7607599e-002
+ -6.6436751e-002
+ -6.5224711e-002
+ -6.3971590e-002
+ -6.2685781e-002
+ -6.1345517e-002
+ -5.9983748e-002
+ -5.8591568e-002
+ -5.7161645e-002
+ -5.5717365e-002
+ -5.4245277e-002
+ -5.2763075e-002
+ -5.1255616e-002
+ -4.9738576e-002
+ -4.8216572e-002
+ -4.6684303e-002
+ -4.5148841e-002
+ -4.3609754e-002
+ -4.2064909e-002
+ -4.0534917e-002
+ -3.9005368e-002
+ -3.7481285e-002
+ -3.5969756e-002
+ -3.4462095e-002
+ -3.2975408e-002
+ -3.1501761e-002
+ -3.0050266e-002
+ -2.8607217e-002
+ -2.7185943e-002
+ -2.5787585e-002
+ -2.4416099e-002
+ -2.3068017e-002
+ -2.1746755e-002
+ -2.0453179e-002
+ -1.9187243e-002
+ -1.7943338e-002
+ -1.6732471e-002
+ -1.5540555e-002
+ -1.4390467e-002
+ -1.3271822e-002
+ -1.2185000e-002
+ -1.1131555e-002
+ -1.0115022e-002
+ -9.1325330e-003
+ -8.1798233e-003
+ -7.2615817e-003
+ -6.3792293e-003
+ -5.5337211e-003
+ -4.7222596e-003
+ -3.9401124e-003
+ -3.1933778e-003
+ -2.4826724e-003
+ -1.8039473e-003
+ -1.1568136e-003
+ -5.4642809e-004
+  2.7604519e-005
+  5.8322642e-004
+  1.0902329e-003
+  1.5784683e-003
+  2.0274176e-003
+  2.4508540e-003
+  2.8446758e-003
+  3.2091886e-003
+  3.5401247e-003
+  3.8456408e-003
+  4.1251642e-003
+  4.3801862e-003
+  4.6039530e-003
+  4.8109469e-003
+  4.9839688e-003
+  5.1382275e-003
+  5.2715759e-003
+  5.3838976e-003
+  5.4753783e-003
+  5.5404364e-003
+  5.5917129e-003
+  5.6266114e-003
+  5.6389200e-003
+  5.6455197e-003
+  5.6220643e-003
+  5.5938023e-003
+  5.5475715e-003
+  5.4876040e-003
+  5.4196776e-003
+  5.3471681e-003
+  5.2461166e-003
+  5.1407354e-003
+  5.0393023e-003
+  4.9137604e-003
+  4.7932561e-003
+  4.6606461e-003
+  4.5209853e-003
+  4.3730720e-003
+  4.2264269e-003
+  4.0819753e-003
+  3.9207432e-003
+  3.7603923e-003
+  3.6008268e-003
+  3.4418874e-003
+  3.2739613e-003
+  3.1125421e-003
+  2.9469448e-003
+  2.7870464e-003
+  2.6201759e-003
+  2.4625617e-003
+  2.3017255e-003
+  2.1461584e-003
+  1.9841141e-003
+  1.8348265e-003
+  1.6868083e-003
+  1.5443220e-003
+  1.3902495e-003
+  1.2577885e-003
+  1.1250155e-003
+  9.8859883e-004
+  8.6084433e-004
+  7.4580259e-004
+  6.2393761e-004
+  5.1073885e-004
+  4.0265402e-004
+  2.9495311e-004
+  2.0430171e-004
+  1.0943831e-004
+  1.3494974e-005
+ -6.1733441e-005
+ -1.4463809e-004
+ -2.0983373e-004
+ -2.8969812e-004
+ -3.5011759e-004
+ -4.0951215e-004
+ -4.6063255e-004
+ -5.1455722e-004
+ -5.5645764e-004
+ -5.9461189e-004
+ -6.3415949e-004
+ -6.6504151e-004
+ -6.9179375e-004
+ -7.2153920e-004
+ -7.3193572e-004
+ -7.5300014e-004
+ -7.6307936e-004
+ -7.7579773e-004
+ -7.8014496e-004
+ -7.8036647e-004
+ -7.7798695e-004
+ -7.8343323e-004
+ -7.7248486e-004
+ -7.6813719e-004
+ -7.4905981e-004
+ -7.4409419e-004
+ -7.2550431e-004
+ -7.1577365e-004
+ -6.9416146e-004
+ -6.7776908e-004
+ -6.5403334e-004
+ -6.3124935e-004
+ -6.1327474e-004
+ -5.8709305e-004
+ -5.6778026e-004
+ -5.4665656e-004
+ -5.2265643e-004
+ -5.0407143e-004
+ -4.8937912e-004
+ -4.8752280e-004
+ -4.9475181e-004
+ -5.6176926e-004
+ -5.5252865e-004
diff --git a/TTS/vocoder/layers/upsample.py b/TTS/vocoder/layers/upsample.py
new file mode 100644
index 0000000000000000000000000000000000000000..e169db00b2749493e1cec07ee51c93178dada118
--- /dev/null
+++ b/TTS/vocoder/layers/upsample.py
@@ -0,0 +1,102 @@
+import torch
+from torch.nn import functional as F
+
+
+class Stretch2d(torch.nn.Module):
+    def __init__(self, x_scale, y_scale, mode="nearest"):
+        super().__init__()
+        self.x_scale = x_scale
+        self.y_scale = y_scale
+        self.mode = mode
+
+    def forward(self, x):
+        """
+        x (Tensor): Input tensor (B, C, F, T).
+        Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale),
+        """
+        return F.interpolate(x, scale_factor=(self.y_scale, self.x_scale), mode=self.mode)
+
+
+class UpsampleNetwork(torch.nn.Module):
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        upsample_factors,
+        nonlinear_activation=None,
+        nonlinear_activation_params={},
+        interpolate_mode="nearest",
+        freq_axis_kernel_size=1,
+        use_causal_conv=False,
+    ):
+        super().__init__()
+        self.use_causal_conv = use_causal_conv
+        self.up_layers = torch.nn.ModuleList()
+        for scale in upsample_factors:
+            # interpolation layer
+            stretch = Stretch2d(scale, 1, interpolate_mode)
+            self.up_layers += [stretch]
+
+            # conv layer
+            assert (freq_axis_kernel_size - 1) % 2 == 0, "Not support even number freq axis kernel size."
+            freq_axis_padding = (freq_axis_kernel_size - 1) // 2
+            kernel_size = (freq_axis_kernel_size, scale * 2 + 1)
+            if use_causal_conv:
+                padding = (freq_axis_padding, scale * 2)
+            else:
+                padding = (freq_axis_padding, scale)
+            conv = torch.nn.Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False)
+            self.up_layers += [conv]
+
+            # nonlinear
+            if nonlinear_activation is not None:
+                nonlinear = getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params)
+                self.up_layers += [nonlinear]
+
+    def forward(self, c):
+        """
+        c :  (B, C, T_in).
+        Tensor: (B, C, T_upsample)
+        """
+        c = c.unsqueeze(1)  # (B, 1, C, T)
+        for f in self.up_layers:
+            c = f(c)
+        return c.squeeze(1)  # (B, C, T')
+
+
+class ConvUpsample(torch.nn.Module):
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        upsample_factors,
+        nonlinear_activation=None,
+        nonlinear_activation_params={},
+        interpolate_mode="nearest",
+        freq_axis_kernel_size=1,
+        aux_channels=80,
+        aux_context_window=0,
+        use_causal_conv=False,
+    ):
+        super().__init__()
+        self.aux_context_window = aux_context_window
+        self.use_causal_conv = use_causal_conv and aux_context_window > 0
+        # To capture wide-context information in conditional features
+        kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1
+        # NOTE(kan-bayashi): Here do not use padding because the input is already padded
+        self.conv_in = torch.nn.Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False)
+        self.upsample = UpsampleNetwork(
+            upsample_factors=upsample_factors,
+            nonlinear_activation=nonlinear_activation,
+            nonlinear_activation_params=nonlinear_activation_params,
+            interpolate_mode=interpolate_mode,
+            freq_axis_kernel_size=freq_axis_kernel_size,
+            use_causal_conv=use_causal_conv,
+        )
+
+    def forward(self, c):
+        """
+        c : (B, C, T_in).
+        Tensor: (B, C, T_upsampled),
+        """
+        c_ = self.conv_in(c)
+        c = c_[:, :, : -self.aux_context_window] if self.use_causal_conv else c_
+        return self.upsample(c)
diff --git a/TTS/vocoder/layers/wavegrad.py b/TTS/vocoder/layers/wavegrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..24b905f994b69075fc5e46249ce0c7719fe4b174
--- /dev/null
+++ b/TTS/vocoder/layers/wavegrad.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.utils import weight_norm
+
+
+class Conv1d(nn.Conv1d):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        nn.init.orthogonal_(self.weight)
+        nn.init.zeros_(self.bias)
+
+
+class PositionalEncoding(nn.Module):
+    """Positional encoding with noise level conditioning"""
+
+    def __init__(self, n_channels, max_len=10000):
+        super().__init__()
+        self.n_channels = n_channels
+        self.max_len = max_len
+        self.C = 5000
+        self.pe = torch.zeros(0, 0)
+
+    def forward(self, x, noise_level):
+        if x.shape[2] > self.pe.shape[1]:
+            self.init_pe_matrix(x.shape[1], x.shape[2], x)
+        return x + noise_level[..., None, None] + self.pe[:, : x.size(2)].repeat(x.shape[0], 1, 1) / self.C
+
+    def init_pe_matrix(self, n_channels, max_len, x):
+        pe = torch.zeros(max_len, n_channels)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.pow(10000, torch.arange(0, n_channels, 2).float() / n_channels)
+
+        pe[:, 0::2] = torch.sin(position / div_term)
+        pe[:, 1::2] = torch.cos(position / div_term)
+        self.pe = pe.transpose(0, 1).to(x)
+
+
+class FiLM(nn.Module):
+    def __init__(self, input_size, output_size):
+        super().__init__()
+        self.encoding = PositionalEncoding(input_size)
+        self.input_conv = nn.Conv1d(input_size, input_size, 3, padding=1)
+        self.output_conv = nn.Conv1d(input_size, output_size * 2, 3, padding=1)
+
+        nn.init.xavier_uniform_(self.input_conv.weight)
+        nn.init.xavier_uniform_(self.output_conv.weight)
+        nn.init.zeros_(self.input_conv.bias)
+        nn.init.zeros_(self.output_conv.bias)
+
+    def forward(self, x, noise_scale):
+        o = self.input_conv(x)
+        o = F.leaky_relu(o, 0.2)
+        o = self.encoding(o, noise_scale)
+        shift, scale = torch.chunk(self.output_conv(o), 2, dim=1)
+        return shift, scale
+
+    def remove_weight_norm(self):
+        nn.utils.remove_weight_norm(self.input_conv)
+        nn.utils.remove_weight_norm(self.output_conv)
+
+    def apply_weight_norm(self):
+        self.input_conv = weight_norm(self.input_conv)
+        self.output_conv = weight_norm(self.output_conv)
+
+
+@torch.jit.script
+def shif_and_scale(x, scale, shift):
+    o = shift + scale * x
+    return o
+
+
+class UBlock(nn.Module):
+    def __init__(self, input_size, hidden_size, factor, dilation):
+        super().__init__()
+        assert isinstance(dilation, (list, tuple))
+        assert len(dilation) == 4
+
+        self.factor = factor
+        self.res_block = Conv1d(input_size, hidden_size, 1)
+        self.main_block = nn.ModuleList(
+            [
+                Conv1d(input_size, hidden_size, 3, dilation=dilation[0], padding=dilation[0]),
+                Conv1d(hidden_size, hidden_size, 3, dilation=dilation[1], padding=dilation[1]),
+            ]
+        )
+        self.out_block = nn.ModuleList(
+            [
+                Conv1d(hidden_size, hidden_size, 3, dilation=dilation[2], padding=dilation[2]),
+                Conv1d(hidden_size, hidden_size, 3, dilation=dilation[3], padding=dilation[3]),
+            ]
+        )
+
+    def forward(self, x, shift, scale):
+        x_inter = F.interpolate(x, size=x.shape[-1] * self.factor)
+        res = self.res_block(x_inter)
+        o = F.leaky_relu(x_inter, 0.2)
+        o = F.interpolate(o, size=x.shape[-1] * self.factor)
+        o = self.main_block[0](o)
+        o = shif_and_scale(o, scale, shift)
+        o = F.leaky_relu(o, 0.2)
+        o = self.main_block[1](o)
+        res2 = res + o
+        o = shif_and_scale(res2, scale, shift)
+        o = F.leaky_relu(o, 0.2)
+        o = self.out_block[0](o)
+        o = shif_and_scale(o, scale, shift)
+        o = F.leaky_relu(o, 0.2)
+        o = self.out_block[1](o)
+        o = o + res2
+        return o
+
+    def remove_weight_norm(self):
+        nn.utils.remove_weight_norm(self.res_block)
+        for _, layer in enumerate(self.main_block):
+            if len(layer.state_dict()) != 0:
+                nn.utils.remove_weight_norm(layer)
+        for _, layer in enumerate(self.out_block):
+            if len(layer.state_dict()) != 0:
+                nn.utils.remove_weight_norm(layer)
+
+    def apply_weight_norm(self):
+        self.res_block = weight_norm(self.res_block)
+        for idx, layer in enumerate(self.main_block):
+            if len(layer.state_dict()) != 0:
+                self.main_block[idx] = weight_norm(layer)
+        for idx, layer in enumerate(self.out_block):
+            if len(layer.state_dict()) != 0:
+                self.out_block[idx] = weight_norm(layer)
+
+
+class DBlock(nn.Module):
+    def __init__(self, input_size, hidden_size, factor):
+        super().__init__()
+        self.factor = factor
+        self.res_block = Conv1d(input_size, hidden_size, 1)
+        self.main_block = nn.ModuleList(
+            [
+                Conv1d(input_size, hidden_size, 3, dilation=1, padding=1),
+                Conv1d(hidden_size, hidden_size, 3, dilation=2, padding=2),
+                Conv1d(hidden_size, hidden_size, 3, dilation=4, padding=4),
+            ]
+        )
+
+    def forward(self, x):
+        size = x.shape[-1] // self.factor
+        res = self.res_block(x)
+        res = F.interpolate(res, size=size)
+        o = F.interpolate(x, size=size)
+        for layer in self.main_block:
+            o = F.leaky_relu(o, 0.2)
+            o = layer(o)
+        return o + res
+
+    def remove_weight_norm(self):
+        nn.utils.remove_weight_norm(self.res_block)
+        for _, layer in enumerate(self.main_block):
+            if len(layer.state_dict()) != 0:
+                nn.utils.remove_weight_norm(layer)
+
+    def apply_weight_norm(self):
+        self.res_block = weight_norm(self.res_block)
+        for idx, layer in enumerate(self.main_block):
+            if len(layer.state_dict()) != 0:
+                self.main_block[idx] = weight_norm(layer)
diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..65901617b69d3ae708e09226c5e4ad903f89a929
--- /dev/null
+++ b/TTS/vocoder/models/__init__.py
@@ -0,0 +1,154 @@
+import importlib
+import re
+
+from coqpit import Coqpit
+
+
+def to_camel(text):
+    text = text.capitalize()
+    return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text)
+
+
+def setup_model(config: Coqpit):
+    """Load models directly from configuration."""
+    if "discriminator_model" in config and "generator_model" in config:
+        MyModel = importlib.import_module("TTS.vocoder.models.gan")
+        MyModel = getattr(MyModel, "GAN")
+    else:
+        MyModel = importlib.import_module("TTS.vocoder.models." + config.model.lower())
+        if config.model.lower() == "wavernn":
+            MyModel = getattr(MyModel, "Wavernn")
+        elif config.model.lower() == "gan":
+            MyModel = getattr(MyModel, "GAN")
+        elif config.model.lower() == "wavegrad":
+            MyModel = getattr(MyModel, "Wavegrad")
+        else:
+            try:
+                MyModel = getattr(MyModel, to_camel(config.model))
+            except ModuleNotFoundError as e:
+                raise ValueError(f"Model {config.model} not exist!") from e
+    print(" > Vocoder Model: {}".format(config.model))
+    return MyModel.init_from_config(config)
+
+
+def setup_generator(c):
+    """TODO: use config object as arguments"""
+    print(" > Generator Model: {}".format(c.generator_model))
+    MyModel = importlib.import_module("TTS.vocoder.models." + c.generator_model.lower())
+    MyModel = getattr(MyModel, to_camel(c.generator_model))
+    # this is to preserve the Wavernn class name (instead of Wavernn)
+    if c.generator_model.lower() in "hifigan_generator":
+        model = MyModel(in_channels=c.audio["num_mels"], out_channels=1, **c.generator_model_params)
+    elif c.generator_model.lower() in "melgan_generator":
+        model = MyModel(
+            in_channels=c.audio["num_mels"],
+            out_channels=1,
+            proj_kernel=7,
+            base_channels=512,
+            upsample_factors=c.generator_model_params["upsample_factors"],
+            res_kernel=3,
+            num_res_blocks=c.generator_model_params["num_res_blocks"],
+        )
+    elif c.generator_model in "melgan_fb_generator":
+        raise ValueError("melgan_fb_generator is now fullband_melgan_generator")
+    elif c.generator_model.lower() in "multiband_melgan_generator":
+        model = MyModel(
+            in_channels=c.audio["num_mels"],
+            out_channels=4,
+            proj_kernel=7,
+            base_channels=384,
+            upsample_factors=c.generator_model_params["upsample_factors"],
+            res_kernel=3,
+            num_res_blocks=c.generator_model_params["num_res_blocks"],
+        )
+    elif c.generator_model.lower() in "fullband_melgan_generator":
+        model = MyModel(
+            in_channels=c.audio["num_mels"],
+            out_channels=1,
+            proj_kernel=7,
+            base_channels=512,
+            upsample_factors=c.generator_model_params["upsample_factors"],
+            res_kernel=3,
+            num_res_blocks=c.generator_model_params["num_res_blocks"],
+        )
+    elif c.generator_model.lower() in "parallel_wavegan_generator":
+        model = MyModel(
+            in_channels=1,
+            out_channels=1,
+            kernel_size=3,
+            num_res_blocks=c.generator_model_params["num_res_blocks"],
+            stacks=c.generator_model_params["stacks"],
+            res_channels=64,
+            gate_channels=128,
+            skip_channels=64,
+            aux_channels=c.audio["num_mels"],
+            dropout=0.0,
+            bias=True,
+            use_weight_norm=True,
+            upsample_factors=c.generator_model_params["upsample_factors"],
+        )
+    elif c.generator_model.lower() in "univnet_generator":
+        model = MyModel(**c.generator_model_params)
+    else:
+        raise NotImplementedError(f"Model {c.generator_model} not implemented!")
+    return model
+
+
+def setup_discriminator(c):
+    """TODO: use config objekt as arguments"""
+    print(" > Discriminator Model: {}".format(c.discriminator_model))
+    if "parallel_wavegan" in c.discriminator_model:
+        MyModel = importlib.import_module("TTS.vocoder.models.parallel_wavegan_discriminator")
+    else:
+        MyModel = importlib.import_module("TTS.vocoder.models." + c.discriminator_model.lower())
+    MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower()))
+    if c.discriminator_model in "hifigan_discriminator":
+        model = MyModel()
+    if c.discriminator_model in "random_window_discriminator":
+        model = MyModel(
+            cond_channels=c.audio["num_mels"],
+            hop_length=c.audio["hop_length"],
+            uncond_disc_donwsample_factors=c.discriminator_model_params["uncond_disc_donwsample_factors"],
+            cond_disc_downsample_factors=c.discriminator_model_params["cond_disc_downsample_factors"],
+            cond_disc_out_channels=c.discriminator_model_params["cond_disc_out_channels"],
+            window_sizes=c.discriminator_model_params["window_sizes"],
+        )
+    if c.discriminator_model in "melgan_multiscale_discriminator":
+        model = MyModel(
+            in_channels=1,
+            out_channels=1,
+            kernel_sizes=(5, 3),
+            base_channels=c.discriminator_model_params["base_channels"],
+            max_channels=c.discriminator_model_params["max_channels"],
+            downsample_factors=c.discriminator_model_params["downsample_factors"],
+        )
+    if c.discriminator_model == "residual_parallel_wavegan_discriminator":
+        model = MyModel(
+            in_channels=1,
+            out_channels=1,
+            kernel_size=3,
+            num_layers=c.discriminator_model_params["num_layers"],
+            stacks=c.discriminator_model_params["stacks"],
+            res_channels=64,
+            gate_channels=128,
+            skip_channels=64,
+            dropout=0.0,
+            bias=True,
+            nonlinear_activation="LeakyReLU",
+            nonlinear_activation_params={"negative_slope": 0.2},
+        )
+    if c.discriminator_model == "parallel_wavegan_discriminator":
+        model = MyModel(
+            in_channels=1,
+            out_channels=1,
+            kernel_size=3,
+            num_layers=c.discriminator_model_params["num_layers"],
+            conv_channels=64,
+            dilation_factor=1,
+            nonlinear_activation="LeakyReLU",
+            nonlinear_activation_params={"negative_slope": 0.2},
+            bias=True,
+        )
+    if c.discriminator_model == "univnet_discriminator":
+        model = MyModel()
+    return model
diff --git a/TTS/vocoder/models/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1da8615aee1241c0952cb4827012e127bcccf10
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1137c2f92941a81d6cf82efa96e8bfdb6e0c68b8
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/models/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2f59eedc3410357172d4d72ff280d421af1b6dcf
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/base_vocoder.cpython-310.pyc b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8732854b6496eb86312aa2ee777bb9133fd3e319
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/base_vocoder.cpython-38.pyc b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6de282895436b8b1924563ce5103811f727d12bb
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/base_vocoder.cpython-39.pyc b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..367e241e36b57b454df04abb4d0343180c2e0d18
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/base_vocoder.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/gan.cpython-310.pyc b/TTS/vocoder/models/__pycache__/gan.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6517237135ad6d53714b424d8b7fceda990db253
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/gan.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/gan.cpython-38.pyc b/TTS/vocoder/models/__pycache__/gan.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3adcb65c5b3e76b6c85fe5af8f5b8673d0488a81
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/gan.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/gan.cpython-39.pyc b/TTS/vocoder/models/__pycache__/gan.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9141fa1a5abc79755da3768ac5a3827995b7a327
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/gan.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-310.pyc b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29497ccc3c1635ccb8c2ed00643fba927438d490
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-38.pyc b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48d602fd5a7979a4efd3bc2c159c482981d5280d
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-39.pyc b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..490dabf1a69ab0be26d8df55d5673f518e511a4f
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-310.pyc b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d6953bd9cdfd17d43a7b6e3742c0277139b577e9
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-38.pyc b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbfc508e3a2e47c7ba1f232eedb723b8c2fad0e0
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-39.pyc b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cf102622741ed579fc25b0af23b155706d823ff
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_discriminator.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_generator.cpython-310.pyc b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c58325d4edb3ba6ef8d02cd5753aa934328f4bf0
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_generator.cpython-38.pyc b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..726fda64f385e0cd395324da8c2778229474fadb
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/univnet_generator.cpython-39.pyc b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99b2044822e4ed53675b4ce6f72982f3664541d1
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/univnet_generator.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavegrad.cpython-310.pyc b/TTS/vocoder/models/__pycache__/wavegrad.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eafe23fee4c799d1d425a2f6546dc047d5977db8
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavegrad.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavegrad.cpython-38.pyc b/TTS/vocoder/models/__pycache__/wavegrad.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae2819eae3ccbf2355569164abec0dd04ef14e3e
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavegrad.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavegrad.cpython-39.pyc b/TTS/vocoder/models/__pycache__/wavegrad.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68bd09df2ad817475879184fbeab498406442cd2
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavegrad.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavernn.cpython-310.pyc b/TTS/vocoder/models/__pycache__/wavernn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..178a2da82fc8b5438b80a28bbe7e3eb49369a7d7
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavernn.cpython-310.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavernn.cpython-38.pyc b/TTS/vocoder/models/__pycache__/wavernn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92090cd8611fd2f72d6812d9cbfa7bf915f8a288
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavernn.cpython-38.pyc differ
diff --git a/TTS/vocoder/models/__pycache__/wavernn.cpython-39.pyc b/TTS/vocoder/models/__pycache__/wavernn.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0bb21d52c2225db3220271a40ce09d5c38374fe1
Binary files /dev/null and b/TTS/vocoder/models/__pycache__/wavernn.cpython-39.pyc differ
diff --git a/TTS/vocoder/models/base_vocoder.py b/TTS/vocoder/models/base_vocoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..01a7ff68771c72f89f9d0fb6708706f6f92ba96a
--- /dev/null
+++ b/TTS/vocoder/models/base_vocoder.py
@@ -0,0 +1,53 @@
+from coqpit import Coqpit
+
+from TTS.model import BaseTrainerModel
+
+# pylint: skip-file
+
+
+class BaseVocoder(BaseTrainerModel):
+    """Base `vocoder` class. Every new `vocoder` model must inherit this.
+
+    It defines `vocoder` specific functions on top of `Model`.
+
+    Notes on input/output tensor shapes:
+        Any input or output tensor of the model must be shaped as
+
+        - 3D tensors `batch x time x channels`
+        - 2D tensors `batch x channels`
+        - 1D tensors `batch x 1`
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self._set_model_args(config)
+
+    def _set_model_args(self, config: Coqpit):
+        """Setup model args based on the config type.
+
+        If the config is for training with a name like "*Config", then the model args are embeded in the
+        config.model_args
+
+        If the config is for the model with a name like "*Args", then we assign the directly.
+        """
+        # don't use isintance not to import recursively
+        if "Config" in config.__class__.__name__:
+            if "characters" in config:
+                _, self.config, num_chars = self.get_characters(config)
+                self.config.num_chars = num_chars
+                if hasattr(self.config, "model_args"):
+                    config.model_args.num_chars = num_chars
+                    if "model_args" in config:
+                        self.args = self.config.model_args
+                    # This is for backward compatibility
+                    if "model_params" in config:
+                        self.args = self.config.model_params
+            else:
+                self.config = config
+                if "model_args" in config:
+                    self.args = self.config.model_args
+                # This is for backward compatibility
+                if "model_params" in config:
+                    self.args = self.config.model_params
+        else:
+            raise ValueError("config must be either a *Config or *Args")
diff --git a/TTS/vocoder/models/fullband_melgan_generator.py b/TTS/vocoder/models/fullband_melgan_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee25559af0d468aac535841bdfdd33b366250f43
--- /dev/null
+++ b/TTS/vocoder/models/fullband_melgan_generator.py
@@ -0,0 +1,33 @@
+import torch
+
+from TTS.vocoder.models.melgan_generator import MelganGenerator
+
+
+class FullbandMelganGenerator(MelganGenerator):
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=1,
+        proj_kernel=7,
+        base_channels=512,
+        upsample_factors=(2, 8, 2, 2),
+        res_kernel=3,
+        num_res_blocks=4,
+    ):
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            proj_kernel=proj_kernel,
+            base_channels=base_channels,
+            upsample_factors=upsample_factors,
+            res_kernel=res_kernel,
+            num_res_blocks=num_res_blocks,
+        )
+
+    @torch.no_grad()
+    def inference(self, cond_features):
+        cond_features = cond_features.to(self.layers[1].weight.device)
+        cond_features = torch.nn.functional.pad(
+            cond_features, (self.inference_padding, self.inference_padding), "replicate"
+        )
+        return self.layers(cond_features)
diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py
new file mode 100644
index 0000000000000000000000000000000000000000..19c30e983e5bb2066d3ccd22dc5cb21c091cb60a
--- /dev/null
+++ b/TTS/vocoder/models/gan.py
@@ -0,0 +1,374 @@
+from inspect import signature
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+from coqpit import Coqpit
+from torch import nn
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from trainer.trainer_utils import get_optimizer, get_scheduler
+
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.io import load_fsspec
+from TTS.vocoder.datasets.gan_dataset import GANDataset
+from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
+from TTS.vocoder.models import setup_discriminator, setup_generator
+from TTS.vocoder.models.base_vocoder import BaseVocoder
+from TTS.vocoder.utils.generic_utils import plot_results
+
+
+class GAN(BaseVocoder):
+    def __init__(self, config: Coqpit, ap: AudioProcessor = None):
+        """Wrap a generator and a discriminator network. It provides a compatible interface for the trainer.
+        It also helps mixing and matching different generator and disciminator networks easily.
+
+        To implement a new GAN models, you just need to define the generator and the discriminator networks, the rest
+        is handled by the `GAN` class.
+
+        Args:
+            config (Coqpit): Model configuration.
+            ap (AudioProcessor): 🐸TTS AudioProcessor instance. Defaults to None.
+
+        Examples:
+            Initializing the GAN model with HifiGAN generator and discriminator.
+            >>> from TTS.vocoder.configs import HifiganConfig
+            >>> config = HifiganConfig()
+            >>> model = GAN(config)
+        """
+        super().__init__(config)
+        self.config = config
+        self.model_g = setup_generator(config)
+        self.model_d = setup_discriminator(config)
+        self.train_disc = False  # if False, train only the generator.
+        self.y_hat_g = None  # the last generator prediction to be passed onto the discriminator
+        self.ap = ap
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Run the generator's forward pass.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+
+        Returns:
+            torch.Tensor: output of the GAN generator network.
+        """
+        return self.model_g.forward(x)
+
+    def inference(self, x: torch.Tensor) -> torch.Tensor:
+        """Run the generator's inference pass.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+        Returns:
+            torch.Tensor: output of the GAN generator network.
+        """
+        return self.model_g.inference(x)
+
+    def train_step(self, batch: Dict, criterion: Dict, optimizer_idx: int) -> Tuple[Dict, Dict]:
+        """Compute model outputs and the loss values. `optimizer_idx` selects the generator or the discriminator for
+        network on the current pass.
+
+        Args:
+            batch (Dict): Batch of samples returned by the dataloader.
+            criterion (Dict): Criterion used to compute the losses.
+            optimizer_idx (int): ID of the optimizer in use on the current pass.
+
+        Raises:
+            ValueError: `optimizer_idx` is an unexpected value.
+
+        Returns:
+            Tuple[Dict, Dict]: model outputs and the computed loss values.
+        """
+        outputs = {}
+        loss_dict = {}
+
+        x = batch["input"]
+        y = batch["waveform"]
+
+        if optimizer_idx not in [0, 1]:
+            raise ValueError(" [!] Unexpected `optimizer_idx`.")
+
+        if optimizer_idx == 0:
+            # DISCRIMINATOR optimization
+
+            # generator pass
+            y_hat = self.model_g(x)[:, :, : y.size(2)]
+
+            # cache for generator loss
+            # pylint: disable=W0201
+            self.y_hat_g = y_hat
+            self.y_hat_sub = None
+            self.y_sub_g = None
+
+            # PQMF formatting
+            if y_hat.shape[1] > 1:
+                self.y_hat_sub = y_hat
+                y_hat = self.model_g.pqmf_synthesis(y_hat)
+                self.y_hat_g = y_hat  # save for generator loss
+                self.y_sub_g = self.model_g.pqmf_analysis(y)
+
+            scores_fake, feats_fake, feats_real = None, None, None
+
+            if self.train_disc:
+                # use different samples for G and D trainings
+                if self.config.diff_samples_for_G_and_D:
+                    x_d = batch["input_disc"]
+                    y_d = batch["waveform_disc"]
+                    # use a different sample than generator
+                    with torch.no_grad():
+                        y_hat = self.model_g(x_d)
+
+                    # PQMF formatting
+                    if y_hat.shape[1] > 1:
+                        y_hat = self.model_g.pqmf_synthesis(y_hat)
+                else:
+                    # use the same samples as generator
+                    x_d = x.clone()
+                    y_d = y.clone()
+                    y_hat = self.y_hat_g
+
+                # run D with or without cond. features
+                if len(signature(self.model_d.forward).parameters) == 2:
+                    D_out_fake = self.model_d(y_hat.detach().clone(), x_d)
+                    D_out_real = self.model_d(y_d, x_d)
+                else:
+                    D_out_fake = self.model_d(y_hat.detach())
+                    D_out_real = self.model_d(y_d)
+
+                # format D outputs
+                if isinstance(D_out_fake, tuple):
+                    # self.model_d returns scores and features
+                    scores_fake, feats_fake = D_out_fake
+                    if D_out_real is None:
+                        scores_real, feats_real = None, None
+                    else:
+                        scores_real, feats_real = D_out_real
+                else:
+                    # model D returns only scores
+                    scores_fake = D_out_fake
+                    scores_real = D_out_real
+
+                # compute losses
+                loss_dict = criterion[optimizer_idx](scores_fake, scores_real)
+                outputs = {"model_outputs": y_hat}
+
+        if optimizer_idx == 1:
+            # GENERATOR loss
+            scores_fake, feats_fake, feats_real = None, None, None
+            if self.train_disc:
+                if len(signature(self.model_d.forward).parameters) == 2:
+                    D_out_fake = self.model_d(self.y_hat_g, x)
+                else:
+                    D_out_fake = self.model_d(self.y_hat_g)
+                D_out_real = None
+
+                if self.config.use_feat_match_loss:
+                    with torch.no_grad():
+                        D_out_real = self.model_d(y)
+
+                # format D outputs
+                if isinstance(D_out_fake, tuple):
+                    scores_fake, feats_fake = D_out_fake
+                    if D_out_real is None:
+                        feats_real = None
+                    else:
+                        _, feats_real = D_out_real
+                else:
+                    scores_fake = D_out_fake
+                    feats_fake, feats_real = None, None
+
+            # compute losses
+            loss_dict = criterion[optimizer_idx](
+                self.y_hat_g, y, scores_fake, feats_fake, feats_real, self.y_hat_sub, self.y_sub_g
+            )
+            outputs = {"model_outputs": self.y_hat_g}
+        return outputs, loss_dict
+
+    def _log(self, name: str, ap: AudioProcessor, batch: Dict, outputs: Dict) -> Tuple[Dict, Dict]:
+        """Logging shared by the training and evaluation.
+
+        Args:
+            name (str): Name of the run. `train` or `eval`,
+            ap (AudioProcessor): Audio processor used in training.
+            batch (Dict): Batch used in the last train/eval step.
+            outputs (Dict): Model outputs from the last train/eval step.
+
+        Returns:
+            Tuple[Dict, Dict]: log figures and audio samples.
+        """
+        y_hat = outputs[0]["model_outputs"] if self.train_disc else outputs[1]["model_outputs"]
+        y = batch["waveform"]
+        figures = plot_results(y_hat, y, ap, name)
+        sample_voice = y_hat[0].squeeze(0).detach().cpu().numpy()
+        audios = {f"{name}/audio": sample_voice}
+        return figures, audios
+
+    def train_log(
+        self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int  # pylint: disable=unused-argument
+    ) -> Tuple[Dict, np.ndarray]:
+        """Call `_log()` for training."""
+        figures, audios = self._log("eval", self.ap, batch, outputs)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    @torch.no_grad()
+    def eval_step(self, batch: Dict, criterion: nn.Module, optimizer_idx: int) -> Tuple[Dict, Dict]:
+        """Call `train_step()` with `no_grad()`"""
+        self.train_disc = True  # Avoid a bug in the Training with the missing discriminator loss
+        return self.train_step(batch, criterion, optimizer_idx)
+
+    def eval_log(
+        self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int  # pylint: disable=unused-argument
+    ) -> Tuple[Dict, np.ndarray]:
+        """Call `_log()` for evaluation."""
+        figures, audios = self._log("eval", self.ap, batch, outputs)
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    def load_checkpoint(
+        self,
+        config: Coqpit,
+        checkpoint_path: str,
+        eval: bool = False,  # pylint: disable=unused-argument, redefined-builtin
+        cache: bool = False,
+    ) -> None:
+        """Load a GAN checkpoint and initialize model parameters.
+
+        Args:
+            config (Coqpit): Model config.
+            checkpoint_path (str): Checkpoint file path.
+            eval (bool, optional): If true, load the model for inference. If falseDefaults to False.
+        """
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        # band-aid for older than v0.0.15 GAN models
+        if "model_disc" in state:
+            self.model_g.load_checkpoint(config, checkpoint_path, eval)
+        else:
+            self.load_state_dict(state["model"])
+            if eval:
+                self.model_d = None
+                if hasattr(self.model_g, "remove_weight_norm"):
+                    self.model_g.remove_weight_norm()
+
+    def on_train_step_start(self, trainer) -> None:
+        """Enable the discriminator training based on `steps_to_start_discriminator`
+
+        Args:
+            trainer (Trainer): Trainer object.
+        """
+        self.train_disc = trainer.total_steps_done >= self.config.steps_to_start_discriminator
+
+    def get_optimizer(self) -> List:
+        """Initiate and return the GAN optimizers based on the config parameters.
+
+        It returnes 2 optimizers in a list. First one is for the generator and the second one is for the discriminator.
+
+        Returns:
+            List: optimizers.
+        """
+        optimizer1 = get_optimizer(
+            self.config.optimizer, self.config.optimizer_params, self.config.lr_gen, self.model_g
+        )
+        optimizer2 = get_optimizer(
+            self.config.optimizer, self.config.optimizer_params, self.config.lr_disc, self.model_d
+        )
+        return [optimizer2, optimizer1]
+
+    def get_lr(self) -> List:
+        """Set the initial learning rates for each optimizer.
+
+        Returns:
+            List: learning rates for each optimizer.
+        """
+        return [self.config.lr_disc, self.config.lr_gen]
+
+    def get_scheduler(self, optimizer) -> List:
+        """Set the schedulers for each optimizer.
+
+        Args:
+            optimizer (List[`torch.optim.Optimizer`]): List of optimizers.
+
+        Returns:
+            List: Schedulers, one for each optimizer.
+        """
+        scheduler1 = get_scheduler(self.config.lr_scheduler_gen, self.config.lr_scheduler_gen_params, optimizer[0])
+        scheduler2 = get_scheduler(self.config.lr_scheduler_disc, self.config.lr_scheduler_disc_params, optimizer[1])
+        return [scheduler2, scheduler1]
+
+    @staticmethod
+    def format_batch(batch: List) -> Dict:
+        """Format the batch for training.
+
+        Args:
+            batch (List): Batch out of the dataloader.
+
+        Returns:
+            Dict: formatted model inputs.
+        """
+        if isinstance(batch[0], list):
+            x_G, y_G = batch[0]
+            x_D, y_D = batch[1]
+            return {"input": x_G, "waveform": y_G, "input_disc": x_D, "waveform_disc": y_D}
+        x, y = batch
+        return {"input": x, "waveform": y}
+
+    def get_data_loader(  # pylint: disable=no-self-use, unused-argument
+        self,
+        config: Coqpit,
+        assets: Dict,
+        is_eval: True,
+        samples: List,
+        verbose: bool,
+        num_gpus: int,
+        rank: int = None,  # pylint: disable=unused-argument
+    ):
+        """Initiate and return the GAN dataloader.
+
+        Args:
+            config (Coqpit): Model config.
+            ap (AudioProcessor): Audio processor.
+            is_eval (True): Set the dataloader for evaluation if true.
+            samples (List): Data samples.
+            verbose (bool): Log information if true.
+            num_gpus (int): Number of GPUs in use.
+            rank (int): Rank of the current GPU. Defaults to None.
+
+        Returns:
+            DataLoader: Torch dataloader.
+        """
+        dataset = GANDataset(
+            ap=self.ap,
+            items=samples,
+            seq_len=config.seq_len,
+            hop_len=self.ap.hop_length,
+            pad_short=config.pad_short,
+            conv_pad=config.conv_pad,
+            return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False,
+            is_training=not is_eval,
+            return_segments=not is_eval,
+            use_noise_augment=config.use_noise_augment,
+            use_cache=config.use_cache,
+            verbose=verbose,
+        )
+        dataset.shuffle_mapping()
+        sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None
+        loader = DataLoader(
+            dataset,
+            batch_size=1 if is_eval else config.batch_size,
+            shuffle=num_gpus == 0,
+            drop_last=False,
+            sampler=sampler,
+            num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+            pin_memory=False,
+        )
+        return loader
+
+    def get_criterion(self):
+        """Return criterions for the optimizers"""
+        return [DiscriminatorLoss(self.config), GeneratorLoss(self.config)]
+
+    @staticmethod
+    def init_from_config(config: Coqpit, verbose=True) -> "GAN":
+        ap = AudioProcessor.init_from_config(config, verbose=verbose)
+        return GAN(config, ap=ap)
diff --git a/TTS/vocoder/models/hifigan_discriminator.py b/TTS/vocoder/models/hifigan_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca5eaf408c95372ea26f4e83db6f470b4dd92dfb
--- /dev/null
+++ b/TTS/vocoder/models/hifigan_discriminator.py
@@ -0,0 +1,217 @@
+# adopted from https://github.com/jik876/hifi-gan/blob/master/models.py
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+LRELU_SLOPE = 0.1
+
+
+class DiscriminatorP(torch.nn.Module):
+    """HiFiGAN Periodic Discriminator
+
+    Takes every Pth value from the input waveform and applied a stack of convoluations.
+
+    Note:
+        if `period` is 2
+        `waveform = [1, 2, 3, 4, 5, 6 ...] --> [1, 3, 5 ... ] --> convs -> score, feat`
+
+    Args:
+        x (Tensor): input waveform.
+
+    Returns:
+        [Tensor]: discriminator scores per sample in the batch.
+        [List[Tensor]]: list of features from each convolutional layer.
+
+    Shapes:
+        x: [B, 1, T]
+    """
+
+    def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
+        super().__init__()
+        self.period = period
+        get_padding = lambda k, d: int((k * d - d) / 2)
+        norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.weight_norm
+        self.convs = nn.ModuleList(
+            [
+                norm_f(nn.Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
+                norm_f(nn.Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
+                norm_f(nn.Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
+                norm_f(nn.Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
+                norm_f(nn.Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))),
+            ]
+        )
+        self.conv_post = norm_f(nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            [Tensor]: discriminator scores per sample in the batch.
+            [List[Tensor]]: list of features from each convolutional layer.
+
+        Shapes:
+            x: [B, 1, T]
+        """
+        feat = []
+
+        # 1d to 2d
+        b, c, t = x.shape
+        if t % self.period != 0:  # pad first
+            n_pad = self.period - (t % self.period)
+            x = F.pad(x, (0, n_pad), "reflect")
+            t = t + n_pad
+        x = x.view(b, c, t // self.period, self.period)
+
+        for l in self.convs:
+            x = l(x)
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            feat.append(x)
+        x = self.conv_post(x)
+        feat.append(x)
+        x = torch.flatten(x, 1, -1)
+
+        return x, feat
+
+
+class MultiPeriodDiscriminator(torch.nn.Module):
+    """HiFiGAN Multi-Period Discriminator (MPD)
+    Wrapper for the `PeriodDiscriminator` to apply it in different periods.
+    Periods are suggested to be prime numbers to reduce the overlap between each discriminator.
+    """
+
+    def __init__(self, use_spectral_norm=False):
+        super().__init__()
+        self.discriminators = nn.ModuleList(
+            [
+                DiscriminatorP(2, use_spectral_norm=use_spectral_norm),
+                DiscriminatorP(3, use_spectral_norm=use_spectral_norm),
+                DiscriminatorP(5, use_spectral_norm=use_spectral_norm),
+                DiscriminatorP(7, use_spectral_norm=use_spectral_norm),
+                DiscriminatorP(11, use_spectral_norm=use_spectral_norm),
+            ]
+        )
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+        [List[Tensor]]: list of scores from each discriminator.
+            [List[List[Tensor]]]: list of list of features from each discriminator's each convolutional layer.
+
+        Shapes:
+            x: [B, 1, T]
+        """
+        scores = []
+        feats = []
+        for _, d in enumerate(self.discriminators):
+            score, feat = d(x)
+            scores.append(score)
+            feats.append(feat)
+        return scores, feats
+
+
+class DiscriminatorS(torch.nn.Module):
+    """HiFiGAN Scale Discriminator.
+    It is similar to `MelganDiscriminator` but with a specific architecture explained in the paper.
+
+    Args:
+        use_spectral_norm (bool): if `True` swith to spectral norm instead of weight norm.
+
+    """
+
+    def __init__(self, use_spectral_norm=False):
+        super().__init__()
+        norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.weight_norm
+        self.convs = nn.ModuleList(
+            [
+                norm_f(nn.Conv1d(1, 128, 15, 1, padding=7)),
+                norm_f(nn.Conv1d(128, 128, 41, 2, groups=4, padding=20)),
+                norm_f(nn.Conv1d(128, 256, 41, 2, groups=16, padding=20)),
+                norm_f(nn.Conv1d(256, 512, 41, 4, groups=16, padding=20)),
+                norm_f(nn.Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
+                norm_f(nn.Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
+                norm_f(nn.Conv1d(1024, 1024, 5, 1, padding=2)),
+            ]
+        )
+        self.conv_post = norm_f(nn.Conv1d(1024, 1, 3, 1, padding=1))
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            Tensor: discriminator scores.
+            List[Tensor]: list of features from the convolutiona layers.
+        """
+        feat = []
+        for l in self.convs:
+            x = l(x)
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            feat.append(x)
+        x = self.conv_post(x)
+        feat.append(x)
+        x = torch.flatten(x, 1, -1)
+        return x, feat
+
+
+class MultiScaleDiscriminator(torch.nn.Module):
+    """HiFiGAN Multi-Scale Discriminator.
+    It is similar to `MultiScaleMelganDiscriminator` but specially tailored for HiFiGAN as in the paper.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.discriminators = nn.ModuleList(
+            [
+                DiscriminatorS(use_spectral_norm=True),
+                DiscriminatorS(),
+                DiscriminatorS(),
+            ]
+        )
+        self.meanpools = nn.ModuleList([nn.AvgPool1d(4, 2, padding=2), nn.AvgPool1d(4, 2, padding=2)])
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            List[Tensor]: discriminator scores.
+            List[List[Tensor]]: list of list of features from each layers of each discriminator.
+        """
+        scores = []
+        feats = []
+        for i, d in enumerate(self.discriminators):
+            if i != 0:
+                x = self.meanpools[i - 1](x)
+            score, feat = d(x)
+            scores.append(score)
+            feats.append(feat)
+        return scores, feats
+
+
+class HifiganDiscriminator(nn.Module):
+    """HiFiGAN discriminator wrapping MPD and MSD."""
+
+    def __init__(self):
+        super().__init__()
+        self.mpd = MultiPeriodDiscriminator()
+        self.msd = MultiScaleDiscriminator()
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            List[Tensor]: discriminator scores.
+            List[List[Tensor]]: list of list of features from each layers of each discriminator.
+        """
+        scores, feats = self.mpd(x)
+        scores_, feats_ = self.msd(x)
+        return scores + scores_, feats + feats_
diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c6ad9b64a8402868944b4f004beadd0006159c6
--- /dev/null
+++ b/TTS/vocoder/models/hifigan_generator.py
@@ -0,0 +1,300 @@
+# adopted from https://github.com/jik876/hifi-gan/blob/master/models.py
+import torch
+from torch import nn
+from torch.nn import Conv1d, ConvTranspose1d
+from torch.nn import functional as F
+from torch.nn.utils import remove_weight_norm, weight_norm
+
+from TTS.utils.io import load_fsspec
+
+LRELU_SLOPE = 0.1
+
+
+def get_padding(k, d):
+    return int((k * d - d) / 2)
+
+
+class ResBlock1(torch.nn.Module):
+    """Residual Block Type 1. It has 3 convolutional layers in each convolutiona block.
+
+    Network::
+
+        x -> lrelu -> conv1_1 -> conv1_2 -> conv1_3 -> z -> lrelu -> conv2_1 -> conv2_2 -> conv2_3 -> o -> + -> o
+        |--------------------------------------------------------------------------------------------------|
+
+
+    Args:
+        channels (int): number of hidden channels for the convolutional layers.
+        kernel_size (int): size of the convolution filter in each layer.
+        dilations (list): list of dilation value for each conv layer in a block.
+    """
+
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
+        super().__init__()
+        self.convs1 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[0],
+                        padding=get_padding(kernel_size, dilation[0]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[1],
+                        padding=get_padding(kernel_size, dilation[1]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[2],
+                        padding=get_padding(kernel_size, dilation[2]),
+                    )
+                ),
+            ]
+        )
+
+        self.convs2 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))
+                ),
+                weight_norm(
+                    Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))
+                ),
+                weight_norm(
+                    Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))
+                ),
+            ]
+        )
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input tensor.
+        Returns:
+            Tensor: output tensor.
+        Shapes:
+            x: [B, C, T]
+        """
+        for c1, c2 in zip(self.convs1, self.convs2):
+            xt = F.leaky_relu(x, LRELU_SLOPE)
+            xt = c1(xt)
+            xt = F.leaky_relu(xt, LRELU_SLOPE)
+            xt = c2(xt)
+            x = xt + x
+        return x
+
+    def remove_weight_norm(self):
+        for l in self.convs1:
+            remove_weight_norm(l)
+        for l in self.convs2:
+            remove_weight_norm(l)
+
+
+class ResBlock2(torch.nn.Module):
+    """Residual Block Type 1. It has 3 convolutional layers in each convolutiona block.
+
+    Network::
+
+        x -> lrelu -> conv1-> -> z -> lrelu -> conv2-> o -> + -> o
+        |---------------------------------------------------|
+
+
+    Args:
+        channels (int): number of hidden channels for the convolutional layers.
+        kernel_size (int): size of the convolution filter in each layer.
+        dilations (list): list of dilation value for each conv layer in a block.
+    """
+
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
+        super().__init__()
+        self.convs = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[0],
+                        padding=get_padding(kernel_size, dilation[0]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[1],
+                        padding=get_padding(kernel_size, dilation[1]),
+                    )
+                ),
+            ]
+        )
+
+    def forward(self, x):
+        for c in self.convs:
+            xt = F.leaky_relu(x, LRELU_SLOPE)
+            xt = c(xt)
+            x = xt + x
+        return x
+
+    def remove_weight_norm(self):
+        for l in self.convs:
+            remove_weight_norm(l)
+
+
+class HifiganGenerator(torch.nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        resblock_type,
+        resblock_dilation_sizes,
+        resblock_kernel_sizes,
+        upsample_kernel_sizes,
+        upsample_initial_channel,
+        upsample_factors,
+        inference_padding=5,
+        cond_channels=0,
+        conv_pre_weight_norm=True,
+        conv_post_weight_norm=True,
+        conv_post_bias=True,
+    ):
+        r"""HiFiGAN Generator with Multi-Receptive Field Fusion (MRF)
+
+        Network:
+            x -> lrelu -> upsampling_layer -> resblock1_k1x1 -> z1 -> + -> z_sum / #resblocks -> lrelu -> conv_post_7x1 -> tanh -> o
+                                                 ..          -> zI ---|
+                                              resblockN_kNx1 -> zN ---'
+
+        Args:
+            in_channels (int): number of input tensor channels.
+            out_channels (int): number of output tensor channels.
+            resblock_type (str): type of the `ResBlock`. '1' or '2'.
+            resblock_dilation_sizes (List[List[int]]): list of dilation values in each layer of a `ResBlock`.
+            resblock_kernel_sizes (List[int]): list of kernel sizes for each `ResBlock`.
+            upsample_kernel_sizes (List[int]): list of kernel sizes for each transposed convolution.
+            upsample_initial_channel (int): number of channels for the first upsampling layer. This is divided by 2
+                for each consecutive upsampling layer.
+            upsample_factors (List[int]): upsampling factors (stride) for each upsampling layer.
+            inference_padding (int): constant padding applied to the input at inference time. Defaults to 5.
+        """
+        super().__init__()
+        self.inference_padding = inference_padding
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.num_upsamples = len(upsample_factors)
+        # initial upsampling layers
+        self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3))
+        resblock = ResBlock1 if resblock_type == "1" else ResBlock2
+        # upsampling layers
+        self.ups = nn.ModuleList()
+        for i, (u, k) in enumerate(zip(upsample_factors, upsample_kernel_sizes)):
+            self.ups.append(
+                weight_norm(
+                    ConvTranspose1d(
+                        upsample_initial_channel // (2**i),
+                        upsample_initial_channel // (2 ** (i + 1)),
+                        k,
+                        u,
+                        padding=(k - u) // 2,
+                    )
+                )
+            )
+        # MRF blocks
+        self.resblocks = nn.ModuleList()
+        for i in range(len(self.ups)):
+            ch = upsample_initial_channel // (2 ** (i + 1))
+            for _, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
+                self.resblocks.append(resblock(ch, k, d))
+        # post convolution layer
+        self.conv_post = weight_norm(Conv1d(ch, out_channels, 7, 1, padding=3, bias=conv_post_bias))
+        if cond_channels > 0:
+            self.cond_layer = nn.Conv1d(cond_channels, upsample_initial_channel, 1)
+
+        if not conv_pre_weight_norm:
+            remove_weight_norm(self.conv_pre)
+
+        if not conv_post_weight_norm:
+            remove_weight_norm(self.conv_post)
+
+    def forward(self, x, g=None):
+        """
+        Args:
+            x (Tensor): feature input tensor.
+            g (Tensor): global conditioning input tensor.
+
+        Returns:
+            Tensor: output waveform.
+
+        Shapes:
+            x: [B, C, T]
+            Tensor: [B, 1, T]
+        """
+        o = self.conv_pre(x)
+        if hasattr(self, "cond_layer"):
+            o = o + self.cond_layer(g)
+        for i in range(self.num_upsamples):
+            o = F.leaky_relu(o, LRELU_SLOPE)
+            o = self.ups[i](o)
+            z_sum = None
+            for j in range(self.num_kernels):
+                if z_sum is None:
+                    z_sum = self.resblocks[i * self.num_kernels + j](o)
+                else:
+                    z_sum += self.resblocks[i * self.num_kernels + j](o)
+            o = z_sum / self.num_kernels
+        o = F.leaky_relu(o)
+        o = self.conv_post(o)
+        o = torch.tanh(o)
+        return o
+
+    @torch.no_grad()
+    def inference(self, c):
+        """
+        Args:
+            x (Tensor): conditioning input tensor.
+
+        Returns:
+            Tensor: output waveform.
+
+        Shapes:
+            x: [B, C, T]
+            Tensor: [B, 1, T]
+        """
+        c = c.to(self.conv_pre.weight.device)
+        c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate")
+        return self.forward(c)
+
+    def remove_weight_norm(self):
+        print("Removing weight norm...")
+        for l in self.ups:
+            remove_weight_norm(l)
+        for l in self.resblocks:
+            l.remove_weight_norm()
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+            self.remove_weight_norm()
diff --git a/TTS/vocoder/models/melgan_discriminator.py b/TTS/vocoder/models/melgan_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..14f00c5927cb28449c4fb0dc0727cde014370c2b
--- /dev/null
+++ b/TTS/vocoder/models/melgan_discriminator.py
@@ -0,0 +1,84 @@
+import numpy as np
+from torch import nn
+from torch.nn.utils import weight_norm
+
+
+class MelganDiscriminator(nn.Module):
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_sizes=(5, 3),
+        base_channels=16,
+        max_channels=1024,
+        downsample_factors=(4, 4, 4, 4),
+        groups_denominator=4,
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList()
+
+        layer_kernel_size = np.prod(kernel_sizes)
+        layer_padding = (layer_kernel_size - 1) // 2
+
+        # initial layer
+        self.layers += [
+            nn.Sequential(
+                nn.ReflectionPad1d(layer_padding),
+                weight_norm(nn.Conv1d(in_channels, base_channels, layer_kernel_size, stride=1)),
+                nn.LeakyReLU(0.2, inplace=True),
+            )
+        ]
+
+        # downsampling layers
+        layer_in_channels = base_channels
+        for downsample_factor in downsample_factors:
+            layer_out_channels = min(layer_in_channels * downsample_factor, max_channels)
+            layer_kernel_size = downsample_factor * 10 + 1
+            layer_padding = (layer_kernel_size - 1) // 2
+            layer_groups = layer_in_channels // groups_denominator
+            self.layers += [
+                nn.Sequential(
+                    weight_norm(
+                        nn.Conv1d(
+                            layer_in_channels,
+                            layer_out_channels,
+                            kernel_size=layer_kernel_size,
+                            stride=downsample_factor,
+                            padding=layer_padding,
+                            groups=layer_groups,
+                        )
+                    ),
+                    nn.LeakyReLU(0.2, inplace=True),
+                )
+            ]
+            layer_in_channels = layer_out_channels
+
+        # last 2 layers
+        layer_padding1 = (kernel_sizes[0] - 1) // 2
+        layer_padding2 = (kernel_sizes[1] - 1) // 2
+        self.layers += [
+            nn.Sequential(
+                weight_norm(
+                    nn.Conv1d(
+                        layer_out_channels,
+                        layer_out_channels,
+                        kernel_size=kernel_sizes[0],
+                        stride=1,
+                        padding=layer_padding1,
+                    )
+                ),
+                nn.LeakyReLU(0.2, inplace=True),
+            ),
+            weight_norm(
+                nn.Conv1d(
+                    layer_out_channels, out_channels, kernel_size=kernel_sizes[1], stride=1, padding=layer_padding2
+                )
+            ),
+        ]
+
+    def forward(self, x):
+        feats = []
+        for layer in self.layers:
+            x = layer(x)
+            feats.append(x)
+        return x, feats
diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..989797f0b8537c96ff4cfd1c2c6af856cecc79fc
--- /dev/null
+++ b/TTS/vocoder/models/melgan_generator.py
@@ -0,0 +1,95 @@
+import torch
+from torch import nn
+from torch.nn.utils import weight_norm
+
+from TTS.utils.io import load_fsspec
+from TTS.vocoder.layers.melgan import ResidualStack
+
+
+class MelganGenerator(nn.Module):
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=1,
+        proj_kernel=7,
+        base_channels=512,
+        upsample_factors=(8, 8, 2, 2),
+        res_kernel=3,
+        num_res_blocks=3,
+    ):
+        super().__init__()
+
+        # assert model parameters
+        assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number."
+
+        # setup additional model parameters
+        base_padding = (proj_kernel - 1) // 2
+        act_slope = 0.2
+        self.inference_padding = 2
+
+        # initial layer
+        layers = []
+        layers += [
+            nn.ReflectionPad1d(base_padding),
+            weight_norm(nn.Conv1d(in_channels, base_channels, kernel_size=proj_kernel, stride=1, bias=True)),
+        ]
+
+        # upsampling layers and residual stacks
+        for idx, upsample_factor in enumerate(upsample_factors):
+            layer_in_channels = base_channels // (2**idx)
+            layer_out_channels = base_channels // (2 ** (idx + 1))
+            layer_filter_size = upsample_factor * 2
+            layer_stride = upsample_factor
+            layer_output_padding = upsample_factor % 2
+            layer_padding = upsample_factor // 2 + layer_output_padding
+            layers += [
+                nn.LeakyReLU(act_slope),
+                weight_norm(
+                    nn.ConvTranspose1d(
+                        layer_in_channels,
+                        layer_out_channels,
+                        layer_filter_size,
+                        stride=layer_stride,
+                        padding=layer_padding,
+                        output_padding=layer_output_padding,
+                        bias=True,
+                    )
+                ),
+                ResidualStack(channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel),
+            ]
+
+        layers += [nn.LeakyReLU(act_slope)]
+
+        # final layer
+        layers += [
+            nn.ReflectionPad1d(base_padding),
+            weight_norm(nn.Conv1d(layer_out_channels, out_channels, proj_kernel, stride=1, bias=True)),
+            nn.Tanh(),
+        ]
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, c):
+        return self.layers(c)
+
+    def inference(self, c):
+        c = c.to(self.layers[1].weight.device)
+        c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate")
+        return self.layers(c)
+
+    def remove_weight_norm(self):
+        for _, layer in enumerate(self.layers):
+            if len(layer.state_dict()) != 0:
+                try:
+                    nn.utils.remove_weight_norm(layer)
+                except ValueError:
+                    layer.remove_weight_norm()
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+            self.remove_weight_norm()
diff --git a/TTS/vocoder/models/melgan_multiscale_discriminator.py b/TTS/vocoder/models/melgan_multiscale_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4909f37c0c91c6fee8bb0baab98a8662039dea1
--- /dev/null
+++ b/TTS/vocoder/models/melgan_multiscale_discriminator.py
@@ -0,0 +1,50 @@
+from torch import nn
+
+from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator
+
+
+class MelganMultiscaleDiscriminator(nn.Module):
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        num_scales=3,
+        kernel_sizes=(5, 3),
+        base_channels=16,
+        max_channels=1024,
+        downsample_factors=(4, 4, 4),
+        pooling_kernel_size=4,
+        pooling_stride=2,
+        pooling_padding=2,
+        groups_denominator=4,
+    ):
+        super().__init__()
+
+        self.discriminators = nn.ModuleList(
+            [
+                MelganDiscriminator(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_sizes=kernel_sizes,
+                    base_channels=base_channels,
+                    max_channels=max_channels,
+                    downsample_factors=downsample_factors,
+                    groups_denominator=groups_denominator,
+                )
+                for _ in range(num_scales)
+            ]
+        )
+
+        self.pooling = nn.AvgPool1d(
+            kernel_size=pooling_kernel_size, stride=pooling_stride, padding=pooling_padding, count_include_pad=False
+        )
+
+    def forward(self, x):
+        scores = []
+        feats = []
+        for disc in self.discriminators:
+            score, feat = disc(x)
+            scores.append(score)
+            feats.append(feat)
+            x = self.pooling(x)
+        return scores, feats
diff --git a/TTS/vocoder/models/multiband_melgan_generator.py b/TTS/vocoder/models/multiband_melgan_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..25d6590659cf5863176eb6609c7609b0e1b28d12
--- /dev/null
+++ b/TTS/vocoder/models/multiband_melgan_generator.py
@@ -0,0 +1,41 @@
+import torch
+
+from TTS.vocoder.layers.pqmf import PQMF
+from TTS.vocoder.models.melgan_generator import MelganGenerator
+
+
+class MultibandMelganGenerator(MelganGenerator):
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=4,
+        proj_kernel=7,
+        base_channels=384,
+        upsample_factors=(2, 8, 2, 2),
+        res_kernel=3,
+        num_res_blocks=3,
+    ):
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            proj_kernel=proj_kernel,
+            base_channels=base_channels,
+            upsample_factors=upsample_factors,
+            res_kernel=res_kernel,
+            num_res_blocks=num_res_blocks,
+        )
+        self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
+
+    def pqmf_analysis(self, x):
+        return self.pqmf_layer.analysis(x)
+
+    def pqmf_synthesis(self, x):
+        return self.pqmf_layer.synthesis(x)
+
+    @torch.no_grad()
+    def inference(self, cond_features):
+        cond_features = cond_features.to(self.layers[1].weight.device)
+        cond_features = torch.nn.functional.pad(
+            cond_features, (self.inference_padding, self.inference_padding), "replicate"
+        )
+        return self.pqmf_synthesis(self.layers(cond_features))
diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..adf1bdaea040e99dd66829b9b8ed184146e155cb
--- /dev/null
+++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py
@@ -0,0 +1,186 @@
+import math
+
+import torch
+from torch import nn
+
+from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
+
+
+class ParallelWaveganDiscriminator(nn.Module):
+    """PWGAN discriminator as in https://arxiv.org/abs/1910.11480.
+    It classifies each audio window real/fake and returns a sequence
+    of predictions.
+        It is a stack of convolutional blocks with dilation.
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        num_layers=10,
+        conv_channels=64,
+        dilation_factor=1,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        bias=True,
+    ):
+        super().__init__()
+        assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size."
+        assert dilation_factor > 0, " [!] dilation factor must be > 0."
+        self.conv_layers = nn.ModuleList()
+        conv_in_channels = in_channels
+        for i in range(num_layers - 1):
+            if i == 0:
+                dilation = 1
+            else:
+                dilation = i if dilation_factor == 1 else dilation_factor**i
+                conv_in_channels = conv_channels
+            padding = (kernel_size - 1) // 2 * dilation
+            conv_layer = [
+                nn.Conv1d(
+                    conv_in_channels,
+                    conv_channels,
+                    kernel_size=kernel_size,
+                    padding=padding,
+                    dilation=dilation,
+                    bias=bias,
+                ),
+                getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params),
+            ]
+            self.conv_layers += conv_layer
+        padding = (kernel_size - 1) // 2
+        last_conv_layer = nn.Conv1d(conv_in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias)
+        self.conv_layers += [last_conv_layer]
+        self.apply_weight_norm()
+
+    def forward(self, x):
+        """
+            x : (B, 1, T).
+        Returns:
+            Tensor: (B, 1, T)
+        """
+        for f in self.conv_layers:
+            x = f(x)
+        return x
+
+    def apply_weight_norm(self):
+        def _apply_weight_norm(m):
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)):
+                torch.nn.utils.weight_norm(m)
+
+        self.apply(_apply_weight_norm)
+
+    def remove_weight_norm(self):
+        def _remove_weight_norm(m):
+            try:
+                # print(f"Weight norm is removed from {m}.")
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+
+class ResidualParallelWaveganDiscriminator(nn.Module):
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        num_layers=30,
+        stacks=3,
+        res_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        dropout=0.0,
+        bias=True,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+    ):
+        super().__init__()
+        assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_layers = num_layers
+        self.stacks = stacks
+        self.kernel_size = kernel_size
+        self.res_factor = math.sqrt(1.0 / num_layers)
+
+        # check the number of num_layers and stacks
+        assert num_layers % stacks == 0
+        layers_per_stack = num_layers // stacks
+
+        # define first convolution
+        self.first_conv = nn.Sequential(
+            nn.Conv1d(in_channels, res_channels, kernel_size=1, padding=0, dilation=1, bias=True),
+            getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params),
+        )
+
+        # define residual blocks
+        self.conv_layers = nn.ModuleList()
+        for layer in range(num_layers):
+            dilation = 2 ** (layer % layers_per_stack)
+            conv = ResidualBlock(
+                kernel_size=kernel_size,
+                res_channels=res_channels,
+                gate_channels=gate_channels,
+                skip_channels=skip_channels,
+                aux_channels=-1,
+                dilation=dilation,
+                dropout=dropout,
+                bias=bias,
+                use_causal_conv=False,
+            )
+            self.conv_layers += [conv]
+
+        # define output layers
+        self.last_conv_layers = nn.ModuleList(
+            [
+                getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params),
+                nn.Conv1d(skip_channels, skip_channels, kernel_size=1, padding=0, dilation=1, bias=True),
+                getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params),
+                nn.Conv1d(skip_channels, out_channels, kernel_size=1, padding=0, dilation=1, bias=True),
+            ]
+        )
+
+        # apply weight norm
+        self.apply_weight_norm()
+
+    def forward(self, x):
+        """
+        x: (B, 1, T).
+        """
+        x = self.first_conv(x)
+
+        skips = 0
+        for f in self.conv_layers:
+            x, h = f(x, None)
+            skips += h
+        skips *= self.res_factor
+
+        # apply final layers
+        x = skips
+        for f in self.last_conv_layers:
+            x = f(x)
+        return x
+
+    def apply_weight_norm(self):
+        def _apply_weight_norm(m):
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)):
+                torch.nn.utils.weight_norm(m)
+
+        self.apply(_apply_weight_norm)
+
+    def remove_weight_norm(self):
+        def _remove_weight_norm(m):
+            try:
+                print(f"Weight norm is removed from {m}.")
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..c741774ae32360c74a315fca74863cb8b1346de2
--- /dev/null
+++ b/TTS/vocoder/models/parallel_wavegan_generator.py
@@ -0,0 +1,164 @@
+import math
+
+import numpy as np
+import torch
+
+from TTS.utils.io import load_fsspec
+from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
+from TTS.vocoder.layers.upsample import ConvUpsample
+
+
+class ParallelWaveganGenerator(torch.nn.Module):
+    """PWGAN generator as in https://arxiv.org/pdf/1910.11480.pdf.
+    It is similar to WaveNet with no causal convolution.
+        It is conditioned on an aux feature (spectrogram) to generate
+    an output waveform from an input noise.
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        num_res_blocks=30,
+        stacks=3,
+        res_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        aux_channels=80,
+        dropout=0.0,
+        bias=True,
+        use_weight_norm=True,
+        upsample_factors=[4, 4, 4, 4],
+        inference_padding=2,
+    ):
+
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.aux_channels = aux_channels
+        self.num_res_blocks = num_res_blocks
+        self.stacks = stacks
+        self.kernel_size = kernel_size
+        self.upsample_factors = upsample_factors
+        self.upsample_scale = np.prod(upsample_factors)
+        self.inference_padding = inference_padding
+        self.use_weight_norm = use_weight_norm
+
+        # check the number of layers and stacks
+        assert num_res_blocks % stacks == 0
+        layers_per_stack = num_res_blocks // stacks
+
+        # define first convolution
+        self.first_conv = torch.nn.Conv1d(in_channels, res_channels, kernel_size=1, bias=True)
+
+        # define conv + upsampling network
+        self.upsample_net = ConvUpsample(upsample_factors=upsample_factors)
+
+        # define residual blocks
+        self.conv_layers = torch.nn.ModuleList()
+        for layer in range(num_res_blocks):
+            dilation = 2 ** (layer % layers_per_stack)
+            conv = ResidualBlock(
+                kernel_size=kernel_size,
+                res_channels=res_channels,
+                gate_channels=gate_channels,
+                skip_channels=skip_channels,
+                aux_channels=aux_channels,
+                dilation=dilation,
+                dropout=dropout,
+                bias=bias,
+            )
+            self.conv_layers += [conv]
+
+        # define output layers
+        self.last_conv_layers = torch.nn.ModuleList(
+            [
+                torch.nn.ReLU(inplace=True),
+                torch.nn.Conv1d(skip_channels, skip_channels, kernel_size=1, bias=True),
+                torch.nn.ReLU(inplace=True),
+                torch.nn.Conv1d(skip_channels, out_channels, kernel_size=1, bias=True),
+            ]
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, c):
+        """
+        c: (B, C ,T').
+        o: Output tensor (B, out_channels, T)
+        """
+        # random noise
+        x = torch.randn([c.shape[0], 1, c.shape[2] * self.upsample_scale])
+        x = x.to(self.first_conv.bias.device)
+
+        # perform upsampling
+        if c is not None and self.upsample_net is not None:
+            c = self.upsample_net(c)
+            assert (
+                c.shape[-1] == x.shape[-1]
+            ), f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}"
+
+        # encode to hidden representation
+        x = self.first_conv(x)
+        skips = 0
+        for f in self.conv_layers:
+            x, h = f(x, c)
+            skips += h
+        skips *= math.sqrt(1.0 / len(self.conv_layers))
+
+        # apply final layers
+        x = skips
+        for f in self.last_conv_layers:
+            x = f(x)
+
+        return x
+
+    @torch.no_grad()
+    def inference(self, c):
+        c = c.to(self.first_conv.weight.device)
+        c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate")
+        return self.forward(c)
+
+    def remove_weight_norm(self):
+        def _remove_weight_norm(m):
+            try:
+                # print(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        def _apply_weight_norm(m):
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)):
+                torch.nn.utils.weight_norm(m)
+                # print(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    @staticmethod
+    def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x):
+        assert layers % stacks == 0
+        layers_per_cycle = layers // stacks
+        dilations = [dilation(i % layers_per_cycle) for i in range(layers)]
+        return (kernel_size - 1) * sum(dilations) + 1
+
+    @property
+    def receptive_field_size(self):
+        return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size)
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+            if self.use_weight_norm:
+                self.remove_weight_norm()
diff --git a/TTS/vocoder/models/random_window_discriminator.py b/TTS/vocoder/models/random_window_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea95668a5fb6408488f0243c2e4e7f95ee4c6a6f
--- /dev/null
+++ b/TTS/vocoder/models/random_window_discriminator.py
@@ -0,0 +1,204 @@
+import numpy as np
+from torch import nn
+
+
+class GBlock(nn.Module):
+    def __init__(self, in_channels, cond_channels, downsample_factor):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.cond_channels = cond_channels
+        self.downsample_factor = downsample_factor
+
+        self.start = nn.Sequential(
+            nn.AvgPool1d(downsample_factor, stride=downsample_factor),
+            nn.ReLU(),
+            nn.Conv1d(in_channels, in_channels * 2, kernel_size=3, padding=1),
+        )
+        self.lc_conv1d = nn.Conv1d(cond_channels, in_channels * 2, kernel_size=1)
+        self.end = nn.Sequential(
+            nn.ReLU(), nn.Conv1d(in_channels * 2, in_channels * 2, kernel_size=3, dilation=2, padding=2)
+        )
+        self.residual = nn.Sequential(
+            nn.Conv1d(in_channels, in_channels * 2, kernel_size=1),
+            nn.AvgPool1d(downsample_factor, stride=downsample_factor),
+        )
+
+    def forward(self, inputs, conditions):
+        outputs = self.start(inputs) + self.lc_conv1d(conditions)
+        outputs = self.end(outputs)
+        residual_outputs = self.residual(inputs)
+        outputs = outputs + residual_outputs
+
+        return outputs
+
+
+class DBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, downsample_factor):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.downsample_factor = downsample_factor
+        self.out_channels = out_channels
+
+        self.donwsample_layer = nn.AvgPool1d(downsample_factor, stride=downsample_factor)
+        self.layers = nn.Sequential(
+            nn.ReLU(),
+            nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv1d(out_channels, out_channels, kernel_size=3, dilation=2, padding=2),
+        )
+        self.residual = nn.Sequential(
+            nn.Conv1d(in_channels, out_channels, kernel_size=1),
+        )
+
+    def forward(self, inputs):
+        if self.downsample_factor > 1:
+            outputs = self.layers(self.donwsample_layer(inputs)) + self.donwsample_layer(self.residual(inputs))
+        else:
+            outputs = self.layers(inputs) + self.residual(inputs)
+        return outputs
+
+
+class ConditionalDiscriminator(nn.Module):
+    def __init__(self, in_channels, cond_channels, downsample_factors=(2, 2, 2), out_channels=(128, 256)):
+        super().__init__()
+
+        assert len(downsample_factors) == len(out_channels) + 1
+
+        self.in_channels = in_channels
+        self.cond_channels = cond_channels
+        self.downsample_factors = downsample_factors
+        self.out_channels = out_channels
+
+        self.pre_cond_layers = nn.ModuleList()
+        self.post_cond_layers = nn.ModuleList()
+
+        # layers before condition features
+        self.pre_cond_layers += [DBlock(in_channels, 64, 1)]
+        in_channels = 64
+        for (i, channel) in enumerate(out_channels):
+            self.pre_cond_layers.append(DBlock(in_channels, channel, downsample_factors[i]))
+            in_channels = channel
+
+        # condition block
+        self.cond_block = GBlock(in_channels, cond_channels, downsample_factors[-1])
+
+        # layers after condition block
+        self.post_cond_layers += [
+            DBlock(in_channels * 2, in_channels * 2, 1),
+            DBlock(in_channels * 2, in_channels * 2, 1),
+            nn.AdaptiveAvgPool1d(1),
+            nn.Conv1d(in_channels * 2, 1, kernel_size=1),
+        ]
+
+    def forward(self, inputs, conditions):
+        batch_size = inputs.size()[0]
+        outputs = inputs.view(batch_size, self.in_channels, -1)
+        for layer in self.pre_cond_layers:
+            outputs = layer(outputs)
+        outputs = self.cond_block(outputs, conditions)
+        for layer in self.post_cond_layers:
+            outputs = layer(outputs)
+
+        return outputs
+
+
+class UnconditionalDiscriminator(nn.Module):
+    def __init__(self, in_channels, base_channels=64, downsample_factors=(8, 4), out_channels=(128, 256)):
+        super().__init__()
+
+        self.downsample_factors = downsample_factors
+        self.in_channels = in_channels
+        self.downsample_factors = downsample_factors
+        self.out_channels = out_channels
+
+        self.layers = nn.ModuleList()
+        self.layers += [DBlock(self.in_channels, base_channels, 1)]
+        in_channels = base_channels
+        for (i, factor) in enumerate(downsample_factors):
+            self.layers.append(DBlock(in_channels, out_channels[i], factor))
+            in_channels *= 2
+        self.layers += [
+            DBlock(in_channels, in_channels, 1),
+            DBlock(in_channels, in_channels, 1),
+            nn.AdaptiveAvgPool1d(1),
+            nn.Conv1d(in_channels, 1, kernel_size=1),
+        ]
+
+    def forward(self, inputs):
+        batch_size = inputs.size()[0]
+        outputs = inputs.view(batch_size, self.in_channels, -1)
+        for layer in self.layers:
+            outputs = layer(outputs)
+        return outputs
+
+
+class RandomWindowDiscriminator(nn.Module):
+    """Random Window Discriminator as described in
+    http://arxiv.org/abs/1909.11646"""
+
+    def __init__(
+        self,
+        cond_channels,
+        hop_length,
+        uncond_disc_donwsample_factors=(8, 4),
+        cond_disc_downsample_factors=((8, 4, 2, 2, 2), (8, 4, 2, 2), (8, 4, 2), (8, 4), (4, 2, 2)),
+        cond_disc_out_channels=((128, 128, 256, 256), (128, 256, 256), (128, 256), (256,), (128, 256)),
+        window_sizes=(512, 1024, 2048, 4096, 8192),
+    ):
+
+        super().__init__()
+        self.cond_channels = cond_channels
+        self.window_sizes = window_sizes
+        self.hop_length = hop_length
+        self.base_window_size = self.hop_length * 2
+        self.ks = [ws // self.base_window_size for ws in window_sizes]
+
+        # check arguments
+        assert len(cond_disc_downsample_factors) == len(cond_disc_out_channels) == len(window_sizes)
+        for ws in window_sizes:
+            assert ws % hop_length == 0
+
+        for idx, cf in enumerate(cond_disc_downsample_factors):
+            assert np.prod(cf) == hop_length // self.ks[idx]
+
+        # define layers
+        self.unconditional_discriminators = nn.ModuleList([])
+        for k in self.ks:
+            layer = UnconditionalDiscriminator(
+                in_channels=k, base_channels=64, downsample_factors=uncond_disc_donwsample_factors
+            )
+            self.unconditional_discriminators.append(layer)
+
+        self.conditional_discriminators = nn.ModuleList([])
+        for idx, k in enumerate(self.ks):
+            layer = ConditionalDiscriminator(
+                in_channels=k,
+                cond_channels=cond_channels,
+                downsample_factors=cond_disc_downsample_factors[idx],
+                out_channels=cond_disc_out_channels[idx],
+            )
+            self.conditional_discriminators.append(layer)
+
+    def forward(self, x, c):
+        scores = []
+        feats = []
+        # unconditional pass
+        for (window_size, layer) in zip(self.window_sizes, self.unconditional_discriminators):
+            index = np.random.randint(x.shape[-1] - window_size)
+
+            score = layer(x[:, :, index : index + window_size])
+            scores.append(score)
+
+        # conditional pass
+        for (window_size, layer) in zip(self.window_sizes, self.conditional_discriminators):
+            frame_size = window_size // self.hop_length
+            lc_index = np.random.randint(c.shape[-1] - frame_size)
+            sample_index = lc_index * self.hop_length
+            x_sub = x[:, :, sample_index : (lc_index + frame_size) * self.hop_length]
+            c_sub = c[:, :, lc_index : lc_index + frame_size]
+
+            score = layer(x_sub, c_sub)
+            scores.append(score)
+        return scores, feats
diff --git a/TTS/vocoder/models/univnet_discriminator.py b/TTS/vocoder/models/univnet_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..34e2d1c276582f2eca0e7520c6b49338b6f7b8ba
--- /dev/null
+++ b/TTS/vocoder/models/univnet_discriminator.py
@@ -0,0 +1,96 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.utils import spectral_norm, weight_norm
+
+from TTS.utils.audio.torch_transforms import TorchSTFT
+from TTS.vocoder.models.hifigan_discriminator import MultiPeriodDiscriminator
+
+LRELU_SLOPE = 0.1
+
+
+class SpecDiscriminator(nn.Module):
+    """docstring for Discriminator."""
+
+    def __init__(self, fft_size=1024, hop_length=120, win_length=600, use_spectral_norm=False):
+        super().__init__()
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
+        self.fft_size = fft_size
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.stft = TorchSTFT(fft_size, hop_length, win_length)
+        self.discriminators = nn.ModuleList(
+            [
+                norm_f(nn.Conv2d(1, 32, kernel_size=(3, 9), padding=(1, 4))),
+                norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))),
+                norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))),
+                norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))),
+                norm_f(nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))),
+            ]
+        )
+
+        self.out = norm_f(nn.Conv2d(32, 1, 3, 1, 1))
+
+    def forward(self, y):
+
+        fmap = []
+        with torch.no_grad():
+            y = y.squeeze(1)
+            y = self.stft(y)
+        y = y.unsqueeze(1)
+        for _, d in enumerate(self.discriminators):
+            y = d(y)
+            y = F.leaky_relu(y, LRELU_SLOPE)
+            fmap.append(y)
+
+        y = self.out(y)
+        fmap.append(y)
+
+        return torch.flatten(y, 1, -1), fmap
+
+
+class MultiResSpecDiscriminator(torch.nn.Module):
+    def __init__(  # pylint: disable=dangerous-default-value
+        self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240], window="hann_window"
+    ):
+
+        super().__init__()
+        self.discriminators = nn.ModuleList(
+            [
+                SpecDiscriminator(fft_sizes[0], hop_sizes[0], win_lengths[0], window),
+                SpecDiscriminator(fft_sizes[1], hop_sizes[1], win_lengths[1], window),
+                SpecDiscriminator(fft_sizes[2], hop_sizes[2], win_lengths[2], window),
+            ]
+        )
+
+    def forward(self, x):
+        scores = []
+        feats = []
+        for d in self.discriminators:
+            score, feat = d(x)
+            scores.append(score)
+            feats.append(feat)
+
+        return scores, feats
+
+
+class UnivnetDiscriminator(nn.Module):
+    """Univnet discriminator wrapping MPD and MSD."""
+
+    def __init__(self):
+        super().__init__()
+        self.mpd = MultiPeriodDiscriminator()
+        self.msd = MultiResSpecDiscriminator()
+
+    def forward(self, x):
+        """
+        Args:
+            x (Tensor): input waveform.
+
+        Returns:
+            List[Tensor]: discriminator scores.
+            List[List[Tensor]]: list of list of features from each layers of each discriminator.
+        """
+        scores, feats = self.mpd(x)
+        scores_, feats_ = self.msd(x)
+        return scores + scores_, feats + feats_
diff --git a/TTS/vocoder/models/univnet_generator.py b/TTS/vocoder/models/univnet_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ee28c7b85852c6b15df28907b6fd1195f3218cd
--- /dev/null
+++ b/TTS/vocoder/models/univnet_generator.py
@@ -0,0 +1,156 @@
+from typing import List
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from TTS.vocoder.layers.lvc_block import LVCBlock
+
+LRELU_SLOPE = 0.1
+
+
+class UnivnetGenerator(torch.nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        hidden_channels: int,
+        cond_channels: int,
+        upsample_factors: List[int],
+        lvc_layers_each_block: int,
+        lvc_kernel_size: int,
+        kpnet_hidden_channels: int,
+        kpnet_conv_size: int,
+        dropout: float,
+        use_weight_norm=True,
+    ):
+        """Univnet Generator network.
+
+        Paper: https://arxiv.org/pdf/2106.07889.pdf
+
+        Args:
+            in_channels (int): Number of input tensor channels.
+            out_channels (int): Number of channels of the output tensor.
+            hidden_channels (int): Number of hidden network channels.
+            cond_channels (int): Number of channels of the conditioning tensors.
+            upsample_factors (List[int]): List of uplsample factors for the upsampling layers.
+            lvc_layers_each_block (int): Number of LVC layers in each block.
+            lvc_kernel_size (int): Kernel size of the LVC layers.
+            kpnet_hidden_channels (int): Number of hidden channels in the key-point network.
+            kpnet_conv_size (int): Number of convolution channels in the key-point network.
+            dropout (float): Dropout rate.
+            use_weight_norm (bool, optional): Enable/disable weight norm. Defaults to True.
+        """
+
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.cond_channels = cond_channels
+        self.upsample_scale = np.prod(upsample_factors)
+        self.lvc_block_nums = len(upsample_factors)
+
+        # define first convolution
+        self.first_conv = torch.nn.Conv1d(
+            in_channels, hidden_channels, kernel_size=7, padding=(7 - 1) // 2, dilation=1, bias=True
+        )
+
+        # define residual blocks
+        self.lvc_blocks = torch.nn.ModuleList()
+        cond_hop_length = 1
+        for n in range(self.lvc_block_nums):
+            cond_hop_length = cond_hop_length * upsample_factors[n]
+            lvcb = LVCBlock(
+                in_channels=hidden_channels,
+                cond_channels=cond_channels,
+                upsample_ratio=upsample_factors[n],
+                conv_layers=lvc_layers_each_block,
+                conv_kernel_size=lvc_kernel_size,
+                cond_hop_length=cond_hop_length,
+                kpnet_hidden_channels=kpnet_hidden_channels,
+                kpnet_conv_size=kpnet_conv_size,
+                kpnet_dropout=dropout,
+            )
+            self.lvc_blocks += [lvcb]
+
+        # define output layers
+        self.last_conv_layers = torch.nn.ModuleList(
+            [
+                torch.nn.Conv1d(
+                    hidden_channels, out_channels, kernel_size=7, padding=(7 - 1) // 2, dilation=1, bias=True
+                ),
+            ]
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, c):
+        """Calculate forward propagation.
+        Args:
+            c (Tensor): Local conditioning auxiliary features (B, C ,T').
+        Returns:
+            Tensor: Output tensor (B, out_channels, T)
+        """
+        # random noise
+        x = torch.randn([c.shape[0], self.in_channels, c.shape[2]])
+        x = x.to(self.first_conv.bias.device)
+        x = self.first_conv(x)
+
+        for n in range(self.lvc_block_nums):
+            x = self.lvc_blocks[n](x, c)
+
+        # apply final layers
+        for f in self.last_conv_layers:
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            x = f(x)
+        x = torch.tanh(x)
+        return x
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                # print(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)):
+                torch.nn.utils.weight_norm(m)
+                # print(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    @staticmethod
+    def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x):
+        assert layers % stacks == 0
+        layers_per_cycle = layers // stacks
+        dilations = [dilation(i % layers_per_cycle) for i in range(layers)]
+        return (kernel_size - 1) * sum(dilations) + 1
+
+    @property
+    def receptive_field_size(self):
+        """Return receptive field size."""
+        return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size)
+
+    @torch.no_grad()
+    def inference(self, c):
+        """Perform inference.
+        Args:
+            c (Tensor): Local conditioning auxiliary features :math:`(B, C, T)`.
+        Returns:
+            Tensor: Output tensor (T, out_channels)
+        """
+        x = torch.randn([c.shape[0], self.in_channels, c.shape[2]])
+        x = x.to(self.first_conv.bias.device)
+
+        c = c.to(next(self.parameters()))
+        return self.forward(c)
diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0f9221a8f64fb9953527a6b859d114aabb702d2
--- /dev/null
+++ b/TTS/vocoder/models/wavegrad.py
@@ -0,0 +1,344 @@
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+from coqpit import Coqpit
+from torch import nn
+from torch.nn.utils import weight_norm
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from trainer.trainer_utils import get_optimizer, get_scheduler
+
+from TTS.utils.io import load_fsspec
+from TTS.vocoder.datasets import WaveGradDataset
+from TTS.vocoder.layers.wavegrad import Conv1d, DBlock, FiLM, UBlock
+from TTS.vocoder.models.base_vocoder import BaseVocoder
+from TTS.vocoder.utils.generic_utils import plot_results
+
+
+@dataclass
+class WavegradArgs(Coqpit):
+    in_channels: int = 80
+    out_channels: int = 1
+    use_weight_norm: bool = False
+    y_conv_channels: int = 32
+    x_conv_channels: int = 768
+    dblock_out_channels: List[int] = field(default_factory=lambda: [128, 128, 256, 512])
+    ublock_out_channels: List[int] = field(default_factory=lambda: [512, 512, 256, 128, 128])
+    upsample_factors: List[int] = field(default_factory=lambda: [4, 4, 4, 2, 2])
+    upsample_dilations: List[List[int]] = field(
+        default_factory=lambda: [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], [1, 2, 4, 8], [1, 2, 4, 8]]
+    )
+
+
+class Wavegrad(BaseVocoder):
+    """🐸 🌊 WaveGrad 🌊 model.
+    Paper - https://arxiv.org/abs/2009.00713
+
+    Examples:
+        Initializing the model.
+
+        >>> from TTS.vocoder.configs import WavegradConfig
+        >>> config = WavegradConfig()
+        >>> model = Wavegrad(config)
+
+    Paper Abstract:
+        This paper introduces WaveGrad, a conditional model for waveform generation which estimates gradients of the
+        data density. The model is built on prior work on score matching and diffusion probabilistic models. It starts
+        from a Gaussian white noise signal and iteratively refines the signal via a gradient-based sampler conditioned
+        on the mel-spectrogram. WaveGrad offers a natural way to trade inference speed for sample quality by adjusting
+        the number of refinement steps, and bridges the gap between non-autoregressive and autoregressive models in
+        terms of audio quality. We find that it can generate high fidelity audio samples using as few as six iterations.
+        Experiments reveal WaveGrad to generate high fidelity audio, outperforming adversarial non-autoregressive
+        baselines and matching a strong likelihood-based autoregressive baseline using fewer sequential operations.
+        Audio samples are available at this https URL.
+    """
+
+    # pylint: disable=dangerous-default-value
+    def __init__(self, config: Coqpit):
+        super().__init__(config)
+        self.config = config
+        self.use_weight_norm = config.model_params.use_weight_norm
+        self.hop_len = np.prod(config.model_params.upsample_factors)
+        self.noise_level = None
+        self.num_steps = None
+        self.beta = None
+        self.alpha = None
+        self.alpha_hat = None
+        self.c1 = None
+        self.c2 = None
+        self.sigma = None
+
+        # dblocks
+        self.y_conv = Conv1d(1, config.model_params.y_conv_channels, 5, padding=2)
+        self.dblocks = nn.ModuleList([])
+        ic = config.model_params.y_conv_channels
+        for oc, df in zip(config.model_params.dblock_out_channels, reversed(config.model_params.upsample_factors)):
+            self.dblocks.append(DBlock(ic, oc, df))
+            ic = oc
+
+        # film
+        self.film = nn.ModuleList([])
+        ic = config.model_params.y_conv_channels
+        for oc in reversed(config.model_params.ublock_out_channels):
+            self.film.append(FiLM(ic, oc))
+            ic = oc
+
+        # ublocksn
+        self.ublocks = nn.ModuleList([])
+        ic = config.model_params.x_conv_channels
+        for oc, uf, ud in zip(
+            config.model_params.ublock_out_channels,
+            config.model_params.upsample_factors,
+            config.model_params.upsample_dilations,
+        ):
+            self.ublocks.append(UBlock(ic, oc, uf, ud))
+            ic = oc
+
+        self.x_conv = Conv1d(config.model_params.in_channels, config.model_params.x_conv_channels, 3, padding=1)
+        self.out_conv = Conv1d(oc, config.model_params.out_channels, 3, padding=1)
+
+        if config.model_params.use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, x, spectrogram, noise_scale):
+        shift_and_scale = []
+
+        x = self.y_conv(x)
+        shift_and_scale.append(self.film[0](x, noise_scale))
+
+        for film, layer in zip(self.film[1:], self.dblocks):
+            x = layer(x)
+            shift_and_scale.append(film(x, noise_scale))
+
+        x = self.x_conv(spectrogram)
+        for layer, (film_shift, film_scale) in zip(self.ublocks, reversed(shift_and_scale)):
+            x = layer(x, film_shift, film_scale)
+        x = self.out_conv(x)
+        return x
+
+    def load_noise_schedule(self, path):
+        beta = np.load(path, allow_pickle=True).item()["beta"]  # pylint: disable=unexpected-keyword-arg
+        self.compute_noise_level(beta)
+
+    @torch.no_grad()
+    def inference(self, x, y_n=None):
+        """
+        Shapes:
+            x: :math:`[B, C , T]`
+            y_n: :math:`[B, 1, T]`
+        """
+        if y_n is None:
+            y_n = torch.randn(x.shape[0], 1, self.hop_len * x.shape[-1])
+        else:
+            y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0)
+        y_n = y_n.type_as(x)
+        sqrt_alpha_hat = self.noise_level.to(x)
+        for n in range(len(self.alpha) - 1, -1, -1):
+            y_n = self.c1[n] * (y_n - self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0])))
+            if n > 0:
+                z = torch.randn_like(y_n)
+                y_n += self.sigma[n - 1] * z
+            y_n.clamp_(-1.0, 1.0)
+        return y_n
+
+    def compute_y_n(self, y_0):
+        """Compute noisy audio based on noise schedule"""
+        self.noise_level = self.noise_level.to(y_0)
+        if len(y_0.shape) == 3:
+            y_0 = y_0.squeeze(1)
+        s = torch.randint(0, self.num_steps - 1, [y_0.shape[0]])
+        l_a, l_b = self.noise_level[s], self.noise_level[s + 1]
+        noise_scale = l_a + torch.rand(y_0.shape[0]).to(y_0) * (l_b - l_a)
+        noise_scale = noise_scale.unsqueeze(1)
+        noise = torch.randn_like(y_0)
+        noisy_audio = noise_scale * y_0 + (1.0 - noise_scale**2) ** 0.5 * noise
+        return noise.unsqueeze(1), noisy_audio.unsqueeze(1), noise_scale[:, 0]
+
+    def compute_noise_level(self, beta):
+        """Compute noise schedule parameters"""
+        self.num_steps = len(beta)
+        alpha = 1 - beta
+        alpha_hat = np.cumprod(alpha)
+        noise_level = np.concatenate([[1.0], alpha_hat**0.5], axis=0)
+        noise_level = alpha_hat**0.5
+
+        # pylint: disable=not-callable
+        self.beta = torch.tensor(beta.astype(np.float32))
+        self.alpha = torch.tensor(alpha.astype(np.float32))
+        self.alpha_hat = torch.tensor(alpha_hat.astype(np.float32))
+        self.noise_level = torch.tensor(noise_level.astype(np.float32))
+
+        self.c1 = 1 / self.alpha**0.5
+        self.c2 = (1 - self.alpha) / (1 - self.alpha_hat) ** 0.5
+        self.sigma = ((1.0 - self.alpha_hat[:-1]) / (1.0 - self.alpha_hat[1:]) * self.beta[1:]) ** 0.5
+
+    def remove_weight_norm(self):
+        for _, layer in enumerate(self.dblocks):
+            if len(layer.state_dict()) != 0:
+                try:
+                    nn.utils.remove_weight_norm(layer)
+                except ValueError:
+                    layer.remove_weight_norm()
+
+        for _, layer in enumerate(self.film):
+            if len(layer.state_dict()) != 0:
+                try:
+                    nn.utils.remove_weight_norm(layer)
+                except ValueError:
+                    layer.remove_weight_norm()
+
+        for _, layer in enumerate(self.ublocks):
+            if len(layer.state_dict()) != 0:
+                try:
+                    nn.utils.remove_weight_norm(layer)
+                except ValueError:
+                    layer.remove_weight_norm()
+
+        nn.utils.remove_weight_norm(self.x_conv)
+        nn.utils.remove_weight_norm(self.out_conv)
+        nn.utils.remove_weight_norm(self.y_conv)
+
+    def apply_weight_norm(self):
+        for _, layer in enumerate(self.dblocks):
+            if len(layer.state_dict()) != 0:
+                layer.apply_weight_norm()
+
+        for _, layer in enumerate(self.film):
+            if len(layer.state_dict()) != 0:
+                layer.apply_weight_norm()
+
+        for _, layer in enumerate(self.ublocks):
+            if len(layer.state_dict()) != 0:
+                layer.apply_weight_norm()
+
+        self.x_conv = weight_norm(self.x_conv)
+        self.out_conv = weight_norm(self.out_conv)
+        self.y_conv = weight_norm(self.y_conv)
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+            if self.config.model_params.use_weight_norm:
+                self.remove_weight_norm()
+            betas = np.linspace(
+                config["test_noise_schedule"]["min_val"],
+                config["test_noise_schedule"]["max_val"],
+                config["test_noise_schedule"]["num_steps"],
+            )
+            self.compute_noise_level(betas)
+        else:
+            betas = np.linspace(
+                config["train_noise_schedule"]["min_val"],
+                config["train_noise_schedule"]["max_val"],
+                config["train_noise_schedule"]["num_steps"],
+            )
+            self.compute_noise_level(betas)
+
+    def train_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]:
+        # format data
+        x = batch["input"]
+        y = batch["waveform"]
+
+        # set noise scale
+        noise, x_noisy, noise_scale = self.compute_y_n(y)
+
+        # forward pass
+        noise_hat = self.forward(x_noisy, x, noise_scale)
+
+        # compute losses
+        loss = criterion(noise, noise_hat)
+        return {"model_output": noise_hat}, {"loss": loss}
+
+    def train_log(  # pylint: disable=no-self-use
+        self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int  # pylint: disable=unused-argument
+    ) -> Tuple[Dict, np.ndarray]:
+        pass
+
+    @torch.no_grad()
+    def eval_step(self, batch: Dict, criterion: nn.Module) -> Tuple[Dict, Dict]:
+        return self.train_step(batch, criterion)
+
+    def eval_log(  # pylint: disable=no-self-use
+        self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int  # pylint: disable=unused-argument
+    ) -> None:
+        pass
+
+    def test(self, assets: Dict, test_loader: "DataLoader", outputs=None):  # pylint: disable=unused-argument
+        # setup noise schedule and inference
+        ap = assets["audio_processor"]
+        noise_schedule = self.config["test_noise_schedule"]
+        betas = np.linspace(noise_schedule["min_val"], noise_schedule["max_val"], noise_schedule["num_steps"])
+        self.compute_noise_level(betas)
+        samples = test_loader.dataset.load_test_samples(1)
+        for sample in samples:
+            x = sample[0]
+            x = x[None, :, :].to(next(self.parameters()).device)
+            y = sample[1]
+            y = y[None, :]
+            # compute voice
+            y_pred = self.inference(x)
+            # compute spectrograms
+            figures = plot_results(y_pred, y, ap, "test")
+            # Sample audio
+            sample_voice = y_pred[0].squeeze(0).detach().cpu().numpy()
+        return figures, {"test/audio": sample_voice}
+
+    def get_optimizer(self):
+        return get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr, self)
+
+    def get_scheduler(self, optimizer):
+        return get_scheduler(self.config.lr_scheduler, self.config.lr_scheduler_params, optimizer)
+
+    @staticmethod
+    def get_criterion():
+        return torch.nn.L1Loss()
+
+    @staticmethod
+    def format_batch(batch: Dict) -> Dict:
+        # return a whole audio segment
+        m, y = batch[0], batch[1]
+        y = y.unsqueeze(1)
+        return {"input": m, "waveform": y}
+
+    def get_data_loader(self, config: Coqpit, assets: Dict, is_eval: True, samples: List, verbose: bool, num_gpus: int):
+        ap = assets["audio_processor"]
+        dataset = WaveGradDataset(
+            ap=ap,
+            items=samples,
+            seq_len=self.config.seq_len,
+            hop_len=ap.hop_length,
+            pad_short=self.config.pad_short,
+            conv_pad=self.config.conv_pad,
+            is_training=not is_eval,
+            return_segments=True,
+            use_noise_augment=False,
+            use_cache=config.use_cache,
+            verbose=verbose,
+        )
+        sampler = DistributedSampler(dataset) if num_gpus > 1 else None
+        loader = DataLoader(
+            dataset,
+            batch_size=self.config.batch_size,
+            shuffle=num_gpus <= 1,
+            drop_last=False,
+            sampler=sampler,
+            num_workers=self.config.num_eval_loader_workers if is_eval else self.config.num_loader_workers,
+            pin_memory=False,
+        )
+        return loader
+
+    def on_epoch_start(self, trainer):  # pylint: disable=unused-argument
+        noise_schedule = self.config["train_noise_schedule"]
+        betas = np.linspace(noise_schedule["min_val"], noise_schedule["max_val"], noise_schedule["num_steps"])
+        self.compute_noise_level(betas)
+
+    @staticmethod
+    def init_from_config(config: "WavegradConfig"):
+        return Wavegrad(config)
diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ea6b6e086ffea70e959494a084853f3e9aede32
--- /dev/null
+++ b/TTS/vocoder/models/wavernn.py
@@ -0,0 +1,647 @@
+import sys
+import time
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from coqpit import Coqpit
+from torch import nn
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+
+from TTS.tts.utils.visual import plot_spectrogram
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.io import load_fsspec
+from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset
+from TTS.vocoder.layers.losses import WaveRNNLoss
+from TTS.vocoder.models.base_vocoder import BaseVocoder
+from TTS.vocoder.utils.distribution import sample_from_discretized_mix_logistic, sample_from_gaussian
+
+
+def stream(string, variables):
+    sys.stdout.write(f"\r{string}" % variables)
+
+
+# pylint: disable=abstract-method
+# relates https://github.com/pytorch/pytorch/issues/42305
+class ResBlock(nn.Module):
+    def __init__(self, dims):
+        super().__init__()
+        self.conv1 = nn.Conv1d(dims, dims, kernel_size=1, bias=False)
+        self.conv2 = nn.Conv1d(dims, dims, kernel_size=1, bias=False)
+        self.batch_norm1 = nn.BatchNorm1d(dims)
+        self.batch_norm2 = nn.BatchNorm1d(dims)
+
+    def forward(self, x):
+        residual = x
+        x = self.conv1(x)
+        x = self.batch_norm1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = self.batch_norm2(x)
+        return x + residual
+
+
+class MelResNet(nn.Module):
+    def __init__(self, num_res_blocks, in_dims, compute_dims, res_out_dims, pad):
+        super().__init__()
+        k_size = pad * 2 + 1
+        self.conv_in = nn.Conv1d(in_dims, compute_dims, kernel_size=k_size, bias=False)
+        self.batch_norm = nn.BatchNorm1d(compute_dims)
+        self.layers = nn.ModuleList()
+        for _ in range(num_res_blocks):
+            self.layers.append(ResBlock(compute_dims))
+        self.conv_out = nn.Conv1d(compute_dims, res_out_dims, kernel_size=1)
+
+    def forward(self, x):
+        x = self.conv_in(x)
+        x = self.batch_norm(x)
+        x = F.relu(x)
+        for f in self.layers:
+            x = f(x)
+        x = self.conv_out(x)
+        return x
+
+
+class Stretch2d(nn.Module):
+    def __init__(self, x_scale, y_scale):
+        super().__init__()
+        self.x_scale = x_scale
+        self.y_scale = y_scale
+
+    def forward(self, x):
+        b, c, h, w = x.size()
+        x = x.unsqueeze(-1).unsqueeze(3)
+        x = x.repeat(1, 1, 1, self.y_scale, 1, self.x_scale)
+        return x.view(b, c, h * self.y_scale, w * self.x_scale)
+
+
+class UpsampleNetwork(nn.Module):
+    def __init__(
+        self,
+        feat_dims,
+        upsample_scales,
+        compute_dims,
+        num_res_blocks,
+        res_out_dims,
+        pad,
+        use_aux_net,
+    ):
+        super().__init__()
+        self.total_scale = np.cumproduct(upsample_scales)[-1]
+        self.indent = pad * self.total_scale
+        self.use_aux_net = use_aux_net
+        if use_aux_net:
+            self.resnet = MelResNet(num_res_blocks, feat_dims, compute_dims, res_out_dims, pad)
+            self.resnet_stretch = Stretch2d(self.total_scale, 1)
+        self.up_layers = nn.ModuleList()
+        for scale in upsample_scales:
+            k_size = (1, scale * 2 + 1)
+            padding = (0, scale)
+            stretch = Stretch2d(scale, 1)
+            conv = nn.Conv2d(1, 1, kernel_size=k_size, padding=padding, bias=False)
+            conv.weight.data.fill_(1.0 / k_size[1])
+            self.up_layers.append(stretch)
+            self.up_layers.append(conv)
+
+    def forward(self, m):
+        if self.use_aux_net:
+            aux = self.resnet(m).unsqueeze(1)
+            aux = self.resnet_stretch(aux)
+            aux = aux.squeeze(1)
+            aux = aux.transpose(1, 2)
+        else:
+            aux = None
+        m = m.unsqueeze(1)
+        for f in self.up_layers:
+            m = f(m)
+        m = m.squeeze(1)[:, :, self.indent : -self.indent]
+        return m.transpose(1, 2), aux
+
+
+class Upsample(nn.Module):
+    def __init__(self, scale, pad, num_res_blocks, feat_dims, compute_dims, res_out_dims, use_aux_net):
+        super().__init__()
+        self.scale = scale
+        self.pad = pad
+        self.indent = pad * scale
+        self.use_aux_net = use_aux_net
+        self.resnet = MelResNet(num_res_blocks, feat_dims, compute_dims, res_out_dims, pad)
+
+    def forward(self, m):
+        if self.use_aux_net:
+            aux = self.resnet(m)
+            aux = torch.nn.functional.interpolate(aux, scale_factor=self.scale, mode="linear", align_corners=True)
+            aux = aux.transpose(1, 2)
+        else:
+            aux = None
+        m = torch.nn.functional.interpolate(m, scale_factor=self.scale, mode="linear", align_corners=True)
+        m = m[:, :, self.indent : -self.indent]
+        m = m * 0.045  # empirically found
+
+        return m.transpose(1, 2), aux
+
+
+@dataclass
+class WavernnArgs(Coqpit):
+    """🐸 WaveRNN model arguments.
+
+    rnn_dims (int):
+        Number of hidden channels in RNN layers. Defaults to 512.
+    fc_dims (int):
+        Number of hidden channels in fully-conntected layers. Defaults to 512.
+    compute_dims (int):
+        Number of hidden channels in the feature ResNet. Defaults to 128.
+    res_out_dim (int):
+        Number of hidden channels in the feature ResNet output. Defaults to 128.
+    num_res_blocks (int):
+        Number of residual blocks in the ResNet. Defaults to 10.
+    use_aux_net (bool):
+        enable/disable the feature ResNet. Defaults to True.
+    use_upsample_net (bool):
+        enable/ disable the upsampling networl. If False, basic upsampling is used. Defaults to True.
+    upsample_factors (list):
+        Upsampling factors. The multiply of the values must match the `hop_length`. Defaults to ```[4, 8, 8]```.
+    mode (str):
+        Output mode of the WaveRNN vocoder. `mold` for Mixture of Logistic Distribution, `gauss` for a single
+        Gaussian Distribution and `bits` for quantized bits as the model's output.
+    mulaw (bool):
+        enable / disable the use of Mulaw quantization for training. Only applicable if `mode == 'bits'`. Defaults
+        to `True`.
+    pad (int):
+            Padding applied to the input feature frames against the convolution layers of the feature network.
+            Defaults to 2.
+    """
+
+    rnn_dims: int = 512
+    fc_dims: int = 512
+    compute_dims: int = 128
+    res_out_dims: int = 128
+    num_res_blocks: int = 10
+    use_aux_net: bool = True
+    use_upsample_net: bool = True
+    upsample_factors: List[int] = field(default_factory=lambda: [4, 8, 8])
+    mode: str = "mold"  # mold [string], gauss [string], bits [int]
+    mulaw: bool = True  # apply mulaw if mode is bits
+    pad: int = 2
+    feat_dims: int = 80
+
+
+class Wavernn(BaseVocoder):
+    def __init__(self, config: Coqpit):
+        """🐸 WaveRNN model.
+        Original paper - https://arxiv.org/abs/1802.08435
+        Official implementation - https://github.com/fatchord/WaveRNN
+
+        Args:
+            config (Coqpit): [description]
+
+        Raises:
+            RuntimeError: [description]
+
+        Examples:
+            >>> from TTS.vocoder.configs import WavernnConfig
+            >>> config = WavernnConfig()
+            >>> model = Wavernn(config)
+
+        Paper Abstract:
+            Sequential models achieve state-of-the-art results in audio, visual and textual domains with respect to
+            both estimating the data distribution and generating high-quality samples. Efficient sampling for this
+            class of models has however remained an elusive problem. With a focus on text-to-speech synthesis, we
+            describe a set of general techniques for reducing sampling time while maintaining high output quality.
+            We first describe a single-layer recurrent neural network, the WaveRNN, with a dual softmax layer that
+            matches the quality of the state-of-the-art WaveNet model. The compact form of the network makes it
+            possible to generate 24kHz 16-bit audio 4x faster than real time on a GPU. Second, we apply a weight
+            pruning technique to reduce the number of weights in the WaveRNN. We find that, for a constant number of
+            parameters, large sparse networks perform better than small dense networks and this relationship holds for
+            sparsity levels beyond 96%. The small number of weights in a Sparse WaveRNN makes it possible to sample
+            high-fidelity audio on a mobile CPU in real time. Finally, we propose a new generation scheme based on
+            subscaling that folds a long sequence into a batch of shorter sequences and allows one to generate multiple
+            samples at once. The Subscale WaveRNN produces 16 samples per step without loss of quality and offers an
+            orthogonal method for increasing sampling efficiency.
+        """
+        super().__init__(config)
+
+        if isinstance(self.args.mode, int):
+            self.n_classes = 2**self.args.mode
+        elif self.args.mode == "mold":
+            self.n_classes = 3 * 10
+        elif self.args.mode == "gauss":
+            self.n_classes = 2
+        else:
+            raise RuntimeError("Unknown model mode value - ", self.args.mode)
+
+        self.ap = AudioProcessor(**config.audio.to_dict())
+        self.aux_dims = self.args.res_out_dims // 4
+
+        if self.args.use_upsample_net:
+            assert (
+                np.cumproduct(self.args.upsample_factors)[-1] == config.audio.hop_length
+            ), " [!] upsample scales needs to be equal to hop_length"
+            self.upsample = UpsampleNetwork(
+                self.args.feat_dims,
+                self.args.upsample_factors,
+                self.args.compute_dims,
+                self.args.num_res_blocks,
+                self.args.res_out_dims,
+                self.args.pad,
+                self.args.use_aux_net,
+            )
+        else:
+            self.upsample = Upsample(
+                config.audio.hop_length,
+                self.args.pad,
+                self.args.num_res_blocks,
+                self.args.feat_dims,
+                self.args.compute_dims,
+                self.args.res_out_dims,
+                self.args.use_aux_net,
+            )
+        if self.args.use_aux_net:
+            self.I = nn.Linear(self.args.feat_dims + self.aux_dims + 1, self.args.rnn_dims)
+            self.rnn1 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True)
+            self.rnn2 = nn.GRU(self.args.rnn_dims + self.aux_dims, self.args.rnn_dims, batch_first=True)
+            self.fc1 = nn.Linear(self.args.rnn_dims + self.aux_dims, self.args.fc_dims)
+            self.fc2 = nn.Linear(self.args.fc_dims + self.aux_dims, self.args.fc_dims)
+            self.fc3 = nn.Linear(self.args.fc_dims, self.n_classes)
+        else:
+            self.I = nn.Linear(self.args.feat_dims + 1, self.args.rnn_dims)
+            self.rnn1 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True)
+            self.rnn2 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True)
+            self.fc1 = nn.Linear(self.args.rnn_dims, self.args.fc_dims)
+            self.fc2 = nn.Linear(self.args.fc_dims, self.args.fc_dims)
+            self.fc3 = nn.Linear(self.args.fc_dims, self.n_classes)
+
+    def forward(self, x, mels):
+        bsize = x.size(0)
+        h1 = torch.zeros(1, bsize, self.args.rnn_dims).to(x.device)
+        h2 = torch.zeros(1, bsize, self.args.rnn_dims).to(x.device)
+        mels, aux = self.upsample(mels)
+
+        if self.args.use_aux_net:
+            aux_idx = [self.aux_dims * i for i in range(5)]
+            a1 = aux[:, :, aux_idx[0] : aux_idx[1]]
+            a2 = aux[:, :, aux_idx[1] : aux_idx[2]]
+            a3 = aux[:, :, aux_idx[2] : aux_idx[3]]
+            a4 = aux[:, :, aux_idx[3] : aux_idx[4]]
+
+        x = (
+            torch.cat([x.unsqueeze(-1), mels, a1], dim=2)
+            if self.args.use_aux_net
+            else torch.cat([x.unsqueeze(-1), mels], dim=2)
+        )
+        x = self.I(x)
+        res = x
+        self.rnn1.flatten_parameters()
+        x, _ = self.rnn1(x, h1)
+
+        x = x + res
+        res = x
+        x = torch.cat([x, a2], dim=2) if self.args.use_aux_net else x
+        self.rnn2.flatten_parameters()
+        x, _ = self.rnn2(x, h2)
+
+        x = x + res
+        x = torch.cat([x, a3], dim=2) if self.args.use_aux_net else x
+        x = F.relu(self.fc1(x))
+
+        x = torch.cat([x, a4], dim=2) if self.args.use_aux_net else x
+        x = F.relu(self.fc2(x))
+        return self.fc3(x)
+
+    def inference(self, mels, batched=None, target=None, overlap=None):
+
+        self.eval()
+        output = []
+        start = time.time()
+        rnn1 = self.get_gru_cell(self.rnn1)
+        rnn2 = self.get_gru_cell(self.rnn2)
+
+        with torch.no_grad():
+            if isinstance(mels, np.ndarray):
+                mels = torch.FloatTensor(mels).to(str(next(self.parameters()).device))
+
+            if mels.ndim == 2:
+                mels = mels.unsqueeze(0)
+            wave_len = (mels.size(-1) - 1) * self.config.audio.hop_length
+
+            mels = self.pad_tensor(mels.transpose(1, 2), pad=self.args.pad, side="both")
+            mels, aux = self.upsample(mels.transpose(1, 2))
+
+            if batched:
+                mels = self.fold_with_overlap(mels, target, overlap)
+                if aux is not None:
+                    aux = self.fold_with_overlap(aux, target, overlap)
+
+            b_size, seq_len, _ = mels.size()
+
+            h1 = torch.zeros(b_size, self.args.rnn_dims).type_as(mels)
+            h2 = torch.zeros(b_size, self.args.rnn_dims).type_as(mels)
+            x = torch.zeros(b_size, 1).type_as(mels)
+
+            if self.args.use_aux_net:
+                d = self.aux_dims
+                aux_split = [aux[:, :, d * i : d * (i + 1)] for i in range(4)]
+
+            for i in range(seq_len):
+
+                m_t = mels[:, i, :]
+
+                if self.args.use_aux_net:
+                    a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split)
+
+                x = torch.cat([x, m_t, a1_t], dim=1) if self.args.use_aux_net else torch.cat([x, m_t], dim=1)
+                x = self.I(x)
+                h1 = rnn1(x, h1)
+
+                x = x + h1
+                inp = torch.cat([x, a2_t], dim=1) if self.args.use_aux_net else x
+                h2 = rnn2(inp, h2)
+
+                x = x + h2
+                x = torch.cat([x, a3_t], dim=1) if self.args.use_aux_net else x
+                x = F.relu(self.fc1(x))
+
+                x = torch.cat([x, a4_t], dim=1) if self.args.use_aux_net else x
+                x = F.relu(self.fc2(x))
+
+                logits = self.fc3(x)
+
+                if self.args.mode == "mold":
+                    sample = sample_from_discretized_mix_logistic(logits.unsqueeze(0).transpose(1, 2))
+                    output.append(sample.view(-1))
+                    x = sample.transpose(0, 1).type_as(mels)
+                elif self.args.mode == "gauss":
+                    sample = sample_from_gaussian(logits.unsqueeze(0).transpose(1, 2))
+                    output.append(sample.view(-1))
+                    x = sample.transpose(0, 1).type_as(mels)
+                elif isinstance(self.args.mode, int):
+                    posterior = F.softmax(logits, dim=1)
+                    distrib = torch.distributions.Categorical(posterior)
+
+                    sample = 2 * distrib.sample().float() / (self.n_classes - 1.0) - 1.0
+                    output.append(sample)
+                    x = sample.unsqueeze(-1)
+                else:
+                    raise RuntimeError("Unknown model mode value - ", self.args.mode)
+
+                if i % 100 == 0:
+                    self.gen_display(i, seq_len, b_size, start)
+
+        output = torch.stack(output).transpose(0, 1)
+        output = output.cpu()
+        if batched:
+            output = output.numpy()
+            output = output.astype(np.float64)
+
+            output = self.xfade_and_unfold(output, target, overlap)
+        else:
+            output = output[0]
+
+        if self.args.mulaw and isinstance(self.args.mode, int):
+            output = AudioProcessor.mulaw_decode(output, self.args.mode)
+
+        # Fade-out at the end to avoid signal cutting out suddenly
+        fade_out = np.linspace(1, 0, 20 * self.config.audio.hop_length)
+        output = output[:wave_len]
+
+        if wave_len > len(fade_out):
+            output[-20 * self.config.audio.hop_length :] *= fade_out
+
+        self.train()
+        return output
+
+    def gen_display(self, i, seq_len, b_size, start):
+        gen_rate = (i + 1) / (time.time() - start) * b_size / 1000
+        realtime_ratio = gen_rate * 1000 / self.config.audio.sample_rate
+        stream(
+            "%i/%i -- batch_size: %i -- gen_rate: %.1f kHz -- x_realtime: %.1f  ",
+            (i * b_size, seq_len * b_size, b_size, gen_rate, realtime_ratio),
+        )
+
+    def fold_with_overlap(self, x, target, overlap):
+        """Fold the tensor with overlap for quick batched inference.
+            Overlap will be used for crossfading in xfade_and_unfold()
+        Args:
+            x (tensor)    : Upsampled conditioning features.
+                            shape=(1, timesteps, features)
+            target (int)  : Target timesteps for each index of batch
+            overlap (int) : Timesteps for both xfade and rnn warmup
+        Return:
+            (tensor) : shape=(num_folds, target + 2 * overlap, features)
+        Details:
+            x = [[h1, h2, ... hn]]
+            Where each h is a vector of conditioning features
+            Eg: target=2, overlap=1 with x.size(1)=10
+            folded = [[h1, h2, h3, h4],
+                      [h4, h5, h6, h7],
+                      [h7, h8, h9, h10]]
+        """
+
+        _, total_len, features = x.size()
+
+        # Calculate variables needed
+        num_folds = (total_len - overlap) // (target + overlap)
+        extended_len = num_folds * (overlap + target) + overlap
+        remaining = total_len - extended_len
+
+        # Pad if some time steps poking out
+        if remaining != 0:
+            num_folds += 1
+            padding = target + 2 * overlap - remaining
+            x = self.pad_tensor(x, padding, side="after")
+
+        folded = torch.zeros(num_folds, target + 2 * overlap, features).to(x.device)
+
+        # Get the values for the folded tensor
+        for i in range(num_folds):
+            start = i * (target + overlap)
+            end = start + target + 2 * overlap
+            folded[i] = x[:, start:end, :]
+
+        return folded
+
+    @staticmethod
+    def get_gru_cell(gru):
+        gru_cell = nn.GRUCell(gru.input_size, gru.hidden_size)
+        gru_cell.weight_hh.data = gru.weight_hh_l0.data
+        gru_cell.weight_ih.data = gru.weight_ih_l0.data
+        gru_cell.bias_hh.data = gru.bias_hh_l0.data
+        gru_cell.bias_ih.data = gru.bias_ih_l0.data
+        return gru_cell
+
+    @staticmethod
+    def pad_tensor(x, pad, side="both"):
+        # NB - this is just a quick method i need right now
+        # i.e., it won't generalise to other shapes/dims
+        b, t, c = x.size()
+        total = t + 2 * pad if side == "both" else t + pad
+        padded = torch.zeros(b, total, c).to(x.device)
+        if side in ("before", "both"):
+            padded[:, pad : pad + t, :] = x
+        elif side == "after":
+            padded[:, :t, :] = x
+        return padded
+
+    @staticmethod
+    def xfade_and_unfold(y, target, overlap):
+        """Applies a crossfade and unfolds into a 1d array.
+        Args:
+            y (ndarry)    : Batched sequences of audio samples
+                            shape=(num_folds, target + 2 * overlap)
+                            dtype=np.float64
+            overlap (int) : Timesteps for both xfade and rnn warmup
+        Return:
+            (ndarry) : audio samples in a 1d array
+                       shape=(total_len)
+                       dtype=np.float64
+        Details:
+            y = [[seq1],
+                 [seq2],
+                 [seq3]]
+            Apply a gain envelope at both ends of the sequences
+            y = [[seq1_in, seq1_target, seq1_out],
+                 [seq2_in, seq2_target, seq2_out],
+                 [seq3_in, seq3_target, seq3_out]]
+            Stagger and add up the groups of samples:
+            [seq1_in, seq1_target, (seq1_out + seq2_in), seq2_target, ...]
+        """
+
+        num_folds, length = y.shape
+        target = length - 2 * overlap
+        total_len = num_folds * (target + overlap) + overlap
+
+        # Need some silence for the rnn warmup
+        silence_len = overlap // 2
+        fade_len = overlap - silence_len
+        silence = np.zeros((silence_len), dtype=np.float64)
+
+        # Equal power crossfade
+        t = np.linspace(-1, 1, fade_len, dtype=np.float64)
+        fade_in = np.sqrt(0.5 * (1 + t))
+        fade_out = np.sqrt(0.5 * (1 - t))
+
+        # Concat the silence to the fades
+        fade_in = np.concatenate([silence, fade_in])
+        fade_out = np.concatenate([fade_out, silence])
+
+        # Apply the gain to the overlap samples
+        y[:, :overlap] *= fade_in
+        y[:, -overlap:] *= fade_out
+
+        unfolded = np.zeros((total_len), dtype=np.float64)
+
+        # Loop to add up all the samples
+        for i in range(num_folds):
+            start = i * (target + overlap)
+            end = start + target + 2 * overlap
+            unfolded[start:end] += y[i]
+
+        return unfolded
+
+    def load_checkpoint(
+        self, config, checkpoint_path, eval=False, cache=False
+    ):  # pylint: disable=unused-argument, redefined-builtin
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        self.load_state_dict(state["model"])
+        if eval:
+            self.eval()
+            assert not self.training
+
+    def train_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]:
+        mels = batch["input"]
+        waveform = batch["waveform"]
+        waveform_coarse = batch["waveform_coarse"]
+
+        y_hat = self.forward(waveform, mels)
+        if isinstance(self.args.mode, int):
+            y_hat = y_hat.transpose(1, 2).unsqueeze(-1)
+        else:
+            waveform_coarse = waveform_coarse.float()
+        waveform_coarse = waveform_coarse.unsqueeze(-1)
+        # compute losses
+        loss_dict = criterion(y_hat, waveform_coarse)
+        return {"model_output": y_hat}, loss_dict
+
+    def eval_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]:
+        return self.train_step(batch, criterion)
+
+    @torch.no_grad()
+    def test(
+        self, assets: Dict, test_loader: "DataLoader", output: Dict  # pylint: disable=unused-argument
+    ) -> Tuple[Dict, Dict]:
+        ap = self.ap
+        figures = {}
+        audios = {}
+        samples = test_loader.dataset.load_test_samples(1)
+        for idx, sample in enumerate(samples):
+            x = torch.FloatTensor(sample[0])
+            x = x.to(next(self.parameters()).device)
+            y_hat = self.inference(x, self.config.batched, self.config.target_samples, self.config.overlap_samples)
+            x_hat = ap.melspectrogram(y_hat)
+            figures.update(
+                {
+                    f"test_{idx}/ground_truth": plot_spectrogram(x.T),
+                    f"test_{idx}/prediction": plot_spectrogram(x_hat.T),
+                }
+            )
+            audios.update({f"test_{idx}/audio": y_hat})
+            # audios.update({f"real_{idx}/audio": y_hat})
+        return figures, audios
+
+    def test_log(
+        self, outputs: Dict, logger: "Logger", assets: Dict, steps: int  # pylint: disable=unused-argument
+    ) -> Tuple[Dict, np.ndarray]:
+        figures, audios = outputs
+        logger.eval_figures(steps, figures)
+        logger.eval_audios(steps, audios, self.ap.sample_rate)
+
+    @staticmethod
+    def format_batch(batch: Dict) -> Dict:
+        waveform = batch[0]
+        mels = batch[1]
+        waveform_coarse = batch[2]
+        return {"input": mels, "waveform": waveform, "waveform_coarse": waveform_coarse}
+
+    def get_data_loader(  # pylint: disable=no-self-use
+        self,
+        config: Coqpit,
+        assets: Dict,
+        is_eval: True,
+        samples: List,
+        verbose: bool,
+        num_gpus: int,
+    ):
+        ap = self.ap
+        dataset = WaveRNNDataset(
+            ap=ap,
+            items=samples,
+            seq_len=config.seq_len,
+            hop_len=ap.hop_length,
+            pad=config.model_args.pad,
+            mode=config.model_args.mode,
+            mulaw=config.model_args.mulaw,
+            is_training=not is_eval,
+            verbose=verbose,
+        )
+        sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None
+        loader = DataLoader(
+            dataset,
+            batch_size=1 if is_eval else config.batch_size,
+            shuffle=num_gpus == 0,
+            collate_fn=dataset.collate,
+            sampler=sampler,
+            num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+            pin_memory=True,
+        )
+        return loader
+
+    def get_criterion(self):
+        # define train functions
+        return WaveRNNLoss(self.args.mode)
+
+    @staticmethod
+    def init_from_config(config: "WavernnConfig"):
+        return Wavernn(config)
diff --git a/TTS/vocoder/pqmf_output.wav b/TTS/vocoder/pqmf_output.wav
new file mode 100644
index 0000000000000000000000000000000000000000..8a77747b00198a4adfd6c398998517df5b4bdb8d
Binary files /dev/null and b/TTS/vocoder/pqmf_output.wav differ
diff --git a/TTS/vocoder/utils/__init__.py b/TTS/vocoder/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TTS/vocoder/utils/__pycache__/__init__.cpython-310.pyc b/TTS/vocoder/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a0f7cbdacf2d4c6b5f6eb6321da7b69fb0c8dd24
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/__init__.cpython-38.pyc b/TTS/vocoder/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a984c345fb842a6986032c3cd71864e8877299c
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/__init__.cpython-39.pyc b/TTS/vocoder/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71b5e9a48be6d2b6a193d1454665daa2e221d537
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/distribution.cpython-310.pyc b/TTS/vocoder/utils/__pycache__/distribution.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de729bae3dd335ccde23f4f34eaaf13d757b4755
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/distribution.cpython-310.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/distribution.cpython-38.pyc b/TTS/vocoder/utils/__pycache__/distribution.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d374ff0708c5d948cfa94196963d515fb41d1004
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/distribution.cpython-38.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/distribution.cpython-39.pyc b/TTS/vocoder/utils/__pycache__/distribution.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b2a85532545a5aabfb5f5f7f60a7d08e00da730
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/distribution.cpython-39.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/generic_utils.cpython-310.pyc b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce3fbd151dd8f3b7e0da8d05efe53edd2e3372f9
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-310.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/generic_utils.cpython-38.pyc b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22b2131faa47c3230a6ca9299a8f2ace26cb60ce
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-38.pyc differ
diff --git a/TTS/vocoder/utils/__pycache__/generic_utils.cpython-39.pyc b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9ac3e3c8184a1f27e14a0da124cf33a3b1b0039
Binary files /dev/null and b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-39.pyc differ
diff --git a/TTS/vocoder/utils/distribution.py b/TTS/vocoder/utils/distribution.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe706ba9ffbc3f8aad75285bca34a910246666b3
--- /dev/null
+++ b/TTS/vocoder/utils/distribution.py
@@ -0,0 +1,154 @@
+import math
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.distributions.normal import Normal
+
+
+def gaussian_loss(y_hat, y, log_std_min=-7.0):
+    assert y_hat.dim() == 3
+    assert y_hat.size(2) == 2
+    mean = y_hat[:, :, :1]
+    log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min)
+    # TODO: replace with pytorch dist
+    log_probs = -0.5 * (-math.log(2.0 * math.pi) - 2.0 * log_std - torch.pow(y - mean, 2) * torch.exp((-2.0 * log_std)))
+    return log_probs.squeeze().mean()
+
+
+def sample_from_gaussian(y_hat, log_std_min=-7.0, scale_factor=1.0):
+    assert y_hat.size(2) == 2
+    mean = y_hat[:, :, :1]
+    log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min)
+    dist = Normal(
+        mean,
+        torch.exp(log_std),
+    )
+    sample = dist.sample()
+    sample = torch.clamp(torch.clamp(sample, min=-scale_factor), max=scale_factor)
+    del dist
+    return sample
+
+
+def log_sum_exp(x):
+    """numerically stable log_sum_exp implementation that prevents overflow"""
+    # TF ordering
+    axis = len(x.size()) - 1
+    m, _ = torch.max(x, dim=axis)
+    m2, _ = torch.max(x, dim=axis, keepdim=True)
+    return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis))
+
+
+# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py
+def discretized_mix_logistic_loss(y_hat, y, num_classes=65536, log_scale_min=None, reduce=True):
+    if log_scale_min is None:
+        log_scale_min = float(np.log(1e-14))
+    y_hat = y_hat.permute(0, 2, 1)
+    assert y_hat.dim() == 3
+    assert y_hat.size(1) % 3 == 0
+    nr_mix = y_hat.size(1) // 3
+
+    # (B x T x C)
+    y_hat = y_hat.transpose(1, 2)
+
+    # unpack parameters. (B, T, num_mixtures) x 3
+    logit_probs = y_hat[:, :, :nr_mix]
+    means = y_hat[:, :, nr_mix : 2 * nr_mix]
+    log_scales = torch.clamp(y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min)
+
+    # B x T x 1 -> B x T x num_mixtures
+    y = y.expand_as(means)
+
+    centered_y = y - means
+    inv_stdv = torch.exp(-log_scales)
+    plus_in = inv_stdv * (centered_y + 1.0 / (num_classes - 1))
+    cdf_plus = torch.sigmoid(plus_in)
+    min_in = inv_stdv * (centered_y - 1.0 / (num_classes - 1))
+    cdf_min = torch.sigmoid(min_in)
+
+    # log probability for edge case of 0 (before scaling)
+    # equivalent: torch.log(F.sigmoid(plus_in))
+    log_cdf_plus = plus_in - F.softplus(plus_in)
+
+    # log probability for edge case of 255 (before scaling)
+    # equivalent: (1 - F.sigmoid(min_in)).log()
+    log_one_minus_cdf_min = -F.softplus(min_in)
+
+    # probability for all other cases
+    cdf_delta = cdf_plus - cdf_min
+
+    mid_in = inv_stdv * centered_y
+    # log probability in the center of the bin, to be used in extreme cases
+    # (not actually used in our code)
+    log_pdf_mid = mid_in - log_scales - 2.0 * F.softplus(mid_in)
+
+    # tf equivalent
+
+    # log_probs = tf.where(x < -0.999, log_cdf_plus,
+    #                      tf.where(x > 0.999, log_one_minus_cdf_min,
+    #                               tf.where(cdf_delta > 1e-5,
+    #                                        tf.log(tf.maximum(cdf_delta, 1e-12)),
+    #                                        log_pdf_mid - np.log(127.5))))
+
+    # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value
+    # for num_classes=65536 case? 1e-7? not sure..
+    inner_inner_cond = (cdf_delta > 1e-5).float()
+
+    inner_inner_out = inner_inner_cond * torch.log(torch.clamp(cdf_delta, min=1e-12)) + (1.0 - inner_inner_cond) * (
+        log_pdf_mid - np.log((num_classes - 1) / 2)
+    )
+    inner_cond = (y > 0.999).float()
+    inner_out = inner_cond * log_one_minus_cdf_min + (1.0 - inner_cond) * inner_inner_out
+    cond = (y < -0.999).float()
+    log_probs = cond * log_cdf_plus + (1.0 - cond) * inner_out
+
+    log_probs = log_probs + F.log_softmax(logit_probs, -1)
+
+    if reduce:
+        return -torch.mean(log_sum_exp(log_probs))
+    return -log_sum_exp(log_probs).unsqueeze(-1)
+
+
+def sample_from_discretized_mix_logistic(y, log_scale_min=None):
+    """
+    Sample from discretized mixture of logistic distributions
+    Args:
+        y (Tensor): :math:`[B, C, T]`
+        log_scale_min (float): Log scale minimum value
+    Returns:
+        Tensor: sample in range of [-1, 1].
+    """
+    if log_scale_min is None:
+        log_scale_min = float(np.log(1e-14))
+    assert y.size(1) % 3 == 0
+    nr_mix = y.size(1) // 3
+
+    # B x T x C
+    y = y.transpose(1, 2)
+    logit_probs = y[:, :, :nr_mix]
+
+    # sample mixture indicator from softmax
+    temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
+    temp = logit_probs.data - torch.log(-torch.log(temp))
+    _, argmax = temp.max(dim=-1)
+
+    # (B, T) -> (B, T, nr_mix)
+    one_hot = to_one_hot(argmax, nr_mix)
+    # select logistic parameters
+    means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1)
+    log_scales = torch.clamp(torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1), min=log_scale_min)
+    # sample from logistic & clip to interval
+    # we don't actually round to the nearest 8bit value when sampling
+    u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5)
+    x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1.0 - u))
+
+    x = torch.clamp(torch.clamp(x, min=-1.0), max=1.0)
+
+    return x
+
+
+def to_one_hot(tensor, n, fill_with=1.0):
+    # we perform one hot encore with respect to the last axis
+    one_hot = torch.FloatTensor(tensor.size() + (n,)).zero_().type_as(tensor)
+    one_hot.scatter_(len(tensor.size()), tensor.unsqueeze(-1), fill_with)
+    return one_hot
diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..63a0af4445b5684e928b83d2f4fdfaf7e8f5b9a2
--- /dev/null
+++ b/TTS/vocoder/utils/generic_utils.py
@@ -0,0 +1,72 @@
+from typing import Dict
+
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+
+from TTS.tts.utils.visual import plot_spectrogram
+from TTS.utils.audio import AudioProcessor
+
+
+def interpolate_vocoder_input(scale_factor, spec):
+    """Interpolate spectrogram by the scale factor.
+    It is mainly used to match the sampling rates of
+    the tts and vocoder models.
+
+    Args:
+        scale_factor (float): scale factor to interpolate the spectrogram
+        spec (np.array): spectrogram to be interpolated
+
+    Returns:
+        torch.tensor: interpolated spectrogram.
+    """
+    print(" > before interpolation :", spec.shape)
+    spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0)  # pylint: disable=not-callable
+    spec = torch.nn.functional.interpolate(
+        spec, scale_factor=scale_factor, recompute_scale_factor=True, mode="bilinear", align_corners=False
+    ).squeeze(0)
+    print(" > after interpolation :", spec.shape)
+    return spec
+
+
+def plot_results(y_hat: torch.tensor, y: torch.tensor, ap: AudioProcessor, name_prefix: str = None) -> Dict:
+    """Plot the predicted and the real waveform and their spectrograms.
+
+    Args:
+        y_hat (torch.tensor): Predicted waveform.
+        y (torch.tensor): Real waveform.
+        ap (AudioProcessor): Audio processor used to process the waveform.
+        name_prefix (str, optional): Name prefix used to name the figures. Defaults to None.
+
+    Returns:
+        Dict: output figures keyed by the name of the figures.
+    """ """Plot vocoder model results"""
+    if name_prefix is None:
+        name_prefix = ""
+
+    # select an instance from batch
+    y_hat = y_hat[0].squeeze().detach().cpu().numpy()
+    y = y[0].squeeze().detach().cpu().numpy()
+
+    spec_fake = ap.melspectrogram(y_hat).T
+    spec_real = ap.melspectrogram(y).T
+    spec_diff = np.abs(spec_fake - spec_real)
+
+    # plot figure and save it
+    fig_wave = plt.figure()
+    plt.subplot(2, 1, 1)
+    plt.plot(y)
+    plt.title("groundtruth speech")
+    plt.subplot(2, 1, 2)
+    plt.plot(y_hat)
+    plt.title("generated speech")
+    plt.tight_layout()
+    plt.close()
+
+    figures = {
+        name_prefix + "spectrogram/fake": plot_spectrogram(spec_fake),
+        name_prefix + "spectrogram/real": plot_spectrogram(spec_real),
+        name_prefix + "spectrogram/diff": plot_spectrogram(spec_diff),
+        name_prefix + "speech_comparison": fig_wave,
+    }
+    return figures
diff --git a/__pycache__/synthesize.cpython-39.pyc b/__pycache__/synthesize.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0da6b26c4a67e60015102ee621f04c769d92ad8e
Binary files /dev/null and b/__pycache__/synthesize.cpython-39.pyc differ
diff --git a/app.py b/app.py
index d726ba560d57d97f0d7ceb549b3442cc0ecf1bcd..b75f2f5ae498c476847bcb2bfc3691ac7b35b145 100644
--- a/app.py
+++ b/app.py
@@ -1,3 +1,6 @@
+import gradio as gr
+import argparse
+import yaml
 from vietTTS.hifigan.mel2wave import mel2wave
 from vietTTS.nat.text2mel import text2mel
 from vietTTS.synthesizer import nat_normalize_text
@@ -6,15 +9,76 @@ import gradio as gr
 import re
 from vietnam_number import n2w
 from vietnam_number import n2w_single
+from synthesize import synthesizer
 import noisereduce as nr
 import os
-
-
+import scipy.io.wavfile as wavf
+from scipy.io import wavfile
+
+
+TITLE = "Saltlux Text to Speech"
+DESCRIPTION = "SLT Vietnamese Text to speech demo."
+    
+    
+class GradioApplication:
+    def __init__(self):
+    
+        inputs = prepare_input()
+        outputs = prepare_output()
+
+        self.iface = gr.Interface(fn=self.infer,
+                                  title=TITLE,
+                                  description=DESCRIPTION,
+                                  inputs=inputs,
+                                  outputs=outputs,
+                                  allow_flagging='never')
+        
+  
+    
+    def infer(self, text, lang, duration_rate):
+        if(lang == "VietTTS"):
+            return using_viettts(text,duration_rate)
+        else :
+            return using_tacotron(text)
+        return 1     
+
+    def run(self):
+        try:
+            self.iface.launch(height=900,
+                              share=False, server_port=7086,
+                              enable_queue=True)
+        
+        except KeyboardInterrupt:
+            gr.close_all()
+
+
+def prepare_input():
+    text_input = gr.Textbox(lines=2,
+                            placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet",
+                            value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",
+                            label="Text")
+    lang_input = gr.Radio(['VietTTS', 'Tacotron2'],
+                          type='value',
+                          value=None,
+                          label="Model select")
+    duration_rate_input = gr.Slider(minimum=0.2,
+                                    maximum=1,
+                                    step=0.1,
+                                    value=1.0,
+                                    label="Duration (The bigger the value, the slower the speech) - only for vietTTS")
+    
+    return [text_input, lang_input, duration_rate_input]
+
+
+def prepare_output():
+    outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")]
+    return outputs
 def text_to_speech(text,stop_duration):
     print("starting")
     # prevent too long text
     if len(text) > 500:
         text = text[:500]
+    # text_to_speech_tacotron(text)
     # stop_duration_float = float(stop_duration_text)
     text = clean_text(text)
     text = nat_normalize_text(text)
@@ -28,12 +92,42 @@ def text_to_speech(text,stop_duration):
     wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
     return (wave * (2**15)).astype(np.int16)
 
+def text_to_speech_tacotron(text):
+    print("starting")
+    # prevent too long text
+    if len(text) > 500:
+        text = text[:500]
+
+    wav = synthesizer.tts(text) 
+    output = './out.wav'
+
+    synthesizer.save_wav(wav,output)
+    return output
 
-def speak(text,stop_duration):
-    y = text_to_speech(text,stop_duration)
-    return 16_000, y
 
 
+def using_viettts(text,stop_duration):
+    y = text_to_speech(text,stop_duration)
+    fs = 16000
+    output = './out.wav'
+    output_denoise = './output_denoise.wav'
+
+    wavf.write(output, fs, y)
+    rate, data = wavfile.read(output)
+  # perform noise reduction
+    reduced_noise = nr.reduce_noise(y=data, sr=rate)
+    wavfile.write(output_denoise, rate, reduced_noise)
+    return (output,output_denoise)
+
+def using_tacotron(text):
+    y = text_to_speech_tacotron(text)
+    output_denoise = "./output_denoise.wav"
+    rate, data = wavfile.read(y)
+  # perform noise reduction
+    reduced_noise = nr.reduce_noise(y=data, sr=rate)
+    wavfile.write(output_denoise, rate, reduced_noise)
+    return (y,output_denoise)
+
 
 def clean_text(test_string):
   list_word = test_string.split()
@@ -122,23 +216,9 @@ def clean_text(test_string):
   return test_string
 
 
-title = "SLT TTS"
-description = "SLT Vietnamese Text to speech demo."
-
-gr.Interface(
-    fn=speak, 
-    inputs=["text","number"],
-    outputs="audio",
-    title = title,
-    examples = [
-    ["Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",0.2],
-    ["Thứ năm, 22/12/2022, 10:59 những mét hàng rào đầu tiên quanh công viên lớn nhất thủ đô được tháo dỡ, chuyển công viên sang hình thức mở, không thu vé.",0.2],
-    ["Cô dì chú bác nhà em không học hành vì vẫn làm bất động sản. Có phải làm ngành này thì chỉ cần biết sơ sơ, không cần học hành? Đó là vấn đề được học sinh trường Trung học phổ thông  Gia Định, Thành phố Hồ Chí Minh đặt ra tại buổi khai mạc chương trình tư vấn tuyển sinh do Tạp chí Giáo dục Thành phố Hồ Chí Minh tổ chức sáng mồng 9 tháng 1.",0.2],
-    ["Trong chuyến công du nước ngoài thứ hai từ khi Nga phát động chiến dịch quân sự tại Ukraine gần 12 tháng trước, Tổng thống Volodymyr Zelensky dự kiến phát biểu tại Nghị viện châu Âu ngày 9/2",0.2],
-    ["Tuy nhiên, kế hoạch này đang đứng trước nguy cơ đổ vỡ sau khi các quan chức Nghị viện châu âu làm rò rỉ kế hoạch bí mật tiếp đón Tổng thống Ukraine. Hồi đầu tuần, thông tin về chuyến thăm xuất hiện trên nhiều tờ báo châu âu, khởi nguồn từ báo La Stampa của Italy.",0.2]
-    ],
-    description=description,
-    theme="default",
-    allow_screenshot=False,
-    allow_flagging="never",
-).launch(debug=False)
\ No newline at end of file
+
+if __name__ == '__main__':
+    # args = parse_args()
+    
+    gradio_application = GradioApplication()
+    gradio_application.run()
\ No newline at end of file
diff --git a/out.wav b/out.wav
new file mode 100644
index 0000000000000000000000000000000000000000..bff21cf116b1bd492f6f74c54cb4c13dd87b0c5f
Binary files /dev/null and b/out.wav differ
diff --git a/output_denoise.wav b/output_denoise.wav
new file mode 100644
index 0000000000000000000000000000000000000000..b46c00b805502a105190c5ae97b13cb0fbc692cf
Binary files /dev/null and b/output_denoise.wav differ
diff --git a/scale_stats.npy b/scale_stats.npy
new file mode 100644
index 0000000000000000000000000000000000000000..e43d1cc97ee15797ae0bdec6f31ae79edc73d4f6
Binary files /dev/null and b/scale_stats.npy differ
diff --git a/synthesize.py b/synthesize.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb3deab0abc2b4f7aacc2f93a89abd980d9724a
--- /dev/null
+++ b/synthesize.py
@@ -0,0 +1,43 @@
+from TTS.utils.synthesizer import Synthesizer 
+
+#provide model params
+# model_path = "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DDC/tobi-ddc-December-07-2022_02+50AM-3191c5f/best_model.pth"
+# config_path = "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DDC/tobi-ddc-December-07-2022_02+50AM-3191c5f/config.json"
+#Huu
+# model_path = "/content/drive/MyDrive/SLT-TTS/best_model.pth"
+# config_path = "/content/drive/MyDrive/SLT-TTS/config.json"
+#Huu2
+# model_path = "/content/drive/MyDrive/SLT-TTS/model-1701/best_model.pth"
+# config_path = "/content/drive/MyDrive/SLT-TTS/config.json"
+
+#New phoneme
+model_path = "./tacotron/best_model.pth"
+config_path = "./tacotron/config.json"
+# vocoder_path = "/content/drive/MyDrive/Nancy/nancy_univnet/best_model.pth.tar"
+# vocoder_config_path = "/content/drive/MyDrive/Nancy/nancy_univnet/config.json"
+# vocoder_path = "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DDC/univnet/tobi-univnet-December-29-2022_09+54AM-3191c5f/best_model.pth"
+vocoder_config_path = "./vocoder/config.json"
+vocoder_path = "./vocoder/best_model.pth"
+synthesizer = Synthesizer(
+        model_path,
+        config_path,
+        "",
+        "",
+        vocoder_path,
+        vocoder_config_path,
+        "",
+        "",
+        False
+    )
+
+# synthesizer = Synthesizer(
+#         model_path,
+#         config_path,
+#         "",
+#         "",
+#         "",
+#         "",
+#         "",
+#         "",
+#         True
+#     )   
\ No newline at end of file
diff --git a/tacotron/config.json b/tacotron/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce62a4baf1ae32195df2b99cd232aff2c72b6ee7
--- /dev/null
+++ b/tacotron/config.json
@@ -0,0 +1,200 @@
+{
+    "output_path": "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DCA/",
+    "logger_uri": null,
+    "run_name": "tobi-ddc",
+    "project_name": null,
+    "run_description": "tacotron2 with DCA",
+    "print_step": 100,
+    "plot_step": 100,
+    "model_param_stats": false,
+    "wandb_entity": null,
+    "dashboard_logger": "tensorboard",
+    "log_model_step": 10000,
+    "save_step": 1000,
+    "save_n_checkpoints": 5,
+    "save_checkpoints": true,
+    "save_all_best": false,
+    "save_best_after": 10000,
+    "target_loss": null,
+    "print_eval": false,
+    "test_delay_epochs": 10,
+    "run_eval": true,
+    "run_eval_steps": null,
+    "distributed_backend": "nccl",
+    "distributed_url": "tcp://localhost:54321",
+    "mixed_precision": false,
+    "epochs": 1000,
+    "batch_size": 32,
+    "eval_batch_size": 16,
+    "grad_clip": 0.05,
+    "scheduler_after_epoch": true,
+    "lr": 0.001,
+    "optimizer": "RAdam",
+    "optimizer_params": {
+        "betas": [
+            0.9,
+            0.998
+        ],
+        "weight_decay": 1e-06
+    },
+    "lr_scheduler": "NoamLR",
+    "lr_scheduler_params": {
+        "warmup_steps": 4000
+    },
+    "use_grad_scaler": false,
+    "cudnn_enable": true,
+    "cudnn_deterministic": false,
+    "cudnn_benchmark": false,
+    "training_seed": 54321,
+    "model": "Tacotron2",
+    "num_loader_workers": 4,
+    "num_eval_loader_workers": 0,
+    "use_noise_augment": false,
+    "audio": {
+        "fft_size": 1024,
+        "win_length": 1024,
+        "hop_length": 256,
+        "frame_shift_ms": null,
+        "frame_length_ms": null,
+        "stft_pad_mode": "reflect",
+        "sample_rate": 22050,
+        "resample": false,
+        "preemphasis": 0.0,
+        "ref_level_db": 20,
+        "do_sound_norm": false,
+        "log_func": "np.log10",
+        "do_trim_silence": true,
+        "trim_db": 30,
+        "do_rms_norm": false,
+        "db_level": null,
+        "power": 1.5,
+        "griffin_lim_iters": 60,
+        "num_mels": 80,
+        "mel_fmin": 70.0,
+        "mel_fmax": 8000.0,
+        "spec_gain": 20,
+        "do_amp_to_db_linear": true,
+        "do_amp_to_db_mel": true,
+        "pitch_fmax": 640.0,
+        "pitch_fmin": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100,
+        "symmetric_norm": true,
+        "max_norm": 4.0,
+        "clip_norm": true,
+        "stats_path": "./scale_stats.npy"
+    },
+    "use_phonemes": false,
+    "phonemizer": null,
+    "phoneme_language": "vi",
+    "compute_input_seq_cache": false,
+    "text_cleaner": "basic_cleaners",
+    "enable_eos_bos_chars": false,
+    "test_sentences_file": null,
+    "phoneme_cache_path": "DEFINE THIS",
+    "characters": {
+        "characters_class": "TTS.tts.utils.text.characters.Graphemes",
+        "vocab_dict": null,
+        "pad": "<pad>",
+        "eos": "<eos>",
+        "bos": "<bos>",
+        "blank": "<blnk>",
+        "characters": "0123456789a\u00e1\u1ea3\u00e0\u00e3\u1ea1\u00e2\u1ea5\u1ea9\u1ea7\u1eab\u1ead\u0103\u1eaf\u1eb3\u1eb1\u1eb5\u1eb7bcd\u0111e\u00e9\u1ebb\u00e8\u1ebd\u1eb9\u00ea\u1ebf\u1ec3\u1ec1\u1ec5\u1ec7fghi\u00ed\u1ec9\u00ec\u0129\u1ecbjklmno\u00f3\u1ecf\u00f2\u00f5\u1ecd\u00f4\u1ed1\u1ed5\u1ed3\u1ed7\u1ed9\u01a1\u1edb\u1edf\u1edd\u1ee1\u1ee3pqrstu\u00fa\u1ee7\u00f9\u0169\u1ee5\u01b0\u1ee9\u1eed\u1eeb\u1eef\u1ef1vwxy\u00fd\u1ef7\u1ef3\u1ef9\u1ef5z",
+        "punctuations": "!'(),-.:;? ",
+        "phonemes": null,
+        "is_unique": true,
+        "is_sorted": true
+    },
+    "add_blank": false,
+    "batch_group_size": 4,
+    "loss_masking": true,
+    "min_audio_len": 1,
+    "max_audio_len": Infinity,
+    "min_text_len": 1,
+    "max_text_len": Infinity,
+    "compute_f0": false,
+    "compute_linear_spec": false,
+    "precompute_num_workers": 0,
+    "start_by_longest": false,
+    "datasets": [
+        {
+            "formatter": "infore22",
+            "dataset_name": "",
+            "path": "/content/drive/MyDrive/infore/",
+            "meta_file_train": "scripts.csv",
+            "ignored_speakers": null,
+            "language": "",
+            "meta_file_val": null,
+            "meta_file_attn_mask": ""
+        }
+    ],
+    "test_sentences": [
+        "T\u00f4i y\u00eau Vi\u1ec7t Nam, \u0111\u1ea5t n\u01b0\u1edbc v\u00f4 c\u00f9ng xinh \u0111\u1eb9p.",
+        "\u0110\u1ec3 \u0111\u00e1p \u1ee9ng nhu c\u1ea7u s\u1ea3n xu\u1ea5t, T\u1eadp \u0111o\u00e0n may th\u00eau Thu\u1eadn Ph\u01b0\u01a1ng \u1edf qu\u1eadn.",
+        "\u0110\u00e3 h\u01a1n hai l\u1ea7n nh\u00e2n vi\u00ean tuy\u1ec3n d\u1ee5ng \u0111\u1ebfn t\u1eadn nh\u00e0 m\u00e1y, c\u00e1c khu tr\u1ecd c\u00f3 c\u00f4ng nh\u00e2n T\u1ef7 H\u00f9ng \u1edf \u0111\u1ec3 g\u1eb7p g\u1ee1."
+    ],
+    "eval_split_max_size": null,
+    "eval_split_size": 0.05,
+    "use_speaker_weighted_sampler": false,
+    "speaker_weighted_sampler_alpha": 1.0,
+    "use_language_weighted_sampler": false,
+    "language_weighted_sampler_alpha": 1.0,
+    "use_length_weighted_sampler": false,
+    "length_weighted_sampler_alpha": 1.0,
+    "use_gst": false,
+    "gst": {
+        "gst_style_input_wav": null,
+        "gst_style_input_weights": null,
+        "gst_embedding_dim": 512,
+        "gst_use_speaker_embedding": false,
+        "gst_num_heads": 4,
+        "gst_num_style_tokens": 10
+    },
+    "gst_style_input": null,
+    "use_capacitron_vae": false,
+    "capacitron_vae": null,
+    "num_speakers": 1,
+    "num_chars": 118,
+    "r": 2,
+    "gradual_training": null,
+    "memory_size": -1,
+    "prenet_type": "original",
+    "prenet_dropout": true,
+    "prenet_dropout_at_inference": false,
+    "stopnet": true,
+    "separate_stopnet": true,
+    "stopnet_pos_weight": 15.0,
+    "max_decoder_steps": 1000,
+    "encoder_in_features": 512,
+    "decoder_in_features": 512,
+    "decoder_output_dim": 80,
+    "out_channels": 80,
+    "attention_type": "dynamic_convolution",
+    "attention_heads": null,
+    "attention_norm": "sigmoid",
+    "attention_win": false,
+    "windowing": false,
+    "use_forward_attn": false,
+    "forward_attn_mask": false,
+    "transition_agent": false,
+    "location_attn": true,
+    "bidirectional_decoder": false,
+    "double_decoder_consistency": true,
+    "ddc_r": 6,
+    "speakers_file": null,
+    "use_speaker_embedding": false,
+    "speaker_embedding_dim": 512,
+    "use_d_vector_file": false,
+    "d_vector_file": false,
+    "d_vector_dim": null,
+    "seq_len_norm": false,
+    "decoder_loss_alpha": 0.25,
+    "postnet_loss_alpha": 0.25,
+    "postnet_diff_spec_alpha": 0.25,
+    "decoder_diff_spec_alpha": 0.25,
+    "decoder_ssim_alpha": 0.25,
+    "postnet_ssim_alpha": 0.25,
+    "ga_alpha": 5.0,
+    "restore_path": "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DCA/tobi-ddc-January-04-2023_08+44AM-3191c5f/checkpoint_95000.pth",
+    "github_branch": "inside_docker"
+}
\ No newline at end of file
diff --git a/vietTTS/__pycache__/__init__.cpython-39.pyc b/vietTTS/__pycache__/__init__.cpython-39.pyc
index 10cbe4d4e84ba8fc36250ad07d26039b0fb6452e..7c7400ea4fb2a5060ce3465a4eb23a126345c19f 100644
Binary files a/vietTTS/__pycache__/__init__.cpython-39.pyc and b/vietTTS/__pycache__/__init__.cpython-39.pyc differ
diff --git a/vietTTS/__pycache__/synthesizer.cpython-39.pyc b/vietTTS/__pycache__/synthesizer.cpython-39.pyc
index c9b3e504015ac3d674a9bd8b3e16c8c09c4a0d68..90c8ed05a5981e54e31a33eb7b43cf3b84b9731b 100644
Binary files a/vietTTS/__pycache__/synthesizer.cpython-39.pyc and b/vietTTS/__pycache__/synthesizer.cpython-39.pyc differ
diff --git a/vietTTS/hifigan/__pycache__/config.cpython-39.pyc b/vietTTS/hifigan/__pycache__/config.cpython-39.pyc
index 6d1990fef3eede2ef0e4e622e1ed1819d9480049..05e82dbb8372f18013fd7fcbde1037fbd58a4315 100644
Binary files a/vietTTS/hifigan/__pycache__/config.cpython-39.pyc and b/vietTTS/hifigan/__pycache__/config.cpython-39.pyc differ
diff --git a/vietTTS/hifigan/__pycache__/mel2wave.cpython-39.pyc b/vietTTS/hifigan/__pycache__/mel2wave.cpython-39.pyc
index ad61cda592cac78b5f5a8f4b40a7778b82f5e18d..88db4b823c933e717de4dc5e2ee13154c54cca2b 100644
Binary files a/vietTTS/hifigan/__pycache__/mel2wave.cpython-39.pyc and b/vietTTS/hifigan/__pycache__/mel2wave.cpython-39.pyc differ
diff --git a/vietTTS/hifigan/__pycache__/model.cpython-39.pyc b/vietTTS/hifigan/__pycache__/model.cpython-39.pyc
index 4487902c0ba4170690ac515055bd268345e589bc..66c482dabe0e0847b1a057ba819c374f589ea5f3 100644
Binary files a/vietTTS/hifigan/__pycache__/model.cpython-39.pyc and b/vietTTS/hifigan/__pycache__/model.cpython-39.pyc differ
diff --git a/vietTTS/nat/__pycache__/__init__.cpython-39.pyc b/vietTTS/nat/__pycache__/__init__.cpython-39.pyc
index 57ea7a667010852f040c06107aff89568246b917..e272b434b04ff531a8a406f4ba4dc5780fb752ed 100644
Binary files a/vietTTS/nat/__pycache__/__init__.cpython-39.pyc and b/vietTTS/nat/__pycache__/__init__.cpython-39.pyc differ
diff --git a/vietTTS/nat/__pycache__/config.cpython-39.pyc b/vietTTS/nat/__pycache__/config.cpython-39.pyc
index 9ffd84ef6e64b8424578d0ad299d8eb18ce7b9a7..3a0cb7589a71a059d27619660b21a1d4ce199156 100644
Binary files a/vietTTS/nat/__pycache__/config.cpython-39.pyc and b/vietTTS/nat/__pycache__/config.cpython-39.pyc differ
diff --git a/vietTTS/nat/__pycache__/data_loader.cpython-39.pyc b/vietTTS/nat/__pycache__/data_loader.cpython-39.pyc
index e6446f87ac7581ea256f038481f284eb5a91e0bc..d2946018196ac5a10778672ffa1cc5e961ad6f57 100644
Binary files a/vietTTS/nat/__pycache__/data_loader.cpython-39.pyc and b/vietTTS/nat/__pycache__/data_loader.cpython-39.pyc differ
diff --git a/vietTTS/nat/__pycache__/model.cpython-39.pyc b/vietTTS/nat/__pycache__/model.cpython-39.pyc
index 06df1102823ad348809b2fe2c870177415e120e0..9bf29d79566c73908c587df21eb1478661f943da 100644
Binary files a/vietTTS/nat/__pycache__/model.cpython-39.pyc and b/vietTTS/nat/__pycache__/model.cpython-39.pyc differ
diff --git a/vietTTS/nat/__pycache__/text2mel.cpython-39.pyc b/vietTTS/nat/__pycache__/text2mel.cpython-39.pyc
index 2230c2c970bd24b968fe7c1217f96e7313a4e0fe..3fbdd280a44c2715584bf1b126c86de1fb88e3e7 100644
Binary files a/vietTTS/nat/__pycache__/text2mel.cpython-39.pyc and b/vietTTS/nat/__pycache__/text2mel.cpython-39.pyc differ
diff --git a/vocoder/config.json b/vocoder/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3697315db5feb3f8281c795c84075d2d40c55098
--- /dev/null
+++ b/vocoder/config.json
@@ -0,0 +1,167 @@
+{
+    "output_path": "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DDC/univnet",
+    "logger_uri": null,
+    "run_name": "tobi-univnet",
+    "project_name": null,
+    "run_description": "",
+    "print_step": 100,
+    "plot_step": 100,
+    "model_param_stats": false,
+    "wandb_entity": null,
+    "dashboard_logger": "tensorboard",
+    "log_model_step": null,
+    "save_step": 5000,
+    "save_n_checkpoints": 5,
+    "save_checkpoints": true,
+    "save_all_best": false,
+    "save_best_after": 10000,
+    "target_loss": "loss_0",
+    "print_eval": false,
+    "test_delay_epochs": -1,
+    "run_eval": true,
+    "run_eval_steps": null,
+    "distributed_backend": "nccl",
+    "distributed_url": "tcp://localhost:54321",
+    "mixed_precision": false,
+    "epochs": 1000,
+    "batch_size": 10,
+    "eval_batch_size": 16,
+    "grad_clip": null,
+    "scheduler_after_epoch": false,
+    "lr": 0.001,
+    "optimizer": "AdamW",
+    "optimizer_params": {
+        "betas": [
+            0.5,
+            0.9
+        ],
+        "weight_decay": 0.0
+    },
+    "lr_scheduler": null,
+    "lr_scheduler_params": {},
+    "use_grad_scaler": false,
+    "cudnn_enable": true,
+    "cudnn_deterministic": false,
+    "cudnn_benchmark": false,
+    "training_seed": 54321,
+    "model": "univnet",
+    "num_loader_workers": 2,
+    "num_eval_loader_workers": 2,
+    "use_noise_augment": true,
+    "audio": {
+        "fft_size": 1024,
+        "win_length": 1024,
+        "hop_length": 256,
+        "frame_shift_ms": null,
+        "frame_length_ms": null,
+        "stft_pad_mode": "reflect",
+        "sample_rate": 22050,
+        "resample": false,
+        "preemphasis": 0.0,
+        "ref_level_db": 20,
+        "do_sound_norm": false,
+        "log_func": "np.log10",
+        "do_trim_silence": true,
+        "trim_db": 30,
+        "do_rms_norm": false,
+        "db_level": null,
+        "power": 1.5,
+        "griffin_lim_iters": 60,
+        "num_mels": 80,
+        "mel_fmin": 70.0,
+        "mel_fmax": 8000.0,
+        "spec_gain": 20,
+        "do_amp_to_db_linear": true,
+        "do_amp_to_db_mel": true,
+        "pitch_fmax": 640.0,
+        "pitch_fmin": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100,
+        "symmetric_norm": true,
+        "max_norm": 4.0,
+        "clip_norm": true,
+        "stats_path": "./scale_stats.npy"
+    },
+    "eval_split_size": 10,
+    "data_path": "/content/drive/MyDrive/infore/wav22",
+    "feature_path": null,
+    "seq_len": 8192,
+    "pad_short": 2000,
+    "conv_pad": 0,
+    "use_cache": false,
+    "wd": 0.0,
+    "use_stft_loss": true,
+    "use_subband_stft_loss": false,
+    "use_mse_gan_loss": true,
+    "use_hinge_gan_loss": false,
+    "use_feat_match_loss": false,
+    "use_l1_spec_loss": false,
+    "stft_loss_weight": 2.5,
+    "subband_stft_loss_weight": 0.0,
+    "mse_G_loss_weight": 1.0,
+    "hinge_G_loss_weight": 0.0,
+    "feat_match_loss_weight": 0.0,
+    "l1_spec_loss_weight": 0.0,
+    "stft_loss_params": {
+        "n_ffts": [
+            1024,
+            2048,
+            512
+        ],
+        "hop_lengths": [
+            120,
+            240,
+            50
+        ],
+        "win_lengths": [
+            600,
+            1200,
+            240
+        ]
+    },
+    "l1_spec_loss_params": {
+        "use_mel": true,
+        "sample_rate": 22050,
+        "n_fft": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": null
+    },
+    "lr_gen": 0.0001,
+    "lr_disc": 0.0001,
+    "lr_scheduler_gen": null,
+    "lr_scheduler_gen_params": {
+        "gamma": 0.999,
+        "last_epoch": -1
+    },
+    "lr_scheduler_disc": null,
+    "lr_scheduler_disc_params": {
+        "gamma": 0.999,
+        "last_epoch": -1
+    },
+    "use_pqmf": false,
+    "diff_samples_for_G_and_D": false,
+    "discriminator_model": "univnet_discriminator",
+    "generator_model": "univnet_generator",
+    "generator_model_params": {
+        "in_channels": 64,
+        "out_channels": 1,
+        "hidden_channels": 32,
+        "cond_channels": 80,
+        "upsample_factors": [
+            8,
+            8,
+            4
+        ],
+        "lvc_layers_each_block": 4,
+        "lvc_kernel_size": 3,
+        "kpnet_hidden_channels": 64,
+        "kpnet_conv_size": 3,
+        "dropout": 0.0
+    },
+    "steps_to_start_discriminator": 200000,
+    "restore_path": "/content/drive/MyDrive/TTS/recipes/ljspeech/tacotron2-DDC/univnet/tobi-univnet-January-04-2023_04+20AM-3191c5f/checkpoint_635000.pth",
+    "github_branch": "inside_docker"
+}
\ No newline at end of file