diff --git a/.cardboardlint.yml b/.cardboardlint.yml new file mode 100644 index 0000000000000000000000000000000000000000..4a115a37cddb065c76afebc905476e650f53d085 --- /dev/null +++ b/.cardboardlint.yml @@ -0,0 +1,5 @@ +linters: +- pylint: + # pylintrc: pylintrc + filefilter: ['- test_*.py', '+ *.py', '- *.npy'] + # exclude: \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..8aba5c755f3ac63fb803d1c7f1a5f6dc4954c61b --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,53 @@ +version: 2 + +workflows: + version: 2 + test: + jobs: + - test-3.6 + - test-3.7 + - test-3.8 + +executor: ubuntu-latest + +on: + push: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + test-3.6: &test-template + docker: + - image: circleci/python:3.6 + resource_class: large + working_directory: ~/repo + steps: + - checkout + - run: | + sudo apt update + sudo apt install espeak git + - run: sudo pip install --upgrade pip + - run: sudo pip install -e . + - run: | + sudo pip install --quiet --upgrade cardboardlint pylint + cardboardlinter --refspec ${CIRCLE_BRANCH} -n auto + - run: nosetests tests --nocapture + - run: | + sudo ./tests/test_server_package.sh + sudo ./tests/test_glow-tts_train.sh + sudo ./tests/test_server_package.sh + sudo ./tests/test_tacotron_train.sh + sudo ./tests/test_vocoder_gan_train.sh + sudo ./tests/test_vocoder_wavegrad_train.sh + sudo ./tests/test_vocoder_wavernn_train.sh + sudo ./tests/test_speedy_speech_train.sh + + test-3.7: + <<: *test-template + docker: + - image: circleci/python:3.7 + + test-3.8: + <<: *test-template + docker: + - image: circleci/python:3.8 diff --git a/.compute b/.compute new file mode 100644 index 0000000000000000000000000000000000000000..cda787d2adf77105d311067b5a60d76be118c203 --- /dev/null +++ b/.compute @@ -0,0 +1,17 @@ +#!/bin/bash +yes | apt-get install sox +yes | apt-get install ffmpeg +yes | apt-get install espeak +yes | apt-get install tmux +yes | apt-get install zsh +sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)" +pip3 install https://download.pytorch.org/whl/cu100/torch-1.3.0%2Bcu100-cp36-cp36m-linux_x86_64.whl +sudo sh install.sh +# pip install pytorch==1.7.0+cu100 +# python3 setup.py develop +# python3 distribute.py --config_path config.json --data_path /data/ro/shared/data/keithito/LJSpeech-1.1/ +# cp -R ${USER_DIR}/Mozilla_22050 ../tmp/ +# python3 distribute.py --config_path config_tacotron_gst.json --data_path ../tmp/Mozilla_22050/ +# python3 distribute.py --config_path config.json --data_path /data/rw/home/LibriTTS/train-clean-360 +# python3 distribute.py --config_path config.json +while true; do sleep 1000000; done diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..4032ec6b7c844f4835cb3bacb31387e55301d1f5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +.git/ \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..a7ceffb3d0ae8048edc354028e10ce40b193b77e --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,19 @@ +--- +name: 'TTS Discourse ' +about: Pls consider to use TTS Discourse page. +title: '' +labels: '' +assignees: '' + +--- +Questions will not be answered here!! + +Help is much more valuable if it's shared publicly, so that more people can benefit from it. + +Please consider posting on [TTS Discourse](https://discourse.mozilla.org/c/tts) page or matrix [chat room](https://matrix.to/#/!KTePhNahjgiVumkqca:matrix.org?via=matrix.org) if your issue is not directly related to TTS development (Bugs, code updates etc.). + +You can also check https://github.com/mozilla/TTS/wiki/FAQ for common questions and answers. + +Happy posting! + +https://discourse.mozilla.org/c/tts diff --git a/.github/PR_TEMPLATE.md b/.github/PR_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..7bfb8c60a7a5f3a25039973fda81d980ca54ac7b --- /dev/null +++ b/.github/PR_TEMPLATE.md @@ -0,0 +1,18 @@ +--- +name: 'Contribution Guideline ' +about: Refer to Contirbution Guideline +title: '' +labels: '' +assignees: '' + +--- +### Contribution Guideline + +Please send your PRs to `dev` branch if it is not directly related to a specific branch. +Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. +We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the changed code, you can use the follow command: + +```bash +pip install pylint cardboardlint +cardboardlinter --refspec master +``` \ No newline at end of file diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 0000000000000000000000000000000000000000..5bac63d342fdfaa07fa92c7d1596d6a2f4325cb0 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,19 @@ +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 60 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 7 +# Issues with these labels will never be considered stale +exemptLabels: + - pinned + - security +# Label to use when marking an issue as stale +staleLabel: wontfix +# Comment to post when marking an issue as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. You might also look our discourse page for further help. + https://discourse.mozilla.org/c/tts +# Comment to post when closing a stale issue. Set to `false` to disable +closeComment: false + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..579bfbea10a5ecefe966a540dedfe88eaea5dcec --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +WadaSNR/ +.idea/ +*.pyc +.DS_Store +./__init__.py +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# vim +*.swp +*.swm +*.swn +*.swo + +# pytorch models +*.pth.tar +result/ + +# setup.py +version.py + +# jupyter dummy files +core + +tests/outputs/* +TODO.txt +.vscode/* +data/* +notebooks/data/* +TTS/tts/layers/glow_tts/monotonic_align/core.c diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..a78b521ecef7774ce96525cc5891b26c8ad371cb --- /dev/null +++ b/.pylintrc @@ -0,0 +1,586 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=missing-docstring, + line-too-long, + fixme, + wrong-import-order, + ungrouped-imports, + wrong-import-position, + import-error, + invalid-name, + too-many-instance-attributes, + arguments-differ, + no-name-in-module, + no-member, + unsubscriptable-object, + print-statement, + parameter-unpacking, + unpacking-in-except, + old-raise-syntax, + backtick, + long-suffix, + old-ne-operator, + old-octal-literal, + import-star-module-level, + non-ascii-bytes-literal, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + useless-object-inheritance, + too-few-public-methods, + too-many-branches, + too-many-arguments, + too-many-locals, + too-many-statements, + apply-builtin, + basestring-builtin, + buffer-builtin, + cmp-builtin, + coerce-builtin, + execfile-builtin, + file-builtin, + long-builtin, + raw_input-builtin, + reduce-builtin, + standarderror-builtin, + unicode-builtin, + xrange-builtin, + coerce-method, + delslice-method, + getslice-method, + setslice-method, + no-absolute-import, + old-division, + dict-iter-method, + dict-view-method, + next-method-called, + metaclass-assignment, + indexing-exception, + raising-string, + reload-builtin, + oct-method, + hex-method, + nonzero-method, + cmp-method, + input-builtin, + round-builtin, + intern-builtin, + unichr-builtin, + map-builtin-not-iterating, + zip-builtin-not-iterating, + range-builtin-not-iterating, + filter-builtin-not-iterating, + using-cmp-argument, + eq-without-hash, + div-method, + idiv-method, + rdiv-method, + exception-message-attribute, + invalid-str-codec, + sys-max-int, + bad-python3-import, + deprecated-string-function, + deprecated-str-translate-call, + deprecated-itertools-function, + deprecated-types-field, + next-method-defined, + dict-items-not-iterating, + dict-keys-not-iterating, + dict-values-not-iterating, + deprecated-operator-function, + deprecated-urllib-function, + xreadlines-attribute, + deprecated-sys-function, + exception-escape, + comprehension-escape, + duplicate-code + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[LOGGING] + +# Format style used to check logging format string. `old` means using % +# formatting, while `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package.. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma, + dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +argument-rgx=[a-z_][a-z0-9_]{0,30}$ + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + x, + ex, + Run, + _ + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +variable-rgx=[a-z_][a-z0-9_]{0,30}$ + + +[STRING] + +# This flag controls whether the implicit-str-concat-in-sequence should +# generate a warning on implicit string concatenation in sequences defined over +# several lines. +check-str-concat-over-line-jumps=no + + +[IMPORTS] + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement. +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException, + Exception diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..3b6d813c20bc3fcf379a45be89544acc71506727 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,19 @@ +# Ethical Notice + +Please consider possible consequences and be mindful of any adversarial use cases of this project. In this regard, please contact us if you have any concerns. + +# Community Participation Guidelines + +This repository is governed by Mozilla's code of conduct and etiquette guidelines. +For more details, please read the +[Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). + +## How to Report +For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. + + diff --git a/CODE_OWNERS.rst b/CODE_OWNERS.rst new file mode 100644 index 0000000000000000000000000000000000000000..768b573911eae8aeb229de6f56039deb9a64ce27 --- /dev/null +++ b/CODE_OWNERS.rst @@ -0,0 +1,75 @@ +TTS code owners / governance system +========================================== + +TTS is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system `_. The project is roughly divided into modules, and each module has its owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project. + +Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their owners. + +Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interest of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We don’t ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldn’t go on very often or for very long or escalation and review is likely. + +The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole. + +This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping. + +There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person. + +Global owners +---------------- + +These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision. + +- Eren Gölge (@erogol) +- Reuben Morais (@reuben) + +Training, feeding +----------------- + +- Eren Gölge (@erogol) + +Model exporting +--------------- + +- Eren Gölge (@erogol) + +Multi-Speaker TTS +----------------- + +- Eren Gölge (@erogol) +- Edresson Casanova (@edresson) + +TTS +--- + +- Eren Gölge (@erogol) + +Vocoders +-------- + +- Eren Gölge (@erogol) + +Speaker Encoder +--------------- + +- Eren Gölge (@erogol) + +Testing & CI +------------ + +- Eren Gölge (@erogol) +- Reuben Morais (@reuben) + +Python bindings +--------------- + +- Eren Gölge (@erogol) +- Reuben Morais (@reuben) + +Documentation +------------- + +- Eren Gölge (@erogol) + +Third party bindings +-------------------- + +Owned by the author. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..c9d08f37c213143267caafa5120d85d7678310c3 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,51 @@ +# Contribution guidelines + +This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). + +Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the differences between your work and master, you can use the follow command: + +```bash +pip install pylint cardboardlint +cardboardlinter --refspec master +``` + +This will compare the code against master and run the linter on all the changes. To run it automatically as a git pre-commit hook, you can do do the following: + +```bash +cat <<\EOF > .git/hooks/pre-commit +#!/bin/bash +if [ ! -x "$(command -v cardboardlinter)" ]; then + exit 0 +fi + +# First, stash index and work dir, keeping only the +# to-be-committed changes in the working directory. +echo "Stashing working tree changes..." 1>&2 +old_stash=$(git rev-parse -q --verify refs/stash) +git stash save -q --keep-index +new_stash=$(git rev-parse -q --verify refs/stash) + +# If there were no changes (e.g., `--amend` or `--allow-empty`) +# then nothing was stashed, and we should skip everything, +# including the tests themselves. (Presumably the tests passed +# on the previous commit, so there is no need to re-run them.) +if [ "$old_stash" = "$new_stash" ]; then + echo "No changes, skipping lint." 1>&2 + exit 0 +fi + +# Run tests +cardboardlinter --refspec HEAD -n auto +status=$? + +# Restore changes +echo "Restoring working tree changes..." 1>&2 +git reset --hard -q && git stash apply --index -q && git stash drop -q + +# Exit with status from test-run: nonzero prevents commit +exit $status +EOF +chmod +x .git/hooks/pre-commit +``` + +This will run the linters on just the changes made in your commit. \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..14e2f777f6c395e7e04ab4aa306bbcc4b0c1120e --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..3b6f33ce5cd264a0b7fd187216048b719f273244 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include README.md +include LICENSE.txt +include requirements.txt +recursive-include TTS *.json +recursive-include TTS *.html +recursive-include TTS *.png +recursive-include TTS *.md +recursive-include TTS *.py +recursive-include TTS *.pyx +recursive-include images *.png + diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..9dac0542d5f5f1e104ed18a996909e7f942776ec 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,281 @@ ---- -license: apache-2.0 ---- + + +# TTS: Text-to-Speech for all. + +TTS is a library for advanced Text-to-Speech generation. It's built on the latest research, was designed to achieve the best trade-off among ease-of-training, speed and quality. +TTS comes with [pretrained models](https://github.com/mozilla/TTS/wiki/Released-Models), tools for measuring dataset quality and already used in **20+ languages** for products and research projects. + +[![CircleCI]()]() +[![License]()](https://opensource.org/licenses/MPL-2.0) +[![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS) + +:loudspeaker: [English Voice Samples](https://erogol.github.io/ddc-samples/) and [SoundCloud playlist](https://soundcloud.com/user-565970875/pocket-article-wavernn-and-tacotron2) + +:man_cook: [TTS training recipes](https://github.com/erogol/TTS_recipes) + +:page_facing_up: [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers) + +## 💬 Where to ask questions +Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly, so that more people can benefit from it. + +| Type | Platforms | +| ------------------------------- | --------------------------------------- | +| 🚨 **Bug Reports** | [GitHub Issue Tracker] | +| ❔ **FAQ** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/FAQ) | +| 🎁 **Feature Requests & Ideas** | [GitHub Issue Tracker] | +| 👩‍💻 **Usage Questions** | [Discourse Forum] | +| 🗯 **General Discussion** | [Discourse Forum] and [Matrix Channel] | + +[github issue tracker]: https://github.com/mozilla/tts/issues +[discourse forum]: https://discourse.mozilla.org/c/tts/ +[matrix channel]: https://matrix.to/#/!KTePhNahjgiVumkqca:matrix.org?via=matrix.org +[Tutorials and Examples]: https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials + + +## 🔗 Links and Resources +| Type | Links | +| ------------------------------- | --------------------------------------- | +| 💾 **Installation** | [TTS/README.md](https://github.com/mozilla/TTS/tree/dev#install-tts)| +| 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials) | +| 🚀 **Released Models** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/Released-Models)| +| 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-mozillatts)| +| 🖥️ **Demo Server** | [TTS/server](https://github.com/mozilla/TTS/tree/master/TTS/server)| +| 🤖 **Running TTS on Terminal** | [TTS/README.md](https://github.com/mozilla/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)| +| ✨ **How to contribute** |[TTS/README.md](#contribution-guidelines)| + +## 🥇 TTS Performance +

+ +"Mozilla*" and "Judy*" are our models. +[Details...](https://github.com/mozilla/TTS/wiki/Mean-Opinion-Score-Results) + +## Features +- High performance Deep Learning models for Text2Speech tasks. + - Text2Spec models (Tacotron, Tacotron2, Glow-TTS, SpeedySpeech). + - Speaker Encoder to compute speaker embeddings efficiently. + - Vocoder models (MelGAN, Multiband-MelGAN, GAN-TTS, ParallelWaveGAN, WaveGrad, WaveRNN) +- Fast and efficient model training. +- Detailed training logs on console and Tensorboard. +- Support for multi-speaker TTS. +- Efficient Multi-GPUs training. +- Ability to convert PyTorch models to Tensorflow 2.0 and TFLite for inference. +- Released models in PyTorch, Tensorflow and TFLite. +- Tools to curate Text2Speech datasets under```dataset_analysis```. +- Demo server for model testing. +- Notebooks for extensive model benchmarking. +- Modular (but not too much) code base enabling easy testing for new ideas. + +## Implemented Models +### Text-to-Spectrogram +- Tacotron: [paper](https://arxiv.org/abs/1703.10135) +- Tacotron2: [paper](https://arxiv.org/abs/1712.05884) +- Glow-TTS: [paper](https://arxiv.org/abs/2005.11129) +- Speedy-Speech: [paper](https://arxiv.org/abs/2008.03802) + +### Attention Methods +- Guided Attention: [paper](https://arxiv.org/abs/1710.08969) +- Forward Backward Decoding: [paper](https://arxiv.org/abs/1907.09006) +- Graves Attention: [paper](https://arxiv.org/abs/1907.09006) +- Double Decoder Consistency: [blog](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) + +### Speaker Encoder +- GE2E: [paper](https://arxiv.org/abs/1710.10467) +- Angular Loss: [paper](https://arxiv.org/pdf/2003.11982.pdf) + +### Vocoders +- MelGAN: [paper](https://arxiv.org/abs/1910.06711) +- MultiBandMelGAN: [paper](https://arxiv.org/abs/2005.05106) +- ParallelWaveGAN: [paper](https://arxiv.org/abs/1910.11480) +- GAN-TTS discriminators: [paper](https://arxiv.org/abs/1909.11646) +- WaveRNN: [origin](https://github.com/fatchord/WaveRNN/) +- WaveGrad: [paper](https://arxiv.org/abs/2009.00713) + +You can also help us implement more models. Some TTS related work can be found [here](https://github.com/erogol/TTS-papers). + +## Install TTS +TTS supports **python >= 3.6, <3.9**. + +If you are only interested in [synthesizing speech](https://github.com/mozilla/TTS/tree/dev#example-synthesizing-speech-on-terminal-using-the-released-models) with the released TTS models, installing from PyPI is the easiest option. + +```bash +pip install TTS +``` + +If you plan to code or train models, clone TTS and install it locally. + +```bash +git clone https://github.com/mozilla/TTS +pip install -e . +``` + +## Directory Structure +``` +|- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.) +|- utils/ (common utilities.) +|- TTS + |- bin/ (folder for all the executables.) + |- train*.py (train your target model.) + |- distribute.py (train your TTS model using Multiple GPUs.) + |- compute_statistics.py (compute dataset statistics for normalization.) + |- convert*.py (convert target torch model to TF.) + |- tts/ (text to speech models) + |- layers/ (model layer definitions) + |- models/ (model definitions) + |- tf/ (Tensorflow 2 utilities and model implementations) + |- utils/ (model specific utilities.) + |- speaker_encoder/ (Speaker Encoder models.) + |- (same) + |- vocoder/ (Vocoder models.) + |- (same) +``` + +## Sample Model Output +Below you see Tacotron model state after 16K iterations with batch-size 32 with LJSpeech dataset. + +> "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase the grey matter in the parts of the brain responsible for emotional regulation and learning." + +Audio examples: [soundcloud](https://soundcloud.com/user-565970875/pocket-article-wavernn-and-tacotron2) + +example_output + +## Datasets and Data-Loading +TTS provides a generic dataloader easy to use for your custom dataset. +You just need to write a simple function to format the dataset. Check ```datasets/preprocess.py``` to see some examples. +After that, you need to set ```dataset``` fields in ```config.json```. + +Some of the public datasets that we successfully applied TTS: + +- [LJ Speech](https://keithito.com/LJ-Speech-Dataset/) +- [Nancy](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) +- [TWEB](https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset) +- [M-AI-Labs](http://www.caito.de/2019/01/the-m-ailabs-speech-dataset/) +- [LibriTTS](https://openslr.org/60/) +- [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01 + +## Example: Synthesizing Speech on Terminal Using the Released Models. + +After the installation, TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under the TTS project. + +Listing released TTS models. +```bash +tts --list_models +``` + +Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.) +```bash +tts --text "Text for TTS" \ + --model_name "///" \ + --vocoder_name "///" \ + --out_path folder/to/save/output/ +``` + +Run your own TTS model (Using Griffin-Lim Vocoder) +```bash +tts --text "Text for TTS" \ + --model_path path/to/model.pth.tar \ + --config_path path/to/config.json \ + --out_path output/path/speech.wav +``` + +Run your own TTS and Vocoder models +```bash +tts --text "Text for TTS" \ + --model_path path/to/config.json \ + --config_path path/to/model.pth.tar \ + --out_path output/path/speech.wav \ + --vocoder_path path/to/vocoder.pth.tar \ + --vocoder_config_path path/to/vocoder_config.json +``` + +**Note:** You can use ```./TTS/bin/synthesize.py``` if you prefer running ```tts``` from the TTS project folder. + +## Example: Training and Fine-tuning LJ-Speech Dataset +Here you can find a [CoLab](https://gist.github.com/erogol/97516ad65b44dbddb8cd694953187c5b) notebook for a hands-on example, training LJSpeech. Or you can manually follow the guideline below. + +To start with, split ```metadata.csv``` into train and validation subsets respectively ```metadata_train.csv``` and ```metadata_val.csv```. Note that for text-to-speech, validation performance might be misleading since the loss value does not directly measure the voice quality to the human ear and it also does not measure the attention module performance. Therefore, running the model with new sentences and listening to the results is the best way to go. + +``` +shuf metadata.csv > metadata_shuf.csv +head -n 12000 metadata_shuf.csv > metadata_train.csv +tail -n 1100 metadata_shuf.csv > metadata_val.csv +``` + +To train a new model, you need to define your own ```config.json``` to define model details, trainin configuration and more (check the examples). Then call the corressponding train script. + +For instance, in order to train a tacotron or tacotron2 model on LJSpeech dataset, follow these steps. + +```bash +python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json +``` + +To fine-tune a model, use ```--restore_path```. + +```bash +python TTS/bin/train_tacotron.py --config_path TTS/tts/configs/config.json --restore_path /path/to/your/model.pth.tar +``` + +To continue an old training run, use ```--continue_path```. + +```bash +python TTS/bin/train_tacotron.py --continue_path /path/to/your/run_folder/ +``` + +For multi-GPU training, call ```distribute.py```. It runs any provided train script in multi-GPU setting. + +```bash +CUDA_VISIBLE_DEVICES="0,1,4" python TTS/bin/distribute.py --script train_tacotron.py --config_path TTS/tts/configs/config.json +``` + +Each run creates a new output folder accomodating used ```config.json```, model checkpoints and tensorboard logs. + +In case of any error or intercepted execution, if there is no checkpoint yet under the output folder, the whole folder is going to be removed. + +You can also enjoy Tensorboard, if you point Tensorboard argument```--logdir``` to the experiment folder. + +## Contribution Guidelines +This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines.](https://www.mozilla.org/about/governance/policies/participation/) + +1. Create a new branch. +2. Implement your changes. +3. (if applicable) Add [Google Style](https://google.github.io/styleguide/pyguide.html#381-docstrings) docstrings. +4. (if applicable) Implement a test case under ```tests``` folder. +5. (Optional but Prefered) Run tests. +```bash +./run_tests.sh +``` +6. Run the linter. +```bash +pip install pylint cardboardlint +cardboardlinter --refspec master +``` +7. Send a PR to ```dev``` branch, explain what the change is about. +8. Let us discuss until we make it perfect :). +9. We merge it to the ```dev``` branch once things look good. + +Feel free to ping us at any step you need help using our communication channels. + +## Collaborative Experimentation Guide +If you like to use TTS to try a new idea and like to share your experiments with the community, we urge you to use the following guideline for a better collaboration. +(If you have an idea for better collaboration, let us know) +- Create a new branch. +- Open an issue pointing your branch. +- Explain your idea and experiment. +- Share your results regularly. (Tensorboard log files, audio results, visuals etc.) + +## Major TODOs +- [x] Implement the model. +- [x] Generate human-like speech on LJSpeech dataset. +- [x] Generate human-like speech on a different dataset (Nancy) (TWEB). +- [x] Train TTS with r=1 successfully. +- [x] Enable process based distributed training. Similar to (https://github.com/fastai/imagenet-fast/). +- [x] Adapting Neural Vocoder. TTS works with WaveRNN and ParallelWaveGAN (https://github.com/erogol/WaveRNN and https://github.com/erogol/ParallelWaveGAN) +- [x] Multi-speaker embedding. +- [x] Model optimization (model export, model pruning etc.) + +### Acknowledgement +- https://github.com/keithito/tacotron (Dataset pre-processing) +- https://github.com/r9y9/tacotron_pytorch (Initial Tacotron architecture) +- https://github.com/kan-bayashi/ParallelWaveGAN (vocoder library) +- https://github.com/jaywalnut310/glow-tts (Original Glow-TTS implementation) +- https://github.com/fatchord/WaveRNN/ (Original WaveRNN implementation) diff --git a/TTS/.models.json b/TTS/.models.json new file mode 100644 index 0000000000000000000000000000000000000000..075861dbe78eaaa9def93e6b0c25fd6104647d6b --- /dev/null +++ b/TTS/.models.json @@ -0,0 +1,77 @@ +{ + "tts_models":{ + "en":{ + "ljspeech":{ + "glow-tts":{ + "description": "", + "model_file": "1NFsfhH8W8AgcfJ-BsL8CYAwQfZ5k4T-n", + "config_file": "1IAROF3yy9qTK43vG_-R67y3Py9yYbD6t", + "stats_file": null, + "commit": "" + }, + "tacotron2-DCA": { + "description": "", + "model_file": "1CFoPDQBnhfBFu2Gc0TBSJn8o-TuNKQn7", + "config_file": "1lWSscNfKet1zZSJCNirOn7v9bigUZ8C1", + "stats_file": "1qevpGRVHPmzfiRBNuugLMX62x1k7B5vK", + "commit": "" + }, + "speedy-speech-wn":{ + "description": "Speedy Speech model with wavenet decoder.", + "model_file": "1VXAwiq6N-Viq3rsSXlf43bdoi0jSvMAJ", + "config_file": "1KvZilhsNP3EumVggDcD46yd834eO5hR3", + "stats_file": "1Ju7apZ5JlgsVECcETL-GEx3DRoNzWfkR", + "commit": "77b6145" + } + } + }, + "es":{ + "mai":{ + "tacotron2-DDC":{ + "model_file": "1jZ4HvYcAXI5ZClke2iGA7qFQQJBXIovw", + "config_file": "1s7g4n-B73ChCB48AQ88_DV_8oyLth8r0", + "stats_file": "13st0CZ743v6Br5R5Qw_lH1OPQOr3M-Jv", + "commit": "" + } + } + }, + "fr":{ + "mai":{ + "tacotron2-DDC":{ + "model_file": "1qyxrrCyoXUvBG2lqVd0KqAlHj-2nZCgS", + "config_file": "1yECKeP2LI7tNv4E8yVNx1yLmCfTCpkqG", + "stats_file": "13st0CZ743v6Br5R5Qw_lH1OPQOr3M-Jv", + "commit": "" + } + } + } + }, + "vocoder_models":{ + "universal":{ + "libri-tts":{ + "wavegrad":{ + "model_file": "1r2g90JaZsfCj9dJkI9ioIU6JCFMPRqi6", + "config_file": "1POrrLf5YEpZyjvWyMccj1nGCVc94mR6s", + "stats_file": "1Vwbv4t-N1i3jXqI0bgKAhShAEO097sK0", + "commit": "ea976b0" + }, + "fullband-melgan":{ + "model_file": "1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K", + "config_file": "1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu", + "stats_file": "11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU", + "commit": "4132240" + } + } + }, + "en": { + "ljspeech":{ + "mulitband-melgan":{ + "model_file": "1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K", + "config_file": "1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu", + "stats_file": "11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU", + "commit": "ea976b0" + } + } + } + } +} \ No newline at end of file diff --git a/TTS/__init__.py b/TTS/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/bin/__init__.py b/TTS/bin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py new file mode 100644 index 0000000000000000000000000000000000000000..fc02144aa78228ee9ca7fd33756f61e9d691fab0 --- /dev/null +++ b/TTS/bin/compute_attention_masks.py @@ -0,0 +1,166 @@ +import argparse +import importlib +import os + +import numpy as np +import torch +from torch.utils.data import DataLoader +from tqdm import tqdm +from argparse import RawTextHelpFormatter +from TTS.tts.datasets.TTSDataset import MyDataset +from TTS.tts.utils.generic_utils import setup_model +from TTS.tts.utils.io import load_checkpoint +from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='''Extract attention masks from trained Tacotron/Tacotron2 models. +These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n''' + +'''Each attention mask is written to the same path as the input wav file with ".npy" file extension. +(e.g. path/bla.wav (wav file) --> path/bla.npy (attention mask))\n''' + +''' +Example run: + CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py + --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth.tar + --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json + --dataset_metafile /root/LJSpeech-1.1/metadata.csv + --data_path /root/LJSpeech-1.1/ + --batch_size 32 + --dataset ljspeech + --use_cuda True +''', + formatter_class=RawTextHelpFormatter + ) + parser.add_argument('--model_path', + type=str, + required=True, + help='Path to Tacotron/Tacotron2 model file ') + parser.add_argument( + '--config_path', + type=str, + required=True, + help='Path to Tacotron/Tacotron2 config file.', + ) + parser.add_argument('--dataset', + type=str, + default='', + required=True, + help='Target dataset processor name from TTS.tts.dataset.preprocess.') + + parser.add_argument( + '--dataset_metafile', + type=str, + default='', + required=True, + help='Dataset metafile inclusing file paths with transcripts.') + parser.add_argument( + '--data_path', + type=str, + default='', + help='Defines the data path. It overwrites config.json.') + parser.add_argument('--use_cuda', + type=bool, + default=False, + help="enable/disable cuda.") + + parser.add_argument( + '--batch_size', + default=16, + type=int, + help='Batch size for the model. Use batch_size=1 if you have no CUDA.') + args = parser.parse_args() + + C = load_config(args.config_path) + ap = AudioProcessor(**C.audio) + + # if the vocabulary was passed, replace the default + if 'characters' in C.keys(): + symbols, phonemes = make_symbols(**C.characters) + + # load the model + num_chars = len(phonemes) if C.use_phonemes else len(symbols) + # TODO: handle multi-speaker + model = setup_model(num_chars, num_speakers=0, c=C) + model, _ = load_checkpoint(model, args.model_path, None, args.use_cuda) + model.eval() + + # data loader + preprocessor = importlib.import_module('TTS.tts.datasets.preprocess') + preprocessor = getattr(preprocessor, args.dataset) + meta_data = preprocessor(args.data_path, args.dataset_metafile) + dataset = MyDataset(model.decoder.r, + C.text_cleaner, + compute_linear_spec=False, + ap=ap, + meta_data=meta_data, + tp=C.characters if 'characters' in C.keys() else None, + add_blank=C['add_blank'] if 'add_blank' in C.keys() else False, + use_phonemes=C.use_phonemes, + phoneme_cache_path=C.phoneme_cache_path, + phoneme_language=C.phoneme_language, + enable_eos_bos=C.enable_eos_bos_chars) + + dataset.sort_items() + loader = DataLoader(dataset, + batch_size=args.batch_size, + num_workers=4, + collate_fn=dataset.collate_fn, + shuffle=False, + drop_last=False) + + # compute attentions + file_paths = [] + with torch.no_grad(): + for data in tqdm(loader): + # setup input data + text_input = data[0] + text_lengths = data[1] + linear_input = data[3] + mel_input = data[4] + mel_lengths = data[5] + stop_targets = data[6] + item_idxs = data[7] + + # dispatch data to GPU + if args.use_cuda: + text_input = text_input.cuda() + text_lengths = text_lengths.cuda() + mel_input = mel_input.cuda() + mel_lengths = mel_lengths.cuda() + + mel_outputs, postnet_outputs, alignments, stop_tokens = model.forward( + text_input, text_lengths, mel_input) + + alignments = alignments.detach() + for idx, alignment in enumerate(alignments): + item_idx = item_idxs[idx] + # interpolate if r > 1 + alignment = torch.nn.functional.interpolate( + alignment.transpose(0, 1).unsqueeze(0), + size=None, + scale_factor=model.decoder.r, + mode='nearest', + align_corners=None, + recompute_scale_factor=None).squeeze(0).transpose(0, 1) + # remove paddings + alignment = alignment[:mel_lengths[idx], :text_lengths[idx]].cpu().numpy() + # set file paths + wav_file_name = os.path.basename(item_idx) + align_file_name = os.path.splitext(wav_file_name)[0] + '.npy' + file_path = item_idx.replace(wav_file_name, align_file_name) + # save output + file_paths.append([item_idx, file_path]) + np.save(file_path, alignment) + + # ourput metafile + metafile = os.path.join(args.data_path, "metadata_attn_mask.txt") + + with open(metafile, "w") as f: + for p in file_paths: + f.write(f"{p[0]}|{p[1]}\n") + print(f" >> Metafile created: {metafile}") diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..64edd140c806c1dc64c6b5730f86f3f8855c05d5 --- /dev/null +++ b/TTS/bin/compute_embeddings.py @@ -0,0 +1,130 @@ +import argparse +import glob +import os + +import numpy as np +from tqdm import tqdm + +import torch +from TTS.speaker_encoder.model import SpeakerEncoder +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.tts.utils.speakers import save_speaker_mapping +from TTS.tts.datasets.preprocess import load_meta_data + +parser = argparse.ArgumentParser( + description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.') +parser.add_argument( + 'model_path', + type=str, + help='Path to model outputs (checkpoint, tensorboard etc.).') +parser.add_argument( + 'config_path', + type=str, + help='Path to config file for training.', +) +parser.add_argument( + 'data_path', + type=str, + help='Data path for wav files - directory or CSV file') +parser.add_argument( + 'output_path', + type=str, + help='path for training outputs.') +parser.add_argument( + '--target_dataset', + type=str, + default='', + help='Target dataset to pick a processor from TTS.tts.dataset.preprocess. Necessary to create a speakers.json file.' +) +parser.add_argument( + '--use_cuda', type=bool, help='flag to set cuda.', default=False +) +parser.add_argument( + '--separator', type=str, help='Separator used in file if CSV is passed for data_path', default='|' +) +args = parser.parse_args() + + +c = load_config(args.config_path) +ap = AudioProcessor(**c['audio']) + +data_path = args.data_path +split_ext = os.path.splitext(data_path) +sep = args.separator + +if args.target_dataset != '': + # if target dataset is defined + dataset_config = [ + { + "name": args.target_dataset, + "path": args.data_path, + "meta_file_train": None, + "meta_file_val": None + }, + ] + wav_files, _ = load_meta_data(dataset_config, eval_split=False) + output_files = [wav_file[1].replace(data_path, args.output_path).replace( + '.wav', '.npy') for wav_file in wav_files] +else: + # if target dataset is not defined + if len(split_ext) > 0 and split_ext[1].lower() == '.csv': + # Parse CSV + print(f'CSV file: {data_path}') + with open(data_path) as f: + wav_path = os.path.join(os.path.dirname(data_path), 'wavs') + wav_files = [] + print(f'Separator is: {sep}') + for line in f: + components = line.split(sep) + if len(components) != 2: + print("Invalid line") + continue + wav_file = os.path.join(wav_path, components[0] + '.wav') + #print(f'wav_file: {wav_file}') + if os.path.exists(wav_file): + wav_files.append(wav_file) + print(f'Count of wavs imported: {len(wav_files)}') + else: + # Parse all wav files in data_path + wav_files = glob.glob(data_path + '/**/*.wav', recursive=True) + + output_files = [wav_file.replace(data_path, args.output_path).replace( + '.wav', '.npy') for wav_file in wav_files] + +for output_file in output_files: + os.makedirs(os.path.dirname(output_file), exist_ok=True) + +# define Encoder model +model = SpeakerEncoder(**c.model) +model.load_state_dict(torch.load(args.model_path)['model']) +model.eval() +if args.use_cuda: + model.cuda() + +# compute speaker embeddings +speaker_mapping = {} +for idx, wav_file in enumerate(tqdm(wav_files)): + if isinstance(wav_file, list): + speaker_name = wav_file[2] + wav_file = wav_file[1] + + mel_spec = ap.melspectrogram(ap.load_wav(wav_file, sr=ap.sample_rate)).T + mel_spec = torch.FloatTensor(mel_spec[None, :, :]) + if args.use_cuda: + mel_spec = mel_spec.cuda() + embedd = model.compute_embedding(mel_spec) + embedd = embedd.detach().cpu().numpy() + np.save(output_files[idx], embedd) + + if args.target_dataset != '': + # create speaker_mapping if target dataset is defined + wav_file_name = os.path.basename(wav_file) + speaker_mapping[wav_file_name] = {} + speaker_mapping[wav_file_name]['name'] = speaker_name + speaker_mapping[wav_file_name]['embedding'] = embedd.flatten().tolist() + +if args.target_dataset != '': + # save speaker_mapping if target dataset is defined + mapping_file_path = os.path.join(args.output_path, 'speakers.json') + save_speaker_mapping(args.output_path, speaker_mapping) diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..7642f86bf07394d99a5a494a946df22aefab80e0 --- /dev/null +++ b/TTS/bin/compute_statistics.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import glob +import argparse + +import numpy as np +from tqdm import tqdm + +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.utils.io import load_config +from TTS.utils.audio import AudioProcessor + + +def main(): + """Run preprocessing process.""" + parser = argparse.ArgumentParser( + description="Compute mean and variance of spectrogtram features.") + parser.add_argument("--config_path", type=str, required=True, + help="TTS config file path to define audio processin parameters.") + parser.add_argument("--out_path", default=None, type=str, + help="directory to save the output file.") + args = parser.parse_args() + + # load config + CONFIG = load_config(args.config_path) + CONFIG.audio['signal_norm'] = False # do not apply earlier normalization + CONFIG.audio['stats_path'] = None # discard pre-defined stats + + # load audio processor + ap = AudioProcessor(**CONFIG.audio) + + # load the meta data of target dataset + if 'data_path' in CONFIG.keys(): + dataset_items = glob.glob(os.path.join(CONFIG.data_path, '**', '*.wav'), recursive=True) + else: + dataset_items = load_meta_data(CONFIG.datasets)[0] # take only train data + print(f" > There are {len(dataset_items)} files.") + + mel_sum = 0 + mel_square_sum = 0 + linear_sum = 0 + linear_square_sum = 0 + N = 0 + for item in tqdm(dataset_items): + # compute features + wav = ap.load_wav(item if isinstance(item, str) else item[1]) + linear = ap.spectrogram(wav) + mel = ap.melspectrogram(wav) + + # compute stats + N += mel.shape[1] + mel_sum += mel.sum(1) + linear_sum += linear.sum(1) + mel_square_sum += (mel ** 2).sum(axis=1) + linear_square_sum += (linear ** 2).sum(axis=1) + + mel_mean = mel_sum / N + mel_scale = np.sqrt(mel_square_sum / N - mel_mean ** 2) + linear_mean = linear_sum / N + linear_scale = np.sqrt(linear_square_sum / N - linear_mean ** 2) + + output_file_path = args.out_path + stats = {} + stats['mel_mean'] = mel_mean + stats['mel_std'] = mel_scale + stats['linear_mean'] = linear_mean + stats['linear_std'] = linear_scale + + print(f' > Avg mel spec mean: {mel_mean.mean()}') + print(f' > Avg mel spec scale: {mel_scale.mean()}') + print(f' > Avg linear spec mean: {linear_mean.mean()}') + print(f' > Avg lienar spec scale: {linear_scale.mean()}') + + # set default config values for mean-var scaling + CONFIG.audio['stats_path'] = output_file_path + CONFIG.audio['signal_norm'] = True + # remove redundant values + del CONFIG.audio['max_norm'] + del CONFIG.audio['min_level_db'] + del CONFIG.audio['symmetric_norm'] + del CONFIG.audio['clip_norm'] + stats['audio_config'] = CONFIG.audio + np.save(output_file_path, stats, allow_pickle=True) + print(f' > stats saved to {output_file_path}') + + +if __name__ == "__main__": + main() diff --git a/TTS/bin/convert_melgan_tflite.py b/TTS/bin/convert_melgan_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..8df582da607b987c79fd210e88cb73464f642e66 --- /dev/null +++ b/TTS/bin/convert_melgan_tflite.py @@ -0,0 +1,32 @@ +# Convert Tensorflow Tacotron2 model to TF-Lite binary + +import argparse + +from TTS.utils.io import load_config +from TTS.vocoder.tf.utils.generic_utils import setup_generator +from TTS.vocoder.tf.utils.io import load_checkpoint +from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite + + +parser = argparse.ArgumentParser() +parser.add_argument('--tf_model', + type=str, + help='Path to target torch model to be converted to TF.') +parser.add_argument('--config_path', + type=str, + help='Path to config file of torch model.') +parser.add_argument('--output_path', + type=str, + help='path to tflite output binary.') +args = parser.parse_args() + +# Set constants +CONFIG = load_config(args.config_path) + +# load the model +model = setup_generator(CONFIG) +model.build_inference() +model = load_checkpoint(model, args.tf_model) + +# create tflite model +tflite_model = convert_melgan_to_tflite(model, output_path=args.output_path) diff --git a/TTS/bin/convert_melgan_torch_to_tf.py b/TTS/bin/convert_melgan_torch_to_tf.py new file mode 100644 index 0000000000000000000000000000000000000000..2eec6157cfab29476e5373c20d94d13d59670e8a --- /dev/null +++ b/TTS/bin/convert_melgan_torch_to_tf.py @@ -0,0 +1,116 @@ +import argparse +from difflib import SequenceMatcher +import os + +import numpy as np +import tensorflow as tf +import torch + +from TTS.utils.io import load_config +from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import ( + compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf) +from TTS.vocoder.tf.utils.generic_utils import \ + setup_generator as setup_tf_generator +from TTS.vocoder.tf.utils.io import save_checkpoint +from TTS.vocoder.utils.generic_utils import setup_generator + +# prevent GPU use +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +# define args +parser = argparse.ArgumentParser() +parser.add_argument('--torch_model_path', + type=str, + help='Path to target torch model to be converted to TF.') +parser.add_argument('--config_path', + type=str, + help='Path to config file of torch model.') +parser.add_argument( + '--output_path', + type=str, + help='path to output file including file name to save TF model.') +args = parser.parse_args() + +# load model config +config_path = args.config_path +c = load_config(config_path) +num_speakers = 0 + +# init torch model +model = setup_generator(c) +checkpoint = torch.load(args.torch_model_path, + map_location=torch.device('cpu')) +state_dict = checkpoint['model'] +model.load_state_dict(state_dict) +model.remove_weight_norm() +state_dict = model.state_dict() + +# init tf model +model_tf = setup_tf_generator(c) + +common_sufix = '/.ATTRIBUTES/VARIABLE_VALUE' +# get tf_model graph by passing an input +# B x D x T +dummy_input = tf.random.uniform((7, 80, 64), dtype=tf.float32) +mel_pred = model_tf(dummy_input, training=False) + +# get tf variables +tf_vars = model_tf.weights + +# match variable names with fuzzy logic +torch_var_names = list(state_dict.keys()) +tf_var_names = [we.name for we in model_tf.weights] +var_map = [] +for tf_name in tf_var_names: + # skip re-mapped layer names + if tf_name in [name[0] for name in var_map]: + continue + tf_name_edited = convert_tf_name(tf_name) + ratios = [ + SequenceMatcher(None, torch_name, tf_name_edited).ratio() + for torch_name in torch_var_names + ] + max_idx = np.argmax(ratios) + matching_name = torch_var_names[max_idx] + del torch_var_names[max_idx] + var_map.append((tf_name, matching_name)) + +# pass weights +tf_vars = transfer_weights_torch_to_tf(tf_vars, dict(var_map), state_dict) + +# Compare TF and TORCH models +# check embedding outputs +model.eval() +dummy_input_torch = torch.ones((1, 80, 10)) +dummy_input_tf = tf.convert_to_tensor(dummy_input_torch.numpy()) +dummy_input_tf = tf.transpose(dummy_input_tf, perm=[0, 2, 1]) +dummy_input_tf = tf.expand_dims(dummy_input_tf, 2) + +out_torch = model.layers[0](dummy_input_torch) +out_tf = model_tf.model_layers[0](dummy_input_tf) +out_tf_ = tf.transpose(out_tf, perm=[0, 3, 2, 1])[:, :, 0, :] + +assert compare_torch_tf(out_torch, out_tf_) < 1e-5 + +for i in range(1, len(model.layers)): + print(f"{i} -> {model.layers[i]} vs {model_tf.model_layers[i]}") + out_torch = model.layers[i](out_torch) + out_tf = model_tf.model_layers[i](out_tf) + out_tf_ = tf.transpose(out_tf, perm=[0, 3, 2, 1])[:, :, 0, :] + diff = compare_torch_tf(out_torch, out_tf_) + assert diff < 1e-5, diff + +torch.manual_seed(0) +dummy_input_torch = torch.rand((1, 80, 100)) +dummy_input_tf = tf.convert_to_tensor(dummy_input_torch.numpy()) +model.inference_padding = 0 +model_tf.inference_padding = 0 +output_torch = model.inference(dummy_input_torch) +output_tf = model_tf(dummy_input_tf, training=False) +assert compare_torch_tf(output_torch, output_tf) < 1e-5, compare_torch_tf( + output_torch, output_tf) + +# save tf model +save_checkpoint(model_tf, checkpoint['step'], checkpoint['epoch'], + args.output_path) +print(' > Model conversion is successfully completed :).') diff --git a/TTS/bin/convert_tacotron2_tflite.py b/TTS/bin/convert_tacotron2_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..2fddf4b01c647bb287e47b26ee444e8eafc3c85f --- /dev/null +++ b/TTS/bin/convert_tacotron2_tflite.py @@ -0,0 +1,37 @@ +# Convert Tensorflow Tacotron2 model to TF-Lite binary + +import argparse + +from TTS.utils.io import load_config +from TTS.tts.utils.text.symbols import symbols, phonemes +from TTS.tts.tf.utils.generic_utils import setup_model +from TTS.tts.tf.utils.io import load_checkpoint +from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite + + +parser = argparse.ArgumentParser() +parser.add_argument('--tf_model', + type=str, + help='Path to target torch model to be converted to TF.') +parser.add_argument('--config_path', + type=str, + help='Path to config file of torch model.') +parser.add_argument('--output_path', + type=str, + help='path to tflite output binary.') +args = parser.parse_args() + +# Set constants +CONFIG = load_config(args.config_path) + +# load the model +c = CONFIG +num_speakers = 0 +num_chars = len(phonemes) if c.use_phonemes else len(symbols) +model = setup_model(num_chars, num_speakers, c, enable_tflite=True) +model.build_inference() +model = load_checkpoint(model, args.tf_model) +model.decoder.set_max_decoder_steps(1000) + +# create tflite model +tflite_model = convert_tacotron2_to_tflite(model, output_path=args.output_path) diff --git a/TTS/bin/convert_tacotron2_torch_to_tf.py b/TTS/bin/convert_tacotron2_torch_to_tf.py new file mode 100644 index 0000000000000000000000000000000000000000..485e56bebda9eec80219d5f4ac51f3ad404bee87 --- /dev/null +++ b/TTS/bin/convert_tacotron2_torch_to_tf.py @@ -0,0 +1,213 @@ +# %% +# %% +import argparse +from difflib import SequenceMatcher +import os +import sys +# %% +# print variable match +from pprint import pprint + +import numpy as np +import tensorflow as tf +import torch +from TTS.tts.tf.models.tacotron2 import Tacotron2 +from TTS.tts.tf.utils.convert_torch_to_tf_utils import ( + compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf) +from TTS.tts.tf.utils.generic_utils import save_checkpoint +from TTS.tts.utils.generic_utils import setup_model +from TTS.tts.utils.text.symbols import phonemes, symbols +from TTS.utils.io import load_config + +sys.path.append('/home/erogol/Projects') +os.environ['CUDA_VISIBLE_DEVICES'] = '' + + +parser = argparse.ArgumentParser() +parser.add_argument('--torch_model_path', + type=str, + help='Path to target torch model to be converted to TF.') +parser.add_argument('--config_path', + type=str, + help='Path to config file of torch model.') +parser.add_argument('--output_path', + type=str, + help='path to output file including file name to save TF model.') +args = parser.parse_args() + +# load model config +config_path = args.config_path +c = load_config(config_path) +num_speakers = 0 + +# init torch model +num_chars = len(phonemes) if c.use_phonemes else len(symbols) +model = setup_model(num_chars, num_speakers, c) +checkpoint = torch.load(args.torch_model_path, + map_location=torch.device('cpu')) +state_dict = checkpoint['model'] +model.load_state_dict(state_dict) + +# init tf model +model_tf = Tacotron2(num_chars=num_chars, + num_speakers=num_speakers, + r=model.decoder.r, + postnet_output_dim=c.audio['num_mels'], + decoder_output_dim=c.audio['num_mels'], + attn_type=c.attention_type, + attn_win=c.windowing, + attn_norm=c.attention_norm, + prenet_type=c.prenet_type, + prenet_dropout=c.prenet_dropout, + forward_attn=c.use_forward_attn, + trans_agent=c.transition_agent, + forward_attn_mask=c.forward_attn_mask, + location_attn=c.location_attn, + attn_K=c.attention_heads, + separate_stopnet=c.separate_stopnet, + bidirectional_decoder=c.bidirectional_decoder) + +# set initial layer mapping - these are not captured by the below heuristic approach +# TODO: set layer names so that we can remove these manual matching +common_sufix = '/.ATTRIBUTES/VARIABLE_VALUE' +var_map = [ + ('embedding/embeddings:0', 'embedding.weight'), + ('encoder/lstm/forward_lstm/lstm_cell_1/kernel:0', + 'encoder.lstm.weight_ih_l0'), + ('encoder/lstm/forward_lstm/lstm_cell_1/recurrent_kernel:0', + 'encoder.lstm.weight_hh_l0'), + ('encoder/lstm/backward_lstm/lstm_cell_2/kernel:0', + 'encoder.lstm.weight_ih_l0_reverse'), + ('encoder/lstm/backward_lstm/lstm_cell_2/recurrent_kernel:0', + 'encoder.lstm.weight_hh_l0_reverse'), + ('encoder/lstm/forward_lstm/lstm_cell_1/bias:0', + ('encoder.lstm.bias_ih_l0', 'encoder.lstm.bias_hh_l0')), + ('encoder/lstm/backward_lstm/lstm_cell_2/bias:0', + ('encoder.lstm.bias_ih_l0_reverse', 'encoder.lstm.bias_hh_l0_reverse')), + ('attention/v/kernel:0', 'decoder.attention.v.linear_layer.weight'), + ('decoder/linear_projection/kernel:0', + 'decoder.linear_projection.linear_layer.weight'), + ('decoder/stopnet/kernel:0', 'decoder.stopnet.1.linear_layer.weight') +] + +# %% +# get tf_model graph +model_tf.build_inference() + +# get tf variables +tf_vars = model_tf.weights + +# match variable names with fuzzy logic +torch_var_names = list(state_dict.keys()) +tf_var_names = [we.name for we in model_tf.weights] +for tf_name in tf_var_names: + # skip re-mapped layer names + if tf_name in [name[0] for name in var_map]: + continue + tf_name_edited = convert_tf_name(tf_name) + ratios = [ + SequenceMatcher(None, torch_name, tf_name_edited).ratio() + for torch_name in torch_var_names + ] + max_idx = np.argmax(ratios) + matching_name = torch_var_names[max_idx] + del torch_var_names[max_idx] + var_map.append((tf_name, matching_name)) + +pprint(var_map) +pprint(torch_var_names) + +# pass weights +tf_vars = transfer_weights_torch_to_tf(tf_vars, dict(var_map), state_dict) + +# Compare TF and TORCH models +# %% +# check embedding outputs +model.eval() +input_ids = torch.randint(0, 24, (1, 128)).long() + +o_t = model.embedding(input_ids) +o_tf = model_tf.embedding(input_ids.detach().numpy()) +assert abs(o_t.detach().numpy() - + o_tf.numpy()).sum() < 1e-5, abs(o_t.detach().numpy() - + o_tf.numpy()).sum() + +# compare encoder outputs +oo_en = model.encoder.inference(o_t.transpose(1, 2)) +ooo_en = model_tf.encoder(o_t.detach().numpy(), training=False) +assert compare_torch_tf(oo_en, ooo_en) < 1e-5 + +#pylint: disable=redefined-builtin +# compare decoder.attention_rnn +inp = torch.rand([1, 768]) +inp_tf = inp.numpy() +model.decoder._init_states(oo_en, mask=None) #pylint: disable=protected-access +output, cell_state = model.decoder.attention_rnn(inp) +states = model_tf.decoder.build_decoder_initial_states(1, 512, 128) +output_tf, memory_state = model_tf.decoder.attention_rnn(inp_tf, + states[2], + training=False) +assert compare_torch_tf(output, output_tf).mean() < 1e-5 + +query = output +inputs = torch.rand([1, 128, 512]) +query_tf = query.detach().numpy() +inputs_tf = inputs.numpy() + +# compare decoder.attention +model.decoder.attention.init_states(inputs) +processes_inputs = model.decoder.attention.preprocess_inputs(inputs) +loc_attn, proc_query = model.decoder.attention.get_location_attention( + query, processes_inputs) +context = model.decoder.attention(query, inputs, processes_inputs, None) + +attention_states = model_tf.decoder.build_decoder_initial_states(1, 512, 128)[-1] +model_tf.decoder.attention.process_values(tf.convert_to_tensor(inputs_tf)) +loc_attn_tf, proc_query_tf = model_tf.decoder.attention.get_loc_attn(query_tf, attention_states) +context_tf, attention, attention_states = model_tf.decoder.attention(query_tf, attention_states, training=False) + +assert compare_torch_tf(loc_attn, loc_attn_tf).mean() < 1e-5 +assert compare_torch_tf(proc_query, proc_query_tf).mean() < 1e-5 +assert compare_torch_tf(context, context_tf) < 1e-5 + +# compare decoder.decoder_rnn +input = torch.rand([1, 1536]) +input_tf = input.numpy() +model.decoder._init_states(oo_en, mask=None) #pylint: disable=protected-access +output, cell_state = model.decoder.decoder_rnn( + input, [model.decoder.decoder_hidden, model.decoder.decoder_cell]) +states = model_tf.decoder.build_decoder_initial_states(1, 512, 128) +output_tf, memory_state = model_tf.decoder.decoder_rnn(input_tf, + states[3], + training=False) +assert abs(input - input_tf).mean() < 1e-5 +assert compare_torch_tf(output, output_tf).mean() < 1e-5 + +# compare decoder.linear_projection +input = torch.rand([1, 1536]) +input_tf = input.numpy() +output = model.decoder.linear_projection(input) +output_tf = model_tf.decoder.linear_projection(input_tf, training=False) +assert compare_torch_tf(output, output_tf) < 1e-5 + +# compare decoder outputs +model.decoder.max_decoder_steps = 100 +model_tf.decoder.set_max_decoder_steps(100) +output, align, stop = model.decoder.inference(oo_en) +states = model_tf.decoder.build_decoder_initial_states(1, 512, 128) +output_tf, align_tf, stop_tf = model_tf.decoder(ooo_en, states, training=False) +assert compare_torch_tf(output.transpose(1, 2), output_tf) < 1e-4 + +# compare the whole model output +outputs_torch = model.inference(input_ids) +outputs_tf = model_tf(tf.convert_to_tensor(input_ids.numpy())) +print(abs(outputs_torch[0].numpy()[:, 0] - outputs_tf[0].numpy()[:, 0]).mean()) +assert compare_torch_tf(outputs_torch[2][:, 50, :], + outputs_tf[2][:, 50, :]) < 1e-5 +assert compare_torch_tf(outputs_torch[0], outputs_tf[0]) < 1e-4 + +# %% +# save tf model +save_checkpoint(model_tf, None, checkpoint['step'], checkpoint['epoch'], + checkpoint['r'], args.output_path) +print(' > Model conversion is successfully completed :).') diff --git a/TTS/bin/distribute.py b/TTS/bin/distribute.py new file mode 100644 index 0000000000000000000000000000000000000000..390bd738def5375d8fed16f62f953d5cc7dc7f4d --- /dev/null +++ b/TTS/bin/distribute.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys +import pathlib +import time +import subprocess +import argparse +import torch + + +def main(): + """ + Call train.py as a new process and pass command arguments + """ + parser = argparse.ArgumentParser() + parser.add_argument( + '--script', + type=str, + help='Target training script to distibute.') + parser.add_argument( + '--continue_path', + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument( + '--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv + ) + args = parser.parse_args() + + num_gpus = torch.cuda.device_count() + group_id = time.strftime("%Y_%m_%d-%H%M%S") + + # set arguments for train.py + folder_path = pathlib.Path(__file__).parent.absolute() + command = [os.path.join(folder_path, args.script)] + command.append('--continue_path={}'.format(args.continue_path)) + command.append('--restore_path={}'.format(args.restore_path)) + command.append('--config_path={}'.format(args.config_path)) + command.append('--group_id=group_{}'.format(group_id)) + command.append('') + + # run processes + processes = [] + for i in range(num_gpus): + my_env = os.environ.copy() + my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i) + command[-1] = '--rank={}'.format(i) + stdout = None if i == 0 else open(os.devnull, 'w') + p = subprocess.Popen(['python3'] + command, stdout=stdout, env=my_env) + processes.append(p) + print(command) + + for p in processes: + p.wait() + + +if __name__ == '__main__': + main() diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py new file mode 100644 index 0000000000000000000000000000000000000000..b7ccf850e79cae9bcadb3396ce22bfebfbda40c4 --- /dev/null +++ b/TTS/bin/synthesize.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import os +import sys +import string +from argparse import RawTextHelpFormatter +# pylint: disable=redefined-outer-name, unused-argument +from pathlib import Path + +from TTS.utils.manage import ModelManager +from TTS.utils.synthesizer import Synthesizer + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + + +def main(): + # pylint: disable=bad-continuation + parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' + + '''You can either use your trained model or choose a model from the provided list.\n'''\ + + ''' + Example runs: + + # list provided models + ./TTS/bin/synthesize.py --list_models + + # run a model from the list + ./TTS/bin/synthesize.py --text "Text for TTS" --model_name "//" --vocoder_name "//" --output_path + + # run your own TTS model (Using Griffin-Lim Vocoder) + ./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav + + # run your own TTS and Vocoder models + ./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json + + ''', + formatter_class=RawTextHelpFormatter) + + parser.add_argument( + '--list_models', + type=str2bool, + nargs='?', + const=True, + default=False, + help='list available pre-trained tts and vocoder models.' + ) + parser.add_argument( + '--text', + type=str, + default=None, + help='Text to generate speech.' + ) + + # Args for running pre-trained TTS models. + parser.add_argument( + '--model_name', + type=str, + default=None, + help= + 'Name of one of the pre-trained tts models in format //' + ) + parser.add_argument( + '--vocoder_name', + type=str, + default=None, + help= + 'Name of one of the pre-trained vocoder models in format //' + ) + + # Args for running custom models + parser.add_argument( + '--config_path', + default=None, + type=str, + help='Path to model config file.' + ) + parser.add_argument( + '--model_path', + type=str, + default=None, + help='Path to model file.', + ) + parser.add_argument( + '--out_path', + type=str, + default=Path(__file__).resolve().parent, + help='Path to save final wav file. Wav file will be named as the given text.', + ) + parser.add_argument( + '--use_cuda', + type=bool, + help='Run model on CUDA.', + default=False + ) + parser.add_argument( + '--vocoder_path', + type=str, + help= + 'Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).', + default=None, + ) + parser.add_argument( + '--vocoder_config_path', + type=str, + help='Path to vocoder model config file.', + default=None) + + # args for multi-speaker synthesis + parser.add_argument( + '--speakers_json', + type=str, + help="JSON file for multi-speaker model.", + default=None) + parser.add_argument( + '--speaker_idx', + type=str, + help="if the tts model is trained with x-vectors, then speaker_idx is a file present in speakers.json else speaker_idx is the speaker id corresponding to a speaker in the speaker embedding layer.", + default=None) + parser.add_argument( + '--gst_style', + help="Wav path file for GST stylereference.", + default=None) + + # aux args + parser.add_argument( + '--save_spectogram', + type=bool, + help="If true save raw spectogram for further (vocoder) processing in out_path.", + default=False) + + args = parser.parse_args() + + # load model manager + path = Path(__file__).parent / "../.models.json" + manager = ModelManager(path) + + model_path = None + config_path = None + vocoder_path = None + vocoder_config_path = None + + # CASE1: list pre-trained TTS models + if args.list_models: + manager.list_models() + sys.exit() + + # CASE2: load pre-trained models + if args.model_name is not None: + model_path, config_path = manager.download_model(args.model_name) + + if args.vocoder_name is not None: + vocoder_path, vocoder_config_path = manager.download_model(args.vocoder_name) + + # CASE3: load custome models + if args.model_path is not None: + model_path = args.model_path + config_path = args.config_path + + if args.vocoder_path is not None: + vocoder_path = args.vocoder_path + vocoder_config_path = args.vocoder_config_path + + # RUN THE SYNTHESIS + # load models + synthesizer = Synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, args.use_cuda) + + use_griffin_lim = vocoder_path is None + print(" > Text: {}".format(args.text)) + + # # handle multi-speaker setting + # if not model_config.use_external_speaker_embedding_file and args.speaker_idx is not None: + # if args.speaker_idx.isdigit(): + # args.speaker_idx = int(args.speaker_idx) + # else: + # args.speaker_idx = None + # else: + # args.speaker_idx = None + + # if args.gst_style is None: + # if 'gst' in model_config.keys() and model_config.gst['gst_style_input'] is not None: + # gst_style = model_config.gst['gst_style_input'] + # else: + # gst_style = None + # else: + # # check if gst_style string is a dict, if is dict convert else use string + # try: + # gst_style = json.loads(args.gst_style) + # if max(map(int, gst_style.keys())) >= model_config.gst['gst_style_tokens']: + # raise RuntimeError("The highest value of the gst_style dictionary key must be less than the number of GST Tokens, \n Highest dictionary key value: {} \n Number of GST tokens: {}".format(max(map(int, gst_style.keys())), model_config.gst['gst_style_tokens'])) + # except ValueError: + # gst_style = args.gst_style + + # kick it + wav = synthesizer.tts(args.text) + + # save the results + file_name = args.text.replace(" ", "_")[0:20] + file_name = file_name.translate( + str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav' + out_path = os.path.join(args.out_path, file_name) + print(" > Saving output to {}".format(out_path)) + synthesizer.save_wav(wav, out_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..5201f548b05e2c78b051490c5b8858739568685b --- /dev/null +++ b/TTS/bin/train_encoder.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import os +import sys +import time +import traceback + +import torch +from torch.utils.data import DataLoader +from TTS.speaker_encoder.dataset import MyDataset +from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss +from TTS.speaker_encoder.model import SpeakerEncoder +from TTS.speaker_encoder.utils.generic_utils import \ + check_config_speaker_encoder, save_best_model +from TTS.speaker_encoder.utils.visual import plot_embeddings +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import (count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.radam import RAdam +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import NoamLR, check_update + +torch.backends.cudnn.enabled = True +torch.backends.cudnn.benchmark = True +torch.manual_seed(54321) +use_cuda = torch.cuda.is_available() +num_gpus = torch.cuda.device_count() +print(" > Using CUDA: ", use_cuda) +print(" > Number of GPUs: ", num_gpus) + + +def setup_loader(ap: AudioProcessor, is_val: bool=False, verbose: bool=False): + if is_val: + loader = None + else: + dataset = MyDataset(ap, + meta_data_eval if is_val else meta_data_train, + voice_len=1.6, + num_utter_per_speaker=c.num_utters_per_speaker, + num_speakers_in_batch=c.num_speakers_in_batch, + skip_speakers=False, + storage_size=c.storage["storage_size"], + sample_from_storage_p=c.storage["sample_from_storage_p"], + additive_noise=c.storage["additive_noise"], + verbose=verbose) + # sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader(dataset, + batch_size=c.num_speakers_in_batch, + shuffle=False, + num_workers=c.num_loader_workers, + collate_fn=dataset.collate_fn) + return loader + + +def train(model, criterion, optimizer, scheduler, ap, global_step): + data_loader = setup_loader(ap, is_val=False, verbose=True) + model.train() + epoch_time = 0 + best_loss = float('inf') + avg_loss = 0 + avg_loader_time = 0 + end_time = time.time() + for _, data in enumerate(data_loader): + start_time = time.time() + + # setup input data + inputs = data[0] + loader_time = time.time() - end_time + global_step += 1 + + # setup lr + if c.lr_decay: + scheduler.step() + optimizer.zero_grad() + + # dispatch data to GPU + if use_cuda: + inputs = inputs.cuda(non_blocking=True) + # labels = labels.cuda(non_blocking=True) + + # forward pass model + outputs = model(inputs) + + # loss computation + loss = criterion( + outputs.view(c.num_speakers_in_batch, + outputs.shape[0] // c.num_speakers_in_batch, -1)) + loss.backward() + grad_norm, _ = check_update(model, c.grad_clip) + optimizer.step() + + step_time = time.time() - start_time + epoch_time += step_time + + # Averaged Loss and Averaged Loader Time + avg_loss = 0.01 * loss.item() \ + + 0.99 * avg_loss if avg_loss != 0 else loss.item() + avg_loader_time = 1/c.num_loader_workers * loader_time + \ + (c.num_loader_workers-1) / c.num_loader_workers * avg_loader_time if avg_loader_time != 0 else loader_time + current_lr = optimizer.param_groups[0]['lr'] + + if global_step % c.steps_plot_stats == 0: + # Plot Training Epoch Stats + train_stats = { + "loss": avg_loss, + "lr": current_lr, + "grad_norm": grad_norm, + "step_time": step_time, + "avg_loader_time": avg_loader_time + } + tb_logger.tb_train_epoch_stats(global_step, train_stats) + figures = { + # FIXME: not constant + "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), + 10), + } + tb_logger.tb_train_figures(global_step, figures) + + if global_step % c.print_step == 0: + print( + " | > Step:{} Loss:{:.5f} AvgLoss:{:.5f} GradNorm:{:.5f} " + "StepTime:{:.2f} LoaderTime:{:.2f} AvGLoaderTime:{:.2f} LR:{:.6f}".format( + global_step, loss.item(), avg_loss, grad_norm, step_time, + loader_time, avg_loader_time, current_lr), + flush=True) + + # save best model + best_loss = save_best_model(model, optimizer, avg_loss, best_loss, + OUT_PATH, global_step) + + end_time = time.time() + return avg_loss, global_step + + +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global meta_data_train + global meta_data_eval + + ap = AudioProcessor(**c.audio) + model = SpeakerEncoder(input_dim=c.model['input_dim'], + proj_dim=c.model['proj_dim'], + lstm_dim=c.model['lstm_dim'], + num_lstm_layers=c.model['num_lstm_layers']) + optimizer = RAdam(model.parameters(), lr=c.lr) + + if c.loss == "ge2e": + criterion = GE2ELoss(loss_method='softmax') + elif c.loss == "angleproto": + criterion = AngleProtoLoss() + else: + raise Exception("The %s not is a loss supported" % c.loss) + + if args.restore_path: + checkpoint = torch.load(args.restore_path) + try: + # TODO: fix optimizer init, model.cuda() needs to be called before + # optimizer restore + # optimizer.load_state_dict(checkpoint['optimizer']) + if c.reinit_layers: + raise RuntimeError + model.load_state_dict(checkpoint['model']) + except KeyError: + print(" > Partial model initialization.") + model_dict = model.state_dict() + model_dict = set_init_dict(model_dict, checkpoint, c) + model.load_state_dict(model_dict) + del model_dict + for group in optimizer.param_groups: + group['lr'] = c.lr + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model = model.cuda() + criterion.cuda() + + if c.lr_decay: + scheduler = NoamLR(optimizer, + warmup_steps=c.warmup_steps, + last_epoch=args.restore_step - 1) + else: + scheduler = None + + num_params = count_parameters(model) + print("\n > Model has {} parameters".format(num_params), flush=True) + + # pylint: disable=redefined-outer-name + meta_data_train, meta_data_eval = load_meta_data(c.datasets) + + global_step = args.restore_step + _, global_step = train(model, criterion, optimizer, scheduler, ap, + global_step) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--restore_path', + type=str, + help='Path to model outputs (checkpoint, tensorboard etc.).', + default=0) + parser.add_argument( + '--config_path', + type=str, + required=True, + help='Path to config file for training.', + ) + parser.add_argument('--debug', + type=bool, + default=True, + help='Do not verify commit integrity to run training.') + parser.add_argument( + '--data_path', + type=str, + default='', + help='Defines the data path. It overwrites config.json.') + parser.add_argument('--output_path', + type=str, + help='path for training outputs.', + default='') + parser.add_argument('--output_folder', + type=str, + default='', + help='folder name for training outputs.') + args = parser.parse_args() + + # setup output paths and read configs + c = load_config(args.config_path) + check_config_speaker_encoder(c) + _ = os.path.dirname(os.path.realpath(__file__)) + if args.data_path != '': + c.data_path = args.data_path + + if args.output_path == '': + OUT_PATH = os.path.join(_, c.output_path) + else: + OUT_PATH = args.output_path + + if args.output_folder == '': + OUT_PATH = create_experiment_folder(OUT_PATH, c.run_name, args.debug) + else: + OUT_PATH = os.path.join(OUT_PATH, args.output_folder) + + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, OUT_PATH, + new_fields) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='Speaker_Encoder') + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_glow_tts.py b/TTS/bin/train_glow_tts.py new file mode 100644 index 0000000000000000000000000000000000000000..d03ab1eec4d6a60c8c505597f533cde3f37210ec --- /dev/null +++ b/TTS/bin/train_glow_tts.py @@ -0,0 +1,657 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import glob +import os +import sys +import time +import traceback +from random import randrange + +import torch +# DISTRIBUTED +from torch.nn.parallel import DistributedDataParallel as DDP_th +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.tts.datasets.TTSDataset import MyDataset +from TTS.tts.layers.losses import GlowTTSLoss +from TTS.tts.utils.generic_utils import check_config_tts, setup_model +from TTS.tts.utils.io import save_best_model, save_checkpoint +from TTS.tts.utils.measures import alignment_diagonal_score +from TTS.tts.utils.speakers import parse_speakers +from TTS.tts.utils.synthesis import synthesis +from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.audio import AudioProcessor +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.distribute import init_distributed, reduce_tensor +from TTS.utils.generic_utils import (KeepAverage, count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.radam import RAdam +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import NoamLR, setup_torch_training_env + +use_cuda, num_gpus = setup_torch_training_env(True, False) + +def setup_loader(ap, r, is_val=False, verbose=False): + if is_val and not c.run_eval: + loader = None + else: + dataset = MyDataset( + r, + c.text_cleaner, + compute_linear_spec=False, + meta_data=meta_data_eval if is_val else meta_data_train, + ap=ap, + tp=c.characters if 'characters' in c.keys() else None, + add_blank=c['add_blank'] if 'add_blank' in c.keys() else False, + batch_group_size=0 if is_val else c.batch_group_size * + c.batch_size, + min_seq_len=c.min_seq_len, + max_seq_len=c.max_seq_len, + phoneme_cache_path=c.phoneme_cache_path, + use_phonemes=c.use_phonemes, + phoneme_language=c.phoneme_language, + enable_eos_bos=c.enable_eos_bos_chars, + use_noise_augment=c['use_noise_augment'] and not is_val, + verbose=verbose, + speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None) + + if c.use_phonemes and c.compute_input_seq_cache: + # precompute phonemes to have a better estimate of sequence lengths. + dataset.compute_input_seq(c.num_loader_workers) + dataset.sort_items() + + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=c.eval_batch_size if is_val else c.batch_size, + shuffle=False, + collate_fn=dataset.collate_fn, + drop_last=False, + sampler=sampler, + num_workers=c.num_val_loader_workers + if is_val else c.num_loader_workers, + pin_memory=False) + return loader + + +def format_data(data): + # setup input data + text_input = data[0] + text_lengths = data[1] + speaker_names = data[2] + mel_input = data[4].permute(0, 2, 1) # B x D x T + mel_lengths = data[5] + item_idx = data[7] + attn_mask = data[9] + avg_text_length = torch.mean(text_lengths.float()) + avg_spec_length = torch.mean(mel_lengths.float()) + + if c.use_speaker_embedding: + if c.use_external_speaker_embedding_file: + # return precomputed embedding vector + speaker_c = data[8] + else: + # return speaker_id to be used by an embedding layer + speaker_c = [ + speaker_mapping[speaker_name] for speaker_name in speaker_names + ] + speaker_c = torch.LongTensor(speaker_c) + else: + speaker_c = None + + # dispatch data to GPU + if use_cuda: + text_input = text_input.cuda(non_blocking=True) + text_lengths = text_lengths.cuda(non_blocking=True) + mel_input = mel_input.cuda(non_blocking=True) + mel_lengths = mel_lengths.cuda(non_blocking=True) + if speaker_c is not None: + speaker_c = speaker_c.cuda(non_blocking=True) + if attn_mask is not None: + attn_mask = attn_mask.cuda(non_blocking=True) + return text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ + avg_text_length, avg_spec_length, attn_mask, item_idx + + +def data_depended_init(data_loader, model, ap): + """Data depended initialization for activation normalization.""" + if hasattr(model, 'module'): + for f in model.module.decoder.flows: + if getattr(f, "set_ddi", False): + f.set_ddi(True) + else: + for f in model.decoder.flows: + if getattr(f, "set_ddi", False): + f.set_ddi(True) + + model.train() + print(" > Data depended initialization ... ") + num_iter = 0 + with torch.no_grad(): + for _, data in enumerate(data_loader): + + # format data + text_input, text_lengths, mel_input, mel_lengths, spekaer_embed,\ + _, _, attn_mask, item_idx = format_data(data) + + # forward pass model + _ = model.forward( + text_input, text_lengths, mel_input, mel_lengths, attn_mask, g=spekaer_embed) + if num_iter == c.data_dep_init_iter: + break + num_iter += 1 + + if hasattr(model, 'module'): + for f in model.module.decoder.flows: + if getattr(f, "set_ddi", False): + f.set_ddi(False) + else: + for f in model.decoder.flows: + if getattr(f, "set_ddi", False): + f.set_ddi(False) + return model + + +def train(data_loader, model, criterion, optimizer, scheduler, + ap, global_step, epoch): + + model.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int( + len(data_loader.dataset) / (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ + avg_text_length, avg_spec_length, attn_mask, item_idx = format_data(data) + + loader_time = time.time() - end_time + + global_step += 1 + optimizer.zero_grad() + + # forward pass model + with torch.cuda.amp.autocast(enabled=c.mixed_precision): + z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward( + text_input, text_lengths, mel_input, mel_lengths, attn_mask, g=speaker_c) + + # compute loss + loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths, + o_dur_log, o_total_dur, text_lengths) + + # backward pass with loss scaling + if c.mixed_precision: + scaler.scale(loss_dict['loss']).backward() + scaler.unscale_(optimizer) + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.grad_clip) + scaler.step(optimizer) + scaler.update() + else: + loss_dict['loss'].backward() + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.grad_clip) + optimizer.step() + + # setup lr + if c.noam_schedule: + scheduler.step() + + # current_lr + current_lr = optimizer.param_groups[0]['lr'] + + # compute alignment error (the lower the better ) + align_error = 1 - alignment_diagonal_score(alignments, binary=True) + loss_dict['align_error'] = align_error + + step_time = time.time() - start_time + epoch_time += step_time + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['log_mle'] = reduce_tensor(loss_dict['log_mle'].data, num_gpus) + loss_dict['loss_dur'] = reduce_tensor(loss_dict['loss_dur'].data, num_gpus) + loss_dict['loss'] = reduce_tensor(loss_dict['loss'] .data, num_gpus) + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + update_train_values['avg_loader_time'] = loader_time + update_train_values['avg_step_time'] = step_time + keep_avg.update_values(update_train_values) + + # print training progress + if global_step % c.print_step == 0: + log_dict = { + "avg_spec_length": [avg_spec_length, 1], # value, precision + "avg_text_length": [avg_text_length, 1], + "step_time": [step_time, 4], + "loader_time": [loader_time, 2], + "current_lr": current_lr, + } + c_logger.print_train_step(batch_n_iter, num_iter, global_step, + log_dict, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Plot Training Iter Stats + # reduce TB load + if global_step % c.tb_plot_step == 0: + iter_stats = { + "lr": current_lr, + "grad_norm": grad_norm, + "step_time": step_time + } + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model, optimizer, global_step, epoch, 1, OUT_PATH, + model_loss=loss_dict['loss']) + + # wait all kernels to be completed + torch.cuda.synchronize() + + # Diagnostic visualizations + # direct pass on model for spec predictions + target_speaker = None if speaker_c is None else speaker_c[:1] + + if hasattr(model, 'module'): + spec_pred, *_ = model.module.inference(text_input[:1], text_lengths[:1], g=target_speaker) + else: + spec_pred, *_ = model.inference(text_input[:1], text_lengths[:1], g=target_speaker) + + spec_pred = spec_pred.permute(0, 2, 1) + gt_spec = mel_input.permute(0, 2, 1) + const_spec = spec_pred[0].data.cpu().numpy() + gt_spec = gt_spec[0].data.cpu().numpy() + align_img = alignments[0].data.cpu().numpy() + + figures = { + "prediction": plot_spectrogram(const_spec, ap), + "ground_truth": plot_spectrogram(gt_spec, ap), + "alignment": plot_alignment(align_img), + } + + tb_logger.tb_train_figures(global_step, figures) + + # Sample audio + train_audio = ap.inv_melspectrogram(const_spec.T) + tb_logger.tb_train_audios(global_step, + {'TrainAudio': train_audio}, + c.audio["sample_rate"]) + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Epoch Stats + if args.rank == 0: + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + if c.tb_model_param_stats: + tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(data_loader, model, criterion, ap, global_step, epoch): + model.eval() + epoch_time = 0 + keep_avg = KeepAverage() + c_logger.print_eval_start() + if data_loader is not None: + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ + _, _, attn_mask, item_idx = format_data(data) + + # forward pass model + z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward( + text_input, text_lengths, mel_input, mel_lengths, attn_mask, g=speaker_c) + + # compute loss + loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths, + o_dur_log, o_total_dur, text_lengths) + + # step time + step_time = time.time() - start_time + epoch_time += step_time + + # compute alignment score + align_error = 1 - alignment_diagonal_score(alignments) + loss_dict['align_error'] = align_error + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['log_mle'] = reduce_tensor(loss_dict['log_mle'].data, num_gpus) + loss_dict['loss_dur'] = reduce_tensor(loss_dict['loss_dur'].data, num_gpus) + loss_dict['loss'] = reduce_tensor(loss_dict['loss'] .data, num_gpus) + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + keep_avg.update_values(update_train_values) + + if c.print_eval: + c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Diagnostic visualizations + # direct pass on model for spec predictions + target_speaker = None if speaker_c is None else speaker_c[:1] + if hasattr(model, 'module'): + spec_pred, *_ = model.module.inference(text_input[:1], text_lengths[:1], g=target_speaker) + else: + spec_pred, *_ = model.inference(text_input[:1], text_lengths[:1], g=target_speaker) + spec_pred = spec_pred.permute(0, 2, 1) + gt_spec = mel_input.permute(0, 2, 1) + + const_spec = spec_pred[0].data.cpu().numpy() + gt_spec = gt_spec[0].data.cpu().numpy() + align_img = alignments[0].data.cpu().numpy() + + eval_figures = { + "prediction": plot_spectrogram(const_spec, ap), + "ground_truth": plot_spectrogram(gt_spec, ap), + "alignment": plot_alignment(align_img) + } + + # Sample audio + eval_audio = ap.inv_melspectrogram(const_spec.T) + tb_logger.tb_eval_audios(global_step, {"ValAudio": eval_audio}, + c.audio["sample_rate"]) + + # Plot Validation Stats + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + tb_logger.tb_eval_figures(global_step, eval_figures) + + if args.rank == 0 and epoch >= c.test_delay_epochs: + if c.test_sentences_file is None: + test_sentences = [ + "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "Be a voice, not an echo.", + "I'm sorry Dave. I'm afraid I can't do that.", + "This cake is great. It's so delicious and moist.", + "Prior to November 22, 1963." + ] + else: + with open(c.test_sentences_file, "r") as f: + test_sentences = [s.strip() for s in f.readlines()] + + # test sentences + test_audios = {} + test_figures = {} + print(" | > Synthesizing test sentences") + if c.use_speaker_embedding: + if c.use_external_speaker_embedding_file: + speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[randrange(len(speaker_mapping)-1)]]['embedding'] + speaker_id = None + else: + speaker_id = 0 + speaker_embedding = None + else: + speaker_id = None + speaker_embedding = None + + style_wav = c.get("style_wav_for_test") + for idx, test_sentence in enumerate(test_sentences): + try: + wav, alignment, _, postnet_output, _, _ = synthesis( + model, + test_sentence, + c, + use_cuda, + ap, + speaker_id=speaker_id, + speaker_embedding=speaker_embedding, + style_wav=style_wav, + truncated=False, + enable_eos_bos_chars=c.enable_eos_bos_chars, #pylint: disable=unused-argument + use_griffin_lim=True, + do_trim_silence=False) + + file_path = os.path.join(AUDIO_PATH, str(global_step)) + os.makedirs(file_path, exist_ok=True) + file_path = os.path.join(file_path, + "TestSentence_{}.wav".format(idx)) + ap.save_wav(wav, file_path) + test_audios['{}-audio'.format(idx)] = wav + test_figures['{}-prediction'.format(idx)] = plot_spectrogram( + postnet_output, ap) + test_figures['{}-alignment'.format(idx)] = plot_alignment( + alignment) + except: #pylint: disable=bare-except + print(" !! Error creating Test Sentence -", idx) + traceback.print_exc() + tb_logger.tb_test_audios(global_step, test_audios, + c.audio['sample_rate']) + tb_logger.tb_test_figures(global_step, test_figures) + return keep_avg.avg_values + + +# FIXME: move args definition/parsing inside of main? +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global meta_data_train, meta_data_eval, symbols, phonemes, speaker_mapping + # Audio processor + ap = AudioProcessor(**c.audio) + if 'characters' in c.keys(): + symbols, phonemes = make_symbols(**c.characters) + + # DISTRUBUTED + if num_gpus > 1: + init_distributed(args.rank, num_gpus, args.group_id, + c.distributed["backend"], c.distributed["url"]) + num_chars = len(phonemes) if c.use_phonemes else len(symbols) + + # load data instances + meta_data_train, meta_data_eval = load_meta_data(c.datasets) + + # set the portion of the data used for training + if 'train_portion' in c.keys(): + meta_data_train = meta_data_train[:int(len(meta_data_train) * c.train_portion)] + if 'eval_portion' in c.keys(): + meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)] + + # parse speakers + num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(c, args, meta_data_train, OUT_PATH) + + # setup model + model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim=speaker_embedding_dim) + optimizer = RAdam(model.parameters(), lr=c.lr, weight_decay=0, betas=(0.9, 0.98), eps=1e-9) + criterion = GlowTTSLoss() + + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location='cpu') + try: + # TODO: fix optimizer init, model.cuda() needs to be called before + # optimizer restore + optimizer.load_state_dict(checkpoint['optimizer']) + if c.reinit_layers: + raise RuntimeError + model.load_state_dict(checkpoint['model']) + except: #pylint: disable=bare-except + print(" > Partial model initialization.") + model_dict = model.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model'], c) + model.load_state_dict(model_dict) + del model_dict + + for group in optimizer.param_groups: + group['initial_lr'] = c.lr + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model.cuda() + criterion.cuda() + + # DISTRUBUTED + if num_gpus > 1: + model = DDP_th(model, device_ids=[args.rank]) + + if c.noam_schedule: + scheduler = NoamLR(optimizer, + warmup_steps=c.warmup_steps, + last_epoch=args.restore_step - 1) + else: + scheduler = None + + num_params = count_parameters(model) + print("\n > Model has {} parameters".format(num_params), flush=True) + + if 'best_loss' not in locals(): + best_loss = float('inf') + + # define dataloaders + train_loader = setup_loader(ap, 1, is_val=False, verbose=True) + eval_loader = setup_loader(ap, 1, is_val=True, verbose=True) + + global_step = args.restore_step + model = data_depended_init(train_loader, model, ap) + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, + scheduler, ap, global_step, + epoch) + eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = train_avg_loss_dict['avg_loss'] + if c.run_eval: + target_loss = eval_avg_loss_dict['avg_loss'] + best_loss = save_best_model(target_loss, best_loss, model, optimizer, global_step, epoch, c.r, + OUT_PATH) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--continue_path', + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument( + '--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv + ) + parser.add_argument('--debug', + type=bool, + default=False, + help='Do not verify commit integrity to run training.') + + # DISTRUBUTED + parser.add_argument( + '--rank', + type=int, + default=0, + help='DISTRIBUTED: process rank for distributed training.') + parser.add_argument('--group_id', + type=str, + default="", + help='DISTRIBUTED: process group id.') + args = parser.parse_args() + + if args.continue_path != '': + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, 'config.json') + list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + print(f" > Training continues for {args.restore_path}") + + # setup output paths and read configs + c = load_config(args.config_path) + # check_config(c) + check_config_tts(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + if c.mixed_precision: + print(" > Mixed precision enabled.") + + OUT_PATH = args.continue_path + if args.continue_path == '': + OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug) + + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, + OUT_PATH, new_fields) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS') + + # write model desc to tensorboard + tb_logger.tb_add_text('model-description', c['run_description'], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_speedy_speech.py b/TTS/bin/train_speedy_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..a24cf8bcd4927db206c2919b7afadcd91f170821 --- /dev/null +++ b/TTS/bin/train_speedy_speech.py @@ -0,0 +1,618 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import glob +import os +import sys +import time +import traceback +import numpy as np +from random import randrange + +import torch +# DISTRIBUTED +from torch.nn.parallel import DistributedDataParallel as DDP_th +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.tts.datasets.TTSDataset import MyDataset +from TTS.tts.layers.losses import SpeedySpeechLoss +from TTS.tts.utils.generic_utils import check_config_tts, setup_model +from TTS.tts.utils.io import save_best_model, save_checkpoint +from TTS.tts.utils.measures import alignment_diagonal_score +from TTS.tts.utils.speakers import parse_speakers +from TTS.tts.utils.synthesis import synthesis +from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.audio import AudioProcessor +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.distribute import init_distributed, reduce_tensor +from TTS.utils.generic_utils import (KeepAverage, count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.radam import RAdam +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import NoamLR, setup_torch_training_env + +use_cuda, num_gpus = setup_torch_training_env(True, False) + + +def setup_loader(ap, r, is_val=False, verbose=False): + if is_val and not c.run_eval: + loader = None + else: + dataset = MyDataset( + r, + c.text_cleaner, + compute_linear_spec=False, + meta_data=meta_data_eval if is_val else meta_data_train, + ap=ap, + tp=c.characters if 'characters' in c.keys() else None, + add_blank=c['add_blank'] if 'add_blank' in c.keys() else False, + batch_group_size=0 if is_val else c.batch_group_size * + c.batch_size, + min_seq_len=c.min_seq_len, + max_seq_len=c.max_seq_len, + phoneme_cache_path=c.phoneme_cache_path, + use_phonemes=c.use_phonemes, + phoneme_language=c.phoneme_language, + enable_eos_bos=c.enable_eos_bos_chars, + use_noise_augment=not is_val, + verbose=verbose, + speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None) + + if c.use_phonemes and c.compute_input_seq_cache: + # precompute phonemes to have a better estimate of sequence lengths. + dataset.compute_input_seq(c.num_loader_workers) + dataset.sort_items() + + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=c.eval_batch_size if is_val else c.batch_size, + shuffle=False, + collate_fn=dataset.collate_fn, + drop_last=False, + sampler=sampler, + num_workers=c.num_val_loader_workers + if is_val else c.num_loader_workers, + pin_memory=False) + return loader + + +def format_data(data): + # setup input data + text_input = data[0] + text_lengths = data[1] + speaker_names = data[2] + mel_input = data[4].permute(0, 2, 1) # B x D x T + mel_lengths = data[5] + item_idx = data[7] + attn_mask = data[9] + avg_text_length = torch.mean(text_lengths.float()) + avg_spec_length = torch.mean(mel_lengths.float()) + + if c.use_speaker_embedding: + if c.use_external_speaker_embedding_file: + # return precomputed embedding vector + speaker_c = data[8] + else: + # return speaker_id to be used by an embedding layer + speaker_c = [ + speaker_mapping[speaker_name] for speaker_name in speaker_names + ] + speaker_c = torch.LongTensor(speaker_c) + else: + speaker_c = None + # compute durations from attention mask + durations = torch.zeros(attn_mask.shape[0], attn_mask.shape[2]) + for idx, am in enumerate(attn_mask): + # compute raw durations + c_idxs = am[:, :text_lengths[idx], :mel_lengths[idx]].max(1)[1] + # c_idxs, counts = torch.unique_consecutive(c_idxs, return_counts=True) + c_idxs, counts = torch.unique(c_idxs, return_counts=True) + dur = torch.ones([text_lengths[idx]]).to(counts.dtype) + dur[c_idxs] = counts + # smooth the durations and set any 0 duration to 1 + # by cutting off from the largest duration indeces. + extra_frames = dur.sum() - mel_lengths[idx] + largest_idxs = torch.argsort(-dur)[:extra_frames] + dur[largest_idxs] -= 1 + assert dur.sum() == mel_lengths[idx], f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}" + durations[idx, :text_lengths[idx]] = dur + # dispatch data to GPU + if use_cuda: + text_input = text_input.cuda(non_blocking=True) + text_lengths = text_lengths.cuda(non_blocking=True) + mel_input = mel_input.cuda(non_blocking=True) + mel_lengths = mel_lengths.cuda(non_blocking=True) + if speaker_c is not None: + speaker_c = speaker_c.cuda(non_blocking=True) + attn_mask = attn_mask.cuda(non_blocking=True) + durations = durations.cuda(non_blocking=True) + return text_input, text_lengths, mel_input, mel_lengths, speaker_c,\ + avg_text_length, avg_spec_length, attn_mask, durations, item_idx + + +def train(data_loader, model, criterion, optimizer, scheduler, + ap, global_step, epoch): + + model.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int( + len(data_loader.dataset) / (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_targets, mel_lengths, speaker_c,\ + avg_text_length, avg_spec_length, _, dur_target, _ = format_data(data) + + loader_time = time.time() - end_time + + global_step += 1 + optimizer.zero_grad() + + # forward pass model + with torch.cuda.amp.autocast(enabled=c.mixed_precision): + decoder_output, dur_output, alignments = model.forward( + text_input, text_lengths, mel_lengths, dur_target, g=speaker_c) + + # compute loss + loss_dict = criterion(decoder_output, mel_targets, mel_lengths, dur_output, torch.log(1 + dur_target), text_lengths) + + # backward pass with loss scaling + if c.mixed_precision: + scaler.scale(loss_dict['loss']).backward() + scaler.unscale_(optimizer) + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.grad_clip) + scaler.step(optimizer) + scaler.update() + else: + loss_dict['loss'].backward() + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.grad_clip) + optimizer.step() + + # setup lr + if c.noam_schedule: + scheduler.step() + + # current_lr + current_lr = optimizer.param_groups[0]['lr'] + + # compute alignment error (the lower the better ) + align_error = 1 - alignment_diagonal_score(alignments, binary=True) + loss_dict['align_error'] = align_error + + step_time = time.time() - start_time + epoch_time += step_time + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['loss_l1'] = reduce_tensor(loss_dict['loss_l1'].data, num_gpus) + loss_dict['loss_ssim'] = reduce_tensor(loss_dict['loss_ssim'].data, num_gpus) + loss_dict['loss_dur'] = reduce_tensor(loss_dict['loss_dur'].data, num_gpus) + loss_dict['loss'] = reduce_tensor(loss_dict['loss'] .data, num_gpus) + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + update_train_values['avg_loader_time'] = loader_time + update_train_values['avg_step_time'] = step_time + keep_avg.update_values(update_train_values) + + # print training progress + if global_step % c.print_step == 0: + log_dict = { + + "avg_spec_length": [avg_spec_length, 1], # value, precision + "avg_text_length": [avg_text_length, 1], + "step_time": [step_time, 4], + "loader_time": [loader_time, 2], + "current_lr": current_lr, + } + c_logger.print_train_step(batch_n_iter, num_iter, global_step, + log_dict, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Plot Training Iter Stats + # reduce TB load + if global_step % c.tb_plot_step == 0: + iter_stats = { + "lr": current_lr, + "grad_norm": grad_norm, + "step_time": step_time + } + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model, optimizer, global_step, epoch, 1, OUT_PATH, + model_loss=loss_dict['loss']) + + # wait all kernels to be completed + torch.cuda.synchronize() + + # Diagnostic visualizations + idx = np.random.randint(mel_targets.shape[0]) + pred_spec = decoder_output[idx].detach().data.cpu().numpy().T + gt_spec = mel_targets[idx].data.cpu().numpy().T + align_img = alignments[idx].data.cpu() + + figures = { + "prediction": plot_spectrogram(pred_spec, ap), + "ground_truth": plot_spectrogram(gt_spec, ap), + "alignment": plot_alignment(align_img), + } + + tb_logger.tb_train_figures(global_step, figures) + + # Sample audio + train_audio = ap.inv_melspectrogram(pred_spec.T) + tb_logger.tb_train_audios(global_step, + {'TrainAudio': train_audio}, + c.audio["sample_rate"]) + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Epoch Stats + if args.rank == 0: + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + if c.tb_model_param_stats: + tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(data_loader, model, criterion, ap, global_step, epoch): + model.eval() + epoch_time = 0 + keep_avg = KeepAverage() + c_logger.print_eval_start() + if data_loader is not None: + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_targets, mel_lengths, speaker_c,\ + _, _, _, dur_target, _ = format_data(data) + + # forward pass model + with torch.cuda.amp.autocast(enabled=c.mixed_precision): + decoder_output, dur_output, alignments = model.forward( + text_input, text_lengths, mel_lengths, dur_target, g=speaker_c) + + # compute loss + loss_dict = criterion(decoder_output, mel_targets, mel_lengths, dur_output, torch.log(1 + dur_target), text_lengths) + + # step time + step_time = time.time() - start_time + epoch_time += step_time + + # compute alignment score + align_error = 1 - alignment_diagonal_score(alignments, binary=True) + loss_dict['align_error'] = align_error + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['loss_l1'] = reduce_tensor(loss_dict['loss_l1'].data, num_gpus) + loss_dict['loss_ssim'] = reduce_tensor(loss_dict['loss_ssim'].data, num_gpus) + loss_dict['loss_dur'] = reduce_tensor(loss_dict['loss_dur'].data, num_gpus) + loss_dict['loss'] = reduce_tensor(loss_dict['loss'] .data, num_gpus) + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + keep_avg.update_values(update_train_values) + + if c.print_eval: + c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Diagnostic visualizations + idx = np.random.randint(mel_targets.shape[0]) + pred_spec = decoder_output[idx].detach().data.cpu().numpy().T + gt_spec = mel_targets[idx].data.cpu().numpy().T + align_img = alignments[idx].data.cpu() + + eval_figures = { + "prediction": plot_spectrogram(pred_spec, ap, output_fig=False), + "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False), + "alignment": plot_alignment(align_img, output_fig=False) + } + + # Sample audio + eval_audio = ap.inv_melspectrogram(pred_spec.T) + tb_logger.tb_eval_audios(global_step, {"ValAudio": eval_audio}, + c.audio["sample_rate"]) + + # Plot Validation Stats + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + tb_logger.tb_eval_figures(global_step, eval_figures) + + if args.rank == 0 and epoch >= c.test_delay_epochs: + if c.test_sentences_file is None: + test_sentences = [ + "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "Be a voice, not an echo.", + "I'm sorry Dave. I'm afraid I can't do that.", + "This cake is great. It's so delicious and moist.", + "Prior to November 22, 1963." + ] + else: + with open(c.test_sentences_file, "r") as f: + test_sentences = [s.strip() for s in f.readlines()] + + # test sentences + test_audios = {} + test_figures = {} + print(" | > Synthesizing test sentences") + if c.use_speaker_embedding: + if c.use_external_speaker_embedding_file: + speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[randrange(len(speaker_mapping)-1)]]['embedding'] + speaker_id = None + else: + speaker_id = 0 + speaker_embedding = None + else: + speaker_id = None + speaker_embedding = None + + style_wav = c.get("style_wav_for_test") + for idx, test_sentence in enumerate(test_sentences): + try: + wav, alignment, _, postnet_output, _, _ = synthesis( + model, + test_sentence, + c, + use_cuda, + ap, + speaker_id=speaker_id, + speaker_embedding=speaker_embedding, + style_wav=style_wav, + truncated=False, + enable_eos_bos_chars=c.enable_eos_bos_chars, #pylint: disable=unused-argument + use_griffin_lim=True, + do_trim_silence=False) + + file_path = os.path.join(AUDIO_PATH, str(global_step)) + os.makedirs(file_path, exist_ok=True) + file_path = os.path.join(file_path, + "TestSentence_{}.wav".format(idx)) + ap.save_wav(wav, file_path) + test_audios['{}-audio'.format(idx)] = wav + test_figures['{}-prediction'.format(idx)] = plot_spectrogram( + postnet_output, ap) + test_figures['{}-alignment'.format(idx)] = plot_alignment( + alignment) + except: #pylint: disable=bare-except + print(" !! Error creating Test Sentence -", idx) + traceback.print_exc() + tb_logger.tb_test_audios(global_step, test_audios, + c.audio['sample_rate']) + tb_logger.tb_test_figures(global_step, test_figures) + return keep_avg.avg_values + + +# FIXME: move args definition/parsing inside of main? +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global meta_data_train, meta_data_eval, symbols, phonemes, speaker_mapping + # Audio processor + ap = AudioProcessor(**c.audio) + if 'characters' in c.keys(): + symbols, phonemes = make_symbols(**c.characters) + + # DISTRUBUTED + if num_gpus > 1: + init_distributed(args.rank, num_gpus, args.group_id, + c.distributed["backend"], c.distributed["url"]) + num_chars = len(phonemes) if c.use_phonemes else len(symbols) + + # load data instances + meta_data_train, meta_data_eval = load_meta_data(c.datasets, eval_split=True) + + # set the portion of the data used for training if set in config.json + if 'train_portion' in c.keys(): + meta_data_train = meta_data_train[:int(len(meta_data_train) * c.train_portion)] + if 'eval_portion' in c.keys(): + meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)] + + # parse speakers + num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(c, args, meta_data_train, OUT_PATH) + + # setup model + model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim=speaker_embedding_dim) + optimizer = RAdam(model.parameters(), lr=c.lr, weight_decay=0, betas=(0.9, 0.98), eps=1e-9) + criterion = SpeedySpeechLoss(c) + + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location='cpu') + try: + # TODO: fix optimizer init, model.cuda() needs to be called before + # optimizer restore + optimizer.load_state_dict(checkpoint['optimizer']) + if c.reinit_layers: + raise RuntimeError + model.load_state_dict(checkpoint['model']) + except: #pylint: disable=bare-except + print(" > Partial model initialization.") + model_dict = model.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model'], c) + model.load_state_dict(model_dict) + del model_dict + + for group in optimizer.param_groups: + group['initial_lr'] = c.lr + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model.cuda() + criterion.cuda() + + # DISTRUBUTED + if num_gpus > 1: + model = DDP_th(model, device_ids=[args.rank]) + + if c.noam_schedule: + scheduler = NoamLR(optimizer, + warmup_steps=c.warmup_steps, + last_epoch=args.restore_step - 1) + else: + scheduler = None + + num_params = count_parameters(model) + print("\n > Model has {} parameters".format(num_params), flush=True) + + if 'best_loss' not in locals(): + best_loss = float('inf') + + # define dataloaders + train_loader = setup_loader(ap, 1, is_val=False, verbose=True) + eval_loader = setup_loader(ap, 1, is_val=True, verbose=True) + + global_step = args.restore_step + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer, + scheduler, ap, global_step, + epoch) + eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = train_avg_loss_dict['avg_loss'] + if c.run_eval: + target_loss = eval_avg_loss_dict['avg_loss'] + best_loss = save_best_model(target_loss, best_loss, model, optimizer, global_step, epoch, c.r, + OUT_PATH) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--continue_path', + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument( + '--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv + ) + parser.add_argument('--debug', + type=bool, + default=False, + help='Do not verify commit integrity to run training.') + + # DISTRUBUTED + parser.add_argument( + '--rank', + type=int, + default=0, + help='DISTRIBUTED: process rank for distributed training.') + parser.add_argument('--group_id', + type=str, + default="", + help='DISTRIBUTED: process group id.') + args = parser.parse_args() + + if args.continue_path != '': + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, 'config.json') + list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + print(f" > Training continues for {args.restore_path}") + + # setup output paths and read configs + c = load_config(args.config_path) + # check_config(c) + check_config_tts(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + if c.mixed_precision: + print(" > Mixed precision enabled.") + + OUT_PATH = args.continue_path + if args.continue_path == '': + OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug) + + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, OUT_PATH, new_fields) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS') + + # write model desc to tensorboard + tb_logger.tb_add_text('model-description', c['run_description'], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_tacotron.py b/TTS/bin/train_tacotron.py new file mode 100644 index 0000000000000000000000000000000000000000..ccb35a7c406ca2b2603e5d16f73cedca933c58b8 --- /dev/null +++ b/TTS/bin/train_tacotron.py @@ -0,0 +1,731 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import glob +import os +import sys +import time +import traceback +from random import randrange + +import numpy as np +import torch +from torch.utils.data import DataLoader +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.tts.datasets.TTSDataset import MyDataset +from TTS.tts.layers.losses import TacotronLoss +from TTS.tts.utils.generic_utils import check_config_tts, setup_model +from TTS.tts.utils.io import save_best_model, save_checkpoint +from TTS.tts.utils.measures import alignment_diagonal_score +from TTS.tts.utils.speakers import parse_speakers +from TTS.tts.utils.synthesis import synthesis +from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.audio import AudioProcessor +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce, + init_distributed, reduce_tensor) +from TTS.utils.generic_utils import (KeepAverage, count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.radam import RAdam +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import (NoamLR, adam_weight_decay, check_update, + gradual_training_scheduler, set_weight_decay, + setup_torch_training_env) + +use_cuda, num_gpus = setup_torch_training_env(True, False) + + +def setup_loader(ap, r, is_val=False, verbose=False, dataset=None): + if is_val and not c.run_eval: + loader = None + else: + if dataset is None: + dataset = MyDataset( + r, + c.text_cleaner, + compute_linear_spec=c.model.lower() == 'tacotron', + meta_data=meta_data_eval if is_val else meta_data_train, + ap=ap, + tp=c.characters if 'characters' in c.keys() else None, + add_blank=c['add_blank'] if 'add_blank' in c.keys() else False, + batch_group_size=0 if is_val else c.batch_group_size * + c.batch_size, + min_seq_len=c.min_seq_len, + max_seq_len=c.max_seq_len, + phoneme_cache_path=c.phoneme_cache_path, + use_phonemes=c.use_phonemes, + phoneme_language=c.phoneme_language, + enable_eos_bos=c.enable_eos_bos_chars, + verbose=verbose, + speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None) + + if c.use_phonemes and c.compute_input_seq_cache: + # precompute phonemes to have a better estimate of sequence lengths. + dataset.compute_input_seq(c.num_loader_workers) + dataset.sort_items() + + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=c.eval_batch_size if is_val else c.batch_size, + shuffle=False, + collate_fn=dataset.collate_fn, + drop_last=False, + sampler=sampler, + num_workers=c.num_val_loader_workers + if is_val else c.num_loader_workers, + pin_memory=False) + return loader + +def format_data(data): + # setup input data + text_input = data[0] + text_lengths = data[1] + speaker_names = data[2] + linear_input = data[3] if c.model in ["Tacotron"] else None + mel_input = data[4] + mel_lengths = data[5] + stop_targets = data[6] + max_text_length = torch.max(text_lengths.float()) + max_spec_length = torch.max(mel_lengths.float()) + + if c.use_speaker_embedding: + if c.use_external_speaker_embedding_file: + speaker_embeddings = data[8] + speaker_ids = None + else: + speaker_ids = [ + speaker_mapping[speaker_name] for speaker_name in speaker_names + ] + speaker_ids = torch.LongTensor(speaker_ids) + speaker_embeddings = None + else: + speaker_embeddings = None + speaker_ids = None + + + # set stop targets view, we predict a single stop token per iteration. + stop_targets = stop_targets.view(text_input.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze(2) + + # dispatch data to GPU + if use_cuda: + text_input = text_input.cuda(non_blocking=True) + text_lengths = text_lengths.cuda(non_blocking=True) + mel_input = mel_input.cuda(non_blocking=True) + mel_lengths = mel_lengths.cuda(non_blocking=True) + linear_input = linear_input.cuda(non_blocking=True) if c.model in ["Tacotron"] else None + stop_targets = stop_targets.cuda(non_blocking=True) + if speaker_ids is not None: + speaker_ids = speaker_ids.cuda(non_blocking=True) + if speaker_embeddings is not None: + speaker_embeddings = speaker_embeddings.cuda(non_blocking=True) + + return text_input, text_lengths, mel_input, mel_lengths, linear_input, stop_targets, speaker_ids, speaker_embeddings, max_text_length, max_spec_length + + +def train(data_loader, model, criterion, optimizer, optimizer_st, scheduler, + ap, global_step, epoch, scaler, scaler_st): + model.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int( + len(data_loader.dataset) / (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_input, mel_lengths, linear_input, stop_targets, speaker_ids, speaker_embeddings, max_text_length, max_spec_length = format_data(data) + loader_time = time.time() - end_time + + global_step += 1 + + # setup lr + if c.noam_schedule: + scheduler.step() + + optimizer.zero_grad() + if optimizer_st: + optimizer_st.zero_grad() + + with torch.cuda.amp.autocast(enabled=c.mixed_precision): + # forward pass model + if c.bidirectional_decoder or c.double_decoder_consistency: + decoder_output, postnet_output, alignments, stop_tokens, decoder_backward_output, alignments_backward = model( + text_input, text_lengths, mel_input, mel_lengths, speaker_ids=speaker_ids, speaker_embeddings=speaker_embeddings) + else: + decoder_output, postnet_output, alignments, stop_tokens = model( + text_input, text_lengths, mel_input, mel_lengths, speaker_ids=speaker_ids, speaker_embeddings=speaker_embeddings) + decoder_backward_output = None + alignments_backward = None + + # set the [alignment] lengths wrt reduction factor for guided attention + if mel_lengths.max() % model.decoder.r != 0: + alignment_lengths = (mel_lengths + (model.decoder.r - (mel_lengths.max() % model.decoder.r))) // model.decoder.r + else: + alignment_lengths = mel_lengths // model.decoder.r + + # compute loss + loss_dict = criterion(postnet_output, decoder_output, mel_input, + linear_input, stop_tokens, stop_targets, + mel_lengths, decoder_backward_output, + alignments, alignment_lengths, alignments_backward, + text_lengths) + + # check nan loss + if torch.isnan(loss_dict['loss']).any(): + raise RuntimeError(f'Detected NaN loss at step {global_step}.') + + # optimizer step + if c.mixed_precision: + # model optimizer step in mixed precision mode + scaler.scale(loss_dict['loss']).backward() + scaler.unscale_(optimizer) + optimizer, current_lr = adam_weight_decay(optimizer) + grad_norm, _ = check_update(model, c.grad_clip, ignore_stopnet=True) + scaler.step(optimizer) + scaler.update() + + # stopnet optimizer step + if c.separate_stopnet: + scaler_st.scale( loss_dict['stopnet_loss']).backward() + scaler.unscale_(optimizer_st) + optimizer_st, _ = adam_weight_decay(optimizer_st) + grad_norm_st, _ = check_update(model.decoder.stopnet, 1.0) + scaler_st.step(optimizer) + scaler_st.update() + else: + grad_norm_st = 0 + else: + # main model optimizer step + loss_dict['loss'].backward() + optimizer, current_lr = adam_weight_decay(optimizer) + grad_norm, _ = check_update(model, c.grad_clip, ignore_stopnet=True) + optimizer.step() + + # stopnet optimizer step + if c.separate_stopnet: + loss_dict['stopnet_loss'].backward() + optimizer_st, _ = adam_weight_decay(optimizer_st) + grad_norm_st, _ = check_update(model.decoder.stopnet, 1.0) + optimizer_st.step() + else: + grad_norm_st = 0 + + # compute alignment error (the lower the better ) + align_error = 1 - alignment_diagonal_score(alignments) + loss_dict['align_error'] = align_error + + step_time = time.time() - start_time + epoch_time += step_time + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['postnet_loss'] = reduce_tensor(loss_dict['postnet_loss'].data, num_gpus) + loss_dict['decoder_loss'] = reduce_tensor(loss_dict['decoder_loss'].data, num_gpus) + loss_dict['loss'] = reduce_tensor(loss_dict['loss'] .data, num_gpus) + loss_dict['stopnet_loss'] = reduce_tensor(loss_dict['stopnet_loss'].data, num_gpus) if c.stopnet else loss_dict['stopnet_loss'] + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + update_train_values['avg_loader_time'] = loader_time + update_train_values['avg_step_time'] = step_time + keep_avg.update_values(update_train_values) + + # print training progress + if global_step % c.print_step == 0: + log_dict = { + "max_spec_length": [max_spec_length, 1], # value, precision + "max_text_length": [max_text_length, 1], + "step_time": [step_time, 4], + "loader_time": [loader_time, 2], + "current_lr": current_lr, + } + c_logger.print_train_step(batch_n_iter, num_iter, global_step, + log_dict, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Plot Training Iter Stats + # reduce TB load + if global_step % c.tb_plot_step == 0: + iter_stats = { + "lr": current_lr, + "grad_norm": grad_norm, + "grad_norm_st": grad_norm_st, + "step_time": step_time + } + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model, optimizer, global_step, epoch, model.decoder.r, OUT_PATH, + optimizer_st=optimizer_st, + model_loss=loss_dict['postnet_loss'], + scaler=scaler.state_dict() if c.mixed_precision else None) + + # Diagnostic visualizations + const_spec = postnet_output[0].data.cpu().numpy() + gt_spec = linear_input[0].data.cpu().numpy() if c.model in [ + "Tacotron", "TacotronGST" + ] else mel_input[0].data.cpu().numpy() + align_img = alignments[0].data.cpu().numpy() + + figures = { + "prediction": plot_spectrogram(const_spec, ap, output_fig=False), + "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False), + "alignment": plot_alignment(align_img, output_fig=False), + } + + if c.bidirectional_decoder or c.double_decoder_consistency: + figures["alignment_backward"] = plot_alignment(alignments_backward[0].data.cpu().numpy(), output_fig=False) + + tb_logger.tb_train_figures(global_step, figures) + + # Sample audio + if c.model in ["Tacotron", "TacotronGST"]: + train_audio = ap.inv_spectrogram(const_spec.T) + else: + train_audio = ap.inv_melspectrogram(const_spec.T) + tb_logger.tb_train_audios(global_step, + {'TrainAudio': train_audio}, + c.audio["sample_rate"]) + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Epoch Stats + if args.rank == 0: + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + if c.tb_model_param_stats: + tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(data_loader, model, criterion, ap, global_step, epoch): + model.eval() + epoch_time = 0 + keep_avg = KeepAverage() + c_logger.print_eval_start() + if data_loader is not None: + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + text_input, text_lengths, mel_input, mel_lengths, linear_input, stop_targets, speaker_ids, speaker_embeddings, _, _ = format_data(data) + assert mel_input.shape[1] % model.decoder.r == 0 + + # forward pass model + if c.bidirectional_decoder or c.double_decoder_consistency: + decoder_output, postnet_output, alignments, stop_tokens, decoder_backward_output, alignments_backward = model( + text_input, text_lengths, mel_input, speaker_ids=speaker_ids, speaker_embeddings=speaker_embeddings) + else: + decoder_output, postnet_output, alignments, stop_tokens = model( + text_input, text_lengths, mel_input, speaker_ids=speaker_ids, speaker_embeddings=speaker_embeddings) + decoder_backward_output = None + alignments_backward = None + + # set the alignment lengths wrt reduction factor for guided attention + if mel_lengths.max() % model.decoder.r != 0: + alignment_lengths = (mel_lengths + (model.decoder.r - (mel_lengths.max() % model.decoder.r))) // model.decoder.r + else: + alignment_lengths = mel_lengths // model.decoder.r + + # compute loss + loss_dict = criterion(postnet_output, decoder_output, mel_input, + linear_input, stop_tokens, stop_targets, + mel_lengths, decoder_backward_output, + alignments, alignment_lengths, alignments_backward, + text_lengths) + + # step time + step_time = time.time() - start_time + epoch_time += step_time + + # compute alignment score + align_error = 1 - alignment_diagonal_score(alignments) + loss_dict['align_error'] = align_error + + # aggregate losses from processes + if num_gpus > 1: + loss_dict['postnet_loss'] = reduce_tensor(loss_dict['postnet_loss'].data, num_gpus) + loss_dict['decoder_loss'] = reduce_tensor(loss_dict['decoder_loss'].data, num_gpus) + if c.stopnet: + loss_dict['stopnet_loss'] = reduce_tensor(loss_dict['stopnet_loss'].data, num_gpus) + + # detach loss values + loss_dict_new = dict() + for key, value in loss_dict.items(): + if isinstance(value, (int, float)): + loss_dict_new[key] = value + else: + loss_dict_new[key] = value.item() + loss_dict = loss_dict_new + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + keep_avg.update_values(update_train_values) + + if c.print_eval: + c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # Diagnostic visualizations + idx = np.random.randint(mel_input.shape[0]) + const_spec = postnet_output[idx].data.cpu().numpy() + gt_spec = linear_input[idx].data.cpu().numpy() if c.model in [ + "Tacotron", "TacotronGST" + ] else mel_input[idx].data.cpu().numpy() + align_img = alignments[idx].data.cpu().numpy() + + eval_figures = { + "prediction": plot_spectrogram(const_spec, ap, output_fig=False), + "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False), + "alignment": plot_alignment(align_img, output_fig=False) + } + + # Sample audio + if c.model in ["Tacotron", "TacotronGST"]: + eval_audio = ap.inv_spectrogram(const_spec.T) + else: + eval_audio = ap.inv_melspectrogram(const_spec.T) + tb_logger.tb_eval_audios(global_step, {"ValAudio": eval_audio}, + c.audio["sample_rate"]) + + # Plot Validation Stats + + if c.bidirectional_decoder or c.double_decoder_consistency: + align_b_img = alignments_backward[idx].data.cpu().numpy() + eval_figures['alignment2'] = plot_alignment(align_b_img, output_fig=False) + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + tb_logger.tb_eval_figures(global_step, eval_figures) + + if args.rank == 0 and epoch > c.test_delay_epochs: + if c.test_sentences_file is None: + test_sentences = [ + "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "Be a voice, not an echo.", + "I'm sorry Dave. I'm afraid I can't do that.", + "This cake is great. It's so delicious and moist.", + "Prior to November 22, 1963." + ] + else: + with open(c.test_sentences_file, "r") as f: + test_sentences = [s.strip() for s in f.readlines()] + + # test sentences + test_audios = {} + test_figures = {} + print(" | > Synthesizing test sentences") + speaker_id = 0 if c.use_speaker_embedding else None + speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[randrange(len(speaker_mapping)-1)]]['embedding'] if c.use_external_speaker_embedding_file and c.use_speaker_embedding else None + style_wav = c.get("gst_style_input") + if style_wav is None and c.use_gst: + # inicialize GST with zero dict. + style_wav = {} + print("WARNING: You don't provided a gst style wav, for this reason we use a zero tensor!") + for i in range(c.gst['gst_style_tokens']): + style_wav[str(i)] = 0 + style_wav = c.get("gst_style_input") + for idx, test_sentence in enumerate(test_sentences): + try: + wav, alignment, decoder_output, postnet_output, stop_tokens, _ = synthesis( + model, + test_sentence, + c, + use_cuda, + ap, + speaker_id=speaker_id, + speaker_embedding=speaker_embedding, + style_wav=style_wav, + truncated=False, + enable_eos_bos_chars=c.enable_eos_bos_chars, #pylint: disable=unused-argument + use_griffin_lim=True, + do_trim_silence=False) + + file_path = os.path.join(AUDIO_PATH, str(global_step)) + os.makedirs(file_path, exist_ok=True) + file_path = os.path.join(file_path, + "TestSentence_{}.wav".format(idx)) + ap.save_wav(wav, file_path) + test_audios['{}-audio'.format(idx)] = wav + test_figures['{}-prediction'.format(idx)] = plot_spectrogram( + postnet_output, ap, output_fig=False) + test_figures['{}-alignment'.format(idx)] = plot_alignment( + alignment, output_fig=False) + except: #pylint: disable=bare-except + print(" !! Error creating Test Sentence -", idx) + traceback.print_exc() + tb_logger.tb_test_audios(global_step, test_audios, + c.audio['sample_rate']) + tb_logger.tb_test_figures(global_step, test_figures) + return keep_avg.avg_values + + +# FIXME: move args definition/parsing inside of main? +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global meta_data_train, meta_data_eval, symbols, phonemes, speaker_mapping + # Audio processor + ap = AudioProcessor(**c.audio) + if 'characters' in c.keys(): + symbols, phonemes = make_symbols(**c.characters) + + # DISTRUBUTED + if num_gpus > 1: + init_distributed(args.rank, num_gpus, args.group_id, + c.distributed["backend"], c.distributed["url"]) + num_chars = len(phonemes) if c.use_phonemes else len(symbols) + + # load data instances + meta_data_train, meta_data_eval = load_meta_data(c.datasets) + + # set the portion of the data used for training + if 'train_portion' in c.keys(): + meta_data_train = meta_data_train[:int(len(meta_data_train) * c.train_portion)] + if 'eval_portion' in c.keys(): + meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)] + + # parse speakers + num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(c, args, meta_data_train, OUT_PATH) + + model = setup_model(num_chars, num_speakers, c, speaker_embedding_dim) + + # scalers for mixed precision training + scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None + scaler_st = torch.cuda.amp.GradScaler() if c.mixed_precision and c.separate_stopnet else None + + params = set_weight_decay(model, c.wd) + optimizer = RAdam(params, lr=c.lr, weight_decay=0) + if c.stopnet and c.separate_stopnet: + optimizer_st = RAdam(model.decoder.stopnet.parameters(), + lr=c.lr, + weight_decay=0) + else: + optimizer_st = None + + # setup criterion + criterion = TacotronLoss(c, stopnet_pos_weight=10.0, ga_sigma=0.4) + + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location='cpu') + try: + print(" > Restoring Model.") + model.load_state_dict(checkpoint['model']) + # optimizer restore + print(" > Restoring Optimizer.") + optimizer.load_state_dict(checkpoint['optimizer']) + if "scaler" in checkpoint and c.mixed_precision: + print(" > Restoring AMP Scaler...") + scaler.load_state_dict(checkpoint["scaler"]) + if c.reinit_layers: + raise RuntimeError + except (KeyError, RuntimeError): + print(" > Partial model initialization.") + model_dict = model.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model'], c) + # torch.save(model_dict, os.path.join(OUT_PATH, 'state_dict.pt')) + # print("State Dict saved for debug in: ", os.path.join(OUT_PATH, 'state_dict.pt')) + model.load_state_dict(model_dict) + del model_dict + + for group in optimizer.param_groups: + group['lr'] = c.lr + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model.cuda() + criterion.cuda() + + # DISTRUBUTED + if num_gpus > 1: + model = apply_gradient_allreduce(model) + + if c.noam_schedule: + scheduler = NoamLR(optimizer, + warmup_steps=c.warmup_steps, + last_epoch=args.restore_step - 1) + else: + scheduler = None + + num_params = count_parameters(model) + print("\n > Model has {} parameters".format(num_params), flush=True) + + if 'best_loss' not in locals(): + best_loss = float('inf') + + # define data loaders + train_loader = setup_loader(ap, + model.decoder.r, + is_val=False, + verbose=True) + eval_loader = setup_loader(ap, model.decoder.r, is_val=True) + + global_step = args.restore_step + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + # set gradual training + if c.gradual_training is not None: + r, c.batch_size = gradual_training_scheduler(global_step, c) + c.r = r + model.decoder.set_r(r) + if c.bidirectional_decoder: + model.decoder_backward.set_r(r) + train_loader.dataset.outputs_per_step = r + eval_loader.dataset.outputs_per_step = r + train_loader = setup_loader(ap, + model.decoder.r, + is_val=False, + dataset=train_loader.dataset) + eval_loader = setup_loader(ap, + model.decoder.r, + is_val=True, + dataset=eval_loader.dataset) + print("\n > Number of output frames:", model.decoder.r) + # train one epoch + train_avg_loss_dict, global_step = train(train_loader, model, + criterion, optimizer, + optimizer_st, scheduler, ap, + global_step, epoch, scaler, + scaler_st) + # eval one epoch + eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap, + global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = train_avg_loss_dict['avg_postnet_loss'] + if c.run_eval: + target_loss = eval_avg_loss_dict['avg_postnet_loss'] + best_loss = save_best_model( + target_loss, + best_loss, + model, + optimizer, + global_step, + epoch, + c.r, + OUT_PATH, + scaler=scaler.state_dict() if c.mixed_precision else None) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--continue_path', + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument( + '--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv + ) + parser.add_argument('--debug', + type=bool, + default=False, + help='Do not verify commit integrity to run training.') + + # DISTRUBUTED + parser.add_argument( + '--rank', + type=int, + default=0, + help='DISTRIBUTED: process rank for distributed training.') + parser.add_argument('--group_id', + type=str, + default="", + help='DISTRIBUTED: process group id.') + args = parser.parse_args() + + if args.continue_path != '': + print(f" > Training continues for {args.continue_path}") + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, 'config.json') + list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + + # setup output paths and read configs + c = load_config(args.config_path) + check_config_tts(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + if c.mixed_precision: + print(" > Mixed precision mode is ON") + + OUT_PATH = args.continue_path + if args.continue_path == '': + OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug) + + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, + OUT_PATH, new_fields) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS') + + # write model desc to tensorboard + tb_logger.tb_add_text('model-description', c['run_description'], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_vocoder_gan.py b/TTS/bin/train_vocoder_gan.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1e8c636ee47e78e55fc2e16a96ee195130a8a9 --- /dev/null +++ b/TTS/bin/train_vocoder_gan.py @@ -0,0 +1,664 @@ +import argparse +import glob +import os +import sys +import time +import traceback +from inspect import signature + +import torch +from torch.utils.data import DataLoader +from TTS.utils.audio import AudioProcessor +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.generic_utils import (KeepAverage, count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.radam import RAdam +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import setup_torch_training_env +from TTS.vocoder.datasets.gan_dataset import GANDataset +from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data +from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss +from TTS.vocoder.utils.generic_utils import (plot_results, setup_discriminator, + setup_generator) +from TTS.vocoder.utils.io import save_best_model, save_checkpoint + +# DISTRIBUTED +from torch.nn.parallel import DistributedDataParallel as DDP_th +from torch.utils.data.distributed import DistributedSampler +from TTS.utils.distribute import init_distributed + +use_cuda, num_gpus = setup_torch_training_env(True, True) + + +def setup_loader(ap, is_val=False, verbose=False): + if is_val and not c.run_eval: + loader = None + else: + dataset = GANDataset(ap=ap, + items=eval_data if is_val else train_data, + seq_len=c.seq_len, + hop_len=ap.hop_length, + pad_short=c.pad_short, + conv_pad=c.conv_pad, + is_training=not is_val, + return_segments=not is_val, + use_noise_augment=c.use_noise_augment, + use_cache=c.use_cache, + verbose=verbose) + dataset.shuffle_mapping() + sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None + loader = DataLoader(dataset, + batch_size=1 if is_val else c.batch_size, + shuffle=False if num_gpus > 1 else True, + drop_last=False, + sampler=sampler, + num_workers=c.num_val_loader_workers + if is_val else c.num_loader_workers, + pin_memory=False) + return loader + + +def format_data(data): + if isinstance(data[0], list): + # setup input data + c_G, x_G = data[0] + c_D, x_D = data[1] + + # dispatch data to GPU + if use_cuda: + c_G = c_G.cuda(non_blocking=True) + x_G = x_G.cuda(non_blocking=True) + c_D = c_D.cuda(non_blocking=True) + x_D = x_D.cuda(non_blocking=True) + + return c_G, x_G, c_D, x_D + + # return a whole audio segment + co, x = data + if use_cuda: + co = co.cuda(non_blocking=True) + x = x.cuda(non_blocking=True) + return co, x, None, None + + +def train(model_G, criterion_G, optimizer_G, model_D, criterion_D, optimizer_D, + scheduler_G, scheduler_D, ap, global_step, epoch): + data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) + model_G.train() + model_D.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int( + len(data_loader.dataset) / (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + c_G, y_G, c_D, y_D = format_data(data) + loader_time = time.time() - end_time + + global_step += 1 + + ############################## + # GENERATOR + ############################## + + # generator pass + y_hat = model_G(c_G) + y_hat_sub = None + y_G_sub = None + y_hat_vis = y_hat # for visualization + + # PQMF formatting + if y_hat.shape[1] > 1: + y_hat_sub = y_hat + y_hat = model_G.pqmf_synthesis(y_hat) + y_hat_vis = y_hat + y_G_sub = model_G.pqmf_analysis(y_G) + + scores_fake, feats_fake, feats_real = None, None, None + if global_step > c.steps_to_start_discriminator: + + # run D with or without cond. features + if len(signature(model_D.forward).parameters) == 2: + D_out_fake = model_D(y_hat, c_G) + else: + D_out_fake = model_D(y_hat) + D_out_real = None + + if c.use_feat_match_loss: + with torch.no_grad(): + D_out_real = model_D(y_G) + + # format D outputs + if isinstance(D_out_fake, tuple): + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + feats_real = None + else: + _, feats_real = D_out_real + else: + scores_fake = D_out_fake + + # compute losses + loss_G_dict = criterion_G(y_hat, y_G, scores_fake, feats_fake, + feats_real, y_hat_sub, y_G_sub) + loss_G = loss_G_dict['G_loss'] + + # optimizer generator + optimizer_G.zero_grad() + loss_G.backward() + if c.gen_clip_grad > 0: + torch.nn.utils.clip_grad_norm_(model_G.parameters(), + c.gen_clip_grad) + optimizer_G.step() + if scheduler_G is not None: + scheduler_G.step() + + loss_dict = dict() + for key, value in loss_G_dict.items(): + if isinstance(value, int): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + ############################## + # DISCRIMINATOR + ############################## + if global_step >= c.steps_to_start_discriminator: + # discriminator pass + with torch.no_grad(): + y_hat = model_G(c_D) + + # PQMF formatting + if y_hat.shape[1] > 1: + y_hat = model_G.pqmf_synthesis(y_hat) + + # run D with or without cond. features + if len(signature(model_D.forward).parameters) == 2: + D_out_fake = model_D(y_hat.detach(), c_D) + D_out_real = model_D(y_D, c_D) + else: + D_out_fake = model_D(y_hat.detach()) + D_out_real = model_D(y_D) + + # format D outputs + if isinstance(D_out_fake, tuple): + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + scores_real, feats_real = None, None + else: + scores_real, feats_real = D_out_real + else: + scores_fake = D_out_fake + scores_real = D_out_real + + # compute losses + loss_D_dict = criterion_D(scores_fake, scores_real) + loss_D = loss_D_dict['D_loss'] + + # optimizer discriminator + optimizer_D.zero_grad() + loss_D.backward() + if c.disc_clip_grad > 0: + torch.nn.utils.clip_grad_norm_(model_D.parameters(), + c.disc_clip_grad) + optimizer_D.step() + if scheduler_D is not None: + scheduler_D.step() + + for key, value in loss_D_dict.items(): + if isinstance(value, (int, float)): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + step_time = time.time() - start_time + epoch_time += step_time + + # get current learning rates + current_lr_G = list(optimizer_G.param_groups)[0]['lr'] + current_lr_D = list(optimizer_D.param_groups)[0]['lr'] + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + update_train_values['avg_loader_time'] = loader_time + update_train_values['avg_step_time'] = step_time + keep_avg.update_values(update_train_values) + + # print training stats + if global_step % c.print_step == 0: + log_dict = { + 'step_time': [step_time, 2], + 'loader_time': [loader_time, 4], + "current_lr_G": current_lr_G, + "current_lr_D": current_lr_D + } + c_logger.print_train_step(batch_n_iter, num_iter, global_step, + log_dict, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # plot step stats + if global_step % 10 == 0: + iter_stats = { + "lr_G": current_lr_G, + "lr_D": current_lr_D, + "step_time": step_time + } + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + # save checkpoint + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model_G, + optimizer_G, + scheduler_G, + model_D, + optimizer_D, + scheduler_D, + global_step, + epoch, + OUT_PATH, + model_losses=loss_dict) + + # compute spectrograms + figures = plot_results(y_hat_vis, y_G, ap, global_step, + 'train') + tb_logger.tb_train_figures(global_step, figures) + + # Sample audio + sample_voice = y_hat_vis[0].squeeze(0).detach().cpu().numpy() + tb_logger.tb_train_audios(global_step, + {'train/audio': sample_voice}, + c.audio["sample_rate"]) + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Training Epoch Stats + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + if args.rank == 0: + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + # TODO: plot model stats + # if c.tb_model_param_stats: + # tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(model_G, criterion_G, model_D, criterion_D, ap, global_step, epoch): + data_loader = setup_loader(ap, is_val=True, verbose=(epoch == 0)) + model_G.eval() + model_D.eval() + epoch_time = 0 + keep_avg = KeepAverage() + end_time = time.time() + c_logger.print_eval_start() + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + c_G, y_G, _, _ = format_data(data) + loader_time = time.time() - end_time + + global_step += 1 + + ############################## + # GENERATOR + ############################## + + # generator pass + y_hat = model_G(c_G) + y_hat_sub = None + y_G_sub = None + + # PQMF formatting + if y_hat.shape[1] > 1: + y_hat_sub = y_hat + y_hat = model_G.pqmf_synthesis(y_hat) + y_G_sub = model_G.pqmf_analysis(y_G) + + scores_fake, feats_fake, feats_real = None, None, None + if global_step > c.steps_to_start_discriminator: + + if len(signature(model_D.forward).parameters) == 2: + D_out_fake = model_D(y_hat, c_G) + else: + D_out_fake = model_D(y_hat) + D_out_real = None + + if c.use_feat_match_loss: + with torch.no_grad(): + D_out_real = model_D(y_G) + + # format D outputs + if isinstance(D_out_fake, tuple): + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + feats_real = None + else: + _, feats_real = D_out_real + else: + scores_fake = D_out_fake + feats_fake, feats_real = None, None + + # compute losses + loss_G_dict = criterion_G(y_hat, y_G, scores_fake, feats_fake, + feats_real, y_hat_sub, y_G_sub) + + loss_dict = dict() + for key, value in loss_G_dict.items(): + if isinstance(value, (int, float)): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + ############################## + # DISCRIMINATOR + ############################## + + if global_step >= c.steps_to_start_discriminator: + # discriminator pass + with torch.no_grad(): + y_hat = model_G(c_G) + + # PQMF formatting + if y_hat.shape[1] > 1: + y_hat = model_G.pqmf_synthesis(y_hat) + + # run D with or without cond. features + if len(signature(model_D.forward).parameters) == 2: + D_out_fake = model_D(y_hat.detach(), c_G) + D_out_real = model_D(y_G, c_G) + else: + D_out_fake = model_D(y_hat.detach()) + D_out_real = model_D(y_G) + + # format D outputs + if isinstance(D_out_fake, tuple): + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + scores_real, feats_real = None, None + else: + scores_real, feats_real = D_out_real + else: + scores_fake = D_out_fake + scores_real = D_out_real + + # compute losses + loss_D_dict = criterion_D(scores_fake, scores_real) + + for key, value in loss_D_dict.items(): + if isinstance(value, (int, float)): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + step_time = time.time() - start_time + epoch_time += step_time + + # update avg stats + update_eval_values = dict() + for key, value in loss_dict.items(): + update_eval_values['avg_' + key] = value + update_eval_values['avg_loader_time'] = loader_time + update_eval_values['avg_step_time'] = step_time + keep_avg.update_values(update_eval_values) + + # print eval stats + if c.print_eval: + c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # compute spectrograms + figures = plot_results(y_hat, y_G, ap, global_step, 'eval') + tb_logger.tb_eval_figures(global_step, figures) + + # Sample audio + sample_voice = y_hat[0].squeeze(0).detach().cpu().numpy() + tb_logger.tb_eval_audios(global_step, {'eval/audio': sample_voice}, + c.audio["sample_rate"]) + + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + + # synthesize a full voice + data_loader.return_segments = False + + return keep_avg.avg_values + + +# FIXME: move args definition/parsing inside of main? +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global train_data, eval_data + print(f" > Loading wavs from: {c.data_path}") + if c.feature_path is not None: + print(f" > Loading features from: {c.feature_path}") + eval_data, train_data = load_wav_feat_data( + c.data_path, c.feature_path, c.eval_split_size) + else: + eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) + + # setup audio processor + ap = AudioProcessor(**c.audio) + + # DISTRUBUTED + if num_gpus > 1: + init_distributed(args.rank, num_gpus, args.group_id, + c.distributed["backend"], c.distributed["url"]) + + # setup models + model_gen = setup_generator(c) + model_disc = setup_discriminator(c) + + # setup optimizers + optimizer_gen = RAdam(model_gen.parameters(), lr=c.lr_gen, weight_decay=0) + optimizer_disc = RAdam(model_disc.parameters(), + lr=c.lr_disc, + weight_decay=0) + + # schedulers + scheduler_gen = None + scheduler_disc = None + if 'lr_scheduler_gen' in c: + scheduler_gen = getattr(torch.optim.lr_scheduler, c.lr_scheduler_gen) + scheduler_gen = scheduler_gen( + optimizer_gen, **c.lr_scheduler_gen_params) + if 'lr_scheduler_disc' in c: + scheduler_disc = getattr(torch.optim.lr_scheduler, c.lr_scheduler_disc) + scheduler_disc = scheduler_disc( + optimizer_disc, **c.lr_scheduler_disc_params) + + # setup criterion + criterion_gen = GeneratorLoss(c) + criterion_disc = DiscriminatorLoss(c) + + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location='cpu') + try: + print(" > Restoring Generator Model...") + model_gen.load_state_dict(checkpoint['model']) + print(" > Restoring Generator Optimizer...") + optimizer_gen.load_state_dict(checkpoint['optimizer']) + print(" > Restoring Discriminator Model...") + model_disc.load_state_dict(checkpoint['model_disc']) + print(" > Restoring Discriminator Optimizer...") + optimizer_disc.load_state_dict(checkpoint['optimizer_disc']) + if 'scheduler' in checkpoint: + print(" > Restoring Generator LR Scheduler...") + scheduler_gen.load_state_dict(checkpoint['scheduler']) + # NOTE: Not sure if necessary + scheduler_gen.optimizer = optimizer_gen + if 'scheduler_disc' in checkpoint: + print(" > Restoring Discriminator LR Scheduler...") + scheduler_disc.load_state_dict(checkpoint['scheduler_disc']) + scheduler_disc.optimizer = optimizer_disc + except RuntimeError: + # retore only matching layers. + print(" > Partial model initialization...") + model_dict = model_gen.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model'], c) + model_gen.load_state_dict(model_dict) + + model_dict = model_disc.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model_disc'], c) + model_disc.load_state_dict(model_dict) + del model_dict + + # reset lr if not countinuining training. + for group in optimizer_gen.param_groups: + group['lr'] = c.lr_gen + + for group in optimizer_disc.param_groups: + group['lr'] = c.lr_disc + + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model_gen.cuda() + criterion_gen.cuda() + model_disc.cuda() + criterion_disc.cuda() + + # DISTRUBUTED + if num_gpus > 1: + model_gen = DDP_th(model_gen, device_ids=[args.rank]) + model_disc = DDP_th(model_disc, device_ids=[args.rank]) + + num_params = count_parameters(model_gen) + print(" > Generator has {} parameters".format(num_params), flush=True) + num_params = count_parameters(model_disc) + print(" > Discriminator has {} parameters".format(num_params), flush=True) + + if 'best_loss' not in locals(): + best_loss = float('inf') + + global_step = args.restore_step + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + _, global_step = train(model_gen, criterion_gen, optimizer_gen, + model_disc, criterion_disc, optimizer_disc, + scheduler_gen, scheduler_disc, ap, global_step, + epoch) + eval_avg_loss_dict = evaluate(model_gen, criterion_gen, model_disc, criterion_disc, ap, + global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = eval_avg_loss_dict[c.target_loss] + best_loss = save_best_model(target_loss, + best_loss, + model_gen, + optimizer_gen, + scheduler_gen, + model_disc, + optimizer_disc, + scheduler_disc, + global_step, + epoch, + OUT_PATH, + model_losses=eval_avg_loss_dict) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--continue_path', + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument('--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv) + parser.add_argument('--debug', + type=bool, + default=False, + help='Do not verify commit integrity to run training.') + + # DISTRUBUTED + parser.add_argument( + '--rank', + type=int, + default=0, + help='DISTRIBUTED: process rank for distributed training.') + parser.add_argument('--group_id', + type=str, + default="", + help='DISTRIBUTED: process group id.') + args = parser.parse_args() + + if args.continue_path != '': + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, 'config.json') + list_of_files = glob.glob( + args.continue_path + + "/*.pth.tar") # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + print(f" > Training continues for {args.restore_path}") + + # setup output paths and read configs + c = load_config(args.config_path) + # check_config(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + OUT_PATH = args.continue_path + if args.continue_path == '': + OUT_PATH = create_experiment_folder(c.output_path, c.run_name, + args.debug) + + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, + OUT_PATH, new_fields) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='VOCODER') + + # write model desc to tensorboard + tb_logger.tb_add_text('model-description', c['run_description'], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_vocoder_wavegrad.py b/TTS/bin/train_vocoder_wavegrad.py new file mode 100644 index 0000000000000000000000000000000000000000..73802c63f1e0e42ba835976388e1eb808f6c3f01 --- /dev/null +++ b/TTS/bin/train_vocoder_wavegrad.py @@ -0,0 +1,511 @@ +import argparse +import glob +import os +import sys +import time +import traceback +import numpy as np + +import torch +# DISTRIBUTED +from torch.nn.parallel import DistributedDataParallel as DDP_th +from torch.optim import Adam +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from TTS.utils.audio import AudioProcessor +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.distribute import init_distributed +from TTS.utils.generic_utils import (KeepAverage, count_parameters, + create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.training import setup_torch_training_env +from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data +from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset +from TTS.vocoder.utils.generic_utils import plot_results, setup_generator +from TTS.vocoder.utils.io import save_best_model, save_checkpoint + +use_cuda, num_gpus = setup_torch_training_env(True, True) + + +def setup_loader(ap, is_val=False, verbose=False): + if is_val and not c.run_eval: + loader = None + else: + dataset = WaveGradDataset(ap=ap, + items=eval_data if is_val else train_data, + seq_len=c.seq_len, + hop_len=ap.hop_length, + pad_short=c.pad_short, + conv_pad=c.conv_pad, + is_training=not is_val, + return_segments=True, + use_noise_augment=False, + use_cache=c.use_cache, + verbose=verbose) + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader(dataset, + batch_size=c.batch_size, + shuffle=num_gpus <= 1, + drop_last=False, + sampler=sampler, + num_workers=c.num_val_loader_workers + if is_val else c.num_loader_workers, + pin_memory=False) + + + return loader + + +def format_data(data): + # return a whole audio segment + m, x = data + x = x.unsqueeze(1) + if use_cuda: + m = m.cuda(non_blocking=True) + x = x.cuda(non_blocking=True) + return m, x + + +def format_test_data(data): + # return a whole audio segment + m, x = data + m = m[None, ...] + x = x[None, None, ...] + if use_cuda: + m = m.cuda(non_blocking=True) + x = x.cuda(non_blocking=True) + return m, x + + +def train(model, criterion, optimizer, + scheduler, scaler, ap, global_step, epoch): + data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) + model.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int( + len(data_loader.dataset) / (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + # setup noise schedule + noise_schedule = c['train_noise_schedule'] + betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps']) + if hasattr(model, 'module'): + model.module.compute_noise_level(betas) + else: + model.compute_noise_level(betas) + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + m, x = format_data(data) + loader_time = time.time() - end_time + + global_step += 1 + + with torch.cuda.amp.autocast(enabled=c.mixed_precision): + # compute noisy input + if hasattr(model, 'module'): + noise, x_noisy, noise_scale = model.module.compute_y_n(x) + else: + noise, x_noisy, noise_scale = model.compute_y_n(x) + + # forward pass + noise_hat = model(x_noisy, m, noise_scale) + + # compute losses + loss = criterion(noise, noise_hat) + loss_wavegrad_dict = {'wavegrad_loss':loss} + + # check nan loss + if torch.isnan(loss).any(): + raise RuntimeError(f'Detected NaN loss at step {global_step}.') + + optimizer.zero_grad() + + # backward pass with loss scaling + if c.mixed_precision: + scaler.scale(loss).backward() + scaler.unscale_(optimizer) + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.clip_grad) + scaler.step(optimizer) + scaler.update() + else: + loss.backward() + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), + c.clip_grad) + optimizer.step() + + # schedule update + if scheduler is not None: + scheduler.step() + + # disconnect loss values + loss_dict = dict() + for key, value in loss_wavegrad_dict.items(): + if isinstance(value, int): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + # epoch/step timing + step_time = time.time() - start_time + epoch_time += step_time + + # get current learning rates + current_lr = list(optimizer.param_groups)[0]['lr'] + + # update avg stats + update_train_values = dict() + for key, value in loss_dict.items(): + update_train_values['avg_' + key] = value + update_train_values['avg_loader_time'] = loader_time + update_train_values['avg_step_time'] = step_time + keep_avg.update_values(update_train_values) + + # print training stats + if global_step % c.print_step == 0: + log_dict = { + 'step_time': [step_time, 2], + 'loader_time': [loader_time, 4], + "current_lr": current_lr, + "grad_norm": grad_norm.item() + } + c_logger.print_train_step(batch_n_iter, num_iter, global_step, + log_dict, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + # plot step stats + if global_step % 10 == 0: + iter_stats = { + "lr": current_lr, + "grad_norm": grad_norm.item(), + "step_time": step_time + } + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + # save checkpoint + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model, + optimizer, + scheduler, + None, + None, + None, + global_step, + epoch, + OUT_PATH, + model_losses=loss_dict, + scaler=scaler.state_dict() if c.mixed_precision else None) + + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Training Epoch Stats + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + if args.rank == 0: + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + # TODO: plot model stats + if c.tb_model_param_stats and args.rank == 0: + tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(model, criterion, ap, global_step, epoch): + data_loader = setup_loader(ap, is_val=True, verbose=(epoch == 0)) + model.eval() + epoch_time = 0 + keep_avg = KeepAverage() + end_time = time.time() + c_logger.print_eval_start() + for num_iter, data in enumerate(data_loader): + start_time = time.time() + + # format data + m, x = format_data(data) + loader_time = time.time() - end_time + + global_step += 1 + + # compute noisy input + if hasattr(model, 'module'): + noise, x_noisy, noise_scale = model.module.compute_y_n(x) + else: + noise, x_noisy, noise_scale = model.compute_y_n(x) + + + # forward pass + noise_hat = model(x_noisy, m, noise_scale) + + # compute losses + loss = criterion(noise, noise_hat) + loss_wavegrad_dict = {'wavegrad_loss':loss} + + + loss_dict = dict() + for key, value in loss_wavegrad_dict.items(): + if isinstance(value, (int, float)): + loss_dict[key] = value + else: + loss_dict[key] = value.item() + + step_time = time.time() - start_time + epoch_time += step_time + + # update avg stats + update_eval_values = dict() + for key, value in loss_dict.items(): + update_eval_values['avg_' + key] = value + update_eval_values['avg_loader_time'] = loader_time + update_eval_values['avg_step_time'] = step_time + keep_avg.update_values(update_eval_values) + + # print eval stats + if c.print_eval: + c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values) + + if args.rank == 0: + data_loader.dataset.return_segments = False + samples = data_loader.dataset.load_test_samples(1) + m, x = format_test_data(samples[0]) + + # setup noise schedule and inference + noise_schedule = c['test_noise_schedule'] + betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps']) + if hasattr(model, 'module'): + model.module.compute_noise_level(betas) + # compute voice + x_pred = model.module.inference(m) + else: + model.compute_noise_level(betas) + # compute voice + x_pred = model.inference(m) + + # compute spectrograms + figures = plot_results(x_pred, x, ap, global_step, 'eval') + tb_logger.tb_eval_figures(global_step, figures) + + # Sample audio + sample_voice = x_pred[0].squeeze(0).detach().cpu().numpy() + tb_logger.tb_eval_audios(global_step, {'eval/audio': sample_voice}, + c.audio["sample_rate"]) + + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + data_loader.dataset.return_segments = True + + return keep_avg.avg_values + + +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global train_data, eval_data + print(f" > Loading wavs from: {c.data_path}") + if c.feature_path is not None: + print(f" > Loading features from: {c.feature_path}") + eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size) + else: + eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size) + + # setup audio processor + ap = AudioProcessor(**c.audio) + + # DISTRUBUTED + if num_gpus > 1: + init_distributed(args.rank, num_gpus, args.group_id, + c.distributed["backend"], c.distributed["url"]) + + # setup models + model = setup_generator(c) + + # scaler for mixed_precision + scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None + + # setup optimizers + optimizer = Adam(model.parameters(), lr=c.lr, weight_decay=0) + + # schedulers + scheduler = None + if 'lr_scheduler' in c: + scheduler = getattr(torch.optim.lr_scheduler, c.lr_scheduler) + scheduler = scheduler(optimizer, **c.lr_scheduler_params) + + # setup criterion + criterion = torch.nn.L1Loss().cuda() + + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location='cpu') + try: + print(" > Restoring Model...") + model.load_state_dict(checkpoint['model']) + print(" > Restoring Optimizer...") + optimizer.load_state_dict(checkpoint['optimizer']) + if 'scheduler' in checkpoint: + print(" > Restoring LR Scheduler...") + scheduler.load_state_dict(checkpoint['scheduler']) + # NOTE: Not sure if necessary + scheduler.optimizer = optimizer + if "scaler" in checkpoint and c.mixed_precision: + print(" > Restoring AMP Scaler...") + scaler.load_state_dict(checkpoint["scaler"]) + except RuntimeError: + # retore only matching layers. + print(" > Partial model initialization...") + model_dict = model.state_dict() + model_dict = set_init_dict(model_dict, checkpoint['model'], c) + model.load_state_dict(model_dict) + del model_dict + + # reset lr if not countinuining training. + for group in optimizer.param_groups: + group['lr'] = c.lr + + print(" > Model restored from step %d" % checkpoint['step'], + flush=True) + args.restore_step = checkpoint['step'] + else: + args.restore_step = 0 + + if use_cuda: + model.cuda() + criterion.cuda() + + # DISTRUBUTED + if num_gpus > 1: + model = DDP_th(model, device_ids=[args.rank]) + + num_params = count_parameters(model) + print(" > WaveGrad has {} parameters".format(num_params), flush=True) + + if 'best_loss' not in locals(): + best_loss = float('inf') + + global_step = args.restore_step + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + _, global_step = train(model, criterion, optimizer, + scheduler, scaler, ap, global_step, + epoch) + eval_avg_loss_dict = evaluate(model, criterion, ap, + global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = eval_avg_loss_dict[c.target_loss] + best_loss = save_best_model(target_loss, + best_loss, + model, + optimizer, + scheduler, + None, + None, + None, + global_step, + epoch, + OUT_PATH, + model_losses=eval_avg_loss_dict, + scaler=scaler.state_dict() if c.mixed_precision else None) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--continue_path', + type=str, + help= + 'Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default='', + required='--config_path' not in sys.argv) + parser.add_argument( + '--restore_path', + type=str, + help='Model file to be restored. Use to finetune a model.', + default='') + parser.add_argument('--config_path', + type=str, + help='Path to config file for training.', + required='--continue_path' not in sys.argv) + parser.add_argument('--debug', + type=bool, + default=False, + help='Do not verify commit integrity to run training.') + + # DISTRUBUTED + parser.add_argument( + '--rank', + type=int, + default=0, + help='DISTRIBUTED: process rank for distributed training.') + parser.add_argument('--group_id', + type=str, + default="", + help='DISTRIBUTED: process group id.') + args = parser.parse_args() + + if args.continue_path != '': + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, 'config.json') + list_of_files = glob.glob( + args.continue_path + + "/*.pth.tar") # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + print(f" > Training continues for {args.restore_path}") + + # setup output paths and read configs + c = load_config(args.config_path) + # check_config(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + # DISTRIBUTED + if c.mixed_precision: + print(" > Mixed precision is enabled") + + OUT_PATH = args.continue_path + if args.continue_path == '': + OUT_PATH = create_experiment_folder(c.output_path, c.run_name, + args.debug) + + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files(c, args.config_path, + OUT_PATH, new_fields) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name='VOCODER') + + # write model desc to tensorboard + tb_logger.tb_add_text('model-description', c['run_description'], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/train_vocoder_wavernn.py b/TTS/bin/train_vocoder_wavernn.py new file mode 100644 index 0000000000000000000000000000000000000000..cad357dcdaf767bd52740c2a29b76d59d37f73ee --- /dev/null +++ b/TTS/bin/train_vocoder_wavernn.py @@ -0,0 +1,539 @@ +import argparse +import os +import sys +import traceback +import time +import glob +import random + +import torch +from torch.utils.data import DataLoader + +# from torch.utils.data.distributed import DistributedSampler + +from TTS.tts.utils.visual import plot_spectrogram +from TTS.utils.audio import AudioProcessor +from TTS.utils.radam import RAdam +from TTS.utils.io import copy_model_files, load_config +from TTS.utils.training import setup_torch_training_env +from TTS.utils.console_logger import ConsoleLogger +from TTS.utils.tensorboard_logger import TensorboardLogger +from TTS.utils.generic_utils import ( + KeepAverage, + count_parameters, + create_experiment_folder, + get_git_branch, + remove_experiment_folder, + set_init_dict, +) +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset +from TTS.vocoder.datasets.preprocess import ( + load_wav_data, + load_wav_feat_data +) +from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss +from TTS.vocoder.utils.generic_utils import setup_wavernn +from TTS.vocoder.utils.io import save_best_model, save_checkpoint + + +use_cuda, num_gpus = setup_torch_training_env(True, True) + + +def setup_loader(ap, is_val=False, verbose=False): + if is_val and not c.run_eval: + loader = None + else: + dataset = WaveRNNDataset(ap=ap, + items=eval_data if is_val else train_data, + seq_len=c.seq_len, + hop_len=ap.hop_length, + pad=c.padding, + mode=c.mode, + mulaw=c.mulaw, + is_training=not is_val, + verbose=verbose, + ) + # sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader(dataset, + shuffle=True, + collate_fn=dataset.collate, + batch_size=c.batch_size, + num_workers=c.num_val_loader_workers + if is_val + else c.num_loader_workers, + pin_memory=True, + ) + return loader + + +def format_data(data): + # setup input data + x_input = data[0] + mels = data[1] + y_coarse = data[2] + + # dispatch data to GPU + if use_cuda: + x_input = x_input.cuda(non_blocking=True) + mels = mels.cuda(non_blocking=True) + y_coarse = y_coarse.cuda(non_blocking=True) + + return x_input, mels, y_coarse + + +def train(model, optimizer, criterion, scheduler, scaler, ap, global_step, epoch): + # create train loader + data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0)) + model.train() + epoch_time = 0 + keep_avg = KeepAverage() + if use_cuda: + batch_n_iter = int(len(data_loader.dataset) / + (c.batch_size * num_gpus)) + else: + batch_n_iter = int(len(data_loader.dataset) / c.batch_size) + end_time = time.time() + c_logger.print_train_start() + # train loop + for num_iter, data in enumerate(data_loader): + start_time = time.time() + x_input, mels, y_coarse = format_data(data) + loader_time = time.time() - end_time + global_step += 1 + + optimizer.zero_grad() + + if c.mixed_precision: + # mixed precision training + with torch.cuda.amp.autocast(): + y_hat = model(x_input, mels) + if isinstance(model.mode, int): + y_hat = y_hat.transpose(1, 2).unsqueeze(-1) + else: + y_coarse = y_coarse.float() + y_coarse = y_coarse.unsqueeze(-1) + # compute losses + loss = criterion(y_hat, y_coarse) + scaler.scale(loss).backward() + scaler.unscale_(optimizer) + if c.grad_clip > 0: + torch.nn.utils.clip_grad_norm_( + model.parameters(), c.grad_clip) + scaler.step(optimizer) + scaler.update() + else: + # full precision training + y_hat = model(x_input, mels) + if isinstance(model.mode, int): + y_hat = y_hat.transpose(1, 2).unsqueeze(-1) + else: + y_coarse = y_coarse.float() + y_coarse = y_coarse.unsqueeze(-1) + # compute losses + loss = criterion(y_hat, y_coarse) + if loss.item() is None: + raise RuntimeError(" [!] None loss. Exiting ...") + loss.backward() + if c.grad_clip > 0: + torch.nn.utils.clip_grad_norm_( + model.parameters(), c.grad_clip) + optimizer.step() + + if scheduler is not None: + scheduler.step() + + # get the current learning rate + cur_lr = list(optimizer.param_groups)[0]["lr"] + + step_time = time.time() - start_time + epoch_time += step_time + + update_train_values = dict() + loss_dict = dict() + loss_dict["model_loss"] = loss.item() + for key, value in loss_dict.items(): + update_train_values["avg_" + key] = value + update_train_values["avg_loader_time"] = loader_time + update_train_values["avg_step_time"] = step_time + keep_avg.update_values(update_train_values) + + # print training stats + if global_step % c.print_step == 0: + log_dict = {"step_time": [step_time, 2], + "loader_time": [loader_time, 4], + "current_lr": cur_lr, + } + c_logger.print_train_step(batch_n_iter, + num_iter, + global_step, + log_dict, + loss_dict, + keep_avg.avg_values, + ) + + # plot step stats + if global_step % 10 == 0: + iter_stats = {"lr": cur_lr, "step_time": step_time} + iter_stats.update(loss_dict) + tb_logger.tb_train_iter_stats(global_step, iter_stats) + + # save checkpoint + if global_step % c.save_step == 0: + if c.checkpoint: + # save model + save_checkpoint(model, + optimizer, + scheduler, + None, + None, + None, + global_step, + epoch, + OUT_PATH, + model_losses=loss_dict, + scaler=scaler.state_dict() if c.mixed_precision else None + ) + + # synthesize a full voice + rand_idx = random.randrange(0, len(train_data)) + wav_path = train_data[rand_idx] if not isinstance( + train_data[rand_idx], (tuple, list)) else train_data[rand_idx][0] + wav = ap.load_wav(wav_path) + ground_mel = ap.melspectrogram(wav) + sample_wav = model.generate(ground_mel, + c.batched, + c.target_samples, + c.overlap_samples, + use_cuda + ) + predict_mel = ap.melspectrogram(sample_wav) + + # compute spectrograms + figures = {"train/ground_truth": plot_spectrogram(ground_mel.T), + "train/prediction": plot_spectrogram(predict_mel.T) + } + tb_logger.tb_train_figures(global_step, figures) + + # Sample audio + tb_logger.tb_train_audios( + global_step, { + "train/audio": sample_wav}, c.audio["sample_rate"] + ) + end_time = time.time() + + # print epoch stats + c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg) + + # Plot Training Epoch Stats + epoch_stats = {"epoch_time": epoch_time} + epoch_stats.update(keep_avg.avg_values) + tb_logger.tb_train_epoch_stats(global_step, epoch_stats) + # TODO: plot model stats + # if c.tb_model_param_stats: + # tb_logger.tb_model_weights(model, global_step) + return keep_avg.avg_values, global_step + + +@torch.no_grad() +def evaluate(model, criterion, ap, global_step, epoch): + # create train loader + data_loader = setup_loader(ap, is_val=True, verbose=(epoch == 0)) + model.eval() + epoch_time = 0 + keep_avg = KeepAverage() + end_time = time.time() + c_logger.print_eval_start() + with torch.no_grad(): + for num_iter, data in enumerate(data_loader): + start_time = time.time() + # format data + x_input, mels, y_coarse = format_data(data) + loader_time = time.time() - end_time + global_step += 1 + + y_hat = model(x_input, mels) + if isinstance(model.mode, int): + y_hat = y_hat.transpose(1, 2).unsqueeze(-1) + else: + y_coarse = y_coarse.float() + y_coarse = y_coarse.unsqueeze(-1) + loss = criterion(y_hat, y_coarse) + # Compute avg loss + # if num_gpus > 1: + # loss = reduce_tensor(loss.data, num_gpus) + loss_dict = dict() + loss_dict["model_loss"] = loss.item() + + step_time = time.time() - start_time + epoch_time += step_time + + # update avg stats + update_eval_values = dict() + for key, value in loss_dict.items(): + update_eval_values["avg_" + key] = value + update_eval_values["avg_loader_time"] = loader_time + update_eval_values["avg_step_time"] = step_time + keep_avg.update_values(update_eval_values) + + # print eval stats + if c.print_eval: + c_logger.print_eval_step( + num_iter, loss_dict, keep_avg.avg_values) + + if epoch % c.test_every_epochs == 0 and epoch != 0: + # synthesize a full voice + rand_idx = random.randrange(0, len(eval_data)) + wav_path = eval_data[rand_idx] if not isinstance( + eval_data[rand_idx], (tuple, list)) else eval_data[rand_idx][0] + wav = ap.load_wav(wav_path) + ground_mel = ap.melspectrogram(wav) + sample_wav = model.generate(ground_mel, + c.batched, + c.target_samples, + c.overlap_samples, + use_cuda + ) + predict_mel = ap.melspectrogram(sample_wav) + + # Sample audio + tb_logger.tb_eval_audios( + global_step, { + "eval/audio": sample_wav}, c.audio["sample_rate"] + ) + + # compute spectrograms + figures = {"eval/ground_truth": plot_spectrogram(ground_mel.T), + "eval/prediction": plot_spectrogram(predict_mel.T) + } + tb_logger.tb_eval_figures(global_step, figures) + + tb_logger.tb_eval_stats(global_step, keep_avg.avg_values) + return keep_avg.avg_values + + +# FIXME: move args definition/parsing inside of main? +def main(args): # pylint: disable=redefined-outer-name + # pylint: disable=global-variable-undefined + global train_data, eval_data + + # setup audio processor + ap = AudioProcessor(**c.audio) + + # print(f" > Loading wavs from: {c.data_path}") + # if c.feature_path is not None: + # print(f" > Loading features from: {c.feature_path}") + # eval_data, train_data = load_wav_feat_data( + # c.data_path, c.feature_path, c.eval_split_size + # ) + # else: + # mel_feat_path = os.path.join(OUT_PATH, "mel") + # feat_data = find_feat_files(mel_feat_path) + # if feat_data: + # print(f" > Loading features from: {mel_feat_path}") + # eval_data, train_data = load_wav_feat_data( + # c.data_path, mel_feat_path, c.eval_split_size + # ) + # else: + # print(" > No feature data found. Preprocessing...") + # # preprocessing feature data from given wav files + # preprocess_wav_files(OUT_PATH, CONFIG, ap) + # eval_data, train_data = load_wav_feat_data( + # c.data_path, mel_feat_path, c.eval_split_size + # ) + + print(f" > Loading wavs from: {c.data_path}") + if c.feature_path is not None: + print(f" > Loading features from: {c.feature_path}") + eval_data, train_data = load_wav_feat_data( + c.data_path, c.feature_path, c.eval_split_size) + else: + eval_data, train_data = load_wav_data( + c.data_path, c.eval_split_size) + # setup model + model_wavernn = setup_wavernn(c) + + # setup amp scaler + scaler = torch.cuda.amp.GradScaler() if c.mixed_precision else None + + # define train functions + if c.mode == "mold": + criterion = discretized_mix_logistic_loss + elif c.mode == "gauss": + criterion = gaussian_loss + elif isinstance(c.mode, int): + criterion = torch.nn.CrossEntropyLoss() + + if use_cuda: + model_wavernn.cuda() + if isinstance(c.mode, int): + criterion.cuda() + + optimizer = RAdam(model_wavernn.parameters(), lr=c.lr, weight_decay=0) + + scheduler = None + if "lr_scheduler" in c: + scheduler = getattr(torch.optim.lr_scheduler, c.lr_scheduler) + scheduler = scheduler(optimizer, **c.lr_scheduler_params) + # slow start for the first 5 epochs + # lr_lambda = lambda epoch: min(epoch / c.warmup_steps, 1) + # scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda) + + # restore any checkpoint + if args.restore_path: + checkpoint = torch.load(args.restore_path, map_location="cpu") + try: + print(" > Restoring Model...") + model_wavernn.load_state_dict(checkpoint["model"]) + print(" > Restoring Optimizer...") + optimizer.load_state_dict(checkpoint["optimizer"]) + if "scheduler" in checkpoint: + print(" > Restoring Generator LR Scheduler...") + scheduler.load_state_dict(checkpoint["scheduler"]) + scheduler.optimizer = optimizer + if "scaler" in checkpoint and c.mixed_precision: + print(" > Restoring AMP Scaler...") + scaler.load_state_dict(checkpoint["scaler"]) + except RuntimeError: + # retore only matching layers. + print(" > Partial model initialization...") + model_dict = model_wavernn.state_dict() + model_dict = set_init_dict(model_dict, checkpoint["model"], c) + model_wavernn.load_state_dict(model_dict) + + print(" > Model restored from step %d" % + checkpoint["step"], flush=True) + args.restore_step = checkpoint["step"] + else: + args.restore_step = 0 + + # DISTRIBUTED + # if num_gpus > 1: + # model = apply_gradient_allreduce(model) + + num_parameters = count_parameters(model_wavernn) + print(" > Model has {} parameters".format(num_parameters), flush=True) + + if "best_loss" not in locals(): + best_loss = float("inf") + + global_step = args.restore_step + for epoch in range(0, c.epochs): + c_logger.print_epoch_start(epoch, c.epochs) + _, global_step = train(model_wavernn, optimizer, + criterion, scheduler, scaler, ap, global_step, epoch) + eval_avg_loss_dict = evaluate( + model_wavernn, criterion, ap, global_step, epoch) + c_logger.print_epoch_end(epoch, eval_avg_loss_dict) + target_loss = eval_avg_loss_dict["avg_model_loss"] + best_loss = save_best_model( + target_loss, + best_loss, + model_wavernn, + optimizer, + scheduler, + None, + None, + None, + global_step, + epoch, + OUT_PATH, + model_losses=eval_avg_loss_dict, + scaler=scaler.state_dict() if c.mixed_precision else None + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--continue_path", + type=str, + help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.', + default="", + required="--config_path" not in sys.argv, + ) + parser.add_argument( + "--restore_path", + type=str, + help="Model file to be restored. Use to finetune a model.", + default="", + ) + parser.add_argument( + "--config_path", + type=str, + help="Path to config file for training.", + required="--continue_path" not in sys.argv, + ) + parser.add_argument( + "--debug", + type=bool, + default=False, + help="Do not verify commit integrity to run training.", + ) + + # DISTRUBUTED + parser.add_argument( + "--rank", + type=int, + default=0, + help="DISTRIBUTED: process rank for distributed training.", + ) + parser.add_argument( + "--group_id", type=str, default="", help="DISTRIBUTED: process group id." + ) + args = parser.parse_args() + + if args.continue_path != "": + args.output_path = args.continue_path + args.config_path = os.path.join(args.continue_path, "config.json") + list_of_files = glob.glob( + args.continue_path + "/*.pth.tar" + ) # * means all if need specific format then *.csv + latest_model_file = max(list_of_files, key=os.path.getctime) + args.restore_path = latest_model_file + print(f" > Training continues for {args.restore_path}") + + # setup output paths and read configs + c = load_config(args.config_path) + # check_config(c) + _ = os.path.dirname(os.path.realpath(__file__)) + + OUT_PATH = args.continue_path + if args.continue_path == "": + OUT_PATH = create_experiment_folder( + c.output_path, c.run_name, args.debug + ) + + AUDIO_PATH = os.path.join(OUT_PATH, "test_audios") + + c_logger = ConsoleLogger() + + if args.rank == 0: + os.makedirs(AUDIO_PATH, exist_ok=True) + new_fields = {} + if args.restore_path: + new_fields["restore_path"] = args.restore_path + new_fields["github_branch"] = get_git_branch() + copy_model_files( + c, args.config_path, OUT_PATH, new_fields + ) + os.chmod(AUDIO_PATH, 0o775) + os.chmod(OUT_PATH, 0o775) + + LOG_DIR = OUT_PATH + tb_logger = TensorboardLogger(LOG_DIR, model_name="VOCODER") + + # write model desc to tensorboard + tb_logger.tb_add_text("model-description", c["run_description"], 0) + + try: + main(args) + except KeyboardInterrupt: + remove_experiment_folder(OUT_PATH) + try: + sys.exit(0) + except SystemExit: + os._exit(0) # pylint: disable=protected-access + except Exception: # pylint: disable=broad-except + remove_experiment_folder(OUT_PATH) + traceback.print_exc() + sys.exit(1) diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py new file mode 100644 index 0000000000000000000000000000000000000000..7461282d268b6dfacdf740f8dfa1faddab240093 --- /dev/null +++ b/TTS/bin/tune_wavegrad.py @@ -0,0 +1,91 @@ +"""Search a good noise schedule for WaveGrad for a given number of inferece iterations""" +import argparse +from itertools import product as cartesian_product + +import numpy as np +import torch +from torch.utils.data import DataLoader +from tqdm import tqdm +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.vocoder.datasets.preprocess import load_wav_data +from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset +from TTS.vocoder.utils.generic_utils import setup_generator + +parser = argparse.ArgumentParser() +parser.add_argument('--model_path', type=str, help='Path to model checkpoint.') +parser.add_argument('--config_path', type=str, help='Path to model config file.') +parser.add_argument('--data_path', type=str, help='Path to data directory.') +parser.add_argument('--output_path', type=str, help='path for output file including file name and extension.') +parser.add_argument('--num_iter', type=int, help='Number of model inference iterations that you like to optimize noise schedule for.') +parser.add_argument('--use_cuda', type=bool, help='enable/disable CUDA.') +parser.add_argument('--num_samples', type=int, default=1, help='Number of datasamples used for inference.') +parser.add_argument('--search_depth', type=int, default=3, help='Search granularity. Increasing this increases the run-time exponentially.') + +# load config +args = parser.parse_args() +config = load_config(args.config_path) + +# setup audio processor +ap = AudioProcessor(**config.audio) + +# load dataset +_, train_data = load_wav_data(args.data_path, 0) +train_data = train_data[:args.num_samples] +dataset = WaveGradDataset(ap=ap, + items=train_data, + seq_len=-1, + hop_len=ap.hop_length, + pad_short=config.pad_short, + conv_pad=config.conv_pad, + is_training=True, + return_segments=False, + use_noise_augment=False, + use_cache=False, + verbose=True) +loader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + collate_fn=dataset.collate_full_clips, + drop_last=False, + num_workers=config.num_loader_workers, + pin_memory=False) + +# setup the model +model = setup_generator(config) +if args.use_cuda: + model.cuda() + +# setup optimization parameters +base_values = sorted(10 * np.random.uniform(size=args.search_depth)) +print(base_values) +exponents = 10 ** np.linspace(-6, -1, num=args.num_iter) +best_error = float('inf') +best_schedule = None +total_search_iter = len(base_values)**args.num_iter +for base in tqdm(cartesian_product(base_values, repeat=args.num_iter), total=total_search_iter): + beta = exponents * base + model.compute_noise_level(beta) + for data in loader: + mel, audio = data + y_hat = model.inference(mel.cuda() if args.use_cuda else mel) + + if args.use_cuda: + y_hat = y_hat.cpu() + y_hat = y_hat.numpy() + + mel_hat = [] + for i in range(y_hat.shape[0]): + m = ap.melspectrogram(y_hat[i, 0])[:, :-1] + mel_hat.append(torch.from_numpy(m)) + + mel_hat = torch.stack(mel_hat) + mse = torch.sum((mel - mel_hat) ** 2).mean() + if mse.item() < best_error: + best_error = mse.item() + best_schedule = {'beta': beta} + print(f" > Found a better schedule. - MSE: {mse.item()}") + np.save(args.output_path, best_schedule) + + diff --git a/TTS/server/README.md b/TTS/server/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a8d8635a3ae04e9ee4f7e8ba4cdc19458567c893 --- /dev/null +++ b/TTS/server/README.md @@ -0,0 +1,65 @@ +## TTS example web-server + +You'll need a model package (Zip file, includes TTS Python wheel, model files, server configuration, and optional nginx/uwsgi configs). Publicly available models are listed [here](https://github.com/mozilla/TTS/wiki/Released-Models#simple-packaging---self-contained-package-that-runs-an-http-api-for-a-pre-trained-tts-model). + +Instructions below are based on a Ubuntu 18.04 machine, but it should be simple to adapt the package names to other distros if needed. Python 3.6 is recommended, as some of the dependencies' versions predate Python 3.7 and will force building from source, which requires extra dependencies and is not guaranteed to work. + +#### Development server: + +##### Using server.py +If you have the environment set already for TTS, then you can directly call ```server.py```. + +**Note:** After installing TTS as a package you can use ```tts-server``` to call the commands below. + +Examples runs: + +List officially released models. +```python TTS/server/server.py --list_models ``` + +Run the server with the official models. +```python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/mulitband-melgan``` + +Run the server with the official models on a GPU. +```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/mulitband-melgan --use_cuda True``` + +Run the server with a custom models. +```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth.tar --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth.tar --vocoder_config /path/to/vocoder/config.json``` + +##### Using .whl +1. apt-get install -y espeak libsndfile1 python3-venv +2. python3 -m venv /tmp/venv +3. source /tmp/venv/bin/activate +4. pip install -U pip setuptools wheel +5. pip install -U https//example.com/url/to/python/package.whl +6. python -m TTS.server.server + +You can now open http://localhost:5002 in a browser + +#### Running with nginx/uwsgi: + +**Note:** This method uses an old TTS model, so quality might be low. + +1. apt-get install -y uwsgi uwsgi-plugin-python3 nginx espeak libsndfile1 python3-venv +2. python3 -m venv /tmp/venv +3. source /tmp/venv/bin/activate +4. pip install -U pip setuptools wheel +5. pip install -U https//example.com/url/to/python/package.whl +6. curl -LO https://github.com/reuben/TTS/releases/download/t2-ljspeech-mold/t2-ljspeech-mold-nginx-uwsgi.zip +7. unzip *-nginx-uwsgi.zip +8. cp tts_site_nginx /etc/nginx/sites-enabled/default +9. service nginx restart +10. uwsgi --ini uwsgi.ini + +You can now open http://localhost:80 in a browser (edit the port in /etc/nginx/sites-enabled/tts_site_nginx). +Configure number of workers (number of requests that will be processed in parallel) by editing the `uwsgi.ini` file, specifically the `processes` setting. + +#### Creating a server package with an embedded model + +[setup.py](../setup.py) was extended with two new parameters when running the `bdist_wheel` command: + +- `--checkpoint ` - path to model checkpoint file you want to embed in the package +- `--model_config ` - path to corresponding config.json file for the checkpoint + +To create a package, run `python setup.py bdist_wheel --checkpoint /path/to/checkpoint --model_config /path/to/config.json`. + +A Python `.whl` file will be created in the `dist/` folder with the checkpoint and config embedded in it. diff --git a/TTS/server/__init__.py b/TTS/server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/server/conf.json b/TTS/server/conf.json new file mode 100644 index 0000000000000000000000000000000000000000..32e475cf5691064969753f3be595a11c2dfe9baf --- /dev/null +++ b/TTS/server/conf.json @@ -0,0 +1,12 @@ +{ + "tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/", // tts model root folder + "tts_file":"best_model.pth.tar", // tts checkpoint file + "tts_config":"config.json", // tts config.json file + "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. + "vocoder_config":null, + "vocoder_file": null, + "is_wavernn_batched":true, + "port": 5002, + "use_cuda": true, + "debug": true +} diff --git a/TTS/server/server.py b/TTS/server/server.py new file mode 100644 index 0000000000000000000000000000000000000000..1f7357af9ee77847c7ead5e47500846d48416d1c --- /dev/null +++ b/TTS/server/server.py @@ -0,0 +1,116 @@ +#!flask/bin/python +import argparse +import os +import sys +import io +from pathlib import Path + +from flask import Flask, render_template, request, send_file +from TTS.utils.synthesizer import Synthesizer +from TTS.utils.manage import ModelManager +from TTS.utils.io import load_config + + +def create_argparser(): + def convert_boolean(x): + return x.lower() in ['true', '1', 'yes'] + + parser = argparse.ArgumentParser() + parser.add_argument('--list_models', type=convert_boolean, nargs='?', const=True, default=False, help='list available pre-trained tts and vocoder models.') + parser.add_argument('--model_name', type=str, help='name of one of the released tts models.') + parser.add_argument('--vocoder_name', type=str, help='name of one of the released vocoder models.') + parser.add_argument('--tts_checkpoint', type=str, help='path to custom tts checkpoint file') + parser.add_argument('--tts_config', type=str, help='path to custom tts config.json file') + parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model') + parser.add_argument('--vocoder_config', type=str, default=None, help='path to vocoder config file.') + parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to vocoder checkpoint file.') + parser.add_argument('--port', type=int, default=5002, help='port to listen on.') + parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.') + parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.') + parser.add_argument('--show_details', type=convert_boolean, default=False, help='Generate model detail page.') + return parser + +synthesizer = None + +embedded_models_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model') + +embedded_tts_folder = os.path.join(embedded_models_folder, 'tts') +tts_checkpoint_file = os.path.join(embedded_tts_folder, 'checkpoint.pth.tar') +tts_config_file = os.path.join(embedded_tts_folder, 'config.json') + +embedded_vocoder_folder = os.path.join(embedded_models_folder, 'vocoder') +vocoder_checkpoint_file = os.path.join(embedded_vocoder_folder, 'checkpoint.pth.tar') +vocoder_config_file = os.path.join(embedded_vocoder_folder, 'config.json') + +# These models are soon to be deprecated +embedded_wavernn_folder = os.path.join(embedded_models_folder, 'wavernn') +wavernn_checkpoint_file = os.path.join(embedded_wavernn_folder, 'checkpoint.pth.tar') +wavernn_config_file = os.path.join(embedded_wavernn_folder, 'config.json') + +args = create_argparser().parse_args() + +path = Path(__file__).parent / "../.models.json" +manager = ModelManager(path) + +if args.list_models: + manager.list_models() + sys.exit() + +# set models by the released models +if args.model_name is not None: + tts_checkpoint_file, tts_config_file = manager.download_model(args.model_name) + +if args.vocoder_name is not None: + vocoder_checkpoint_file, vocoder_config_file = manager.download_model(args.vocoder_name) + +# If these were not specified in the CLI args, use default values with embedded model files +if not args.tts_checkpoint and os.path.isfile(tts_checkpoint_file): + args.tts_checkpoint = tts_checkpoint_file +if not args.tts_config and os.path.isfile(tts_config_file): + args.tts_config = tts_config_file + +if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file): + args.vocoder_checkpoint = vocoder_checkpoint_file +if not args.vocoder_config and os.path.isfile(vocoder_config_file): + args.vocoder_config = vocoder_config_file + +synthesizer = Synthesizer(args.tts_checkpoint, args.tts_config, args.vocoder_checkpoint, args.vocoder_config, args.use_cuda) + +app = Flask(__name__) + + +@app.route('/') +def index(): + return render_template('index.html', show_details=args.show_details) + +@app.route('/details') +def details(): + model_config = load_config(args.tts_config) + if args.vocoder_config is not None and os.path.isfile(args.vocoder_config): + vocoder_config = load_config(args.vocoder_config) + else: + vocoder_config = None + + return render_template('details.html', + show_details=args.show_details + , model_config=model_config + , vocoder_config=vocoder_config + , args=args.__dict__ + ) + +@app.route('/api/tts', methods=['GET']) +def tts(): + text = request.args.get('text') + print(" > Model input: {}".format(text)) + wavs = synthesizer.tts(text) + out = io.BytesIO() + synthesizer.save_wav(wavs, out) + return send_file(out, mimetype='audio/wav') + + +def main(): + app.run(debug=args.debug, host='0.0.0.0', port=args.port) + + +if __name__ == '__main__': + main() diff --git a/TTS/server/static/TTS_circle.png b/TTS/server/static/TTS_circle.png new file mode 100644 index 0000000000000000000000000000000000000000..34755811aa0459e7d4cf3ad5de26527d555a93dc Binary files /dev/null and b/TTS/server/static/TTS_circle.png differ diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html new file mode 100644 index 0000000000000000000000000000000000000000..51c9ed85a83ac0aab045623ee1e6c430fbe51b9d --- /dev/null +++ b/TTS/server/templates/details.html @@ -0,0 +1,131 @@ + + + + + + + + + + + TTS engine + + + + + + + + + + Fork me on GitHub + + {% if show_details == true %} + +
+ Model details +
+ +
+
+ CLI arguments: + + + + + + + {% for key, value in args.items() %} + + + + + + + {% endfor %} +
CLI key Value
{{ key }}{{ value }}
+
+

+ +
+ + {% if model_config != None %} + +
+ Model config: + + + + + + + + + {% for key, value in model_config.items() %} + + + + + + + {% endfor %} + +
Key Value
{{ key }}{{ value }}
+
+ + {% endif %} + +

+ + + +
+ {% if vocoder_config != None %} +
+ Vocoder model config: + + + + + + + + + {% for key, value in vocoder_config.items() %} + + + + + + + {% endfor %} + + +
Key Value
{{ key }}{{ value }}
+
+ {% endif %} +

+ + {% else %} +
+ Please start server with --show_details=true to see details. +
+ + {% endif %} + + + + \ No newline at end of file diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..8c3c631de16276df67159cfb1c336ee4581f8bd3 --- /dev/null +++ b/TTS/server/templates/index.html @@ -0,0 +1,114 @@ + + + + + + + + + + + TTS engine + + + + + + + + + + Fork me on GitHub + + + + + +
+
+
+ + +
    +
+ +

+ {%if show_details%} +

+ {%endif%} + +

+
+
+
+ + + + + + + diff --git a/TTS/speaker_encoder/README.md b/TTS/speaker_encoder/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b6f541f884f6165a37540cc7fae4df7bf2fa2ac7 --- /dev/null +++ b/TTS/speaker_encoder/README.md @@ -0,0 +1,18 @@ +### Speaker Encoder + +This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding. + +With the code here you can generate d-vectors for both multi-speaker and single-speaker TTS datasets, then visualise and explore them along with the associated audio files in an interactive chart. + +Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook as demonstrated in [this video](https://youtu.be/KW3oO7JVa7Q). + +![](umap.png) + +Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page. + +To run the code, you need to follow the same flow as in TTS. + +- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. +- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` +- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. +- Watch training on Tensorboard as in TTS diff --git a/TTS/speaker_encoder/__init__.py b/TTS/speaker_encoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/speaker_encoder/config.json b/TTS/speaker_encoder/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fbd84cc3bc45bc0055d99d36be376223aa9b8a0 --- /dev/null +++ b/TTS/speaker_encoder/config.json @@ -0,0 +1,103 @@ + +{ + "run_name": "mueller91", + "run_description": "train speaker encoder with voxceleb1, voxceleb2 and libriSpeech ", + "audio":{ + // Audio processing parameters + "num_mels": 40, // size of the mel spec frame. + "fft_size": 400, // number of stft frequency levels. Size of the linear spectogram frame. + "sample_rate": 16000, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "win_length": 400, // stft window length in ms. + "hop_length": 160, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + "preemphasis": 0.98, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "min_level_db": -100, // normalization range + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + // Normalization parameters + "signal_norm": true, // normalize the spec values in range [0, 1] + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + "do_trim_silence": true, // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60 // threshold for timming silence. Set this according to your dataset. + }, + "reinit_layers": [], + "loss": "angleproto", // "ge2e" to use Generalized End-to-End loss and "angleproto" to use Angular Prototypical loss (new SOTA) + "grad_clip": 3.0, // upper limit for gradients for clipping. + "epochs": 1000, // total number of epochs to train. + "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_decay": false, // if true, Noam learning rate decaying is applied through training. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + "steps_plot_stats": 10, // number of steps to plot embeddings. + "num_speakers_in_batch": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "num_utters_per_speaker": 10, // + "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "wd": 0.000001, // Weight decay weight. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "save_step": 1000, // Number of training steps expected to save traning stats and checkpoints. + "print_step": 20, // Number of steps to log traning on console. + "output_path": "../../MozillaTTSOutput/checkpoints/voxceleb_librispeech/speaker_encoder/", // DATASET-RELATED: output path for all training outputs. + "model": { + "input_dim": 40, + "proj_dim": 256, + "lstm_dim": 768, + "num_lstm_layers": 3, + "use_lstm_with_projection": true + }, + "storage": { + "sample_from_storage_p": 0.66, // the probability with which we'll sample from the DataSet in-memory storage + "storage_size": 15, // the size of the in-memory storage with respect to a single batch + "additive_noise": 1e-5 // add very small gaussian noise to the data in order to increase robustness + }, + "datasets": + [ + { + "name": "vctk_slim", + "path": "../../../audio-datasets/en/VCTK-Corpus/", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "libri_tts", + "path": "../../../audio-datasets/en/LibriTTS/train-clean-100", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "libri_tts", + "path": "../../../audio-datasets/en/LibriTTS/train-clean-360", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "libri_tts", + "path": "../../../audio-datasets/en/LibriTTS/train-other-500", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "voxceleb1", + "path": "../../../audio-datasets/en/voxceleb1/", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "voxceleb2", + "path": "../../../audio-datasets/en/voxceleb2/", + "meta_file_train": null, + "meta_file_val": null + }, + { + "name": "common_voice", + "path": "../../../audio-datasets/en/MozillaCommonVoice", + "meta_file_train": "train.tsv", + "meta_file_val": "test.tsv" + } + ] +} \ No newline at end of file diff --git a/TTS/speaker_encoder/dataset.py b/TTS/speaker_encoder/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..33cc4f363ac0fffefd332a510a9749af0985c758 --- /dev/null +++ b/TTS/speaker_encoder/dataset.py @@ -0,0 +1,169 @@ +import numpy +import numpy as np +import queue +import torch +import random +from torch.utils.data import Dataset +from tqdm import tqdm + + +class MyDataset(Dataset): + def __init__(self, ap, meta_data, voice_len=1.6, num_speakers_in_batch=64, + storage_size=1, sample_from_storage_p=0.5, additive_noise=0, + num_utter_per_speaker=10, skip_speakers=False, verbose=False): + """ + Args: + ap (TTS.tts.utils.AudioProcessor): audio processor object. + meta_data (list): list of dataset instances. + seq_len (int): voice segment length in seconds. + verbose (bool): print diagnostic information. + """ + self.items = meta_data + self.sample_rate = ap.sample_rate + self.voice_len = voice_len + self.seq_len = int(voice_len * self.sample_rate) + self.num_speakers_in_batch = num_speakers_in_batch + self.num_utter_per_speaker = num_utter_per_speaker + self.skip_speakers = skip_speakers + self.ap = ap + self.verbose = verbose + self.__parse_items() + self.storage = queue.Queue(maxsize=storage_size*num_speakers_in_batch) + self.sample_from_storage_p = float(sample_from_storage_p) + self.additive_noise = float(additive_noise) + if self.verbose: + print("\n > DataLoader initialization") + print(f" | > Speakers per Batch: {num_speakers_in_batch}") + print(f" | > Storage Size: {self.storage.maxsize} speakers, each with {num_utter_per_speaker} utters") + print(f" | > Sample_from_storage_p : {self.sample_from_storage_p}") + print(f" | > Noise added : {self.additive_noise}") + print(f" | > Number of instances : {len(self.items)}") + print(f" | > Sequence length: {self.seq_len}") + print(f" | > Num speakers: {len(self.speakers)}") + + def load_wav(self, filename): + audio = self.ap.load_wav(filename, sr=self.ap.sample_rate) + return audio + + def load_data(self, idx): + text, wav_file, speaker_name = self.items[idx] + wav = np.asarray(self.load_wav(wav_file), dtype=np.float32) + mel = self.ap.melspectrogram(wav).astype("float32") + # sample seq_len + + assert text.size > 0, self.items[idx][1] + assert wav.size > 0, self.items[idx][1] + + sample = { + "mel": mel, + "item_idx": self.items[idx][1], + "speaker_name": speaker_name, + } + return sample + + def __parse_items(self): + self.speaker_to_utters = {} + for i in self.items: + path_ = i[1] + speaker_ = i[2] + if speaker_ in self.speaker_to_utters.keys(): + self.speaker_to_utters[speaker_].append(path_) + else: + self.speaker_to_utters[speaker_] = [path_, ] + + if self.skip_speakers: + self.speaker_to_utters = {k: v for (k, v) in self.speaker_to_utters.items() if + len(v) >= self.num_utter_per_speaker} + + self.speakers = [k for (k, v) in self.speaker_to_utters.items()] + + # def __parse_items(self): + # """ + # Find unique speaker ids and create a dict mapping utterances from speaker id + # """ + # speakers = list({item[-1] for item in self.items}) + # self.speaker_to_utters = {} + # self.speakers = [] + # for speaker in speakers: + # speaker_utters = [item[1] for item in self.items if item[2] == speaker] + # if len(speaker_utters) < self.num_utter_per_speaker and self.skip_speakers: + # print( + # f" [!] Skipped speaker {speaker}. Not enough utterances {self.num_utter_per_speaker} vs {len(speaker_utters)}." + # ) + # else: + # self.speakers.append(speaker) + # self.speaker_to_utters[speaker] = speaker_utters + + def __len__(self): + return int(1e10) + + def __sample_speaker(self): + speaker = random.sample(self.speakers, 1)[0] + if self.num_utter_per_speaker > len(self.speaker_to_utters[speaker]): + utters = random.choices( + self.speaker_to_utters[speaker], k=self.num_utter_per_speaker + ) + else: + utters = random.sample( + self.speaker_to_utters[speaker], self.num_utter_per_speaker + ) + return speaker, utters + + def __sample_speaker_utterances(self, speaker): + """ + Sample all M utterances for the given speaker. + """ + wavs = [] + labels = [] + for _ in range(self.num_utter_per_speaker): + # TODO:dummy but works + while True: + if len(self.speaker_to_utters[speaker]) > 0: + utter = random.sample(self.speaker_to_utters[speaker], 1)[0] + else: + self.speakers.remove(speaker) + speaker, _ = self.__sample_speaker() + continue + wav = self.load_wav(utter) + if wav.shape[0] - self.seq_len > 0: + break + self.speaker_to_utters[speaker].remove(utter) + + wavs.append(wav) + labels.append(speaker) + return wavs, labels + + def __getitem__(self, idx): + speaker, _ = self.__sample_speaker() + return speaker + + def collate_fn(self, batch): + labels = [] + feats = [] + for speaker in batch: + if random.random() < self.sample_from_storage_p and self.storage.full(): + # sample from storage (if full), ignoring the speaker + wavs_, labels_ = random.choice(self.storage.queue) + else: + # don't sample from storage, but from HDD + wavs_, labels_ = self.__sample_speaker_utterances(speaker) + # if storage is full, remove an item + if self.storage.full(): + _ = self.storage.get_nowait() + # put the newly loaded item into storage + self.storage.put_nowait((wavs_, labels_)) + + # add random gaussian noise + if self.additive_noise > 0: + noises_ = [numpy.random.normal(0, self.additive_noise, size=len(w)) for w in wavs_] + wavs_ = [wavs_[i] + noises_[i] for i in range(len(wavs_))] + + # get a random subset of each of the wavs and convert to MFCC. + offsets_ = [random.randint(0, wav.shape[0] - self.seq_len) for wav in wavs_] + mels_ = [self.ap.melspectrogram(wavs_[i][offsets_[i]: offsets_[i] + self.seq_len]) for i in range(len(wavs_))] + feats_ = [torch.FloatTensor(mel) for mel in mels_] + + labels.append(labels_) + feats.extend(feats_) + feats = torch.stack(feats) + return feats.transpose(1, 2), labels diff --git a/TTS/speaker_encoder/losses.py b/TTS/speaker_encoder/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..35ff73fa746403b7c744d9aea002fa679530ba1c --- /dev/null +++ b/TTS/speaker_encoder/losses.py @@ -0,0 +1,160 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# adapted from https://github.com/cvqluu/GE2E-Loss +class GE2ELoss(nn.Module): + def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"): + """ + Implementation of the Generalized End-to-End loss defined in https://arxiv.org/abs/1710.10467 [1] + Accepts an input of size (N, M, D) + where N is the number of speakers in the batch, + M is the number of utterances per speaker, + and D is the dimensionality of the embedding vector (e.g. d-vector) + Args: + - init_w (float): defines the initial value of w in Equation (5) of [1] + - init_b (float): definies the initial value of b in Equation (5) of [1] + """ + super(GE2ELoss, self).__init__() + # pylint: disable=E1102 + self.w = nn.Parameter(torch.tensor(init_w)) + # pylint: disable=E1102 + self.b = nn.Parameter(torch.tensor(init_b)) + self.loss_method = loss_method + + print(' > Initialised Generalized End-to-End loss') + + assert self.loss_method in ["softmax", "contrast"] + + if self.loss_method == "softmax": + self.embed_loss = self.embed_loss_softmax + if self.loss_method == "contrast": + self.embed_loss = self.embed_loss_contrast + + # pylint: disable=R0201 + def calc_new_centroids(self, dvecs, centroids, spkr, utt): + """ + Calculates the new centroids excluding the reference utterance + """ + excl = torch.cat((dvecs[spkr, :utt], dvecs[spkr, utt + 1 :])) + excl = torch.mean(excl, 0) + new_centroids = [] + for i, centroid in enumerate(centroids): + if i == spkr: + new_centroids.append(excl) + else: + new_centroids.append(centroid) + return torch.stack(new_centroids) + + def calc_cosine_sim(self, dvecs, centroids): + """ + Make the cosine similarity matrix with dims (N,M,N) + """ + cos_sim_matrix = [] + for spkr_idx, speaker in enumerate(dvecs): + cs_row = [] + for utt_idx, utterance in enumerate(speaker): + new_centroids = self.calc_new_centroids( + dvecs, centroids, spkr_idx, utt_idx + ) + # vector based cosine similarity for speed + cs_row.append( + torch.clamp( + torch.mm( + utterance.unsqueeze(1).transpose(0, 1), + new_centroids.transpose(0, 1), + ) + / (torch.norm(utterance) * torch.norm(new_centroids, dim=1)), + 1e-6, + ) + ) + cs_row = torch.cat(cs_row, dim=0) + cos_sim_matrix.append(cs_row) + return torch.stack(cos_sim_matrix) + + # pylint: disable=R0201 + def embed_loss_softmax(self, dvecs, cos_sim_matrix): + """ + Calculates the loss on each embedding $L(e_{ji})$ by taking softmax + """ + N, M, _ = dvecs.shape + L = [] + for j in range(N): + L_row = [] + for i in range(M): + L_row.append(-F.log_softmax(cos_sim_matrix[j, i], 0)[j]) + L_row = torch.stack(L_row) + L.append(L_row) + return torch.stack(L) + + # pylint: disable=R0201 + def embed_loss_contrast(self, dvecs, cos_sim_matrix): + """ + Calculates the loss on each embedding $L(e_{ji})$ by contrast loss with closest centroid + """ + N, M, _ = dvecs.shape + L = [] + for j in range(N): + L_row = [] + for i in range(M): + centroids_sigmoids = torch.sigmoid(cos_sim_matrix[j, i]) + excl_centroids_sigmoids = torch.cat( + (centroids_sigmoids[:j], centroids_sigmoids[j + 1 :]) + ) + L_row.append( + 1.0 + - torch.sigmoid(cos_sim_matrix[j, i, j]) + + torch.max(excl_centroids_sigmoids) + ) + L_row = torch.stack(L_row) + L.append(L_row) + return torch.stack(L) + + def forward(self, dvecs): + """ + Calculates the GE2E loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats) + """ + centroids = torch.mean(dvecs, 1) + cos_sim_matrix = self.calc_cosine_sim(dvecs, centroids) + torch.clamp(self.w, 1e-6) + cos_sim_matrix = self.w * cos_sim_matrix + self.b + L = self.embed_loss(dvecs, cos_sim_matrix) + return L.mean() + +# adapted from https://github.com/clovaai/voxceleb_trainer/blob/master/loss/angleproto.py +class AngleProtoLoss(nn.Module): + """ + Implementation of the Angular Prototypical loss defined in https://arxiv.org/abs/2003.11982 + Accepts an input of size (N, M, D) + where N is the number of speakers in the batch, + M is the number of utterances per speaker, + and D is the dimensionality of the embedding vector + Args: + - init_w (float): defines the initial value of w + - init_b (float): definies the initial value of b + """ + def __init__(self, init_w=10.0, init_b=-5.0): + super(AngleProtoLoss, self).__init__() + # pylint: disable=E1102 + self.w = nn.Parameter(torch.tensor(init_w)) + # pylint: disable=E1102 + self.b = nn.Parameter(torch.tensor(init_b)) + self.criterion = torch.nn.CrossEntropyLoss() + + print(' > Initialised Angular Prototypical loss') + + def forward(self, x): + """ + Calculates the AngleProto loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats) + """ + out_anchor = torch.mean(x[:, 1:, :], 1) + out_positive = x[:, 0, :] + num_speakers = out_anchor.size()[0] + + cos_sim_matrix = F.cosine_similarity(out_positive.unsqueeze(-1).expand(-1, -1, num_speakers), out_anchor.unsqueeze(-1).expand(-1, -1, num_speakers).transpose(0, 2)) + torch.clamp(self.w, 1e-6) + cos_sim_matrix = cos_sim_matrix * self.w + self.b + label = torch.from_numpy(np.asarray(range(0, num_speakers))).to(cos_sim_matrix.device) + L = self.criterion(cos_sim_matrix, label) + return L diff --git a/TTS/speaker_encoder/model.py b/TTS/speaker_encoder/model.py new file mode 100644 index 0000000000000000000000000000000000000000..322ee42f780d6ba29ba7853ec533655fc67e9fb3 --- /dev/null +++ b/TTS/speaker_encoder/model.py @@ -0,0 +1,112 @@ +import torch +from torch import nn + + +class LSTMWithProjection(nn.Module): + def __init__(self, input_size, hidden_size, proj_size): + super().__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.proj_size = proj_size + self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True) + self.linear = nn.Linear(hidden_size, proj_size, bias=False) + + def forward(self, x): + self.lstm.flatten_parameters() + o, (_, _) = self.lstm(x) + return self.linear(o) + +class LSTMWithoutProjection(nn.Module): + def __init__(self, input_dim, lstm_dim, proj_dim, num_lstm_layers): + super().__init__() + self.lstm = nn.LSTM(input_size=input_dim, + hidden_size=lstm_dim, + num_layers=num_lstm_layers, + batch_first=True) + self.linear = nn.Linear(lstm_dim, proj_dim, bias=True) + self.relu = nn.ReLU() + def forward(self, x): + _, (hidden, _) = self.lstm(x) + return self.relu(self.linear(hidden[-1])) + +class SpeakerEncoder(nn.Module): + def __init__(self, input_dim, proj_dim=256, lstm_dim=768, num_lstm_layers=3, use_lstm_with_projection=True): + super().__init__() + self.use_lstm_with_projection = use_lstm_with_projection + layers = [] + # choise LSTM layer + if use_lstm_with_projection: + layers.append(LSTMWithProjection(input_dim, lstm_dim, proj_dim)) + for _ in range(num_lstm_layers - 1): + layers.append(LSTMWithProjection(proj_dim, lstm_dim, proj_dim)) + self.layers = nn.Sequential(*layers) + else: + self.layers = LSTMWithoutProjection(input_dim, lstm_dim, proj_dim, num_lstm_layers) + + self._init_layers() + + def _init_layers(self): + for name, param in self.layers.named_parameters(): + if "bias" in name: + nn.init.constant_(param, 0.0) + elif "weight" in name: + nn.init.xavier_normal_(param) + + def forward(self, x): + # TODO: implement state passing for lstms + d = self.layers(x) + if self.use_lstm_with_projection: + d = torch.nn.functional.normalize(d[:, -1], p=2, dim=1) + else: + d = torch.nn.functional.normalize(d, p=2, dim=1) + return d + + @torch.no_grad() + def inference(self, x): + d = self.layers.forward(x) + if self.use_lstm_with_projection: + d = torch.nn.functional.normalize(d[:, -1], p=2, dim=1) + else: + d = torch.nn.functional.normalize(d, p=2, dim=1) + return d + + def compute_embedding(self, x, num_frames=160, overlap=0.5): + """ + Generate embeddings for a batch of utterances + x: 1xTxD + """ + num_overlap = int(num_frames * overlap) + max_len = x.shape[1] + embed = None + cur_iter = 0 + for offset in range(0, max_len, num_frames - num_overlap): + cur_iter += 1 + end_offset = min(x.shape[1], offset + num_frames) + frames = x[:, offset:end_offset] + if embed is None: + embed = self.inference(frames) + else: + embed += self.inference(frames) + return embed / cur_iter + + def batch_compute_embedding(self, x, seq_lens, num_frames=160, overlap=0.5): + """ + Generate embeddings for a batch of utterances + x: BxTxD + """ + num_overlap = num_frames * overlap + max_len = x.shape[1] + embed = None + num_iters = seq_lens / (num_frames - num_overlap) + cur_iter = 0 + for offset in range(0, max_len, num_frames - num_overlap): + cur_iter += 1 + end_offset = min(x.shape[1], offset + num_frames) + frames = x[:, offset:end_offset] + if embed is None: + embed = self.inference(frames) + else: + embed[cur_iter <= num_iters, :] += self.inference( + frames[cur_iter <= num_iters, :, :] + ) + return embed / num_iters diff --git a/TTS/speaker_encoder/requirements.txt b/TTS/speaker_encoder/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a486cc45ddb44591bd03c9c0df294fbe98c13884 --- /dev/null +++ b/TTS/speaker_encoder/requirements.txt @@ -0,0 +1,2 @@ +umap-learn +numpy>=1.17.0 diff --git a/TTS/speaker_encoder/umap.png b/TTS/speaker_encoder/umap.png new file mode 100644 index 0000000000000000000000000000000000000000..ca8aefeac8cbe616983b35e968c9c9133eb41ede Binary files /dev/null and b/TTS/speaker_encoder/umap.png differ diff --git a/TTS/speaker_encoder/utils/__init__.py b/TTS/speaker_encoder/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/speaker_encoder/utils/generic_utils.py b/TTS/speaker_encoder/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..021c7f45a8f5ade0f132739d4729c3443d04f2b9 --- /dev/null +++ b/TTS/speaker_encoder/utils/generic_utils.py @@ -0,0 +1,117 @@ +import datetime +import os +import re + +import torch +from TTS.speaker_encoder.model import SpeakerEncoder +from TTS.utils.generic_utils import check_argument + + +def to_camel(text): + text = text.capitalize() + return re.sub(r'(?!^)_([a-zA-Z])', lambda m: m.group(1).upper(), text) + + +def setup_model(c): + model = SpeakerEncoder(c.model['input_dim'], c.model['proj_dim'], + c.model['lstm_dim'], c.model['num_lstm_layers']) + return model + + +def save_checkpoint(model, optimizer, model_loss, out_path, + current_step, epoch): + checkpoint_path = 'checkpoint_{}.pth.tar'.format(current_step) + checkpoint_path = os.path.join(out_path, checkpoint_path) + print(" | | > Checkpoint saving : {}".format(checkpoint_path)) + + new_state_dict = model.state_dict() + state = { + 'model': new_state_dict, + 'optimizer': optimizer.state_dict() if optimizer is not None else None, + 'step': current_step, + 'epoch': epoch, + 'loss': model_loss, + 'date': datetime.date.today().strftime("%B %d, %Y"), + } + torch.save(state, checkpoint_path) + + +def save_best_model(model, optimizer, model_loss, best_loss, out_path, + current_step): + if model_loss < best_loss: + new_state_dict = model.state_dict() + state = { + 'model': new_state_dict, + 'optimizer': optimizer.state_dict(), + 'step': current_step, + 'loss': model_loss, + 'date': datetime.date.today().strftime("%B %d, %Y"), + } + best_loss = model_loss + bestmodel_path = 'best_model.pth.tar' + bestmodel_path = os.path.join(out_path, bestmodel_path) + print("\n > BEST MODEL ({0:.5f}) : {1:}".format( + model_loss, bestmodel_path)) + torch.save(state, bestmodel_path) + return best_loss + + +def check_config_speaker_encoder(c): + """Check the config.json file of the speaker encoder""" + check_argument('run_name', c, restricted=True, val_type=str) + check_argument('run_description', c, val_type=str) + + # audio processing parameters + check_argument('audio', c, restricted=True, val_type=dict) + check_argument('num_mels', c['audio'], restricted=True, val_type=int, min_val=10, max_val=2056) + check_argument('fft_size', c['audio'], restricted=True, val_type=int, min_val=128, max_val=4058) + check_argument('sample_rate', c['audio'], restricted=True, val_type=int, min_val=512, max_val=100000) + check_argument('frame_length_ms', c['audio'], restricted=True, val_type=float, min_val=10, max_val=1000, alternative='win_length') + check_argument('frame_shift_ms', c['audio'], restricted=True, val_type=float, min_val=1, max_val=1000, alternative='hop_length') + check_argument('preemphasis', c['audio'], restricted=True, val_type=float, min_val=0, max_val=1) + check_argument('min_level_db', c['audio'], restricted=True, val_type=int, min_val=-1000, max_val=10) + check_argument('ref_level_db', c['audio'], restricted=True, val_type=int, min_val=0, max_val=1000) + check_argument('power', c['audio'], restricted=True, val_type=float, min_val=1, max_val=5) + check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000) + + # training parameters + check_argument('loss', c, enum_list=['ge2e', 'angleproto'], restricted=True, val_type=str) + check_argument('grad_clip', c, restricted=True, val_type=float) + check_argument('epochs', c, restricted=True, val_type=int, min_val=1) + check_argument('lr', c, restricted=True, val_type=float, min_val=0) + check_argument('lr_decay', c, restricted=True, val_type=bool) + check_argument('warmup_steps', c, restricted=True, val_type=int, min_val=0) + check_argument('tb_model_param_stats', c, restricted=True, val_type=bool) + check_argument('num_speakers_in_batch', c, restricted=True, val_type=int) + check_argument('num_loader_workers', c, restricted=True, val_type=int) + check_argument('wd', c, restricted=True, val_type=float, min_val=0.0, max_val=1.0) + + # checkpoint and output parameters + check_argument('steps_plot_stats', c, restricted=True, val_type=int) + check_argument('checkpoint', c, restricted=True, val_type=bool) + check_argument('save_step', c, restricted=True, val_type=int) + check_argument('print_step', c, restricted=True, val_type=int) + check_argument('output_path', c, restricted=True, val_type=str) + + # model parameters + check_argument('model', c, restricted=True, val_type=dict) + check_argument('input_dim', c['model'], restricted=True, val_type=int) + check_argument('proj_dim', c['model'], restricted=True, val_type=int) + check_argument('lstm_dim', c['model'], restricted=True, val_type=int) + check_argument('num_lstm_layers', c['model'], restricted=True, val_type=int) + check_argument('use_lstm_with_projection', c['model'], restricted=True, val_type=bool) + + # in-memory storage parameters + check_argument('storage', c, restricted=True, val_type=dict) + check_argument('sample_from_storage_p', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0) + check_argument('storage_size', c['storage'], restricted=True, val_type=int, min_val=1, max_val=100) + check_argument('additive_noise', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0) + + # datasets - checking only the first entry + check_argument('datasets', c, restricted=True, val_type=list) + for dataset_entry in c['datasets']: + check_argument('name', dataset_entry, restricted=True, val_type=str) + check_argument('path', dataset_entry, restricted=True, val_type=str) + check_argument('meta_file_train', dataset_entry, restricted=True, val_type=[str, list]) + check_argument('meta_file_val', dataset_entry, restricted=True, val_type=str) + diff --git a/TTS/speaker_encoder/utils/io.py b/TTS/speaker_encoder/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/speaker_encoder/utils/prepare_voxceleb.py b/TTS/speaker_encoder/utils/prepare_voxceleb.py new file mode 100644 index 0000000000000000000000000000000000000000..758e1cb3b3197989ef453bba43a1ad145db8f119 --- /dev/null +++ b/TTS/speaker_encoder/utils/prepare_voxceleb.py @@ -0,0 +1,233 @@ +# coding=utf-8 +# Copyright (C) 2020 ATHENA AUTHORS; Yiping Peng; Ne Luo +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Only support eager mode and TF>=2.0.0 +# pylint: disable=no-member, invalid-name, relative-beyond-top-level +# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes +''' voxceleb 1 & 2 ''' + +import os +import sys +import zipfile +import subprocess +import hashlib +import pandas +from absl import logging +import tensorflow as tf +import soundfile as sf + +gfile = tf.compat.v1.gfile + +SUBSETS = { + "vox1_dev_wav": + ["http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partac", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partad"], + "vox1_test_wav": + ["http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip"], + "vox2_dev_aac": + ["http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaa", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partab", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partac", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partad", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partae", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaf", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partag", + "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partah"], + "vox2_test_aac": + ["http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_test_aac.zip"] +} + +MD5SUM = { + "vox1_dev_wav": "ae63e55b951748cc486645f532ba230b", + "vox2_dev_aac": "bbc063c46078a602ca71605645c2a402", + "vox1_test_wav": "185fdc63c3c739954633d50379a3d102", + "vox2_test_aac": "0d2b3ea430a821c33263b5ea37ede312" +} + +USER = { + "user": "", + "password": "" +} + +speaker_id_dict = {} + +def download_and_extract(directory, subset, urls): + """Download and extract the given split of dataset. + + Args: + directory: the directory where to put the downloaded data. + subset: subset name of the corpus. + urls: the list of urls to download the data file. + """ + if not gfile.Exists(directory): + gfile.MakeDirs(directory) + + try: + for url in urls: + zip_filepath = os.path.join(directory, url.split("/")[-1]) + if os.path.exists(zip_filepath): + continue + logging.info("Downloading %s to %s" % (url, zip_filepath)) + subprocess.call('wget %s --user %s --password %s -O %s' % + (url, USER["user"], USER["password"], zip_filepath), shell=True) + + statinfo = os.stat(zip_filepath) + logging.info( + "Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size) + ) + + # concatenate all parts into zip files + if ".zip" not in zip_filepath: + zip_filepath = "_".join(zip_filepath.split("_")[:-1]) + subprocess.call('cat %s* > %s.zip' % + (zip_filepath, zip_filepath), shell=True) + zip_filepath += ".zip" + extract_path = zip_filepath.strip(".zip") + + # check zip file md5sum + md5 = hashlib.md5(open(zip_filepath, 'rb').read()).hexdigest() + if md5 != MD5SUM[subset]: + raise ValueError("md5sum of %s mismatch" % zip_filepath) + + with zipfile.ZipFile(zip_filepath, "r") as zfile: + zfile.extractall(directory) + extract_path_ori = os.path.join(directory, zfile.infolist()[0].filename) + subprocess.call('mv %s %s' % (extract_path_ori, extract_path), shell=True) + finally: + # gfile.Remove(zip_filepath) + pass + + +def exec_cmd(cmd): + """Run a command in a subprocess. + Args: + cmd: command line to be executed. + Return: + int, the return code. + """ + try: + retcode = subprocess.call(cmd, shell=True) + if retcode < 0: + logging.info(f"Child was terminated by signal {retcode}") + except OSError as e: + logging.info(f"Execution failed: {e}") + retcode = -999 + return retcode + + +def decode_aac_with_ffmpeg(aac_file, wav_file): + """Decode a given AAC file into WAV using ffmpeg. + Args: + aac_file: file path to input AAC file. + wav_file: file path to output WAV file. + Return: + bool, True if success. + """ + cmd = f"ffmpeg -i {aac_file} {wav_file}" + logging.info(f"Decoding aac file using command line: {cmd}") + ret = exec_cmd(cmd) + if ret != 0: + logging.error(f"Failed to decode aac file with retcode {ret}") + logging.error("Please check your ffmpeg installation.") + return False + return True + + +def convert_audio_and_make_label(input_dir, subset, + output_dir, output_file): + """Optionally convert AAC to WAV and make speaker labels. + Args: + input_dir: the directory which holds the input dataset. + subset: the name of the specified subset. e.g. vox1_dev_wav + output_dir: the directory to place the newly generated csv files. + output_file: the name of the newly generated csv file. e.g. vox1_dev_wav.csv + """ + + logging.info("Preprocessing audio and label for subset %s" % subset) + source_dir = os.path.join(input_dir, subset) + + files = [] + # Convert all AAC file into WAV format. At the same time, generate the csv + for root, _, filenames in gfile.Walk(source_dir): + for filename in filenames: + name, ext = os.path.splitext(filename) + if ext.lower() == ".wav": + _, ext2 = (os.path.splitext(name)) + if ext2: + continue + wav_file = os.path.join(root, filename) + elif ext.lower() == ".m4a": + # Convert AAC to WAV. + aac_file = os.path.join(root, filename) + wav_file = aac_file + ".wav" + if not gfile.Exists(wav_file): + if not decode_aac_with_ffmpeg(aac_file, wav_file): + raise RuntimeError("Audio decoding failed.") + else: + continue + speaker_name = root.split(os.path.sep)[-2] + if speaker_name not in speaker_id_dict: + num = len(speaker_id_dict) + speaker_id_dict[speaker_name] = num + # wav_filesize = os.path.getsize(wav_file) + wav_length = len(sf.read(wav_file)[0]) + files.append( + (os.path.abspath(wav_file), wav_length, speaker_id_dict[speaker_name], speaker_name) + ) + + # Write to CSV file which contains four columns: + # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name". + csv_file_path = os.path.join(output_dir, output_file) + df = pandas.DataFrame( + data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"]) + df.to_csv(csv_file_path, index=False, sep="\t") + logging.info("Successfully generated csv file {}".format(csv_file_path)) + + +def processor(directory, subset, force_process): + """ download and process """ + urls = SUBSETS + if subset not in urls: + raise ValueError(subset, "is not in voxceleb") + + subset_csv = os.path.join(directory, subset + '.csv') + if not force_process and os.path.exists(subset_csv): + return subset_csv + + logging.info("Downloading and process the voxceleb in %s", directory) + logging.info("Preparing subset %s", subset) + download_and_extract(directory, subset, urls[subset]) + convert_audio_and_make_label( + directory, + subset, + directory, + subset + ".csv" + ) + logging.info("Finished downloading and processing") + return subset_csv + + +if __name__ == "__main__": + logging.set_verbosity(logging.INFO) + if len(sys.argv) != 4: + print("Usage: python prepare_data.py save_directory user password") + sys.exit() + + DIR, USER["user"], USER["password"] = sys.argv[1], sys.argv[2], sys.argv[3] + for SUBSET in SUBSETS: + processor(DIR, SUBSET, False) diff --git a/TTS/speaker_encoder/utils/visual.py b/TTS/speaker_encoder/utils/visual.py new file mode 100644 index 0000000000000000000000000000000000000000..68c48f1234bb64930c7f6629235756768edce95e --- /dev/null +++ b/TTS/speaker_encoder/utils/visual.py @@ -0,0 +1,46 @@ +import umap +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +matplotlib.use("Agg") + + +colormap = ( + np.array( + [ + [76, 255, 0], + [0, 127, 70], + [255, 0, 0], + [255, 217, 38], + [0, 135, 255], + [165, 0, 165], + [255, 167, 255], + [0, 255, 255], + [255, 96, 38], + [142, 76, 0], + [33, 0, 127], + [0, 0, 0], + [183, 183, 183], + ], + dtype=np.float, + ) + / 255 +) + + +def plot_embeddings(embeddings, num_utter_per_speaker): + embeddings = embeddings[: 10 * num_utter_per_speaker] + model = umap.UMAP() + projection = model.fit_transform(embeddings) + num_speakers = embeddings.shape[0] // num_utter_per_speaker + ground_truth = np.repeat(np.arange(num_speakers), num_utter_per_speaker) + colors = [colormap[i] for i in ground_truth] + + fig, ax = plt.subplots(figsize=(16, 10)) + _ = ax.scatter(projection[:, 0], projection[:, 1], c=colors) + plt.gca().set_aspect("equal", "datalim") + plt.title("UMAP projection") + plt.tight_layout() + plt.savefig("umap") + return fig diff --git a/TTS/tts/__init__.py b/TTS/tts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/configs/config.json b/TTS/tts/configs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..48f20e8f25fa9c80d2e771743af86d9bae004ba9 --- /dev/null +++ b/TTS/tts/configs/config.json @@ -0,0 +1,172 @@ +{ + "model": "Tacotron2", + "run_name": "ljspeech-ddc", + "run_description": "tacotron2 with DDC and differential spectral loss.", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (true), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1, + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":16, + "r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "gradual_training": [[0, 7, 64], [1, 5, 64], [50000, 3, 32], [130000, 2, 32], [290000, 1, 32]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. + "mixed_precision": true, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate. + + // LOSS SETTINGS + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "decoder_loss_alpha": 0.5, // original decoder loss weight. If > 0, it is enabled + "postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled + "postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_ssim_alpha": 0.5, // decoder ssim loss weight. If > 0, it is enabled + "postnet_ssim_alpha": 0.25, // postnet ssim loss weight. If > 0, it is enabled + "ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples. + + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": false, // use noam warmup and lr schedule. + "grad_clip": 1.0, // upper limit for gradients for clipping. + "epochs": 1000, // total number of epochs to train. + "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + // TACOTRON PRENET + "memory_size": -1, // ONLY TACOTRON - size of the memory queue used fro storing last decoder predictions for auto-regression. If < 0, memory queue is disabled and decoder only uses the last prediction frame. + "prenet_type": "original", // "original" or "bn". + "prenet_dropout": false, // enable/disable dropout at prenet. + + // TACOTRON ATTENTION + "attention_type": "original", // 'original' , 'graves', 'dynamic_convolution' + "attention_heads": 4, // number of attention heads (only for 'graves') + "attention_norm": "sigmoid", // softmax or sigmoid. + "windowing": false, // Enables attention windowing. Used only in eval mode. + "use_forward_attn": false, // if it uses forward attention. In general, it aligns faster. + "forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode. + "transition_agent": false, // enable/disable transition agent of forward attention. + "location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default. + "bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset. + "double_decoder_consistency": true, // use DDC explained here https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency-draft/ + "ddc_r": 7, // reduction rate for coarse decoder. + + // STOPNET + "stopnet": true, // Train stopnet predicting the end of synthesis. + "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 10000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 4, //Number of batches to shuffle after bucketing. + "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 153, // DATASET-RELATED: maximum text length + "compute_input_seq_cache": false, // if true, text sequences are computed before starting training. If phonemes are enabled, they are also computed at this stage. + "use_noise_augment": true, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/", + + // PHONEMES + "phoneme_cache_path": "/home/erogol/Models/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_gst": false, // use global style tokens + "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "external_speaker_embedding_file": "../../speakers-vctk-en.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "gst": { // gst parameter if gst is enabled + "gst_style_input": null, // Condition the style input either on a + // -> wave file [path to wave] or + // -> dictionary using the style tokens {'token1': 'value', 'token2': 'value'} example {"0": 0.15, "1": 0.15, "5": -0.15} + // with the dictionary being len(dict) <= len(gst_style_tokens). + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10, + "gst_use_speaker_embedding": false + }, + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "/home/erogol/Data/LJSpeech-1.1/", + "meta_file_train": "metadata.csv", // for vtck if list, ignore speakers id in list for train, its useful for test cloning with new speakers + "meta_file_val": null + } + ] +} + diff --git a/TTS/tts/configs/glow_tts_gated_conv.json b/TTS/tts/configs/glow_tts_gated_conv.json new file mode 100644 index 0000000000000000000000000000000000000000..d34fbaf0d1b03f7be3d9a46d030e05b8707d2a6e --- /dev/null +++ b/TTS/tts/configs/glow_tts_gated_conv.json @@ -0,0 +1,136 @@ +{ + "model": "glow_tts", + "run_name": "glow-tts-gatedconv", + "run_description": "glow-tts model training with gated conv.", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Griffin-Lim + "power": 1.1, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 1.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + "add_blank": false, // if true add a new token after each token of the sentence. This increases the size of the input sequence, but has considerably improved the prosody of the GlowTTS model. + + // DISTRIBUTED TRAINING + "apex_amp_level": null, // APEX amp optimization level. "O1" is currently supported. + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54323" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // MODEL PARAMETERS + "use_mas": false, // use Monotonic Alignment Search if true. Otherwise use pre-computed attention alignments. + + // TRAINING + "batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":16, + "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": true, // use noam warmup and lr schedule. + "grad_clip": 5.0, // upper limit for gradients for clipping. + "epochs": 10000, // total number of epochs to train. + "lr": 1e-3, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + "encoder_type": "gatedconv", + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 5000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + "apex_amp_level": null, + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 500, // DATASET-RELATED: maximum text length + "compute_f0": false, // compute f0 values in data-loader + "compute_input_seq_cache": false, // if true, text sequences are computed before starting training. If phonemes are enabled, they are also computed at this stage. + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/", + + // PHONEMES + "phoneme_cache_path": "/home/erogol/Models/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference. + "use_gst": false, // TACOTRON ONLY: use global style tokens + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "/home/erogol/Data/LJSpeech-1.1/", + "meta_file_train": "metadata.csv", + "meta_file_val": null + // "path_for_attn": "/home/erogol/Data/LJSpeech-1.1/alignments/" + } + ] +} + + diff --git a/TTS/tts/configs/glow_tts_ljspeech.json b/TTS/tts/configs/glow_tts_ljspeech.json new file mode 100644 index 0000000000000000000000000000000000000000..636d931382d1161e61c287c5123ef737a6f77358 --- /dev/null +++ b/TTS/tts/configs/glow_tts_ljspeech.json @@ -0,0 +1,149 @@ +{ + "model": "glow_tts", + "run_name": "glow-tts-residual_bn_conv", + "run_description": "glow-tts model training with residual BN conv.", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Griffin-Lim + "power": 1.1, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram.00 + + // Normalization parameters + "signal_norm": false, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 1.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + "add_blank": false, // if true add a new token after each token of the sentence. This increases the size of the input sequence, but has considerably improved the prosody of the GlowTTS model. + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // MODEL PARAMETERS + // "use_mas": false, // use Monotonic Alignment Search if true. Otherwise use pre-computed attention alignments. + "hidden_channels_encoder": 192, + "hidden_channels_decoder": 192, + "hidden_channels_duration_predictor": 256, + "use_encoder_prenet": true, + "encoder_type": "rel_pos_transformer", + "encoder_params": { + "kernel_size":3, + "dropout_p": 0.1, + "num_layers": 6, + "num_heads": 2, + "hidden_channels_ffn": 768, + "input_length": null + }, + + // TRAINING + "batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":16, + "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "mixed_precision": true, + "data_dep_init_iter": 10, + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": true, // use noam warmup and lr schedule. + "grad_clip": 5.0, // upper limit for gradients for clipping. + "epochs": 10000, // total number of epochs to train. + "lr": 1e-3, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 5000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 500, // DATASET-RELATED: maximum text length + "compute_f0": false, // compute f0 values in data-loader + "use_noise_augment": true, //add a random noise to audio signal for augmentation at training . + "compute_input_seq_cache": true, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/", + + // PHONEMES + "phoneme_cache_path": "/home/erogol/Models/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_external_speaker_embedding_file": false, + "style_wav_for_test": null, // path to style wav file to be used in TacotronGST inference. + "use_gst": false, // TACOTRON ONLY: use global style tokens + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "/home/erogol/Data/LJSpeech-1.1/", + "meta_file_train": "metadata.csv", + "meta_file_val": null + // "path_for_attn": "/home/erogol/Data/LJSpeech-1.1/alignments/" + } + ] + } + + diff --git a/TTS/tts/configs/ljspeech_tacotron2_dynamic_conv_attn.json b/TTS/tts/configs/ljspeech_tacotron2_dynamic_conv_attn.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5ad8ab3d3aacba3374411a8566764b47d23509 --- /dev/null +++ b/TTS/tts/configs/ljspeech_tacotron2_dynamic_conv_attn.json @@ -0,0 +1,171 @@ +{ + "model": "Tacotron2", + "run_name": "ljspeech-dcattn", + "run_description": "tacotron2 with dynamic convolution attention.", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (true), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1, + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":16, + "r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "gradual_training": [[0, 7, 64], [1, 5, 64], [50000, 3, 32], [130000, 2, 32], [290000, 1, 32]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. + "mixed_precision": true, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate. + + // LOSS SETTINGS + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "decoder_loss_alpha": 0.5, // original decoder loss weight. If > 0, it is enabled + "postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled + "postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_ssim_alpha": 0.5, // decoder ssim loss weight. If > 0, it is enabled + "postnet_ssim_alpha": 0.25, // postnet ssim loss weight. If > 0, it is enabled + "ga_alpha": 0.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples. + + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": false, // use noam warmup and lr schedule. + "grad_clip": 1.0, // upper limit for gradients for clipping. + "epochs": 1000, // total number of epochs to train. + "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + // TACOTRON PRENET + "memory_size": -1, // ONLY TACOTRON - size of the memory queue used fro storing last decoder predictions for auto-regression. If < 0, memory queue is disabled and decoder only uses the last prediction frame. + "prenet_type": "original", // "original" or "bn". + "prenet_dropout": false, // enable/disable dropout at prenet. + + // TACOTRON ATTENTION + "attention_type": "dynamic_convolution", // 'original' , 'graves', 'dynamic_convolution' + "attention_heads": 4, // number of attention heads (only for 'graves') + "attention_norm": "softmax", // softmax or sigmoid. + "windowing": false, // Enables attention windowing. Used only in eval mode. + "use_forward_attn": false, // if it uses forward attention. In general, it aligns faster. + "forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode. + "transition_agent": false, // enable/disable transition agent of forward attention. + "location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default. + "bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset. + "double_decoder_consistency": false, // use DDC explained here https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency-draft/ + "ddc_r": 7, // reduction rate for coarse decoder. + + // STOPNET + "stopnet": true, // Train stopnet predicting the end of synthesis. + "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 10000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 4, //Number of batches to shuffle after bucketing. + "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 153, // DATASET-RELATED: maximum text length + "compute_input_seq_cache": false, // if true, text sequences are computed before starting training. If phonemes are enabled, they are also computed at this stage. + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/", + + // PHONEMES + "phoneme_cache_path": "/home/erogol/Models/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_gst": false, // use global style tokens + "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "external_speaker_embedding_file": "../../speakers-vctk-en.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "gst": { // gst parameter if gst is enabled + "gst_style_input": null, // Condition the style input either on a + // -> wave file [path to wave] or + // -> dictionary using the style tokens {'token1': 'value', 'token2': 'value'} example {"0": 0.15, "1": 0.15, "5": -0.15} + // with the dictionary being len(dict) <= len(gst_style_tokens). + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10, + "gst_use_speaker_embedding": false + }, + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "/home/erogol/Data/LJSpeech-1.1/", + "meta_file_train": "metadata.csv", // for vtck if list, ignore speakers id in list for train, its useful for test cloning with new speakers + "meta_file_val": null + } + ] +} + diff --git a/TTS/tts/configs/speedy_speech_ljspeech.json b/TTS/tts/configs/speedy_speech_ljspeech.json new file mode 100644 index 0000000000000000000000000000000000000000..bd511470cb7c60853f740e090dca9a1cb908ea52 --- /dev/null +++ b/TTS/tts/configs/speedy_speech_ljspeech.json @@ -0,0 +1,151 @@ +{ + "model": "speedy_speech", + "run_name": "speedy-speech-ljspeech", + "run_description": "speedy-speech model for LJSpeech dataset.", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (true), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1, + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "&", + // "bos": "*", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZÇÃÀÁÂÊÉÍÓÔÕÚÛabcdefghijklmnopqrstuvwxyzçãàáâêéíóôõúû!(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ'̃' " + // }, + + "add_blank": false, // if true add a new token after each token of the sentence. This increases the size of the input sequence, but has considerably improved the prosody of the GlowTTS model. + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // MODEL PARAMETERS + "positional_encoding": true, + "hidden_channels": 128, // defined globally all the hidden channels of the model - 128 default + "encoder_type": "residual_conv_bn", + "encoder_params":{ + "kernel_size": 4, + "dilations": [1, 2, 4, 1, 2, 4, 1, 2, 4, 1, 2, 4, 1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + }, + "decoder_type": "residual_conv_bn", + "decoder_params":{ + "kernel_size": 4, + "dilations": [1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1], + "num_conv_blocks": 2, + "num_res_blocks": 17 + }, + + // TRAINING + "batch_size":64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":32, + "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + + // LOSS PARAMETERS + "ssim_alpha": 1, + "l1_alpha": 1, + "huber_alpha": 1, + + // VALIDATION + "run_eval": true, + "test_delay_epochs": -1, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": true, // use noam warmup and lr schedule. + "grad_clip": 1.0, // upper limit for gradients for clipping. + "epochs": 10000, // total number of epochs to train. + "lr": 0.002, // Initial learning rate. If Noam decay is active, maximum learning rate. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 5000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.:set n + "mixed_precision": false, + + // DATA LOADING + "text_cleaner": "english_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 8, // number of evaluation data loader processes. + "batch_group_size": 4, //Number of batches to shuffle after bucketing. + "min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 300, // DATASET-RELATED: maximum text length + "compute_f0": false, // compute f0 values in data-loader + "compute_input_seq_cache": false, // if true, text sequences are computed before starting training. If phonemes are enabled, they are also computed at this stage. + + // PATHS + "output_path": "/home/erogol/Models/ljspeech/", + + // PHONEMES + "phoneme_cache_path": "/home/erogol/Models/ljspeech_phonemes/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronoun[ciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + + + // DATASETS + "datasets": // List of datasets. They all merged and they get different s$ + [ + { + "name": "ljspeech", + "path": "/home/erogol/Data/LJSpeech-1.1/", + "meta_file_train": "metadata.csv", + "meta_file_val": null, + "meta_file_attn_mask": "/home/erogol/Data/LJSpeech-1.1/metadata_attn_mask.txt" // created by bin/compute_attention_masks.py + } + ] +} \ No newline at end of file diff --git a/TTS/tts/datasets/TTSDataset.py b/TTS/tts/datasets/TTSDataset.py new file mode 100644 index 0000000000000000000000000000000000000000..38dd2890d37d54c2734dc26c484ba907f61e348a --- /dev/null +++ b/TTS/tts/datasets/TTSDataset.py @@ -0,0 +1,349 @@ +import collections +import os +import random +from multiprocessing import Manager, Pool + +import numpy as np +import torch +import tqdm +from torch.utils.data import Dataset +from TTS.tts.utils.data import (prepare_data, prepare_stop_target, + prepare_tensor) +from TTS.tts.utils.text import (pad_with_eos_bos, phoneme_to_sequence, + text_to_sequence) + + +class MyDataset(Dataset): + def __init__(self, + outputs_per_step, + text_cleaner, + compute_linear_spec, + ap, + meta_data, + tp=None, + add_blank=False, + batch_group_size=0, + min_seq_len=0, + max_seq_len=float("inf"), + use_phonemes=True, + phoneme_cache_path=None, + phoneme_language="en-us", + enable_eos_bos=False, + speaker_mapping=None, + use_noise_augment=False, + verbose=False): + """ + Args: + outputs_per_step (int): number of time frames predicted per step. + text_cleaner (str): text cleaner used for the dataset. + compute_linear_spec (bool): compute linear spectrogram if True. + ap (TTS.tts.utils.AudioProcessor): audio processor object. + meta_data (list): list of dataset instances. + batch_group_size (int): (0) range of batch randomization after sorting + sequences by length. + min_seq_len (int): (0) minimum sequence length to be processed + by the loader. + max_seq_len (int): (float("inf")) maximum sequence length. + use_phonemes (bool): (true) if true, text converted to phonemes. + phoneme_cache_path (str): path to cache phoneme features. + phoneme_language (str): one the languages from + https://github.com/bootphon/phonemizer#languages + enable_eos_bos (bool): enable end of sentence and beginning of sentences characters. + use_noise_augment (bool): enable adding random noise to wav for augmentation. + verbose (bool): print diagnostic information. + """ + self.batch_group_size = batch_group_size + self.items = meta_data + self.outputs_per_step = outputs_per_step + self.sample_rate = ap.sample_rate + self.cleaners = text_cleaner + self.compute_linear_spec = compute_linear_spec + self.min_seq_len = min_seq_len + self.max_seq_len = max_seq_len + self.ap = ap + self.tp = tp + self.add_blank = add_blank + self.use_phonemes = use_phonemes + self.phoneme_cache_path = phoneme_cache_path + self.phoneme_language = phoneme_language + self.enable_eos_bos = enable_eos_bos + self.speaker_mapping = speaker_mapping + self.use_noise_augment = use_noise_augment + self.verbose = verbose + self.input_seq_computed = False + if use_phonemes and not os.path.isdir(phoneme_cache_path): + os.makedirs(phoneme_cache_path, exist_ok=True) + if self.verbose: + print("\n > DataLoader initialization") + print(" | > Use phonemes: {}".format(self.use_phonemes)) + if use_phonemes: + print(" | > phoneme language: {}".format(phoneme_language)) + print(" | > Number of instances : {}".format(len(self.items))) + + def load_wav(self, filename): + audio = self.ap.load_wav(filename) + return audio + + @staticmethod + def load_np(filename): + data = np.load(filename).astype('float32') + return data + + @staticmethod + def _generate_and_cache_phoneme_sequence(text, cache_path, cleaners, language, tp, add_blank): + """generate a phoneme sequence from text. + since the usage is for subsequent caching, we never add bos and + eos chars here. Instead we add those dynamically later; based on the + config option.""" + phonemes = phoneme_to_sequence(text, [cleaners], + language=language, + enable_eos_bos=False, + tp=tp, add_blank=add_blank) + phonemes = np.asarray(phonemes, dtype=np.int32) + np.save(cache_path, phonemes) + return phonemes + + @staticmethod + def _load_or_generate_phoneme_sequence(wav_file, text, phoneme_cache_path, enable_eos_bos, cleaners, language, tp, add_blank): + file_name = os.path.splitext(os.path.basename(wav_file))[0] + + # different names for normal phonemes and with blank chars. + file_name_ext = '_blanked_phoneme.npy' if add_blank else '_phoneme.npy' + cache_path = os.path.join(phoneme_cache_path, + file_name + file_name_ext) + try: + phonemes = np.load(cache_path) + except FileNotFoundError: + phonemes = MyDataset._generate_and_cache_phoneme_sequence( + text, cache_path, cleaners, language, tp, add_blank) + except (ValueError, IOError): + print(" [!] failed loading phonemes for {}. " + "Recomputing.".format(wav_file)) + phonemes = MyDataset._generate_and_cache_phoneme_sequence( + text, cache_path, cleaners, language, tp, add_blank) + if enable_eos_bos: + phonemes = pad_with_eos_bos(phonemes, tp=tp) + phonemes = np.asarray(phonemes, dtype=np.int32) + return phonemes + + def load_data(self, idx): + item = self.items[idx] + + if len(item) == 4: + text, wav_file, speaker_name, attn_file = item + else: + text, wav_file, speaker_name = item + attn = None + + wav = np.asarray(self.load_wav(wav_file), dtype=np.float32) + + # apply noise for augmentation + if self.use_noise_augment: + wav = wav + (1.0 / 32768.0) * np.random.rand(*wav.shape) + + if not self.input_seq_computed: + if self.use_phonemes: + text = self._load_or_generate_phoneme_sequence(wav_file, text, self.phoneme_cache_path, self.enable_eos_bos, self.cleaners, self.phoneme_language, self.tp, self.add_blank) + + else: + text = np.asarray(text_to_sequence(text, [self.cleaners], + tp=self.tp, add_blank=self.add_blank), + dtype=np.int32) + + assert text.size > 0, self.items[idx][1] + assert wav.size > 0, self.items[idx][1] + + if "attn_file" in locals(): + attn = np.load(attn_file) + + if len(text) > self.max_seq_len: + # return a different sample if the phonemized + # text is longer than the threshold + # TODO: find a better fix + return self.load_data(100) + + sample = { + 'text': text, + 'wav': wav, + 'attn': attn, + 'item_idx': self.items[idx][1], + 'speaker_name': speaker_name, + 'wav_file_name': os.path.basename(wav_file) + } + return sample + + @staticmethod + def _phoneme_worker(args): + item = args[0] + func_args = args[1] + text, wav_file, *_ = item + phonemes = MyDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args) + return phonemes + + def compute_input_seq(self, num_workers=0): + """compute input sequences separately. Call it before + passing dataset to data loader.""" + if not self.use_phonemes: + if self.verbose: + print(" | > Computing input sequences ...") + for idx, item in enumerate(tqdm.tqdm(self.items)): + text, *_ = item + sequence = np.asarray(text_to_sequence(text, [self.cleaners], + tp=self.tp, add_blank=self.add_blank), + dtype=np.int32) + self.items[idx][0] = sequence + + else: + func_args = [self.phoneme_cache_path, self.enable_eos_bos, self.cleaners, self.phoneme_language, self.tp, self.add_blank] + if self.verbose: + print(" | > Computing phonemes ...") + if num_workers == 0: + for idx, item in enumerate(tqdm.tqdm(self.items)): + phonemes = self._phoneme_worker([item, func_args]) + self.items[idx][0] = phonemes + else: + with Pool(num_workers) as p: + phonemes = list(tqdm.tqdm(p.imap(MyDataset._phoneme_worker, [[item, func_args] for item in self.items]), total=len(self.items))) + for idx, p in enumerate(phonemes): + self.items[idx][0] = p + + def sort_items(self): + r"""Sort instances based on text length in ascending order""" + lengths = np.array([len(ins[0]) for ins in self.items]) + + idxs = np.argsort(lengths) + new_items = [] + ignored = [] + for i, idx in enumerate(idxs): + length = lengths[idx] + if length < self.min_seq_len or length > self.max_seq_len: + ignored.append(idx) + else: + new_items.append(self.items[idx]) + # shuffle batch groups + if self.batch_group_size > 0: + for i in range(len(new_items) // self.batch_group_size): + offset = i * self.batch_group_size + end_offset = offset + self.batch_group_size + temp_items = new_items[offset:end_offset] + random.shuffle(temp_items) + new_items[offset:end_offset] = temp_items + self.items = new_items + + if self.verbose: + print(" | > Max length sequence: {}".format(np.max(lengths))) + print(" | > Min length sequence: {}".format(np.min(lengths))) + print(" | > Avg length sequence: {}".format(np.mean(lengths))) + print( + " | > Num. instances discarded by max-min (max={}, min={}) seq limits: {}" + .format(self.max_seq_len, self.min_seq_len, len(ignored))) + print(" | > Batch group size: {}.".format(self.batch_group_size)) + + def __len__(self): + return len(self.items) + + def __getitem__(self, idx): + return self.load_data(idx) + + def collate_fn(self, batch): + r""" + Perform preprocessing and create a final data batch: + 1. Sort batch instances by text-length + 2. Convert Audio signal to Spectrograms. + 3. PAD sequences wrt r. + 4. Load to Torch. + """ + + # Puts each data field into a tensor with outer dimension batch size + if isinstance(batch[0], collections.Mapping): + + text_lenghts = np.array([len(d["text"]) for d in batch]) + + # sort items with text input length for RNN efficiency + text_lenghts, ids_sorted_decreasing = torch.sort( + torch.LongTensor(text_lenghts), dim=0, descending=True) + + wav = [batch[idx]['wav'] for idx in ids_sorted_decreasing] + item_idxs = [ + batch[idx]['item_idx'] for idx in ids_sorted_decreasing + ] + text = [batch[idx]['text'] for idx in ids_sorted_decreasing] + + speaker_name = [ + batch[idx]['speaker_name'] for idx in ids_sorted_decreasing + ] + # get speaker embeddings + if self.speaker_mapping is not None: + wav_files_names = [ + batch[idx]['wav_file_name'] + for idx in ids_sorted_decreasing + ] + speaker_embedding = [ + self.speaker_mapping[w]['embedding'] + for w in wav_files_names + ] + else: + speaker_embedding = None + # compute features + mel = [self.ap.melspectrogram(w).astype('float32') for w in wav] + + mel_lengths = [m.shape[1] for m in mel] + + # compute 'stop token' targets + stop_targets = [ + np.array([0.] * (mel_len - 1) + [1.]) + for mel_len in mel_lengths + ] + + # PAD stop targets + stop_targets = prepare_stop_target(stop_targets, + self.outputs_per_step) + + # PAD sequences with longest instance in the batch + text = prepare_data(text).astype(np.int32) + + # PAD features with longest instance + mel = prepare_tensor(mel, self.outputs_per_step) + + # B x D x T --> B x T x D + mel = mel.transpose(0, 2, 1) + + # convert things to pytorch + text_lenghts = torch.LongTensor(text_lenghts) + text = torch.LongTensor(text) + mel = torch.FloatTensor(mel).contiguous() + mel_lengths = torch.LongTensor(mel_lengths) + stop_targets = torch.FloatTensor(stop_targets) + + if speaker_embedding is not None: + speaker_embedding = torch.FloatTensor(speaker_embedding) + + # compute linear spectrogram + if self.compute_linear_spec: + linear = [ + self.ap.spectrogram(w).astype('float32') for w in wav + ] + linear = prepare_tensor(linear, self.outputs_per_step) + linear = linear.transpose(0, 2, 1) + assert mel.shape[1] == linear.shape[1] + linear = torch.FloatTensor(linear).contiguous() + else: + linear = None + + # collate attention alignments + if batch[0]['attn'] is not None: + attns = [batch[idx]['attn'].T for idx in ids_sorted_decreasing] + for idx, attn in enumerate(attns): + pad2 = mel.shape[1] - attn.shape[1] + pad1 = text.shape[1] - attn.shape[0] + attn = np.pad(attn, [[0, pad1], [0, pad2]]) + attns[idx] = attn + attns = prepare_tensor(attns, self.outputs_per_step) + attns = torch.FloatTensor(attns).unsqueeze(1) + else: + attns = None + return text, text_lenghts, speaker_name, linear, mel, mel_lengths, \ + stop_targets, item_idxs, speaker_embedding, attns + + raise TypeError(("batch must contain tensors, numbers, dicts or lists;\ + found {}".format(type(batch[0])))) diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/datasets/preprocess.py b/TTS/tts/datasets/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..56fc75f5f92778612158a98d7025c583fc11daa4 --- /dev/null +++ b/TTS/tts/datasets/preprocess.py @@ -0,0 +1,340 @@ +import os +from glob import glob +import re +import sys +from pathlib import Path + +from tqdm import tqdm + +from TTS.tts.utils.generic_utils import split_dataset + +#################### +# UTILITIES +#################### + +def load_meta_data(datasets, eval_split=True): + meta_data_train_all = [] + meta_data_eval_all = [] if eval_split else None + for dataset in datasets: + name = dataset['name'] + root_path = dataset['path'] + meta_file_train = dataset['meta_file_train'] + meta_file_val = dataset['meta_file_val'] + # setup the right data processor + preprocessor = get_preprocessor_by_name(name) + # load train set + meta_data_train = preprocessor(root_path, meta_file_train) + print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}") + # load evaluation split if set + if eval_split: + if meta_file_val is None: + meta_data_eval, meta_data_train = split_dataset(meta_data_train) + else: + meta_data_eval = preprocessor(root_path, meta_file_val) + meta_data_eval_all += meta_data_eval + meta_data_train_all += meta_data_train + # load attention masks for duration predictor training + if 'meta_file_attn_mask' in dataset: + meta_data = dict(load_attention_mask_meta_data(dataset['meta_file_attn_mask'])) + for idx, ins in enumerate(meta_data_train_all): + attn_file = meta_data[ins[1]].strip() + meta_data_train_all[idx].append(attn_file) + if meta_data_eval_all is not None: + for idx, ins in enumerate(meta_data_eval_all): + attn_file = meta_data[ins[1]].strip() + meta_data_eval_all[idx].append(attn_file) + return meta_data_train_all, meta_data_eval_all + + +def load_attention_mask_meta_data(metafile_path): + """Load meta data file created by compute_attention_masks.py""" + with open(metafile_path, 'r') as f: + lines = f.readlines() + + meta_data = [] + for line in lines: + wav_file, attn_file = line.split('|') + meta_data.append([wav_file, attn_file]) + return meta_data + + +def get_preprocessor_by_name(name): + """Returns the respective preprocessing function.""" + thismodule = sys.modules[__name__] + return getattr(thismodule, name.lower()) + + +######################## +# DATASETS +######################## + +def tweb(root_path, meta_file): + """Normalize TWEB dataset. + https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset + """ + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "tweb" + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('\t') + wav_file = os.path.join(root_path, cols[0] + '.wav') + text = cols[1] + items.append([text, wav_file, speaker_name]) + return items + + +def mozilla(root_path, meta_file): + """Normalizes Mozilla meta data files to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "mozilla" + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = cols[1].strip() + text = cols[0].strip() + wav_file = os.path.join(root_path, "wavs", wav_file) + items.append([text, wav_file, speaker_name]) + return items + + +def mozilla_de(root_path, meta_file): + """Normalizes Mozilla meta data files to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "mozilla" + with open(txt_file, 'r', encoding="ISO 8859-1") as ttf: + for line in ttf: + cols = line.strip().split('|') + wav_file = cols[0].strip() + text = cols[1].strip() + folder_name = f"BATCH_{wav_file.split('_')[0]}_FINAL" + wav_file = os.path.join(root_path, folder_name, wav_file) + items.append([text, wav_file, speaker_name]) + return items + + +def mailabs(root_path, meta_files=None): + """Normalizes M-AI-Labs meta data files to TTS format""" + speaker_regex = re.compile( + "by_book/(male|female)/(?P[^/]+)/") + if meta_files is None: + csv_files = glob(root_path + "/**/metadata.csv", recursive=True) + else: + csv_files = meta_files + # meta_files = [f.strip() for f in meta_files.split(",")] + items = [] + for csv_file in csv_files: + txt_file = os.path.join(root_path, csv_file) + folder = os.path.dirname(txt_file) + # determine speaker based on folder structure... + speaker_name_match = speaker_regex.search(txt_file) + if speaker_name_match is None: + continue + speaker_name = speaker_name_match.group("speaker_name") + print(" | > {}".format(csv_file)) + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + if meta_files is None: + wav_file = os.path.join(folder, 'wavs', cols[0] + '.wav') + else: + wav_file = os.path.join(root_path, + folder.replace("metadata.csv", ""), + 'wavs', cols[0] + '.wav') + if os.path.isfile(wav_file): + text = cols[1].strip() + items.append([text, wav_file, speaker_name]) + else: + raise RuntimeError("> File %s does not exist!" % + (wav_file)) + return items + + +def ljspeech(root_path, meta_file): + """Normalizes the Nancy meta data file to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "ljspeech" + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = os.path.join(root_path, 'wavs', cols[0] + '.wav') + text = cols[1] + items.append([text, wav_file, speaker_name]) + return items + + +def nancy(root_path, meta_file): + """Normalizes the Nancy meta data file to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "nancy" + with open(txt_file, 'r') as ttf: + for line in ttf: + utt_id = line.split()[1] + text = line[line.find('"') + 1:line.rfind('"') - 1] + wav_file = os.path.join(root_path, "wavn", utt_id + ".wav") + items.append([text, wav_file, speaker_name]) + return items + + +def common_voice(root_path, meta_file): + """Normalize the common voice meta data file to TTS format.""" + txt_file = os.path.join(root_path, meta_file) + items = [] + with open(txt_file, 'r') as ttf: + for line in ttf: + if line.startswith("client_id"): + continue + cols = line.split("\t") + text = cols[2] + speaker_name = cols[0] + wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav")) + items.append([text, wav_file, 'MCV_' + speaker_name]) + return items + + +def libri_tts(root_path, meta_files=None): + """https://ai.google/tools/datasets/libri-tts/""" + items = [] + if meta_files is None: + meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True) + for meta_file in meta_files: + _meta_file = os.path.basename(meta_file).split('.')[0] + speaker_name = _meta_file.split('_')[0] + chapter_id = _meta_file.split('_')[1] + _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}") + with open(meta_file, 'r') as ttf: + for line in ttf: + cols = line.split('\t') + wav_file = os.path.join(_root_path, cols[0] + '.wav') + text = cols[1] + items.append([text, wav_file, 'LTTS_' + speaker_name]) + for item in items: + assert os.path.exists( + item[1]), f" [!] wav files don't exist - {item[1]}" + return items + + +def custom_turkish(root_path, meta_file): + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "turkish-female" + skipped_files = [] + with open(txt_file, 'r', encoding='utf-8') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = os.path.join(root_path, 'wavs', + cols[0].strip() + '.wav') + if not os.path.exists(wav_file): + skipped_files.append(wav_file) + continue + text = cols[1].strip() + items.append([text, wav_file, speaker_name]) + print(f" [!] {len(skipped_files)} files skipped. They don't exist...") + return items + + +# ToDo: add the dataset link when the dataset is released publicly +def brspeech(root_path, meta_file): + '''BRSpeech 3.0 beta''' + txt_file = os.path.join(root_path, meta_file) + items = [] + with open(txt_file, 'r') as ttf: + for line in ttf: + if line.startswith("wav_filename"): + continue + cols = line.split('|') + wav_file = os.path.join(root_path, cols[0]) + text = cols[2] + speaker_name = cols[3] + items.append([text, wav_file, speaker_name]) + return items + + +def vctk(root_path, meta_files=None, wavs_path='wav48'): + """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz""" + test_speakers = meta_files + items = [] + meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True) + for meta_file in meta_files: + _, speaker_id, txt_file = os.path.relpath(meta_file, + root_path).split(os.sep) + file_id = txt_file.split('.')[0] + if isinstance(test_speakers, + list): # if is list ignore this speakers ids + if speaker_id in test_speakers: + continue + with open(meta_file) as file_text: + text = file_text.readlines()[0] + wav_file = os.path.join(root_path, wavs_path, speaker_id, + file_id + '.wav') + items.append([text, wav_file, 'VCTK_' + speaker_id]) + + return items + + +def vctk_slim(root_path, meta_files=None, wavs_path='wav48'): + """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz""" + items = [] + txt_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True) + for text_file in txt_files: + _, speaker_id, txt_file = os.path.relpath(text_file, + root_path).split(os.sep) + file_id = txt_file.split('.')[0] + if isinstance(meta_files, list): # if is list ignore this speakers ids + if speaker_id in meta_files: + continue + wav_file = os.path.join(root_path, wavs_path, speaker_id, + file_id + '.wav') + items.append([None, wav_file, 'VCTK_' + speaker_id]) + + return items + +# ======================================== VOX CELEB =========================================== +def voxceleb2(root_path, meta_file=None): + """ + :param meta_file Used only for consistency with load_meta_data api + """ + return _voxcel_x(root_path, meta_file, voxcel_idx="2") + + +def voxceleb1(root_path, meta_file=None): + """ + :param meta_file Used only for consistency with load_meta_data api + """ + return _voxcel_x(root_path, meta_file, voxcel_idx="1") + + +def _voxcel_x(root_path, meta_file, voxcel_idx): + assert voxcel_idx in ["1", "2"] + expected_count = 148_000 if voxcel_idx == "1" else 1_000_000 + voxceleb_path = Path(root_path) + cache_to = voxceleb_path / f"metafile_voxceleb{voxcel_idx}.csv" + cache_to.parent.mkdir(exist_ok=True) + + # if not exists meta file, crawl recursively for 'wav' files + if meta_file is not None: + with open(str(meta_file), 'r') as f: + return [x.strip().split('|') for x in f.readlines()] + + elif not cache_to.exists(): + cnt = 0 + meta_data = [] + wav_files = voxceleb_path.rglob("**/*.wav") + for path in tqdm(wav_files, desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.", + total=expected_count): + speaker_id = str(Path(path).parent.parent.stem) + assert speaker_id.startswith('id') + text = None # VoxCel does not provide transciptions, and they are not needed for training the SE + meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n") + cnt += 1 + with open(str(cache_to), 'w') as f: + f.write("".join(meta_data)) + if cnt < expected_count: + raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}") + + with open(str(cache_to), 'r') as f: + return [x.strip().split('|') for x in f.readlines()] diff --git a/TTS/tts/layers/__init__.py b/TTS/tts/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/layers/attentions.py b/TTS/tts/layers/attentions.py new file mode 100644 index 0000000000000000000000000000000000000000..047e3b23981d5cb4201ef44d48f3a60087ec04f9 --- /dev/null +++ b/TTS/tts/layers/attentions.py @@ -0,0 +1,482 @@ +import torch +from torch import nn +from torch.nn import functional as F + +from TTS.tts.layers.common_layers import Linear +from scipy.stats import betabinom + + +class LocationLayer(nn.Module): + """Layers for Location Sensitive Attention + + Args: + attention_dim (int): number of channels in the input tensor. + attention_n_filters (int, optional): number of filters in convolution. Defaults to 32. + attention_kernel_size (int, optional): kernel size of convolution filter. Defaults to 31. + """ + def __init__(self, + attention_dim, + attention_n_filters=32, + attention_kernel_size=31): + super(LocationLayer, self).__init__() + self.location_conv1d = nn.Conv1d( + in_channels=2, + out_channels=attention_n_filters, + kernel_size=attention_kernel_size, + stride=1, + padding=(attention_kernel_size - 1) // 2, + bias=False) + self.location_dense = Linear( + attention_n_filters, attention_dim, bias=False, init_gain='tanh') + + def forward(self, attention_cat): + """ + Shapes: + attention_cat: [B, 2, C] + """ + processed_attention = self.location_conv1d(attention_cat) + processed_attention = self.location_dense( + processed_attention.transpose(1, 2)) + return processed_attention + + +class GravesAttention(nn.Module): + """Graves Attention as is ref1 with updates from ref2. + ref1: https://arxiv.org/abs/1910.10288 + ref2: https://arxiv.org/pdf/1906.01083.pdf + + Args: + query_dim (int): number of channels in query tensor. + K (int): number of Gaussian heads to be used for computing attention. + """ + COEF = 0.3989422917366028 # numpy.sqrt(1/(2*numpy.pi)) + + def __init__(self, query_dim, K): + + super(GravesAttention, self).__init__() + self._mask_value = 1e-8 + self.K = K + # self.attention_alignment = 0.05 + self.eps = 1e-5 + self.J = None + self.N_a = nn.Sequential( + nn.Linear(query_dim, query_dim, bias=True), + nn.ReLU(), + nn.Linear(query_dim, 3*K, bias=True)) + self.attention_weights = None + self.mu_prev = None + self.init_layers() + + def init_layers(self): + torch.nn.init.constant_(self.N_a[2].bias[(2*self.K):(3*self.K)], 1.) # bias mean + torch.nn.init.constant_(self.N_a[2].bias[self.K:(2*self.K)], 10) # bias std + + def init_states(self, inputs): + if self.J is None or inputs.shape[1]+1 > self.J.shape[-1]: + self.J = torch.arange(0, inputs.shape[1]+2.0).to(inputs.device) + 0.5 + self.attention_weights = torch.zeros(inputs.shape[0], inputs.shape[1]).to(inputs.device) + self.mu_prev = torch.zeros(inputs.shape[0], self.K).to(inputs.device) + + # pylint: disable=R0201 + # pylint: disable=unused-argument + def preprocess_inputs(self, inputs): + return None + + def forward(self, query, inputs, processed_inputs, mask): + """ + Shapes: + query: [B, C_attention_rnn] + inputs: [B, T_in, C_encoder] + processed_inputs: place_holder + mask: [B, T_in] + """ + gbk_t = self.N_a(query) + gbk_t = gbk_t.view(gbk_t.size(0), -1, self.K) + + # attention model parameters + # each B x K + g_t = gbk_t[:, 0, :] + b_t = gbk_t[:, 1, :] + k_t = gbk_t[:, 2, :] + + # dropout to decorrelate attention heads + g_t = torch.nn.functional.dropout(g_t, p=0.5, training=self.training) + + # attention GMM parameters + sig_t = torch.nn.functional.softplus(b_t) + self.eps + + mu_t = self.mu_prev + torch.nn.functional.softplus(k_t) + g_t = torch.softmax(g_t, dim=-1) + self.eps + + j = self.J[:inputs.size(1)+1] + + # attention weights + phi_t = g_t.unsqueeze(-1) * (1 / (1 + torch.sigmoid((mu_t.unsqueeze(-1) - j) / sig_t.unsqueeze(-1)))) + + # discritize attention weights + alpha_t = torch.sum(phi_t, 1) + alpha_t = alpha_t[:, 1:] - alpha_t[:, :-1] + alpha_t[alpha_t == 0] = 1e-8 + + # apply masking + if mask is not None: + alpha_t.data.masked_fill_(~mask, self._mask_value) + + context = torch.bmm(alpha_t.unsqueeze(1), inputs).squeeze(1) + self.attention_weights = alpha_t + self.mu_prev = mu_t + return context + + +class OriginalAttention(nn.Module): + """Bahdanau Attention with various optional modifications. Proposed below. + - Location sensitive attnetion: https://arxiv.org/abs/1712.05884 + - Forward Attention: https://arxiv.org/abs/1807.06736 + state masking at inference + - Using sigmoid instead of softmax normalization + - Attention windowing at inference time + + Note: + Location Sensitive Attention is an attention mechanism that extends the additive attention mechanism + to use cumulative attention weights from previous decoder time steps as an additional feature. + + Forward attention considers only the alignment paths that satisfy the monotonic condition at each + decoder timestep. The modified attention probabilities at each timestep are computed recursively + using a forward algorithm. + + Transition agent for forward attention is further proposed, which helps the attention mechanism + to make decisions whether to move forward or stay at each decoder timestep. + + Attention windowing applies a sliding windows to time steps of the input tensor centering at the last + time step with the largest attention weight. It is especially useful at inference to keep the attention + alignment diagonal. + + + Args: + query_dim (int): number of channels in the query tensor. + embedding_dim (int): number of channels in the vakue tensor. In general, the value tensor is the output of the encoder layer. + attention_dim (int): number of channels of the inner attention layers. + location_attention (bool): enable/disable location sensitive attention. + attention_location_n_filters (int): number of location attention filters. + attention_location_kernel_size (int): filter size of location attention convolution layer. + windowing (int): window size for attention windowing. if it is 5, for computing the attention, it only considers the time steps [(t-5), ..., (t+5)] of the input. + norm (str): normalization method applied to the attention weights. 'softmax' or 'sigmoid' + forward_attn (bool): enable/disable forward attention. + trans_agent (bool): enable/disable transition agent in the forward attention. + forward_attn_mask (int): enable/disable an explicit masking in forward attention. It is useful to set at especially inference time. + """ + # Pylint gets confused by PyTorch conventions here + #pylint: disable=attribute-defined-outside-init + def __init__(self, query_dim, embedding_dim, attention_dim, + location_attention, attention_location_n_filters, + attention_location_kernel_size, windowing, norm, forward_attn, + trans_agent, forward_attn_mask): + super(OriginalAttention, self).__init__() + self.query_layer = Linear( + query_dim, attention_dim, bias=False, init_gain='tanh') + self.inputs_layer = Linear( + embedding_dim, attention_dim, bias=False, init_gain='tanh') + self.v = Linear(attention_dim, 1, bias=True) + if trans_agent: + self.ta = nn.Linear( + query_dim + embedding_dim, 1, bias=True) + if location_attention: + self.location_layer = LocationLayer( + attention_dim, + attention_location_n_filters, + attention_location_kernel_size, + ) + self._mask_value = -float("inf") + self.windowing = windowing + self.win_idx = None + self.norm = norm + self.forward_attn = forward_attn + self.trans_agent = trans_agent + self.forward_attn_mask = forward_attn_mask + self.location_attention = location_attention + + def init_win_idx(self): + self.win_idx = -1 + self.win_back = 2 + self.win_front = 6 + + def init_forward_attn(self, inputs): + B = inputs.shape[0] + T = inputs.shape[1] + self.alpha = torch.cat( + [torch.ones([B, 1]), + torch.zeros([B, T])[:, :-1] + 1e-7], dim=1).to(inputs.device) + self.u = (0.5 * torch.ones([B, 1])).to(inputs.device) + + def init_location_attention(self, inputs): + B = inputs.size(0) + T = inputs.size(1) + self.attention_weights_cum = torch.zeros([B, T], device=inputs.device) + + def init_states(self, inputs): + B = inputs.size(0) + T = inputs.size(1) + self.attention_weights = torch.zeros([B, T], device=inputs.device) + if self.location_attention: + self.init_location_attention(inputs) + if self.forward_attn: + self.init_forward_attn(inputs) + if self.windowing: + self.init_win_idx() + + def preprocess_inputs(self, inputs): + return self.inputs_layer(inputs) + + def update_location_attention(self, alignments): + self.attention_weights_cum += alignments + + def get_location_attention(self, query, processed_inputs): + attention_cat = torch.cat((self.attention_weights.unsqueeze(1), + self.attention_weights_cum.unsqueeze(1)), + dim=1) + processed_query = self.query_layer(query.unsqueeze(1)) + processed_attention_weights = self.location_layer(attention_cat) + energies = self.v( + torch.tanh(processed_query + processed_attention_weights + + processed_inputs)) + energies = energies.squeeze(-1) + return energies, processed_query + + def get_attention(self, query, processed_inputs): + processed_query = self.query_layer(query.unsqueeze(1)) + energies = self.v(torch.tanh(processed_query + processed_inputs)) + energies = energies.squeeze(-1) + return energies, processed_query + + def apply_windowing(self, attention, inputs): + back_win = self.win_idx - self.win_back + front_win = self.win_idx + self.win_front + if back_win > 0: + attention[:, :back_win] = -float("inf") + if front_win < inputs.shape[1]: + attention[:, front_win:] = -float("inf") + # this is a trick to solve a special problem. + # but it does not hurt. + if self.win_idx == -1: + attention[:, 0] = attention.max() + # Update the window + self.win_idx = torch.argmax(attention, 1).long()[0].item() + return attention + + def apply_forward_attention(self, alignment): + # forward attention + fwd_shifted_alpha = F.pad( + self.alpha[:, :-1].clone().to(alignment.device), (1, 0, 0, 0)) + # compute transition potentials + alpha = ((1 - self.u) * self.alpha + + self.u * fwd_shifted_alpha + + 1e-8) * alignment + # force incremental alignment + if not self.training and self.forward_attn_mask: + _, n = fwd_shifted_alpha.max(1) + val, _ = alpha.max(1) + for b in range(alignment.shape[0]): + alpha[b, n[b] + 3:] = 0 + alpha[b, :( + n[b] - 1 + )] = 0 # ignore all previous states to prevent repetition. + alpha[b, + (n[b] - 2 + )] = 0.01 * val[b] # smoothing factor for the prev step + # renormalize attention weights + alpha = alpha / alpha.sum(dim=1, keepdim=True) + return alpha + + def forward(self, query, inputs, processed_inputs, mask): + """ + shapes: + query: [B, C_attn_rnn] + inputs: [B, T_en, D_en] + processed_inputs: [B, T_en, D_attn] + mask: [B, T_en] + """ + if self.location_attention: + attention, _ = self.get_location_attention( + query, processed_inputs) + else: + attention, _ = self.get_attention( + query, processed_inputs) + # apply masking + if mask is not None: + attention.data.masked_fill_(~mask, self._mask_value) + # apply windowing - only in eval mode + if not self.training and self.windowing: + attention = self.apply_windowing(attention, inputs) + + # normalize attention values + if self.norm == "softmax": + alignment = torch.softmax(attention, dim=-1) + elif self.norm == "sigmoid": + alignment = torch.sigmoid(attention) / torch.sigmoid( + attention).sum( + dim=1, keepdim=True) + else: + raise ValueError("Unknown value for attention norm type") + + if self.location_attention: + self.update_location_attention(alignment) + + # apply forward attention if enabled + if self.forward_attn: + alignment = self.apply_forward_attention(alignment) + self.alpha = alignment + + context = torch.bmm(alignment.unsqueeze(1), inputs) + context = context.squeeze(1) + self.attention_weights = alignment + + # compute transition agent + if self.forward_attn and self.trans_agent: + ta_input = torch.cat([context, query.squeeze(1)], dim=-1) + self.u = torch.sigmoid(self.ta(ta_input)) + return context + + +class MonotonicDynamicConvolutionAttention(nn.Module): + """Dynamic convolution attention from + https://arxiv.org/pdf/1910.10288.pdf + + + query -> linear -> tanh -> linear ->| + | mask values + v | | + atten_w(t-1) -|-> conv1d_dynamic -> linear -|-> tanh -> + -> softmax -> * -> * -> context + |-> conv1d_static -> linear -| | + |-> conv1d_prior -> log ----------------| + + query: attention rnn output. + + Note: + Dynamic convolution attention is an alternation of the location senstive attention with + dynamically computed convolution filters from the previous attention scores and a set of + constraints to keep the attention alignment diagonal. + + Args: + query_dim (int): number of channels in the query tensor. + embedding_dim (int): number of channels in the value tensor. + static_filter_dim (int): number of channels in the convolution layer computing the static filters. + static_kernel_size (int): kernel size for the convolution layer computing the static filters. + dynamic_filter_dim (int): number of channels in the convolution layer computing the dynamic filters. + dynamic_kernel_size (int): kernel size for the convolution layer computing the dynamic filters. + prior_filter_len (int, optional): [description]. Defaults to 11 from the paper. + alpha (float, optional): [description]. Defaults to 0.1 from the paper. + beta (float, optional): [description]. Defaults to 0.9 from the paper. + """ + def __init__( + self, + query_dim, + embedding_dim, # pylint: disable=unused-argument + attention_dim, + static_filter_dim, + static_kernel_size, + dynamic_filter_dim, + dynamic_kernel_size, + prior_filter_len=11, + alpha=0.1, + beta=0.9, + ): + super().__init__() + self._mask_value = 1e-8 + self.dynamic_filter_dim = dynamic_filter_dim + self.dynamic_kernel_size = dynamic_kernel_size + self.prior_filter_len = prior_filter_len + self.attention_weights = None + # setup key and query layers + self.query_layer = nn.Linear(query_dim, attention_dim) + self.key_layer = nn.Linear( + attention_dim, dynamic_filter_dim * dynamic_kernel_size, bias=False + ) + self.static_filter_conv = nn.Conv1d( + 1, + static_filter_dim, + static_kernel_size, + padding=(static_kernel_size - 1) // 2, + bias=False, + ) + self.static_filter_layer = nn.Linear(static_filter_dim, attention_dim, bias=False) + self.dynamic_filter_layer = nn.Linear(dynamic_filter_dim, attention_dim) + self.v = nn.Linear(attention_dim, 1, bias=False) + + prior = betabinom.pmf(range(prior_filter_len), prior_filter_len - 1, + alpha, beta) + self.register_buffer("prior", torch.FloatTensor(prior).flip(0)) + + # pylint: disable=unused-argument + def forward(self, query, inputs, processed_inputs, mask): + """ + query: [B, C_attn_rnn] + inputs: [B, T_en, D_en] + processed_inputs: place holder. + mask: [B, T_en] + """ + # compute prior filters + prior_filter = F.conv1d( + F.pad(self.attention_weights.unsqueeze(1), + (self.prior_filter_len - 1, 0)), self.prior.view(1, 1, -1)) + prior_filter = torch.log(prior_filter.clamp_min_(1e-6)).squeeze(1) + G = self.key_layer(torch.tanh(self.query_layer(query))) + # compute dynamic filters + dynamic_filter = F.conv1d( + self.attention_weights.unsqueeze(0), + G.view(-1, 1, self.dynamic_kernel_size), + padding=(self.dynamic_kernel_size - 1) // 2, + groups=query.size(0), + ) + dynamic_filter = dynamic_filter.view(query.size(0), self.dynamic_filter_dim, -1).transpose(1, 2) + # compute static filters + static_filter = self.static_filter_conv(self.attention_weights.unsqueeze(1)).transpose(1, 2) + alignment = self.v( + torch.tanh( + self.static_filter_layer(static_filter) + + self.dynamic_filter_layer(dynamic_filter))).squeeze(-1) + prior_filter + # compute attention weights + attention_weights = F.softmax(alignment, dim=-1) + # apply masking + if mask is not None: + attention_weights.data.masked_fill_(~mask, self._mask_value) + self.attention_weights = attention_weights + # compute context + context = torch.bmm(attention_weights.unsqueeze(1), inputs).squeeze(1) + return context + + def preprocess_inputs(self, inputs): # pylint: disable=no-self-use + return None + + def init_states(self, inputs): + B = inputs.size(0) + T = inputs.size(1) + self.attention_weights = torch.zeros([B, T], device=inputs.device) + self.attention_weights[:, 0] = 1. + + +def init_attn(attn_type, query_dim, embedding_dim, attention_dim, + location_attention, attention_location_n_filters, + attention_location_kernel_size, windowing, norm, forward_attn, + trans_agent, forward_attn_mask, attn_K): + if attn_type == "original": + return OriginalAttention(query_dim, embedding_dim, attention_dim, + location_attention, + attention_location_n_filters, + attention_location_kernel_size, windowing, + norm, forward_attn, trans_agent, + forward_attn_mask) + if attn_type == "graves": + return GravesAttention(query_dim, attn_K) + if attn_type == "dynamic_convolution": + return MonotonicDynamicConvolutionAttention(query_dim, + embedding_dim, + attention_dim, + static_filter_dim=8, + static_kernel_size=21, + dynamic_filter_dim=8, + dynamic_kernel_size=21, + prior_filter_len=11, + alpha=0.1, + beta=0.9) + + raise RuntimeError( + " [!] Given Attention Type '{attn_type}' is not exist.") diff --git a/TTS/tts/layers/common_layers.py b/TTS/tts/layers/common_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..5da9b49d37ea4014e851b33a1073d4029df18a68 --- /dev/null +++ b/TTS/tts/layers/common_layers.py @@ -0,0 +1,127 @@ +import torch +from torch import nn +from torch.nn import functional as F + + +class Linear(nn.Module): + """Linear layer with a specific initialization. + + Args: + in_features (int): number of channels in the input tensor. + out_features (int): number of channels in the output tensor. + bias (bool, optional): enable/disable bias in the layer. Defaults to True. + init_gain (str, optional): method to compute the gain in the weight initializtion based on the nonlinear activation used afterwards. Defaults to 'linear'. + """ + def __init__(self, + in_features, + out_features, + bias=True, + init_gain='linear'): + super(Linear, self).__init__() + self.linear_layer = torch.nn.Linear( + in_features, out_features, bias=bias) + self._init_w(init_gain) + + def _init_w(self, init_gain): + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(init_gain)) + + def forward(self, x): + return self.linear_layer(x) + + +class LinearBN(nn.Module): + """Linear layer with Batch Normalization. + + x -> linear -> BN -> o + + Args: + in_features (int): number of channels in the input tensor. + out_features (int ): number of channels in the output tensor. + bias (bool, optional): enable/disable bias in the linear layer. Defaults to True. + init_gain (str, optional): method to set the gain for weight initialization. Defaults to 'linear'. + """ + def __init__(self, + in_features, + out_features, + bias=True, + init_gain='linear'): + super(LinearBN, self).__init__() + self.linear_layer = torch.nn.Linear( + in_features, out_features, bias=bias) + self.batch_normalization = nn.BatchNorm1d(out_features, momentum=0.1, eps=1e-5) + self._init_w(init_gain) + + def _init_w(self, init_gain): + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(init_gain)) + + def forward(self, x): + """ + Shapes: + x: [T, B, C] or [B, C] + """ + out = self.linear_layer(x) + if len(out.shape) == 3: + out = out.permute(1, 2, 0) + out = self.batch_normalization(out) + if len(out.shape) == 3: + out = out.permute(2, 0, 1) + return out + + +class Prenet(nn.Module): + """Tacotron specific Prenet with an optional Batch Normalization. + + Note: + Prenet with BN improves the model performance significantly especially + if it is enabled after learning a diagonal attention alignment with the original + prenet. However, if the target dataset is high quality then it also works from + the start. It is also suggested to disable dropout if BN is in use. + + prenet_type == "original" + x -> [linear -> ReLU -> Dropout]xN -> o + + prenet_type == "bn" + x -> [linear -> BN -> ReLU -> Dropout]xN -> o + + Args: + in_features (int): number of channels in the input tensor and the inner layers. + prenet_type (str, optional): prenet type "original" or "bn". Defaults to "original". + prenet_dropout (bool, optional): dropout rate. Defaults to True. + out_features (list, optional): List of output channels for each prenet block. + It also defines number of the prenet blocks based on the length of argument list. + Defaults to [256, 256]. + bias (bool, optional): enable/disable bias in prenet linear layers. Defaults to True. + """ + # pylint: disable=dangerous-default-value + def __init__(self, + in_features, + prenet_type="original", + prenet_dropout=True, + out_features=[256, 256], + bias=True): + super(Prenet, self).__init__() + self.prenet_type = prenet_type + self.prenet_dropout = prenet_dropout + in_features = [in_features] + out_features[:-1] + if prenet_type == "bn": + self.linear_layers = nn.ModuleList([ + LinearBN(in_size, out_size, bias=bias) + for (in_size, out_size) in zip(in_features, out_features) + ]) + elif prenet_type == "original": + self.linear_layers = nn.ModuleList([ + Linear(in_size, out_size, bias=bias) + for (in_size, out_size) in zip(in_features, out_features) + ]) + + def forward(self, x): + for linear in self.linear_layers: + if self.prenet_dropout: + x = F.dropout(F.relu(linear(x)), p=0.5, training=self.training) + else: + x = F.relu(linear(x)) + return x \ No newline at end of file diff --git a/TTS/tts/layers/generic/__init__.py b/TTS/tts/layers/generic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/layers/generic/gated_conv.py b/TTS/tts/layers/generic/gated_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..dbe0f0f0c32a2374b7f0c166c2fdcd83f089c2c2 --- /dev/null +++ b/TTS/tts/layers/generic/gated_conv.py @@ -0,0 +1,43 @@ +from torch import nn + +from .normalization import LayerNorm + + +class GatedConvBlock(nn.Module): + """Gated convolutional block as in https://arxiv.org/pdf/1612.08083.pdf + Args: + in_out_channels (int): number of input/output channels. + kernel_size (int): convolution kernel size. + dropout_p (float): dropout rate. + """ + def __init__(self, in_out_channels, kernel_size, dropout_p, num_layers): + super().__init__() + # class arguments + self.dropout_p = dropout_p + self.num_layers = num_layers + # define layers + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + self.layers = nn.ModuleList() + for _ in range(num_layers): + self.conv_layers += [ + nn.Conv1d(in_out_channels, + 2 * in_out_channels, + kernel_size, + padding=kernel_size // 2) + ] + self.norm_layers += [LayerNorm(2 * in_out_channels)] + + def forward(self, x, x_mask): + o = x + res = x + for idx in range(self.num_layers): + o = nn.functional.dropout(o, + p=self.dropout_p, + training=self.training) + o = self.conv_layers[idx](o * x_mask) + o = self.norm_layers[idx](o) + o = nn.functional.glu(o, dim=1) + o = res + o + res = o + return o \ No newline at end of file diff --git a/TTS/tts/layers/generic/normalization.py b/TTS/tts/layers/generic/normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..5ccdeb473f6f9a9a9e50d64d62d218f979011706 --- /dev/null +++ b/TTS/tts/layers/generic/normalization.py @@ -0,0 +1,107 @@ +import torch +from torch import nn + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-4): + """Layer norm for the 2nd dimension of the input. + Args: + channels (int): number of channels (2nd dimension) of the input. + eps (float): to prevent 0 division + + Shapes: + - input: (B, C, T) + - output: (B, C, T) + """ + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(1, channels, 1) * 0.1) + self.beta = nn.Parameter(torch.zeros(1, channels, 1)) + + def forward(self, x): + mean = torch.mean(x, 1, keepdim=True) + variance = torch.mean((x - mean)**2, 1, keepdim=True) + x = (x - mean) * torch.rsqrt(variance + self.eps) + x = x * self.gamma + self.beta + return x + + +class TemporalBatchNorm1d(nn.BatchNorm1d): + """Normalize each channel separately over time and batch. + """ + def __init__(self, + channels, + affine=True, + track_running_stats=True, + momentum=0.1): + super().__init__(channels, + affine=affine, + track_running_stats=track_running_stats, + momentum=momentum) + + def forward(self, x): + return super().forward(x.transpose(2, 1)).transpose(2, 1) + + +class ActNorm(nn.Module): + """Activation Normalization bijector as an alternative to Batch Norm. It computes + mean and std from a sample data in advance and it uses these values + for normalization at training. + + Args: + channels (int): input channels. + ddi (False): data depended initialization flag. + + Shapes: + - inputs: (B, C, T) + - outputs: (B, C, T) + """ + def __init__(self, channels, ddi=False, **kwargs): # pylint: disable=unused-argument + super().__init__() + self.channels = channels + self.initialized = not ddi + + self.logs = nn.Parameter(torch.zeros(1, channels, 1)) + self.bias = nn.Parameter(torch.zeros(1, channels, 1)) + + def forward(self, x, x_mask=None, reverse=False, **kwargs): # pylint: disable=unused-argument + if x_mask is None: + x_mask = torch.ones(x.size(0), 1, x.size(2)).to(device=x.device, + dtype=x.dtype) + x_len = torch.sum(x_mask, [1, 2]) + if not self.initialized: + self.initialize(x, x_mask) + self.initialized = True + + if reverse: + z = (x - self.bias) * torch.exp(-self.logs) * x_mask + logdet = None + else: + z = (self.bias + torch.exp(self.logs) * x) * x_mask + logdet = torch.sum(self.logs) * x_len # [b] + + return z, logdet + + def store_inverse(self): + pass + + def set_ddi(self, ddi): + self.initialized = not ddi + + def initialize(self, x, x_mask): + with torch.no_grad(): + denom = torch.sum(x_mask, [0, 2]) + m = torch.sum(x * x_mask, [0, 2]) / denom + m_sq = torch.sum(x * x * x_mask, [0, 2]) / denom + v = m_sq - (m**2) + logs = 0.5 * torch.log(torch.clamp_min(v, 1e-6)) + + bias_init = (-m * torch.exp(-logs)).view(*self.bias.shape).to( + dtype=self.bias.dtype) + logs_init = (-logs).view(*self.logs.shape).to( + dtype=self.logs.dtype) + + self.bias.data.copy_(bias_init) + self.logs.data.copy_(logs_init) \ No newline at end of file diff --git a/TTS/tts/layers/generic/res_conv_bn.py b/TTS/tts/layers/generic/res_conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..322cab94a86d7f7800b37de7f92f8f47295e0d63 --- /dev/null +++ b/TTS/tts/layers/generic/res_conv_bn.py @@ -0,0 +1,118 @@ +from torch import nn + + +class ZeroTemporalPad(nn.Module): + """Pad sequences to equal lentgh in the temporal dimension""" + def __init__(self, kernel_size, dilation): + super().__init__() + total_pad = (dilation * (kernel_size - 1)) + begin = total_pad // 2 + end = total_pad - begin + self.pad_layer = nn.ZeroPad2d((0, 0, begin, end)) + + def forward(self, x): + return self.pad_layer(x) + + +class Conv1dBN(nn.Module): + """1d convolutional with batch norm. + conv1d -> relu -> BN blocks. + + Note: + Batch normalization is applied after ReLU regarding the original implementation. + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + kernel_size (int): kernel size for convolutional filters. + dilation (int): dilation for convolution layers. + """ + def __init__(self, in_channels, out_channels, kernel_size, dilation): + super().__init__() + padding = (dilation * (kernel_size - 1)) + pad_s = padding // 2 + pad_e = padding - pad_s + self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation) + self.pad = nn.ZeroPad2d((pad_s, pad_e, 0, 0)) # uneven left and right padding + self.norm = nn.BatchNorm1d(out_channels) + + def forward(self, x): + o = self.conv1d(x) + o = self.pad(o) + o = nn.functional.relu(o) + o = self.norm(o) + return o + + +class Conv1dBNBlock(nn.Module): + """1d convolutional block with batch norm. It is a set of conv1d -> relu -> BN blocks. + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of inner convolution channels. + kernel_size (int): kernel size for convolutional filters. + dilation (int): dilation for convolution layers. + num_conv_blocks (int, optional): number of convolutional blocks. Defaults to 2. + """ + def __init__(self, in_channels, out_channels, hidden_channels, kernel_size, dilation, num_conv_blocks=2): + super().__init__() + self.conv_bn_blocks = [] + for idx in range(num_conv_blocks): + layer = Conv1dBN(in_channels if idx == 0 else hidden_channels, + out_channels if idx == (num_conv_blocks - 1) else hidden_channels, + kernel_size, + dilation) + self.conv_bn_blocks.append(layer) + self.conv_bn_blocks = nn.Sequential(*self.conv_bn_blocks) + + def forward(self, x): + """ + Shapes: + x: (B, D, T) + """ + return self.conv_bn_blocks(x) + + +class ResidualConv1dBNBlock(nn.Module): + """Residual Convolutional Blocks with BN + Each block has 'num_conv_block' conv layers and 'num_res_blocks' such blocks are connected + with residual connections. + + conv_block = (conv1d -> relu -> bn) x 'num_conv_blocks' + residuak_conv_block = (x -> conv_block -> + ->) x 'num_res_blocks' + ' - - - - - - - - - ^ + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of inner convolution channels. + kernel_size (int): kernel size for convolutional filters. + dilations (list): dilations for each convolution layer. + num_res_blocks (int, optional): number of residual blocks. Defaults to 13. + num_conv_blocks (int, optional): number of convolutional blocks in each residual block. Defaults to 2. + """ + def __init__(self, in_channels, out_channels, hidden_channels, kernel_size, dilations, num_res_blocks=13, num_conv_blocks=2): + + super().__init__() + assert len(dilations) == num_res_blocks + self.res_blocks = nn.ModuleList() + for idx, dilation in enumerate(dilations): + block = Conv1dBNBlock(in_channels if idx==0 else hidden_channels, + out_channels if (idx + 1) == len(dilations) else hidden_channels, + hidden_channels, + kernel_size, + dilation, + num_conv_blocks) + self.res_blocks.append(block) + + def forward(self, x, x_mask=None): + if x_mask is None: + x_mask = 1.0 + o = x * x_mask + for block in self.res_blocks: + res = o + o = block(o) + o = o + res + if x_mask is not None: + o = o * x_mask + return o diff --git a/TTS/tts/layers/generic/time_depth_sep_conv.py b/TTS/tts/layers/generic/time_depth_sep_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..c9a117c8e549866cbbb800cbb6ee3c0c00803bbf --- /dev/null +++ b/TTS/tts/layers/generic/time_depth_sep_conv.py @@ -0,0 +1,92 @@ +import torch +from torch import nn + + +class TimeDepthSeparableConv(nn.Module): + """Time depth separable convolution as in https://arxiv.org/pdf/1904.02619.pdf + It shows competative results with less computation and memory footprint.""" + def __init__(self, + in_channels, + hid_channels, + out_channels, + kernel_size, + bias=True): + super().__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.hid_channels = hid_channels + self.kernel_size = kernel_size + + self.time_conv = nn.Conv1d( + in_channels, + 2 * hid_channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.norm1 = nn.BatchNorm1d(2 * hid_channels) + self.depth_conv = nn.Conv1d( + hid_channels, + hid_channels, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + groups=hid_channels, + bias=bias, + ) + self.norm2 = nn.BatchNorm1d(hid_channels) + self.time_conv2 = nn.Conv1d( + hid_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.norm3 = nn.BatchNorm1d(out_channels) + + def forward(self, x): + x_res = x + x = self.time_conv(x) + x = self.norm1(x) + x = nn.functional.glu(x, dim=1) + x = self.depth_conv(x) + x = self.norm2(x) + x = x * torch.sigmoid(x) + x = self.time_conv2(x) + x = self.norm3(x) + x = x_res + x + return x + + +class TimeDepthSeparableConvBlock(nn.Module): + def __init__(self, + in_channels, + hid_channels, + out_channels, + num_layers, + kernel_size, + bias=True): + super().__init__() + assert (kernel_size - 1) % 2 == 0 + assert num_layers > 1 + + self.layers = nn.ModuleList() + layer = TimeDepthSeparableConv( + in_channels, hid_channels, + out_channels if num_layers == 1 else hid_channels, kernel_size, + bias) + self.layers.append(layer) + for idx in range(num_layers - 1): + layer = TimeDepthSeparableConv( + hid_channels, hid_channels, out_channels if + (idx + 1) == (num_layers - 1) else hid_channels, kernel_size, + bias) + self.layers.append(layer) + + def forward(self, x, mask): + for layer in self.layers: + x = layer(x * mask) + return x diff --git a/TTS/tts/layers/generic/wavenet.py b/TTS/tts/layers/generic/wavenet.py new file mode 100644 index 0000000000000000000000000000000000000000..9906aa4a070a739c5a266f43a2a74d8ace24e0a9 --- /dev/null +++ b/TTS/tts/layers/generic/wavenet.py @@ -0,0 +1,170 @@ +import torch +from torch import nn + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +class WN(torch.nn.Module): + """Wavenet layers with weight norm and no input conditioning. + + |-----------------------------------------------------------------------------| + | |-> tanh -| | + res -|- conv1d(dilation) -> dropout -> + -| * -> conv1d1x1 -> split -|- + -> res + g -------------------------------------| |-> sigmoid -| | + o --------------------------------------------------------------------------- + --------- o + + Args: + in_channels (int): number of input channels. + hidden_channes (int): number of hidden channels. + kernel_size (int): filter kernel size for the first conv layer. + dilation_rate (int): dilations rate to increase dilation per layer. + If it is 2, dilations are 1, 2, 4, 8 for the next 4 layers. + num_layers (int): number of wavenet layers. + c_in_channels (int): number of channels of conditioning input. + dropout_p (float): dropout rate. + weight_norm (bool): enable/disable weight norm for convolution layers. + """ + def __init__(self, + in_channels, + hidden_channels, + kernel_size, + dilation_rate, + num_layers, + c_in_channels=0, + dropout_p=0, + weight_norm=True): + super().__init__() + assert kernel_size % 2 == 1 + assert hidden_channels % 2 == 0 + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.num_layers = num_layers + self.c_in_channels = c_in_channels + self.dropout_p = dropout_p + + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + self.dropout = nn.Dropout(dropout_p) + + # init conditioning layer + if c_in_channels > 0: + cond_layer = torch.nn.Conv1d(c_in_channels, + 2 * hidden_channels * num_layers, 1) + self.cond_layer = torch.nn.utils.weight_norm(cond_layer, + name='weight') + # intermediate layers + for i in range(num_layers): + dilation = dilation_rate**i + padding = int((kernel_size * dilation - dilation) / 2) + in_layer = torch.nn.Conv1d(hidden_channels, + 2 * hidden_channels, + kernel_size, + dilation=dilation, + padding=padding) + in_layer = torch.nn.utils.weight_norm(in_layer, name='weight') + self.in_layers.append(in_layer) + + if i < num_layers - 1: + res_skip_channels = 2 * hidden_channels + else: + res_skip_channels = hidden_channels + + res_skip_layer = torch.nn.Conv1d(hidden_channels, + res_skip_channels, 1) + res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, + name='weight') + self.res_skip_layers.append(res_skip_layer) + # setup weight norm + if not weight_norm: + self.remove_weight_norm() + + def forward(self, x, x_mask=None, g=None, **kwargs): # pylint: disable=unused-argument + output = torch.zeros_like(x) + n_channels_tensor = torch.IntTensor([self.hidden_channels]) + if g is not None: + g = self.cond_layer(g) + for i in range(self.num_layers): + x_in = self.in_layers[i](x) + x_in = self.dropout(x_in) + if g is not None: + cond_offset = i * 2 * self.hidden_channels + g_l = g[:, cond_offset:cond_offset + 2 * self.hidden_channels, :] + else: + g_l = torch.zeros_like(x_in) + acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, + n_channels_tensor) + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.num_layers - 1: + x = (x + res_skip_acts[:, :self.hidden_channels, :]) * x_mask + output = output + res_skip_acts[:, self.hidden_channels:, :] + else: + output = output + res_skip_acts + return output * x_mask + + def remove_weight_norm(self): + if self.c_in_channels != 0: + torch.nn.utils.remove_weight_norm(self.cond_layer) + for l in self.in_layers: + torch.nn.utils.remove_weight_norm(l) + for l in self.res_skip_layers: + torch.nn.utils.remove_weight_norm(l) + + +class WNBlocks(nn.Module): + """Wavenet blocks. + + Note: After each block dilation resets to 1 and it increases in each block + along the dilation rate. + + Args: + in_channels (int): number of input channels. + hidden_channes (int): number of hidden channels. + kernel_size (int): filter kernel size for the first conv layer. + dilation_rate (int): dilations rate to increase dilation per layer. + If it is 2, dilations are 1, 2, 4, 8 for the next 4 layers. + num_blocks (int): number of wavenet blocks. + num_layers (int): number of wavenet layers. + c_in_channels (int): number of channels of conditioning input. + dropout_p (float): dropout rate. + weight_norm (bool): enable/disable weight norm for convolution layers. + """ + + def __init__(self, + in_channels, + hidden_channels, + kernel_size, + dilation_rate, + num_blocks, + num_layers, + c_in_channels=0, + dropout_p=0, + weight_norm=True): + + super().__init__() + self.wn_blocks = nn.ModuleList() + for idx in range(num_blocks): + layer = WN(in_channels=in_channels if idx == 0 else hidden_channels, + hidden_channels=hidden_channels, + kernel_size=kernel_size, + dilation_rate=dilation_rate, + num_layers=num_layers, + c_in_channels=c_in_channels, + dropout_p=dropout_p, + weight_norm=weight_norm) + self.wn_blocks.append(layer) + + def forward(self, x, x_mask, g=None): + o = x + for layer in self.wn_blocks: + o = layer(o, x_mask, g) + return o \ No newline at end of file diff --git a/TTS/tts/layers/glow_tts/__init__.py b/TTS/tts/layers/glow_tts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/layers/glow_tts/decoder.py b/TTS/tts/layers/glow_tts/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..46533ed1873cec8983c3facb5bfb635a5e89221d --- /dev/null +++ b/TTS/tts/layers/glow_tts/decoder.py @@ -0,0 +1,132 @@ +import torch +from torch import nn + +from TTS.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock +from TTS.tts.layers.generic.normalization import ActNorm + + +def squeeze(x, x_mask=None, num_sqz=2): + """GlowTTS squeeze operation + Increase number of channels and reduce number of time steps + by the same factor. + + Note: + each 's' is a n-dimensional vector. + [s1,s2,s3,s4,s5,s6] --> [[s1, s3, s5], [s2, s4, s6]]""" + b, c, t = x.size() + + t = (t // num_sqz) * num_sqz + x = x[:, :, :t] + x_sqz = x.view(b, c, t // num_sqz, num_sqz) + x_sqz = x_sqz.permute(0, 3, 1, + 2).contiguous().view(b, c * num_sqz, t // num_sqz) + + if x_mask is not None: + x_mask = x_mask[:, :, num_sqz - 1::num_sqz] + else: + x_mask = torch.ones(b, 1, t // num_sqz).to(device=x.device, + dtype=x.dtype) + return x_sqz * x_mask, x_mask + + +def unsqueeze(x, x_mask=None, num_sqz=2): + """GlowTTS unsqueeze operation + + Note: + each 's' is a n-dimensional vector. + [[s1, s3, s5], [s2, s4, s6]] --> [[s1, s3, s5], [s2, s4, s6]] """ + b, c, t = x.size() + + x_unsqz = x.view(b, num_sqz, c // num_sqz, t) + x_unsqz = x_unsqz.permute(0, 2, 3, + 1).contiguous().view(b, c // num_sqz, + t * num_sqz) + + if x_mask is not None: + x_mask = x_mask.unsqueeze(-1).repeat(1, 1, 1, + num_sqz).view(b, 1, t * num_sqz) + else: + x_mask = torch.ones(b, 1, t * num_sqz).to(device=x.device, + dtype=x.dtype) + return x_unsqz * x_mask, x_mask + + +class Decoder(nn.Module): + """Stack of Glow Decoder Modules. + Squeeze -> ActNorm -> InvertibleConv1x1 -> AffineCoupling -> Unsqueeze + + Args: + in_channels (int): channels of input tensor. + hidden_channels (int): hidden decoder channels. + kernel_size (int): Coupling block kernel size. (Wavenet filter kernel size.) + dilation_rate (int): rate to increase dilation by each layer in a decoder block. + num_flow_blocks (int): number of decoder blocks. + num_coupling_layers (int): number coupling layers. (number of wavenet layers.) + dropout_p (float): wavenet dropout rate. + sigmoid_scale (bool): enable/disable sigmoid scaling in coupling layer. + """ + def __init__(self, + in_channels, + hidden_channels, + kernel_size, + dilation_rate, + num_flow_blocks, + num_coupling_layers, + dropout_p=0., + num_splits=4, + num_squeeze=2, + sigmoid_scale=False, + c_in_channels=0): + super().__init__() + + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.num_flow_blocks = num_flow_blocks + self.num_coupling_layers = num_coupling_layers + self.dropout_p = dropout_p + self.num_splits = num_splits + self.num_squeeze = num_squeeze + self.sigmoid_scale = sigmoid_scale + self.c_in_channels = c_in_channels + + self.flows = nn.ModuleList() + for _ in range(num_flow_blocks): + self.flows.append(ActNorm(channels=in_channels * num_squeeze)) + self.flows.append( + InvConvNear(channels=in_channels * num_squeeze, + num_splits=num_splits)) + self.flows.append( + CouplingBlock(in_channels * num_squeeze, + hidden_channels, + kernel_size=kernel_size, + dilation_rate=dilation_rate, + num_layers=num_coupling_layers, + c_in_channels=c_in_channels, + dropout_p=dropout_p, + sigmoid_scale=sigmoid_scale)) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + flows = self.flows + logdet_tot = 0 + else: + flows = reversed(self.flows) + logdet_tot = None + + if self.num_squeeze > 1: + x, x_mask = squeeze(x, x_mask, self.num_squeeze) + for f in flows: + if not reverse: + x, logdet = f(x, x_mask, g=g, reverse=reverse) + logdet_tot += logdet + else: + x, logdet = f(x, x_mask, g=g, reverse=reverse) + if self.num_squeeze > 1: + x, x_mask = unsqueeze(x, x_mask, self.num_squeeze) + return x, logdet_tot + + def store_inverse(self): + for f in self.flows: + f.store_inverse() diff --git a/TTS/tts/layers/glow_tts/duration_predictor.py b/TTS/tts/layers/glow_tts/duration_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..a08f64a870a0beaae2ae61fbbef3f13bcec7a231 --- /dev/null +++ b/TTS/tts/layers/glow_tts/duration_predictor.py @@ -0,0 +1,57 @@ +import torch +from torch import nn + +from ..generic.normalization import LayerNorm + + +class DurationPredictor(nn.Module): + """Glow-TTS duration prediction model. + [2 x (conv1d_kxk -> relu -> layer_norm -> dropout)] -> conv1d_1x1 -> durs + + Args: + in_channels ([type]): [description] + hidden_channels ([type]): [description] + kernel_size ([type]): [description] + dropout_p ([type]): [description] + """ + def __init__(self, in_channels, hidden_channels, kernel_size, dropout_p): + super().__init__() + # class arguments + self.in_channels = in_channels + self.filter_channels = hidden_channels + self.kernel_size = kernel_size + self.dropout_p = dropout_p + # layers + self.drop = nn.Dropout(dropout_p) + self.conv_1 = nn.Conv1d(in_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2) + self.norm_1 = LayerNorm(hidden_channels) + self.conv_2 = nn.Conv1d(hidden_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2) + self.norm_2 = LayerNorm(hidden_channels) + # output layer + self.proj = nn.Conv1d(hidden_channels, 1, 1) + + def forward(self, x, x_mask): + """ + Shapes: + x: [B, C, T] + x_mask: [B, 1, T] + + Returns: + [type]: [description] + """ + x = self.conv_1(x * x_mask) + x = torch.relu(x) + x = self.norm_1(x) + x = self.drop(x) + x = self.conv_2(x * x_mask) + x = torch.relu(x) + x = self.norm_2(x) + x = self.drop(x) + x = self.proj(x * x_mask) + return x * x_mask diff --git a/TTS/tts/layers/glow_tts/encoder.py b/TTS/tts/layers/glow_tts/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1508ee9c777c0679dc048192b3874990983dc8 --- /dev/null +++ b/TTS/tts/layers/glow_tts/encoder.py @@ -0,0 +1,186 @@ +import math +import torch +from torch import nn + +from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer +from TTS.tts.layers.generic.gated_conv import GatedConvBlock +from TTS.tts.utils.generic_utils import sequence_mask +from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock +from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor +from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock +from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock + + +class Encoder(nn.Module): + """Glow-TTS encoder module. + + embedding -> -> encoder_module -> --> proj_mean + | + |-> proj_var + | + |-> concat -> duration_predictor + ↑ + speaker_embed + Args: + num_chars (int): number of characters. + out_channels (int): number of output channels. + hidden_channels (int): encoder's embedding size. + hidden_channels_ffn (int): transformer's feed-forward channels. + kernel_size (int): kernel size for conv layers and duration predictor. + dropout_p (float): dropout rate for any dropout layer. + mean_only (bool): if True, output only mean values and use constant std. + use_prenet (bool): if True, use pre-convolutional layers before transformer layers. + c_in_channels (int): number of channels in conditional input. + + Shapes: + - input: (B, T, C) + + Notes: + suggested encoder params... + + for encoder_type == 'rel_pos_transformer' + encoder_params={ + 'kernel_size':3, + 'dropout_p': 0.1, + 'num_layers': 6, + 'num_heads': 2, + 'hidden_channels_ffn': 768, # 4 times the hidden_channels + 'input_length': None + } + + for encoder_type == 'gated_conv' + encoder_params={ + 'kernel_size':5, + 'dropout_p': 0.1, + 'num_layers': 9, + } + + for encoder_type == 'residual_conv_bn' + encoder_params={ + "kernel_size": 4, + "dilations": [1, 2, 4, 1, 2, 4, 1, 2, 4, 1, 2, 4, 1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + } + + for encoder_type == 'time_depth_separable' + encoder_params={ + "kernel_size": 5, + 'num_layers': 9, + } + """ + def __init__(self, + num_chars, + out_channels, + hidden_channels, + hidden_channels_dp, + encoder_type, + encoder_params, + dropout_p_dp=0.1, + mean_only=False, + use_prenet=True, + c_in_channels=0): + super().__init__() + # class arguments + self.num_chars = num_chars + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.hidden_channels_dp = hidden_channels_dp + self.dropout_p_dp = dropout_p_dp + self.mean_only = mean_only + self.use_prenet = use_prenet + self.c_in_channels = c_in_channels + self.encoder_type = encoder_type + # embedding layer + self.emb = nn.Embedding(num_chars, hidden_channels) + nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5) + # init encoder module + if encoder_type.lower() == "rel_pos_transformer": + if use_prenet: + self.prenet = ResidualConv1dLayerNormBlock(hidden_channels, + hidden_channels, + hidden_channels, + kernel_size=5, + num_layers=3, + dropout_p=0.5) + self.encoder = RelativePositionTransformer(hidden_channels, + hidden_channels, + hidden_channels, + **encoder_params) + elif encoder_type.lower() == 'gated_conv': + self.encoder = GatedConvBlock(hidden_channels, **encoder_params) + elif encoder_type.lower() == 'residual_conv_bn': + if use_prenet: + self.prenet = nn.Sequential( + nn.Conv1d(hidden_channels, hidden_channels, 1), + nn.ReLU() + ) + self.encoder = ResidualConv1dBNBlock(hidden_channels, + hidden_channels, + hidden_channels, + **encoder_params) + self.postnet = nn.Sequential( + nn.Conv1d(self.hidden_channels, self.hidden_channels, 1), + nn.BatchNorm1d(self.hidden_channels)) + elif encoder_type.lower() == 'time_depth_separable': + if use_prenet: + self.prenet = ResidualConv1dLayerNormBlock(hidden_channels, + hidden_channels, + hidden_channels, + kernel_size=5, + num_layers=3, + dropout_p=0.5) + self.encoder = TimeDepthSeparableConvBlock(hidden_channels, + hidden_channels, + hidden_channels, + **encoder_params) + else: + raise ValueError(" [!] Unkown encoder type.") + + # final projection layers + self.proj_m = nn.Conv1d(hidden_channels, out_channels, 1) + if not mean_only: + self.proj_s = nn.Conv1d(hidden_channels, out_channels, 1) + # duration predictor + self.duration_predictor = DurationPredictor( + hidden_channels + c_in_channels, hidden_channels_dp, 3, + dropout_p_dp) + + def forward(self, x, x_lengths, g=None): + """ + Shapes: + x: [B, C, T] + x_lengths: [B] + g (optional): [B, 1, T] + """ + # embedding layer + # [B ,T, D] + x = self.emb(x) * math.sqrt(self.hidden_channels) + # [B, D, T] + x = torch.transpose(x, 1, -1) + # compute input sequence mask + x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), + 1).to(x.dtype) + # prenet + if hasattr(self, 'prenet') and self.use_prenet: + x = self.prenet(x, x_mask) + # encoder + x = self.encoder(x, x_mask) + # postnet + if hasattr(self, 'postnet'): + x = self.postnet(x) * x_mask + # set duration predictor input + if g is not None: + g_exp = g.expand(-1, -1, x.size(-1)) + x_dp = torch.cat([torch.detach(x), g_exp], 1) + else: + x_dp = torch.detach(x) + # final projection layer + x_m = self.proj_m(x) * x_mask + if not self.mean_only: + x_logs = self.proj_s(x) * x_mask + else: + x_logs = torch.zeros_like(x_m) + # duration predictor + logw = self.duration_predictor(x_dp, x_mask) + return x_m, x_logs, logw, x_mask diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ad410d49b06f895e90bd6e79b903358ebc67aa --- /dev/null +++ b/TTS/tts/layers/glow_tts/glow.py @@ -0,0 +1,221 @@ +import torch +from torch import nn +from torch.nn import functional as F +from TTS.tts.layers.generic.wavenet import WN + +from ..generic.normalization import LayerNorm + + +class ResidualConv1dLayerNormBlock(nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, + num_layers, dropout_p): + """Conv1d with Layer Normalization and residual connection as in GlowTTS paper. + https://arxiv.org/pdf/1811.00002.pdf + + x |-> conv1d -> layer_norm -> relu -> dropout -> + -> o + |---------------> conv1d_1x1 -----------------------| + + Args: + in_channels (int): number of input tensor channels. + hidden_channels (int): number of inner layer channels. + out_channels (int): number of output tensor channels. + kernel_size (int): kernel size of conv1d filter. + num_layers (int): number of blocks. + dropout_p (float): dropout rate for each block. + """ + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.num_layers = num_layers + self.dropout_p = dropout_p + assert num_layers > 1, " [!] number of layers should be > 0." + assert kernel_size % 2 == 1, " [!] kernel size should be odd number." + + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + + for idx in range(num_layers): + self.conv_layers.append( + nn.Conv1d(in_channels if idx == 0 else hidden_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask): + x_res = x + for i in range(self.num_layers): + x = self.conv_layers[i](x * x_mask) + x = self.norm_layers[i](x * x_mask) + x = F.dropout(F.relu(x), self.dropout_p, training=self.training) + x = x_res + self.proj(x) + return x * x_mask + + +class InvConvNear(nn.Module): + """Invertible Convolution with input splitting as in GlowTTS paper. + https://arxiv.org/pdf/1811.00002.pdf + + Args: + channels (int): input and output channels. + num_splits (int): number of splits, also H and W of conv layer. + no_jacobian (bool): enable/disable jacobian computations. + + Note: + Split the input into groups of size self.num_splits and + perform 1x1 convolution separately. Cast 1x1 conv operation + to 2d by reshaping the input for efficiency. + """ + def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs): # pylint: disable=unused-argument + super().__init__() + assert num_splits % 2 == 0 + self.channels = channels + self.num_splits = num_splits + self.no_jacobian = no_jacobian + self.weight_inv = None + + w_init = torch.qr( + torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0] + if torch.det(w_init) < 0: + w_init[:, 0] = -1 * w_init[:, 0] + self.weight = nn.Parameter(w_init) + + def forward(self, x, x_mask=None, reverse=False, **kwargs): # pylint: disable=unused-argument + """ + Shapes: + x: B x C x T + x_mask: B x 1 x T + """ + + b, c, t = x.size() + assert c % self.num_splits == 0 + if x_mask is None: + x_mask = 1 + x_len = torch.ones((b, ), dtype=x.dtype, device=x.device) * t + else: + x_len = torch.sum(x_mask, [1, 2]) + + x = x.view(b, 2, c // self.num_splits, self.num_splits // 2, t) + x = x.permute(0, 1, 3, 2, 4).contiguous().view(b, self.num_splits, + c // self.num_splits, t) + + if reverse: + if self.weight_inv is not None: + weight = self.weight_inv + else: + weight = torch.inverse( + self.weight.float()).to(dtype=self.weight.dtype) + logdet = None + else: + weight = self.weight + if self.no_jacobian: + logdet = 0 + else: + logdet = torch.logdet( + self.weight) * (c / self.num_splits) * x_len # [b] + + weight = weight.view(self.num_splits, self.num_splits, 1, 1) + z = F.conv2d(x, weight) + + z = z.view(b, 2, self.num_splits // 2, c // self.num_splits, t) + z = z.permute(0, 1, 3, 2, 4).contiguous().view(b, c, t) * x_mask + return z, logdet + + def store_inverse(self): + weight_inv = torch.inverse( + self.weight.float()).to(dtype=self.weight.dtype) + self.weight_inv = nn.Parameter(weight_inv, requires_grad=False) + + +class CouplingBlock(nn.Module): + """Glow Affine Coupling block as in GlowTTS paper. + https://arxiv.org/pdf/1811.00002.pdf + + x --> x0 -> conv1d -> wavenet -> conv1d --> t, s -> concat(s*x1 + t, x0) -> o + '-> x1 - - - - - - - - - - - - - - - - - - - - - - - - - ^ + + Args: + in_channels (int): number of input tensor channels. + hidden_channels (int): number of hidden channels. + kernel_size (int): WaveNet filter kernel size. + dilation_rate (int): rate to increase dilation by each layer in a decoder block. + num_layers (int): number of WaveNet layers. + c_in_channels (int): number of conditioning input channels. + dropout_p (int): wavenet dropout rate. + sigmoid_scale (bool): enable/disable sigmoid scaling for output scale. + + Note: + It does not use conditional inputs differently from WaveGlow. + """ + def __init__(self, + in_channels, + hidden_channels, + kernel_size, + dilation_rate, + num_layers, + c_in_channels=0, + dropout_p=0, + sigmoid_scale=False): + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.num_layers = num_layers + self.c_in_channels = c_in_channels + self.dropout_p = dropout_p + self.sigmoid_scale = sigmoid_scale + # input layer + start = torch.nn.Conv1d(in_channels // 2, hidden_channels, 1) + start = torch.nn.utils.weight_norm(start) + self.start = start + # output layer + # Initializing last layer to 0 makes the affine coupling layers + # do nothing at first. This helps with training stability + end = torch.nn.Conv1d(hidden_channels, in_channels, 1) + end.weight.data.zero_() + end.bias.data.zero_() + self.end = end + # coupling layers + self.wn = WN(in_channels, hidden_channels, kernel_size, dilation_rate, + num_layers, c_in_channels, dropout_p) + + def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): # pylint: disable=unused-argument + """ + Shapes: + x: B x C x T + x_mask: B x 1 x T + g: B x C x 1 + """ + if x_mask is None: + x_mask = 1 + x_0, x_1 = x[:, :self.in_channels // 2], x[:, self.in_channels // 2:] + + x = self.start(x_0) * x_mask + x = self.wn(x, x_mask, g) + out = self.end(x) + + z_0 = x_0 + t = out[:, :self.in_channels // 2, :] + s = out[:, self.in_channels // 2:, :] + if self.sigmoid_scale: + s = torch.log(1e-6 + torch.sigmoid(s + 2)) + + if reverse: + z_1 = (x_1 - t) * torch.exp(-s) * x_mask + logdet = None + else: + z_1 = (t + torch.exp(s) * x_1) * x_mask + logdet = torch.sum(s * x_mask, [1, 2]) + + z = torch.cat([z_0, z_1], 1) + return z, logdet + + def store_inverse(self): + self.wn.remove_weight_norm() diff --git a/TTS/tts/layers/glow_tts/monotonic_align/__init__.py b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2912a98e88bbd41f94d5d4423afbfd1a3b532ae --- /dev/null +++ b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py @@ -0,0 +1,103 @@ +import numpy as np +import torch +from torch.nn import functional as F +from TTS.tts.utils.generic_utils import sequence_mask + +try: + # TODO: fix pypi cython installation problem. + from TTS.tts.layers.glow_tts.monotonic_align.core import maximum_path_c + CYTHON = True +except ModuleNotFoundError: + CYTHON = False + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def generate_path(duration, mask): + """ + duration: [b, t_x] + mask: [b, t_x, t_y] + """ + device = duration.device + + b, t_x, t_y = mask.shape + cum_duration = torch.cumsum(duration, 1) + path = torch.zeros(b, t_x, t_y, dtype=mask.dtype).to(device=device) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0] + ]))[:, :-1] + path = path * mask + return path + + +def maximum_path(value, mask): + if CYTHON: + return maximum_path_cython(value, mask) + return maximum_path_numpy(value, mask) + + +def maximum_path_cython(value, mask): + """ Cython optimised version. + value: [b, t_x, t_y] + mask: [b, t_x, t_y] + """ + value = value * mask + device = value.device + dtype = value.dtype + value = value.data.cpu().numpy().astype(np.float32) + path = np.zeros_like(value).astype(np.int32) + mask = mask.data.cpu().numpy() + + t_x_max = mask.sum(1)[:, 0].astype(np.int32) + t_y_max = mask.sum(2)[:, 0].astype(np.int32) + maximum_path_c(path, value, t_x_max, t_y_max) + return torch.from_numpy(path).to(device=device, dtype=dtype) + + +def maximum_path_numpy(value, mask, max_neg_val=None): + """ + Monotonic alignment search algorithm + Numpy-friendly version. It's about 4 times faster than torch version. + value: [b, t_x, t_y] + mask: [b, t_x, t_y] + """ + if max_neg_val is None: + max_neg_val = -np.inf # Patch for Sphinx complaint + value = value * mask + + device = value.device + dtype = value.dtype + value = value.cpu().detach().numpy() + mask = mask.cpu().detach().numpy().astype(np.bool) + + b, t_x, t_y = value.shape + direction = np.zeros(value.shape, dtype=np.int64) + v = np.zeros((b, t_x), dtype=np.float32) + x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1) + for j in range(t_y): + v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1] + v1 = v + max_mask = v1 >= v0 + v_max = np.where(max_mask, v1, v0) + direction[:, :, j] = max_mask + + index_mask = x_range <= j + v = np.where(index_mask, v_max + value[:, :, j], max_neg_val) + direction = np.where(mask, direction, 1) + + path = np.zeros(value.shape, dtype=np.float32) + index = mask[:, :, 0].sum(1).astype(np.int64) - 1 + index_range = np.arange(b) + for j in reversed(range(t_y)): + path[index_range, index, j] = 1 + index = index + direction[index_range, index, j] - 1 + path = path * mask.astype(np.float32) + path = torch.from_numpy(path).to(device=device, dtype=dtype) + return path diff --git a/TTS/tts/layers/glow_tts/monotonic_align/core.pyx b/TTS/tts/layers/glow_tts/monotonic_align/core.pyx new file mode 100644 index 0000000000000000000000000000000000000000..6aabccc4c408cb1b555e2abb4d73e0d1ce4d346e --- /dev/null +++ b/TTS/tts/layers/glow_tts/monotonic_align/core.pyx @@ -0,0 +1,45 @@ +import numpy as np +cimport numpy as np +cimport cython +from cython.parallel import prange + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil: + cdef int x + cdef int y + cdef float v_prev + cdef float v_cur + cdef float tmp + cdef int index = t_x - 1 + + for y in range(t_y): + for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): + if x == y: + v_cur = max_neg_val + else: + v_cur = value[x, y-1] + if x == 0: + if y == 0: + v_prev = 0. + else: + v_prev = max_neg_val + else: + v_prev = value[x-1, y-1] + value[x, y] = max(v_cur, v_prev) + value[x, y] + + for y in range(t_y - 1, -1, -1): + path[index, y] = 1 + if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]): + index = index - 1 + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: + cdef int b = values.shape[0] + + cdef int i + for i in prange(b, nogil=True): + maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val) diff --git a/TTS/tts/layers/glow_tts/monotonic_align/setup.py b/TTS/tts/layers/glow_tts/monotonic_align/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f22bc6a35a5a04c9e6d7b82040973722c9b770c9 --- /dev/null +++ b/TTS/tts/layers/glow_tts/monotonic_align/setup.py @@ -0,0 +1,7 @@ +# from distutils.core import setup +# from Cython.Build import cythonize +# import numpy + +# setup(name='monotonic_align', +# ext_modules=cythonize("core.pyx"), +# include_dirs=[numpy.get_include()]) diff --git a/TTS/tts/layers/glow_tts/transformer.py b/TTS/tts/layers/glow_tts/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..4feadc809ac02b96bd3add50e0b85644c02a2863 --- /dev/null +++ b/TTS/tts/layers/glow_tts/transformer.py @@ -0,0 +1,400 @@ +import math +import torch +from torch import nn +from torch.nn import functional as F + +from TTS.tts.layers.glow_tts.glow import LayerNorm + + +class RelativePositionMultiHeadAttention(nn.Module): + """Multi-head attention with Relative Positional embedding. + https://arxiv.org/pdf/1809.04281.pdf + + It learns positional embeddings for a window of neighbours. For keys and values, + it learns different set of embeddings. Key embeddings are agregated with the attention + scores and value embeddings are aggregated with the output. + + Note: + Example with relative attention window size 2 + input = [a, b, c, d, e] + rel_attn_embeddings = [e(t-2), e(t-1), e(t+1), e(t+2)] + + So it learns 4 embedding vectors (in total 8) separately for key and value vectors. + + Considering the input c + e(t-2) corresponds to c -> a + e(t-2) corresponds to c -> b + e(t-2) corresponds to c -> d + e(t-2) corresponds to c -> e + + These embeddings are shared among different time steps. So input a, b, d and e also uses + the same embeddings. + + Embeddings are ignored when the relative window is out of limit for the first and the last + n items. + + Args: + channels (int): input and inner layer channels. + out_channels (int): output channels. + num_heads (int): number of attention heads. + rel_attn_window_size (int, optional): relation attention window size. + If 4, for each time step next and previous 4 time steps are attended. + If default, relative encoding is disabled and it is a regular transformer. + Defaults to None. + heads_share (bool, optional): [description]. Defaults to True. + dropout_p (float, optional): dropout rate. Defaults to 0.. + input_length (int, optional): intput length for positional encoding. Defaults to None. + proximal_bias (bool, optional): enable/disable proximal bias as in the paper. Defaults to False. + proximal_init (bool, optional): enable/disable poximal init as in the paper. + Init key and query layer weights the same. Defaults to False. + """ + def __init__(self, + channels, + out_channels, + num_heads, + rel_attn_window_size=None, + heads_share=True, + dropout_p=0., + input_length=None, + proximal_bias=False, + proximal_init=False): + + super().__init__() + assert channels % num_heads == 0, " [!] channels should be divisible by num_heads." + # class attributes + self.channels = channels + self.out_channels = out_channels + self.num_heads = num_heads + self.rel_attn_window_size = rel_attn_window_size + self.heads_share = heads_share + self.input_length = input_length + self.proximal_bias = proximal_bias + self.dropout_p = dropout_p + self.attn = None + # query, key, value layers + self.k_channels = channels // num_heads + self.conv_q = nn.Conv1d(channels, channels, 1) + self.conv_k = nn.Conv1d(channels, channels, 1) + self.conv_v = nn.Conv1d(channels, channels, 1) + # output layers + self.conv_o = nn.Conv1d(channels, out_channels, 1) + self.dropout = nn.Dropout(dropout_p) + # relative positional encoding layers + if rel_attn_window_size is not None: + n_heads_rel = 1 if heads_share else num_heads + rel_stddev = self.k_channels**-0.5 + emb_rel_k = nn.Parameter( + torch.randn(n_heads_rel, rel_attn_window_size * 2 + 1, + self.k_channels) * rel_stddev) + emb_rel_v = nn.Parameter( + torch.randn(n_heads_rel, rel_attn_window_size * 2 + 1, + self.k_channels) * rel_stddev) + self.register_parameter('emb_rel_k', emb_rel_k) + self.register_parameter('emb_rel_v', emb_rel_v) + + # init layers + nn.init.xavier_uniform_(self.conv_q.weight) + nn.init.xavier_uniform_(self.conv_k.weight) + # proximal bias + if proximal_init: + self.conv_k.weight.data.copy_(self.conv_q.weight.data) + self.conv_k.bias.data.copy_(self.conv_q.bias.data) + nn.init.xavier_uniform_(self.conv_v.weight) + + def forward(self, x, c, attn_mask=None): + q = self.conv_q(x) + k = self.conv_k(c) + v = self.conv_v(c) + x, self.attn = self.attention(q, k, v, mask=attn_mask) + x = self.conv_o(x) + return x + + def attention(self, query, key, value, mask=None): + # reshape [b, d, t] -> [b, n_h, t, d_k] + b, d, t_s, t_t = (*key.size(), query.size(2)) + query = query.view(b, self.num_heads, self.k_channels, + t_t).transpose(2, 3) + key = key.view(b, self.num_heads, self.k_channels, t_s).transpose(2, 3) + value = value.view(b, self.num_heads, self.k_channels, + t_s).transpose(2, 3) + # compute raw attention scores + scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt( + self.k_channels) + # relative positional encoding for scores + if self.rel_attn_window_size is not None: + assert t_s == t_t, "Relative attention is only available for self-attention." + # get relative key embeddings + key_relative_embeddings = self._get_relative_embeddings( + self.emb_rel_k, t_s) + rel_logits = self._matmul_with_relative_keys( + query, key_relative_embeddings) + rel_logits = self._relative_position_to_absolute_position( + rel_logits) + scores_local = rel_logits / math.sqrt(self.k_channels) + scores = scores + scores_local + # proximan bias + if self.proximal_bias: + assert t_s == t_t, "Proximal bias is only available for self-attention." + scores = scores + self._attn_proximity_bias(t_s).to( + device=scores.device, dtype=scores.dtype) + # attention score masking + if mask is not None: + # add small value to prevent oor error. + scores = scores.masked_fill(mask == 0, -1e4) + if self.input_length is not None: + block_mask = torch.ones_like(scores).triu( + -1 * self.input_length).tril(self.input_length) + scores = scores * block_mask + -1e4 * (1 - block_mask) + # attention score normalization + p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] + # apply dropout to attention weights + p_attn = self.dropout(p_attn) + # compute output + output = torch.matmul(p_attn, value) + # relative positional encoding for values + if self.rel_attn_window_size is not None: + relative_weights = self._absolute_position_to_relative_position( + p_attn) + value_relative_embeddings = self._get_relative_embeddings( + self.emb_rel_v, t_s) + output = output + self._matmul_with_relative_values( + relative_weights, value_relative_embeddings) + output = output.transpose(2, 3).contiguous().view( + b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t] + return output, p_attn + + @staticmethod + def _matmul_with_relative_values(p_attn, re): + """ + Args: + p_attn (Tensor): attention weights. + re (Tensor): relative value embedding vector. (a_(i,j)^V) + + Shapes: + p_attn: [B, H, T, V] + re: [H or 1, V, D] + logits: [B, H, T, D] + """ + logits = torch.matmul(p_attn, re.unsqueeze(0)) + return logits + + @staticmethod + def _matmul_with_relative_keys(query, re): + """ + Args: + query (Tensor): batch of query vectors. (x*W^Q) + re (Tensor): relative key embedding vector. (a_(i,j)^K) + + Shapes: + query: [B, H, T, D] + re: [H or 1, V, D] + logits: [B, H, T, V] + """ + # logits = torch.einsum('bhld, kmd -> bhlm', [query, re.to(query.dtype)]) + logits = torch.matmul(query, re.unsqueeze(0).transpose(-2, -1)) + return logits + + def _get_relative_embeddings(self, relative_embeddings, length): + """Convert embedding vestors to a tensor of embeddings + """ + # Pad first before slice to avoid using cond ops. + pad_length = max(length - (self.rel_attn_window_size + 1), 0) + slice_start_position = max((self.rel_attn_window_size + 1) - length, 0) + slice_end_position = slice_start_position + 2 * length - 1 + if pad_length > 0: + padded_relative_embeddings = F.pad( + relative_embeddings, [0, 0, pad_length, pad_length, 0, 0]) + else: + padded_relative_embeddings = relative_embeddings + used_relative_embeddings = padded_relative_embeddings[:, + slice_start_position: + slice_end_position] + return used_relative_embeddings + + @staticmethod + def _relative_position_to_absolute_position(x): + """Converts tensor from relative to absolute indexing for local attention. + Args: + x: [B, D, length, 2 * length - 1] + Returns: + A Tensor of shape [B, D, length, length] + """ + batch, heads, length, _ = x.size() + # Pad to shift from relative to absolute indexing. + x = F.pad(x, [0, 1, 0, 0, 0, 0, 0, 0]) + # Pad extra elements so to add up to shape (len+1, 2*len-1). + x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = F.pad(x_flat, [0, length - 1, 0, 0, 0, 0]) + # Reshape and slice out the padded elements. + x_final = x_flat.view([batch, heads, length + 1, + 2 * length - 1])[:, :, :length, length - 1:] + return x_final + + @staticmethod + def _absolute_position_to_relative_position(x): + """ + x: [B, H, T, T] + ret: [B, H, T, 2*T-1] + """ + batch, heads, length, _ = x.size() + # padd along column + x = F.pad(x, [0, length - 1, 0, 0, 0, 0, 0, 0]) + x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) + # add 0's in the beginning that will skew the elements after reshape + x_flat = F.pad(x_flat, [length, 0, 0, 0, 0, 0]) + x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] + return x_final + + @staticmethod + def _attn_proximity_bias(length): + """Produce an attention mask that discourages distant + attention values. + Args: + length (int): an integer scalar. + Returns: + a Tensor with shape [1, 1, length, length] + """ + # L + r = torch.arange(length, dtype=torch.float32) + # L x L + diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) + # scale mask values + diff = -torch.log1p(torch.abs(diff)) + # 1 x 1 x L x L + return diff.unsqueeze(0).unsqueeze(0) + + +class FeedForwardNetwork(nn.Module): + """Feed Forward Inner layers for Transformer. + + Args: + in_channels (int): input tensor channels. + out_channels (int): output tensor channels. + hidden_channels (int): inner layers hidden channels. + kernel_size (int): conv1d filter kernel size. + dropout_p (float, optional): dropout rate. Defaults to 0. + """ + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dropout_p=0.): + + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dropout_p = dropout_p + + self.conv_1 = nn.Conv1d(in_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2) + self.conv_2 = nn.Conv1d(hidden_channels, + out_channels, + kernel_size, + padding=kernel_size // 2) + self.dropout = nn.Dropout(dropout_p) + + def forward(self, x, x_mask): + x = self.conv_1(x * x_mask) + x = torch.relu(x) + x = self.dropout(x) + x = self.conv_2(x * x_mask) + return x * x_mask + + +class RelativePositionTransformer(nn.Module): + """Transformer with Relative Potional Encoding. + https://arxiv.org/abs/1803.02155 + + Args: + in_channels (int): number of channels of the input tensor. + out_chanels (int): number of channels of the output tensor. + hidden_channels (int): model hidden channels. + hidden_channels_ffn (int): hidden channels of FeedForwardNetwork. + num_heads (int): number of attention heads. + num_layers (int): number of transformer layers. + kernel_size (int, optional): kernel size of feed-forward inner layers. Defaults to 1. + dropout_p (float, optional): dropout rate for self-attention and feed-forward inner layers_per_stack. Defaults to 0. + rel_attn_window_size (int, optional): relation attention window size. + If 4, for each time step next and previous 4 time steps are attended. + If default, relative encoding is disabled and it is a regular transformer. + Defaults to None. + input_length (int, optional): input lenght to limit position encoding. Defaults to None. + """ + def __init__(self, + in_channels, + out_channels, + hidden_channels, + hidden_channels_ffn, + num_heads, + num_layers, + kernel_size=1, + dropout_p=0., + rel_attn_window_size=None, + input_length=None): + super().__init__() + self.hidden_channels = hidden_channels + self.hidden_channels_ffn = hidden_channels_ffn + self.num_heads = num_heads + self.num_layers = num_layers + self.kernel_size = kernel_size + self.dropout_p = dropout_p + self.rel_attn_window_size = rel_attn_window_size + + self.dropout = nn.Dropout(dropout_p) + self.attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + + for idx in range(self.num_layers): + self.attn_layers.append( + RelativePositionMultiHeadAttention( + hidden_channels if idx != 0 else in_channels, + hidden_channels, + num_heads, + rel_attn_window_size=rel_attn_window_size, + dropout_p=dropout_p, + input_length=input_length)) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + + if hidden_channels != out_channels and (idx + 1) == self.num_layers: + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + + self.ffn_layers.append( + FeedForwardNetwork(hidden_channels, + hidden_channels if (idx + 1) != self.num_layers else out_channels, + hidden_channels_ffn, + kernel_size, + dropout_p=dropout_p)) + + self.norm_layers_2.append( + LayerNorm(hidden_channels if ( + idx + 1) != self.num_layers else out_channels)) + + def forward(self, x, x_mask): + """ + Shapes: + x: [B, C, T] + x_mask: [B, 1, T] + """ + attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + for i in range(self.num_layers): + x = x * x_mask + y = self.attn_layers[i](x, x, attn_mask) + y = self.dropout(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.dropout(y) + + if (i + 1) == self.num_layers and hasattr(self, 'proj'): + x = self.proj(x) + + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x diff --git a/TTS/tts/layers/gst_layers.py b/TTS/tts/layers/gst_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..381d881ae2b25bc2090c13f11b8365ebbbc9f238 --- /dev/null +++ b/TTS/tts/layers/gst_layers.py @@ -0,0 +1,176 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class GST(nn.Module): + """Global Style Token Module for factorizing prosody in speech. + + See https://arxiv.org/pdf/1803.09017""" + + def __init__(self, num_mel, num_heads, num_style_tokens, gst_embedding_dim, speaker_embedding_dim=None): + super().__init__() + self.encoder = ReferenceEncoder(num_mel, gst_embedding_dim) + self.style_token_layer = StyleTokenLayer(num_heads, num_style_tokens, + gst_embedding_dim, speaker_embedding_dim) + + def forward(self, inputs, speaker_embedding=None): + enc_out = self.encoder(inputs) + # concat speaker_embedding + if speaker_embedding is not None: + enc_out = torch.cat([enc_out, speaker_embedding], dim=-1) + style_embed = self.style_token_layer(enc_out) + + return style_embed + + +class ReferenceEncoder(nn.Module): + """NN module creating a fixed size prosody embedding from a spectrogram. + + inputs: mel spectrograms [batch_size, num_spec_frames, num_mel] + outputs: [batch_size, embedding_dim] + """ + + def __init__(self, num_mel, embedding_dim): + + super().__init__() + self.num_mel = num_mel + filters = [1] + [32, 32, 64, 64, 128, 128] + num_layers = len(filters) - 1 + convs = [ + nn.Conv2d( + in_channels=filters[i], + out_channels=filters[i + 1], + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1)) for i in range(num_layers) + ] + self.convs = nn.ModuleList(convs) + self.bns = nn.ModuleList([ + nn.BatchNorm2d(num_features=filter_size) + for filter_size in filters[1:] + ]) + + post_conv_height = self.calculate_post_conv_height( + num_mel, 3, 2, 1, num_layers) + self.recurrence = nn.GRU( + input_size=filters[-1] * post_conv_height, + hidden_size=embedding_dim // 2, + batch_first=True) + + def forward(self, inputs): + batch_size = inputs.size(0) + x = inputs.view(batch_size, 1, -1, self.num_mel) + # x: 4D tensor [batch_size, num_channels==1, num_frames, num_mel] + for conv, bn in zip(self.convs, self.bns): + x = conv(x) + x = bn(x) + x = F.relu(x) + + x = x.transpose(1, 2) + # x: 4D tensor [batch_size, post_conv_width, + # num_channels==128, post_conv_height] + post_conv_width = x.size(1) + x = x.contiguous().view(batch_size, post_conv_width, -1) + # x: 3D tensor [batch_size, post_conv_width, + # num_channels*post_conv_height] + self.recurrence.flatten_parameters() + memory, out = self.recurrence(x) + # out: 3D tensor [seq_len==1, batch_size, encoding_size=128] + + return out.squeeze(0) + + @staticmethod + def calculate_post_conv_height(height, kernel_size, stride, pad, + n_convs): + """Height of spec after n convolutions with fixed kernel/stride/pad.""" + for _ in range(n_convs): + height = (height - kernel_size + 2 * pad) // stride + 1 + return height + + +class StyleTokenLayer(nn.Module): + """NN Module attending to style tokens based on prosody encodings.""" + + def __init__(self, num_heads, num_style_tokens, + embedding_dim, speaker_embedding_dim=None): + super().__init__() + + self.query_dim = embedding_dim // 2 + + if speaker_embedding_dim: + self.query_dim += speaker_embedding_dim + + self.key_dim = embedding_dim // num_heads + self.style_tokens = nn.Parameter( + torch.FloatTensor(num_style_tokens, self.key_dim)) + nn.init.normal_(self.style_tokens, mean=0, std=0.5) + self.attention = MultiHeadAttention( + query_dim=self.query_dim, + key_dim=self.key_dim, + num_units=embedding_dim, + num_heads=num_heads) + + def forward(self, inputs): + batch_size = inputs.size(0) + prosody_encoding = inputs.unsqueeze(1) + # prosody_encoding: 3D tensor [batch_size, 1, encoding_size==128] + tokens = torch.tanh(self.style_tokens) \ + .unsqueeze(0) \ + .expand(batch_size, -1, -1) + # tokens: 3D tensor [batch_size, num tokens, token embedding size] + style_embed = self.attention(prosody_encoding, tokens) + + return style_embed + +class MultiHeadAttention(nn.Module): + ''' + input: + query --- [N, T_q, query_dim] + key --- [N, T_k, key_dim] + output: + out --- [N, T_q, num_units] + ''' + + def __init__(self, query_dim, key_dim, num_units, num_heads): + + super().__init__() + self.num_units = num_units + self.num_heads = num_heads + self.key_dim = key_dim + + self.W_query = nn.Linear( + in_features=query_dim, out_features=num_units, bias=False) + self.W_key = nn.Linear( + in_features=key_dim, out_features=num_units, bias=False) + self.W_value = nn.Linear( + in_features=key_dim, out_features=num_units, bias=False) + + def forward(self, query, key): + queries = self.W_query(query) # [N, T_q, num_units] + keys = self.W_key(key) # [N, T_k, num_units] + values = self.W_value(key) + + split_size = self.num_units // self.num_heads + queries = torch.stack( + torch.split(queries, split_size, dim=2), + dim=0) # [h, N, T_q, num_units/h] + keys = torch.stack( + torch.split(keys, split_size, dim=2), + dim=0) # [h, N, T_k, num_units/h] + values = torch.stack( + torch.split(values, split_size, dim=2), + dim=0) # [h, N, T_k, num_units/h] + + # score = softmax(QK^T / (d_k ** 0.5)) + scores = torch.matmul(queries, keys.transpose(2, 3)) # [h, N, T_q, T_k] + scores = scores / (self.key_dim**0.5) + scores = F.softmax(scores, dim=3) + + # out = score * V + out = torch.matmul(scores, values) # [h, N, T_q, num_units/h] + out = torch.cat( + torch.split(out, 1, dim=0), + dim=3).squeeze(0) # [N, T_q, num_units] + + return out \ No newline at end of file diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..ef68d1d0eb7c42c0060ae0aa3e4104544dfc65a6 --- /dev/null +++ b/TTS/tts/layers/losses.py @@ -0,0 +1,447 @@ +import math +import numpy as np +import torch +from torch import nn +from inspect import signature +from torch.nn import functional +from TTS.tts.utils.generic_utils import sequence_mask +from TTS.tts.utils.ssim import ssim + + +# pylint: disable=abstract-method Method +# relates https://github.com/pytorch/pytorch/issues/42305 +class L1LossMasked(nn.Module): + def __init__(self, seq_len_norm): + super().__init__() + self.seq_len_norm = seq_len_norm + + def forward(self, x, target, length): + """ + Args: + x: A Variable containing a FloatTensor of size + (batch, max_len, dim) which contains the + unnormalized probability for each class. + target: A Variable containing a LongTensor of size + (batch, max_len, dim) which contains the index of the true + class for each corresponding step. + length: A Variable containing a LongTensor of size (batch,) + which contains the length of each data in a batch. + Shapes: + x: B x T X D + target: B x T x D + length: B + Returns: + loss: An average loss value in range [0, 1] masked by the length. + """ + # mask: (batch, max_len, 1) + target.requires_grad = False + mask = sequence_mask(sequence_length=length, + max_len=target.size(1)).unsqueeze(2).float() + if self.seq_len_norm: + norm_w = mask / mask.sum(dim=1, keepdim=True) + out_weights = norm_w.div(target.shape[0] * target.shape[2]) + mask = mask.expand_as(x) + loss = functional.l1_loss(x * mask, + target * mask, + reduction='none') + loss = loss.mul(out_weights.to(loss.device)).sum() + else: + mask = mask.expand_as(x) + loss = functional.l1_loss(x * mask, target * mask, reduction='sum') + loss = loss / mask.sum() + return loss + + +class MSELossMasked(nn.Module): + def __init__(self, seq_len_norm): + super(MSELossMasked, self).__init__() + self.seq_len_norm = seq_len_norm + + def forward(self, x, target, length): + """ + Args: + x: A Variable containing a FloatTensor of size + (batch, max_len, dim) which contains the + unnormalized probability for each class. + target: A Variable containing a LongTensor of size + (batch, max_len, dim) which contains the index of the true + class for each corresponding step. + length: A Variable containing a LongTensor of size (batch,) + which contains the length of each data in a batch. + Shapes: + x: B x T X D + target: B x T x D + length: B + Returns: + loss: An average loss value in range [0, 1] masked by the length. + """ + # mask: (batch, max_len, 1) + target.requires_grad = False + mask = sequence_mask(sequence_length=length, + max_len=target.size(1)).unsqueeze(2).float() + if self.seq_len_norm: + norm_w = mask / mask.sum(dim=1, keepdim=True) + out_weights = norm_w.div(target.shape[0] * target.shape[2]) + mask = mask.expand_as(x) + loss = functional.mse_loss(x * mask, + target * mask, + reduction='none') + loss = loss.mul(out_weights.to(loss.device)).sum() + else: + mask = mask.expand_as(x) + loss = functional.mse_loss(x * mask, + target * mask, + reduction='sum') + loss = loss / mask.sum() + return loss + + +class SSIMLoss(torch.nn.Module): + """SSIM loss as explained here https://en.wikipedia.org/wiki/Structural_similarity""" + def __init__(self): + super().__init__() + self.loss_func = ssim + + def forward(self, y_hat, y, length=None): + """ + Args: + y_hat (tensor): model prediction values. + y (tensor): target values. + length (tensor): length of each sample in a batch. + Shapes: + y_hat: B x T X D + y: B x T x D + length: B + Returns: + loss: An average loss value in range [0, 1] masked by the length. + """ + if length is not None: + m = sequence_mask(sequence_length=length, + max_len=y.size(1)).unsqueeze(2).float().to( + y_hat.device) + y_hat, y = y_hat * m, y * m + return 1 - self.loss_func(y_hat.unsqueeze(1), y.unsqueeze(1)) + + +class AttentionEntropyLoss(nn.Module): + # pylint: disable=R0201 + def forward(self, align): + """ + Forces attention to be more decisive by penalizing + soft attention weights + + TODO: arguments + TODO: unit_test + """ + entropy = torch.distributions.Categorical(probs=align).entropy() + loss = (entropy / np.log(align.shape[1])).mean() + return loss + + +class BCELossMasked(nn.Module): + def __init__(self, pos_weight): + super(BCELossMasked, self).__init__() + self.pos_weight = pos_weight + + def forward(self, x, target, length): + """ + Args: + x: A Variable containing a FloatTensor of size + (batch, max_len) which contains the + unnormalized probability for each class. + target: A Variable containing a LongTensor of size + (batch, max_len) which contains the index of the true + class for each corresponding step. + length: A Variable containing a LongTensor of size (batch,) + which contains the length of each data in a batch. + Shapes: + x: B x T + target: B x T + length: B + Returns: + loss: An average loss value in range [0, 1] masked by the length. + """ + # mask: (batch, max_len, 1) + target.requires_grad = False + if length is not None: + mask = sequence_mask(sequence_length=length, + max_len=target.size(1)).float() + x = x * mask + target = target * mask + num_items = mask.sum() + else: + num_items = torch.numel(x) + loss = functional.binary_cross_entropy_with_logits( + x, + target, + pos_weight=self.pos_weight, + reduction='sum') + loss = loss / num_items + return loss + + +class DifferentailSpectralLoss(nn.Module): + """Differential Spectral Loss + https://arxiv.org/ftp/arxiv/papers/1909/1909.10302.pdf""" + + def __init__(self, loss_func): + super().__init__() + self.loss_func = loss_func + + def forward(self, x, target, length=None): + """ + Shapes: + x: B x T + target: B x T + length: B + Returns: + loss: An average loss value in range [0, 1] masked by the length. + """ + x_diff = x[:, 1:] - x[:, :-1] + target_diff = target[:, 1:] - target[:, :-1] + if length is None: + return self.loss_func(x_diff, target_diff) + return self.loss_func(x_diff, target_diff, length-1) + + +class GuidedAttentionLoss(torch.nn.Module): + def __init__(self, sigma=0.4): + super(GuidedAttentionLoss, self).__init__() + self.sigma = sigma + + def _make_ga_masks(self, ilens, olens): + B = len(ilens) + max_ilen = max(ilens) + max_olen = max(olens) + ga_masks = torch.zeros((B, max_olen, max_ilen)) + for idx, (ilen, olen) in enumerate(zip(ilens, olens)): + ga_masks[idx, :olen, :ilen] = self._make_ga_mask( + ilen, olen, self.sigma) + return ga_masks + + def forward(self, att_ws, ilens, olens): + ga_masks = self._make_ga_masks(ilens, olens).to(att_ws.device) + seq_masks = self._make_masks(ilens, olens).to(att_ws.device) + losses = ga_masks * att_ws + loss = torch.mean(losses.masked_select(seq_masks)) + return loss + + @staticmethod + def _make_ga_mask(ilen, olen, sigma): + grid_x, grid_y = torch.meshgrid(torch.arange(olen).to(olen), torch.arange(ilen).to(ilen)) + grid_x, grid_y = grid_x.float(), grid_y.float() + return 1.0 - torch.exp(-(grid_y / ilen - grid_x / olen)**2 / + (2 * (sigma**2))) + + @staticmethod + def _make_masks(ilens, olens): + in_masks = sequence_mask(ilens) + out_masks = sequence_mask(olens) + return out_masks.unsqueeze(-1) & in_masks.unsqueeze(-2) + + +class Huber(nn.Module): + # pylint: disable=R0201 + def forward(self, x, y, length=None): + """ + Shapes: + x: B x T + y: B x T + length: B + """ + mask = sequence_mask(sequence_length=length, max_len=y.size(1)).float() + return torch.nn.functional.smooth_l1_loss( + x * mask, y * mask, reduction='sum') / mask.sum() + + +######################## +# MODEL LOSS LAYERS +######################## + +class TacotronLoss(torch.nn.Module): + """Collection of Tacotron set-up based on provided config.""" + def __init__(self, c, stopnet_pos_weight=10, ga_sigma=0.4): + super(TacotronLoss, self).__init__() + self.stopnet_pos_weight = stopnet_pos_weight + self.ga_alpha = c.ga_alpha + self.decoder_diff_spec_alpha = c.decoder_diff_spec_alpha + self.postnet_diff_spec_alpha = c.postnet_diff_spec_alpha + self.decoder_alpha = c.decoder_loss_alpha + self.postnet_alpha = c.postnet_loss_alpha + self.decoder_ssim_alpha = c.decoder_ssim_alpha + self.postnet_ssim_alpha = c.postnet_ssim_alpha + self.config = c + + # postnet and decoder loss + if c.loss_masking: + self.criterion = L1LossMasked(c.seq_len_norm) if c.model in [ + "Tacotron" + ] else MSELossMasked(c.seq_len_norm) + else: + self.criterion = nn.L1Loss() if c.model in ["Tacotron" + ] else nn.MSELoss() + # guided attention loss + if c.ga_alpha > 0: + self.criterion_ga = GuidedAttentionLoss(sigma=ga_sigma) + # differential spectral loss + if c.postnet_diff_spec_alpha > 0 or c.decoder_diff_spec_alpha > 0: + self.criterion_diff_spec = DifferentailSpectralLoss(loss_func=self.criterion) + # ssim loss + if c.postnet_ssim_alpha > 0 or c.decoder_ssim_alpha > 0: + self.criterion_ssim = SSIMLoss() + # stopnet loss + # pylint: disable=not-callable + self.criterion_st = BCELossMasked( + pos_weight=torch.tensor(stopnet_pos_weight)) if c.stopnet else None + + def forward(self, postnet_output, decoder_output, mel_input, linear_input, + stopnet_output, stopnet_target, output_lens, decoder_b_output, + alignments, alignment_lens, alignments_backwards, input_lens): + + return_dict = {} + # remove lengths if no masking is applied + if not self.config.loss_masking: + output_lens = None + # decoder and postnet losses + if self.config.loss_masking: + if self.decoder_alpha > 0: + decoder_loss = self.criterion(decoder_output, mel_input, + output_lens) + if self.postnet_alpha > 0: + if self.config.model in ["Tacotron", "TacotronGST"]: + postnet_loss = self.criterion(postnet_output, linear_input, + output_lens) + else: + postnet_loss = self.criterion(postnet_output, mel_input, + output_lens) + else: + if self.decoder_alpha > 0: + decoder_loss = self.criterion(decoder_output, mel_input) + if self.postnet_alpha > 0: + if self.config.model in ["Tacotron", "TacotronGST"]: + postnet_loss = self.criterion(postnet_output, linear_input) + else: + postnet_loss = self.criterion(postnet_output, mel_input) + loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss + return_dict['decoder_loss'] = decoder_loss + return_dict['postnet_loss'] = postnet_loss + + # stopnet loss + stop_loss = self.criterion_st( + stopnet_output, stopnet_target, + output_lens) if self.config.stopnet else torch.zeros(1) + if not self.config.separate_stopnet and self.config.stopnet: + loss += stop_loss + return_dict['stopnet_loss'] = stop_loss + + # backward decoder loss (if enabled) + if self.config.bidirectional_decoder: + if self.config.loss_masking: + decoder_b_loss = self.criterion( + torch.flip(decoder_b_output, dims=(1, )), mel_input, + output_lens) + else: + decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1, )), mel_input) + decoder_c_loss = torch.nn.functional.l1_loss(torch.flip(decoder_b_output, dims=(1, )), decoder_output) + loss += self.decoder_alpha * (decoder_b_loss + decoder_c_loss) + return_dict['decoder_b_loss'] = decoder_b_loss + return_dict['decoder_c_loss'] = decoder_c_loss + + # double decoder consistency loss (if enabled) + if self.config.double_decoder_consistency: + if self.config.loss_masking: + decoder_b_loss = self.criterion(decoder_b_output, mel_input, + output_lens) + else: + decoder_b_loss = self.criterion(decoder_b_output, mel_input) + # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output) + attention_c_loss = torch.nn.functional.l1_loss(alignments, alignments_backwards) + loss += self.decoder_alpha * (decoder_b_loss + attention_c_loss) + return_dict['decoder_coarse_loss'] = decoder_b_loss + return_dict['decoder_ddc_loss'] = attention_c_loss + + # guided attention loss (if enabled) + if self.config.ga_alpha > 0: + ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens) + loss += ga_loss * self.ga_alpha + return_dict['ga_loss'] = ga_loss + + # decoder differential spectral loss + if self.config.decoder_diff_spec_alpha > 0: + decoder_diff_spec_loss = self.criterion_diff_spec(decoder_output, mel_input, output_lens) + loss += decoder_diff_spec_loss * self.decoder_diff_spec_alpha + return_dict['decoder_diff_spec_loss'] = decoder_diff_spec_loss + + # postnet differential spectral loss + if self.config.postnet_diff_spec_alpha > 0: + postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, mel_input, output_lens) + loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha + return_dict['postnet_diff_spec_loss'] = postnet_diff_spec_loss + + # decoder ssim loss + if self.config.decoder_ssim_alpha > 0: + decoder_ssim_loss = self.criterion_ssim(decoder_output, mel_input, output_lens) + loss += decoder_ssim_loss * self.postnet_ssim_alpha + return_dict['decoder_ssim_loss'] = decoder_ssim_loss + + # postnet ssim loss + if self.config.postnet_ssim_alpha > 0: + postnet_ssim_loss = self.criterion_ssim(postnet_output, mel_input, output_lens) + loss += postnet_ssim_loss * self.postnet_ssim_alpha + return_dict['postnet_ssim_loss'] = postnet_ssim_loss + + return_dict['loss'] = loss + + # check if any loss is NaN + for key, loss in return_dict.items(): + if torch.isnan(loss): + raise RuntimeError(f" [!] NaN loss with {key}.") + return return_dict + + +class GlowTTSLoss(torch.nn.Module): + def __init__(self): + super().__init__() + self.constant_factor = 0.5 * math.log(2 * math.pi) + + def forward(self, z, means, scales, log_det, y_lengths, o_dur_log, + o_attn_dur, x_lengths): + return_dict = {} + # flow loss - neg log likelihood + pz = torch.sum(scales) + 0.5 * torch.sum( + torch.exp(-2 * scales) * (z - means)**2) + log_mle = self.constant_factor + (pz - torch.sum(log_det)) / ( + torch.sum(y_lengths) * z.shape[1]) + # duration loss - MSE + # loss_dur = torch.sum((o_dur_log - o_attn_dur)**2) / torch.sum(x_lengths) + # duration loss - huber loss + loss_dur = torch.nn.functional.smooth_l1_loss( + o_dur_log, o_attn_dur, reduction='sum') / torch.sum(x_lengths) + return_dict['loss'] = log_mle + loss_dur + return_dict['log_mle'] = log_mle + return_dict['loss_dur'] = loss_dur + + # check if any loss is NaN + for key, loss in return_dict.items(): + if torch.isnan(loss): + raise RuntimeError(f" [!] NaN loss with {key}.") + return return_dict + + +class SpeedySpeechLoss(nn.Module): + def __init__(self, c): + super().__init__() + self.l1 = L1LossMasked(False) + self.ssim = SSIMLoss() + self.huber = Huber() + + self.ssim_alpha = c.ssim_alpha + self.huber_alpha = c.huber_alpha + self.l1_alpha = c.l1_alpha + + def forward(self, decoder_output, decoder_target, decoder_output_lens, dur_output, dur_target, input_lens): + l1_loss = self.l1(decoder_output, decoder_target, decoder_output_lens) + ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens) + huber_loss = self.huber(dur_output, dur_target, input_lens) + loss = l1_loss + ssim_loss + huber_loss + return {'loss': loss, 'loss_l1': l1_loss, 'loss_ssim': ssim_loss, 'loss_dur': huber_loss} diff --git a/TTS/tts/layers/speedy_speech/__init__.py b/TTS/tts/layers/speedy_speech/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/layers/speedy_speech/decoder.py b/TTS/tts/layers/speedy_speech/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..5ffb3339ff4766c86ee3203f323630ae07a18524 --- /dev/null +++ b/TTS/tts/layers/speedy_speech/decoder.py @@ -0,0 +1,192 @@ +import torch +from torch import nn +from TTS.tts.layers.generic.res_conv_bn import Conv1dBNBlock, ResidualConv1dBNBlock, Conv1dBN +from TTS.tts.layers.generic.wavenet import WNBlocks +from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer + + +class WaveNetDecoder(nn.Module): + """WaveNet based decoder with a prenet and a postnet. + + prenet: conv1d_1x1 + postnet: 3 x [conv1d_1x1 -> relu] -> conv1d_1x1 + + TODO: Integrate speaker conditioning vector. + + Note: + default wavenet parameters; + params = { + "num_blocks": 12, + "hidden_channels":192, + "kernel_size": 5, + "dilation_rate": 1, + "num_layers": 4, + "dropout_p": 0.05 + } + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of hidden channels for prenet and postnet. + params (dict): dictionary for residual convolutional blocks. + """ + def __init__(self, in_channels, out_channels, hidden_channels, c_in_channels, params): + super().__init__() + # prenet + self.prenet = torch.nn.Conv1d(in_channels, params['hidden_channels'], 1) + # wavenet layers + self.wn = WNBlocks(params['hidden_channels'], c_in_channels=c_in_channels, **params) + # postnet + self.postnet = [ + torch.nn.Conv1d(params['hidden_channels'], hidden_channels, 1), + torch.nn.ReLU(), + torch.nn.Conv1d(hidden_channels, hidden_channels, 1), + torch.nn.ReLU(), + torch.nn.Conv1d(hidden_channels, hidden_channels, 1), + torch.nn.ReLU(), + torch.nn.Conv1d(hidden_channels, out_channels, 1), + ] + self.postnet = nn.Sequential(*self.postnet) + + def forward(self, x, x_mask=None, g=None): + x = self.prenet(x) * x_mask + x = self.wn(x, x_mask, g) + o = self.postnet(x) * x_mask + return o + + +class RelativePositionTransformerDecoder(nn.Module): + """Decoder with Relative Positional Transformer. + + Note: + Default params + params={ + 'hidden_channels_ffn': 128, + 'num_heads': 2, + "kernel_size": 3, + "dropout_p": 0.1, + "num_layers": 8, + "rel_attn_window_size": 4, + "input_length": None + } + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of hidden channels including Transformer layers. + params (dict): dictionary for residual convolutional blocks. + """ + def __init__(self, in_channels, out_channels, hidden_channels, params): + + super().__init__() + self.prenet = Conv1dBN(in_channels, hidden_channels, 1, 1) + self.rel_pos_transformer = RelativePositionTransformer( + in_channels, out_channels, hidden_channels, **params) + + def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-argument + o = self.prenet(x) * x_mask + o = self.rel_pos_transformer(o, x_mask) + return o + + +class ResidualConv1dBNDecoder(nn.Module): + """Residual Convolutional Decoder as in the original Speedy Speech paper + + TODO: Integrate speaker conditioning vector. + + Note: + Default params + params = { + "kernel_size": 4, + "dilations": 4 * [1, 2, 4, 8] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 17 + } + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of hidden channels including ResidualConv1dBNBlock layers. + params (dict): dictionary for residual convolutional blocks. + """ + def __init__(self, in_channels, out_channels, hidden_channels, params): + super().__init__() + self.res_conv_block = ResidualConv1dBNBlock(in_channels, + hidden_channels, + hidden_channels, **params) + self.post_conv = nn.Conv1d(hidden_channels, hidden_channels, 1) + self.postnet = nn.Sequential( + Conv1dBNBlock(hidden_channels, + hidden_channels, + hidden_channels, + params['kernel_size'], + 1, + num_conv_blocks=2), + nn.Conv1d(hidden_channels, out_channels, 1), + ) + + def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-argument + o = self.res_conv_block(x, x_mask) + o = self.post_conv(o) + x + return self.postnet(o) * x_mask + + +class Decoder(nn.Module): + """Decodes the expanded phoneme encoding into spectrograms + Args: + out_channels (int): number of output channels. + in_hidden_channels (int): input and hidden channels. Model keeps the input channels for the intermediate layers. + decoder_type (str): decoder layer types. 'transformers' or 'residual_conv_bn'. Default 'residual_conv_bn'. + decoder_params (dict): model parameters for specified decoder type. + c_in_channels (int): number of channels for conditional input. + + Shapes: + - input: (B, C, T) + """ + + # pylint: disable=dangerous-default-value + def __init__( + self, + out_channels, + in_hidden_channels, + decoder_type='residual_conv_bn', + decoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4, 8] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 17 + }, + c_in_channels=0): + super().__init__() + + if decoder_type == 'transformer': + self.decoder = RelativePositionTransformerDecoder( + in_channels=in_hidden_channels, + out_channels=out_channels, + hidden_channels=in_hidden_channels, + params=decoder_params) + elif decoder_type == 'residual_conv_bn': + self.decoder = ResidualConv1dBNDecoder( + in_channels=in_hidden_channels, + out_channels=out_channels, + hidden_channels=in_hidden_channels, + params=decoder_params) + elif decoder_type == 'wavenet': + self.decoder = WaveNetDecoder(in_channels=in_hidden_channels, + out_channels=out_channels, + hidden_channels=in_hidden_channels, + c_in_channels=c_in_channels, + params=decoder_params) + else: + raise ValueError(f'[!] Unknown decoder type - {decoder_type}') + + def forward(self, x, x_mask, g=None): # pylint: disable=unused-argument + """ + Args: + x: [B, C, T] + x_mask: [B, 1, T] + g: [B, C_g, 1] + """ + # TODO: implement multi-speaker + o = self.decoder(x, x_mask, g) + return o \ No newline at end of file diff --git a/TTS/tts/layers/speedy_speech/duration_predictor.py b/TTS/tts/layers/speedy_speech/duration_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..5c5c4f3a274d1667caa65646b9da26326cb902f4 --- /dev/null +++ b/TTS/tts/layers/speedy_speech/duration_predictor.py @@ -0,0 +1,39 @@ +from torch import nn + +from TTS.tts.layers.generic.res_conv_bn import Conv1dBN + + +class DurationPredictor(nn.Module): + """Speedy Speech duration predictor model. + Predicts phoneme durations from encoder outputs. + + Note: + Outputs interpreted as log(durations) + To get actual durations, do exp transformation + + conv_BN_4x1 -> conv_BN_3x1 -> conv_BN_1x1 -> conv_1x1 + + Args: + hidden_channels (int): number of channels in the inner layers. + """ + def __init__(self, hidden_channels): + + super().__init__() + + self.layers = nn.ModuleList([ + Conv1dBN(hidden_channels, hidden_channels, 4, 1), + Conv1dBN(hidden_channels, hidden_channels, 3, 1), + Conv1dBN(hidden_channels, hidden_channels, 1, 1), + nn.Conv1d(hidden_channels, 1, 1) + ]) + + def forward(self, x, x_mask): + """ + Shapes: + x: [B, C, T] + x_mask: [B, 1, T] + """ + o = x + for layer in self.layers: + o = layer(o) * x_mask + return o diff --git a/TTS/tts/layers/speedy_speech/encoder.py b/TTS/tts/layers/speedy_speech/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..d26b306c6d5b5ad646878a8b1dbfbe28ea4b08b1 --- /dev/null +++ b/TTS/tts/layers/speedy_speech/encoder.py @@ -0,0 +1,209 @@ +import math +import torch +from torch import nn + +from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer +from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock + + + +class PositionalEncoding(nn.Module): + """Sinusoidal positional encoding for non-recurrent neural networks. + Implementation based on "Attention Is All You Need" + Args: + channels (int): embedding size + dropout (float): dropout parameter + """ + def __init__(self, channels, dropout=0.0, max_len=5000): + super().__init__() + if channels % 2 != 0: + raise ValueError( + "Cannot use sin/cos positional encoding with " + "odd channels (got channels={:d})".format(channels)) + pe = torch.zeros(max_len, channels) + position = torch.arange(0, max_len).unsqueeze(1) + div_term = torch.exp((torch.arange(0, channels, 2, dtype=torch.float) * + -(math.log(10000.0) / channels))) + pe[:, 0::2] = torch.sin(position.float() * div_term) + pe[:, 1::2] = torch.cos(position.float() * div_term) + pe = pe.unsqueeze(0).transpose(1, 2) + self.register_buffer('pe', pe) + if dropout > 0: + self.dropout = nn.Dropout(p=dropout) + self.channels = channels + + def forward(self, x, mask=None, first_idx=None, last_idx=None): + """ + Shapes: + x: [B, C, T] + mask: [B, 1, T] + first_idx: int + last_idx: int + """ + + x = x * math.sqrt(self.channels) + if first_idx is None: + if self.pe.size(2) < x.size(2): + raise RuntimeError( + f"Sequence is {x.size(2)} but PositionalEncoding is" + f" limited to {self.pe.size(2)}. See max_len argument.") + if mask is not None: + pos_enc = (self.pe[:, :, :x.size(2)] * mask) + else: + pos_enc = self.pe[:, :, :x.size(2)] + x = x + pos_enc + else: + x = x + self.pe[:, :, first_idx:last_idx] + if hasattr(self, 'dropout'): + x = self.dropout(x) + return x + + +class RelativePositionTransformerEncoder(nn.Module): + """Speedy speech encoder built on Transformer with Relative Position encoding. + + TODO: Integrate speaker conditioning vector. + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of hidden channels + params (dict): dictionary for residual convolutional blocks. + """ + def __init__(self, in_channels, out_channels, hidden_channels, params): + super().__init__() + self.prenet = ResidualConv1dBNBlock(in_channels, + hidden_channels, + hidden_channels, + kernel_size=5, + num_res_blocks=3, + num_conv_blocks=1, + dilations=[1, 1, 1] + ) + self.rel_pos_transformer = RelativePositionTransformer( + hidden_channels, out_channels, hidden_channels, **params) + + def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-argument + if x_mask is None: + x_mask = 1 + o = self.prenet(x) * x_mask + o = self.rel_pos_transformer(o, x_mask) + return o + + +class ResidualConv1dBNEncoder(nn.Module): + """Residual Convolutional Encoder as in the original Speedy Speech paper + + TODO: Integrate speaker conditioning vector. + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + hidden_channels (int): number of hidden channels + params (dict): dictionary for residual convolutional blocks. + """ + def __init__(self, in_channels, out_channels, hidden_channels, params): + super().__init__() + self.prenet = nn.Sequential( + nn.Conv1d(in_channels, hidden_channels, 1), + nn.ReLU()) + self.res_conv_block = ResidualConv1dBNBlock(hidden_channels, + hidden_channels, + hidden_channels, **params) + + self.postnet = nn.Sequential(*[ + nn.Conv1d(hidden_channels, hidden_channels, 1), + nn.ReLU(), + nn.BatchNorm1d(hidden_channels), + nn.Conv1d(hidden_channels, out_channels, 1) + ]) + + def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-argument + if x_mask is None: + x_mask = 1 + o = self.prenet(x) * x_mask + o = self.res_conv_block(o, x_mask) + o = self.postnet(o + x) * x_mask + return o * x_mask + + +class Encoder(nn.Module): + # pylint: disable=dangerous-default-value + """Factory class for Speedy Speech encoder enables different encoder types internally. + + Args: + num_chars (int): number of characters. + out_channels (int): number of output channels. + in_hidden_channels (int): input and hidden channels. Model keeps the input channels for the intermediate layers. + encoder_type (str): encoder layer types. 'transformers' or 'residual_conv_bn'. Default 'residual_conv_bn'. + encoder_params (dict): model parameters for specified encoder type. + c_in_channels (int): number of channels for conditional input. + + Note: + Default encoder_params... + + for 'transformer' + encoder_params={ + 'hidden_channels_ffn': 128, + 'num_heads': 2, + "kernel_size": 3, + "dropout_p": 0.1, + "num_layers": 6, + "rel_attn_window_size": 4, + "input_length": None + }, + + for 'residual_conv_bn' + encoder_params = { + "kernel_size": 4, + "dilations": 4 * [1, 2, 4] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + } + """ + def __init__( + self, + in_hidden_channels, + out_channels, + encoder_type='residual_conv_bn', + encoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + }, + c_in_channels=0): + super().__init__() + self.out_channels = out_channels + self.in_channels = in_hidden_channels + self.hidden_channels = in_hidden_channels + self.encoder_type = encoder_type + self.c_in_channels = c_in_channels + + # init encoder + if encoder_type.lower() == "transformer": + # text encoder + self.encoder = RelativePositionTransformerEncoder(in_hidden_channels, + out_channels, + in_hidden_channels, + encoder_params) # pylint: disable=unexpected-keyword-arg + elif encoder_type.lower() == 'residual_conv_bn': + self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, + out_channels, + in_hidden_channels, + encoder_params) + else: + raise NotImplementedError(' [!] unknown encoder type.') + + # final projection layers + + + def forward(self, x, x_mask, g=None): # pylint: disable=unused-argument + """ + Shapes: + x: [B, C, T] + x_mask: [B, 1, T] + g: [B, C, 1] + """ + o = self.encoder(x, x_mask) + return o * x_mask diff --git a/TTS/tts/layers/tacotron.py b/TTS/tts/layers/tacotron.py new file mode 100644 index 0000000000000000000000000000000000000000..c79edcc3008a9cecbf3c7fe75c1b306936560e65 --- /dev/null +++ b/TTS/tts/layers/tacotron.py @@ -0,0 +1,515 @@ +# coding: utf-8 +import torch +from torch import nn +from .common_layers import Prenet +from .attentions import init_attn + + +class BatchNormConv1d(nn.Module): + r"""A wrapper for Conv1d with BatchNorm. It sets the activation + function between Conv and BatchNorm layers. BatchNorm layer + is initialized with the TF default values for momentum and eps. + + Args: + in_channels: size of each input sample + out_channels: size of each output samples + kernel_size: kernel size of conv filters + stride: stride of conv filters + padding: padding of conv filters + activation: activation function set b/w Conv1d and BatchNorm + + Shapes: + - input: (B, D) + - output: (B, D) + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + activation=None): + + super(BatchNormConv1d, self).__init__() + self.padding = padding + self.padder = nn.ConstantPad1d(padding, 0) + self.conv1d = nn.Conv1d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=0, + bias=False) + # Following tensorflow's default parameters + self.bn = nn.BatchNorm1d(out_channels, momentum=0.99, eps=1e-3) + self.activation = activation + # self.init_layers() + + def init_layers(self): + if isinstance(self.activation, torch.nn.ReLU): + w_gain = 'relu' + elif isinstance(self.activation, torch.nn.Tanh): + w_gain = 'tanh' + elif self.activation is None: + w_gain = 'linear' + else: + raise RuntimeError('Unknown activation function') + torch.nn.init.xavier_uniform_( + self.conv1d.weight, gain=torch.nn.init.calculate_gain(w_gain)) + + def forward(self, x): + x = self.padder(x) + x = self.conv1d(x) + x = self.bn(x) + if self.activation is not None: + x = self.activation(x) + return x + + +class Highway(nn.Module): + r"""Highway layers as explained in https://arxiv.org/abs/1505.00387 + + Args: + in_features (int): size of each input sample + out_feature (int): size of each output sample + + Shapes: + - input: (B, *, H_in) + - output: (B, *, H_out) + """ + + # TODO: Try GLU layer + def __init__(self, in_features, out_feature): + super(Highway, self).__init__() + self.H = nn.Linear(in_features, out_feature) + self.H.bias.data.zero_() + self.T = nn.Linear(in_features, out_feature) + self.T.bias.data.fill_(-1) + self.relu = nn.ReLU() + self.sigmoid = nn.Sigmoid() + # self.init_layers() + + def init_layers(self): + torch.nn.init.xavier_uniform_( + self.H.weight, gain=torch.nn.init.calculate_gain('relu')) + torch.nn.init.xavier_uniform_( + self.T.weight, gain=torch.nn.init.calculate_gain('sigmoid')) + + def forward(self, inputs): + H = self.relu(self.H(inputs)) + T = self.sigmoid(self.T(inputs)) + return H * T + inputs * (1.0 - T) + + +class CBHG(nn.Module): + """CBHG module: a recurrent neural network composed of: + - 1-d convolution banks + - Highway networks + residual connections + - Bidirectional gated recurrent units + + Args: + in_features (int): sample size + K (int): max filter size in conv bank + projections (list): conv channel sizes for conv projections + num_highways (int): number of highways layers + + Shapes: + - input: (B, C, T_in) + - output: (B, T_in, C*2) + """ + #pylint: disable=dangerous-default-value + def __init__(self, + in_features, + K=16, + conv_bank_features=128, + conv_projections=[128, 128], + highway_features=128, + gru_features=128, + num_highways=4): + super(CBHG, self).__init__() + self.in_features = in_features + self.conv_bank_features = conv_bank_features + self.highway_features = highway_features + self.gru_features = gru_features + self.conv_projections = conv_projections + self.relu = nn.ReLU() + # list of conv1d bank with filter size k=1...K + # TODO: try dilational layers instead + self.conv1d_banks = nn.ModuleList([ + BatchNormConv1d(in_features, + conv_bank_features, + kernel_size=k, + stride=1, + padding=[(k - 1) // 2, k // 2], + activation=self.relu) for k in range(1, K + 1) + ]) + # max pooling of conv bank, with padding + # TODO: try average pooling OR larger kernel size + out_features = [K * conv_bank_features] + conv_projections[:-1] + activations = [self.relu] * (len(conv_projections) - 1) + activations += [None] + # setup conv1d projection layers + layer_set = [] + for (in_size, out_size, ac) in zip(out_features, conv_projections, + activations): + layer = BatchNormConv1d(in_size, + out_size, + kernel_size=3, + stride=1, + padding=[1, 1], + activation=ac) + layer_set.append(layer) + self.conv1d_projections = nn.ModuleList(layer_set) + # setup Highway layers + if self.highway_features != conv_projections[-1]: + self.pre_highway = nn.Linear(conv_projections[-1], + highway_features, + bias=False) + self.highways = nn.ModuleList([ + Highway(highway_features, highway_features) + for _ in range(num_highways) + ]) + # bi-directional GPU layer + self.gru = nn.GRU(gru_features, + gru_features, + 1, + batch_first=True, + bidirectional=True) + + def forward(self, inputs): + # (B, in_features, T_in) + x = inputs + # (B, hid_features*K, T_in) + # Concat conv1d bank outputs + outs = [] + for conv1d in self.conv1d_banks: + out = conv1d(x) + outs.append(out) + x = torch.cat(outs, dim=1) + assert x.size(1) == self.conv_bank_features * len(self.conv1d_banks) + for conv1d in self.conv1d_projections: + x = conv1d(x) + x += inputs + x = x.transpose(1, 2) + if self.highway_features != self.conv_projections[-1]: + x = self.pre_highway(x) + # Residual connection + # TODO: try residual scaling as in Deep Voice 3 + # TODO: try plain residual layers + for highway in self.highways: + x = highway(x) + # (B, T_in, hid_features*2) + # TODO: replace GRU with convolution as in Deep Voice 3 + self.gru.flatten_parameters() + outputs, _ = self.gru(x) + return outputs + + +class EncoderCBHG(nn.Module): + r"""CBHG module with Encoder specific arguments""" + + def __init__(self): + super(EncoderCBHG, self).__init__() + self.cbhg = CBHG( + 128, + K=16, + conv_bank_features=128, + conv_projections=[128, 128], + highway_features=128, + gru_features=128, + num_highways=4) + + def forward(self, x): + return self.cbhg(x) + + +class Encoder(nn.Module): + r"""Stack Prenet and CBHG module for encoder + Args: + inputs (FloatTensor): embedding features + + Shapes: + - inputs: (B, T, D_in) + - outputs: (B, T, 128 * 2) + """ + + def __init__(self, in_features): + super(Encoder, self).__init__() + self.prenet = Prenet(in_features, out_features=[256, 128]) + self.cbhg = EncoderCBHG() + + def forward(self, inputs): + # B x T x prenet_dim + outputs = self.prenet(inputs) + outputs = self.cbhg(outputs.transpose(1, 2)) + return outputs + + +class PostCBHG(nn.Module): + def __init__(self, mel_dim): + super(PostCBHG, self).__init__() + self.cbhg = CBHG( + mel_dim, + K=8, + conv_bank_features=128, + conv_projections=[256, mel_dim], + highway_features=128, + gru_features=128, + num_highways=4) + + def forward(self, x): + return self.cbhg(x) + + +class Decoder(nn.Module): + """Tacotron decoder. + + Args: + in_channels (int): number of input channels. + frame_channels (int): number of feature frame channels. + r (int): number of outputs per time step (reduction rate). + memory_size (int): size of the past window. if <= 0 memory_size = r + attn_type (string): type of attention used in decoder. + attn_windowing (bool): if true, define an attention window centered to maximum + attention response. It provides more robust attention alignment especially + at interence time. + attn_norm (string): attention normalization function. 'sigmoid' or 'softmax'. + prenet_type (string): 'original' or 'bn'. + prenet_dropout (float): prenet dropout rate. + forward_attn (bool): if true, use forward attention method. https://arxiv.org/abs/1807.06736 + trans_agent (bool): if true, use transition agent. https://arxiv.org/abs/1807.06736 + forward_attn_mask (bool): if true, mask attention values smaller than a threshold. + location_attn (bool): if true, use location sensitive attention. + attn_K (int): number of attention heads for GravesAttention. + separate_stopnet (bool): if true, detach stopnet input to prevent gradient flow. + speaker_embedding_dim (int): size of speaker embedding vector, for multi-speaker training. + """ + + # Pylint gets confused by PyTorch conventions here + # pylint: disable=attribute-defined-outside-init + + def __init__(self, in_channels, frame_channels, r, memory_size, attn_type, attn_windowing, + attn_norm, prenet_type, prenet_dropout, forward_attn, + trans_agent, forward_attn_mask, location_attn, attn_K, + separate_stopnet): + super(Decoder, self).__init__() + self.r_init = r + self.r = r + self.in_channels = in_channels + self.max_decoder_steps = 500 + self.use_memory_queue = memory_size > 0 + self.memory_size = memory_size if memory_size > 0 else r + self.frame_channels = frame_channels + self.separate_stopnet = separate_stopnet + self.query_dim = 256 + # memory -> |Prenet| -> processed_memory + prenet_dim = frame_channels * self.memory_size if self.use_memory_queue else frame_channels + self.prenet = Prenet( + prenet_dim, + prenet_type, + prenet_dropout, + out_features=[256, 128]) + # processed_inputs, processed_memory -> |Attention| -> Attention, attention, RNN_State + # attention_rnn generates queries for the attention mechanism + self.attention_rnn = nn.GRUCell(in_channels + 128, self.query_dim) + + self.attention = init_attn(attn_type=attn_type, + query_dim=self.query_dim, + embedding_dim=in_channels, + attention_dim=128, + location_attention=location_attn, + attention_location_n_filters=32, + attention_location_kernel_size=31, + windowing=attn_windowing, + norm=attn_norm, + forward_attn=forward_attn, + trans_agent=trans_agent, + forward_attn_mask=forward_attn_mask, + attn_K=attn_K) + # (processed_memory | attention context) -> |Linear| -> decoder_RNN_input + self.project_to_decoder_in = nn.Linear(256 + in_channels, 256) + # decoder_RNN_input -> |RNN| -> RNN_state + self.decoder_rnns = nn.ModuleList( + [nn.GRUCell(256, 256) for _ in range(2)]) + # RNN_state -> |Linear| -> mel_spec + self.proj_to_mel = nn.Linear(256, frame_channels * self.r_init) + # learn init values instead of zero init. + self.stopnet = StopNet(256 + frame_channels * self.r_init) + + def set_r(self, new_r): + self.r = new_r + + def _reshape_memory(self, memory): + """ + Reshape the spectrograms for given 'r' + """ + # Grouping multiple frames if necessary + if memory.size(-1) == self.frame_channels: + memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1) + # Time first (T_decoder, B, frame_channels) + memory = memory.transpose(0, 1) + return memory + + def _init_states(self, inputs): + """ + Initialization of decoder states + """ + B = inputs.size(0) + # go frame as zeros matrix + if self.use_memory_queue: + self.memory_input = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels * self.memory_size) + else: + self.memory_input = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels) + # decoder states + self.attention_rnn_hidden = torch.zeros(1, device=inputs.device).repeat(B, 256) + self.decoder_rnn_hiddens = [ + torch.zeros(1, device=inputs.device).repeat(B, 256) + for idx in range(len(self.decoder_rnns)) + ] + self.context_vec = inputs.data.new(B, self.in_channels).zero_() + # cache attention inputs + self.processed_inputs = self.attention.preprocess_inputs(inputs) + + def _parse_outputs(self, outputs, attentions, stop_tokens): + # Back to batch first + attentions = torch.stack(attentions).transpose(0, 1) + stop_tokens = torch.stack(stop_tokens).transpose(0, 1) + outputs = torch.stack(outputs).transpose(0, 1).contiguous() + outputs = outputs.view( + outputs.size(0), -1, self.frame_channels) + outputs = outputs.transpose(1, 2) + return outputs, attentions, stop_tokens + + def decode(self, inputs, mask=None): + # Prenet + processed_memory = self.prenet(self.memory_input) + # Attention RNN + self.attention_rnn_hidden = self.attention_rnn( + torch.cat((processed_memory, self.context_vec), -1), + self.attention_rnn_hidden) + self.context_vec = self.attention( + self.attention_rnn_hidden, inputs, self.processed_inputs, mask) + # Concat RNN output and attention context vector + decoder_input = self.project_to_decoder_in( + torch.cat((self.attention_rnn_hidden, self.context_vec), -1)) + + # Pass through the decoder RNNs + for idx in range(len(self.decoder_rnns)): + self.decoder_rnn_hiddens[idx] = self.decoder_rnns[idx]( + decoder_input, self.decoder_rnn_hiddens[idx]) + # Residual connection + decoder_input = self.decoder_rnn_hiddens[idx] + decoder_input + decoder_output = decoder_input + + # predict mel vectors from decoder vectors + output = self.proj_to_mel(decoder_output) + # output = torch.sigmoid(output) + # predict stop token + stopnet_input = torch.cat([decoder_output, output], -1) + if self.separate_stopnet: + stop_token = self.stopnet(stopnet_input.detach()) + else: + stop_token = self.stopnet(stopnet_input) + output = output[:, : self.r * self.frame_channels] + return output, stop_token, self.attention.attention_weights + + def _update_memory_input(self, new_memory): + if self.use_memory_queue: + if self.memory_size > self.r: + # memory queue size is larger than number of frames per decoder iter + self.memory_input = torch.cat([ + new_memory, self.memory_input[:, :( + self.memory_size - self.r) * self.frame_channels].clone() + ], dim=-1) + else: + # memory queue size smaller than number of frames per decoder iter + self.memory_input = new_memory[:, :self.memory_size * self.frame_channels] + else: + # use only the last frame prediction + # assert new_memory.shape[-1] == self.r * self.frame_channels + self.memory_input = new_memory[:, self.frame_channels * (self.r - 1):] + + def forward(self, inputs, memory, mask): + """ + Args: + inputs: Encoder outputs. + memory: Decoder memory (autoregression. If None (at eval-time), + decoder outputs are used as decoder inputs. If None, it uses the last + output as the input. + mask: Attention mask for sequence padding. + + Shapes: + - inputs: (B, T, D_out_enc) + - memory: (B, T_mel, D_mel) + """ + # Run greedy decoding if memory is None + memory = self._reshape_memory(memory) + outputs = [] + attentions = [] + stop_tokens = [] + t = 0 + self._init_states(inputs) + self.attention.init_states(inputs) + while len(outputs) < memory.size(0): + if t > 0: + new_memory = memory[t - 1] + self._update_memory_input(new_memory) + + output, stop_token, attention = self.decode(inputs, mask) + outputs += [output] + attentions += [attention] + stop_tokens += [stop_token.squeeze(1)] + t += 1 + return self._parse_outputs(outputs, attentions, stop_tokens) + + def inference(self, inputs): + """ + Args: + inputs: encoder outputs. + Shapes: + - inputs: batch x time x encoder_out_dim + """ + outputs = [] + attentions = [] + stop_tokens = [] + t = 0 + self._init_states(inputs) + self.attention.init_win_idx() + self.attention.init_states(inputs) + while True: + if t > 0: + new_memory = outputs[-1] + self._update_memory_input(new_memory) + output, stop_token, attention = self.decode(inputs, None) + stop_token = torch.sigmoid(stop_token.data) + outputs += [output] + attentions += [attention] + stop_tokens += [stop_token] + t += 1 + if t > inputs.shape[1] / 4 and (stop_token > 0.6 + or attention[:, -1].item() > 0.6): + break + if t > self.max_decoder_steps: + print(" | > Decoder stopped with 'max_decoder_steps") + break + return self._parse_outputs(outputs, attentions, stop_tokens) + + +class StopNet(nn.Module): + r"""Stopnet signalling decoder to stop inference. + Args: + in_features (int): feature dimension of input. + """ + + def __init__(self, in_features): + super(StopNet, self).__init__() + self.dropout = nn.Dropout(0.1) + self.linear = nn.Linear(in_features, 1) + torch.nn.init.xavier_uniform_( + self.linear.weight, gain=torch.nn.init.calculate_gain('linear')) + + def forward(self, inputs): + outputs = self.dropout(inputs) + outputs = self.linear(outputs) + return outputs diff --git a/TTS/tts/layers/tacotron2.py b/TTS/tts/layers/tacotron2.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6dbc1510608ca5dae296f1d67e31873463783f --- /dev/null +++ b/TTS/tts/layers/tacotron2.py @@ -0,0 +1,424 @@ +import torch +from torch import nn +from torch.nn import functional as F +from .common_layers import Prenet, Linear +from .attentions import init_attn + +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg +class ConvBNBlock(nn.Module): + r"""Convolutions with Batch Normalization and non-linear activation. + + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + kernel_size (int): convolution kernel size. + activation (str): 'relu', 'tanh', None (linear). + + Shapes: + - input: (B, C_in, T) + - output: (B, C_out, T) + """ + def __init__(self, in_channels, out_channels, kernel_size, activation=None): + super(ConvBNBlock, self).__init__() + assert (kernel_size - 1) % 2 == 0 + padding = (kernel_size - 1) // 2 + self.convolution1d = nn.Conv1d(in_channels, + out_channels, + kernel_size, + padding=padding) + self.batch_normalization = nn.BatchNorm1d(out_channels, momentum=0.1, eps=1e-5) + self.dropout = nn.Dropout(p=0.5) + if activation == 'relu': + self.activation = nn.ReLU() + elif activation == 'tanh': + self.activation = nn.Tanh() + else: + self.activation = nn.Identity() + + def forward(self, x): + o = self.convolution1d(x) + o = self.batch_normalization(o) + o = self.activation(o) + o = self.dropout(o) + return o + + +class Postnet(nn.Module): + r"""Tacotron2 Postnet + + Args: + in_out_channels (int): number of output channels. + + Shapes: + - input: (B, C_in, T) + - output: (B, C_in, T) + """ + def __init__(self, in_out_channels, num_convs=5): + super(Postnet, self).__init__() + self.convolutions = nn.ModuleList() + self.convolutions.append( + ConvBNBlock(in_out_channels, 512, kernel_size=5, activation='tanh')) + for _ in range(1, num_convs - 1): + self.convolutions.append( + ConvBNBlock(512, 512, kernel_size=5, activation='tanh')) + self.convolutions.append( + ConvBNBlock(512, in_out_channels, kernel_size=5, activation=None)) + + def forward(self, x): + o = x + for layer in self.convolutions: + o = layer(o) + return o + + +class Encoder(nn.Module): + r"""Tacotron2 Encoder + + Args: + in_out_channels (int): number of input and output channels. + + Shapes: + - input: (B, C_in, T) + - output: (B, C_in, T) + """ + def __init__(self, in_out_channels=512): + super(Encoder, self).__init__() + self.convolutions = nn.ModuleList() + for _ in range(3): + self.convolutions.append( + ConvBNBlock(in_out_channels, in_out_channels, 5, 'relu')) + self.lstm = nn.LSTM(in_out_channels, + int(in_out_channels / 2), + num_layers=1, + batch_first=True, + bias=True, + bidirectional=True) + self.rnn_state = None + + def forward(self, x, input_lengths): + o = x + for layer in self.convolutions: + o = layer(o) + o = o.transpose(1, 2) + o = nn.utils.rnn.pack_padded_sequence(o, + input_lengths.cpu(), + batch_first=True) + self.lstm.flatten_parameters() + o, _ = self.lstm(o) + o, _ = nn.utils.rnn.pad_packed_sequence(o, batch_first=True) + return o + + def inference(self, x): + o = x + for layer in self.convolutions: + o = layer(o) + o = o.transpose(1, 2) + # self.lstm.flatten_parameters() + o, _ = self.lstm(o) + return o + + +# adapted from https://github.com/NVIDIA/tacotron2/ +class Decoder(nn.Module): + """Tacotron2 decoder. We don't use Zoneout but Dropout between RNN layers. + + Args: + in_channels (int): number of input channels. + frame_channels (int): number of feature frame channels. + r (int): number of outputs per time step (reduction rate). + memory_size (int): size of the past window. if <= 0 memory_size = r + attn_type (string): type of attention used in decoder. + attn_win (bool): if true, define an attention window centered to maximum + attention response. It provides more robust attention alignment especially + at interence time. + attn_norm (string): attention normalization function. 'sigmoid' or 'softmax'. + prenet_type (string): 'original' or 'bn'. + prenet_dropout (float): prenet dropout rate. + forward_attn (bool): if true, use forward attention method. https://arxiv.org/abs/1807.06736 + trans_agent (bool): if true, use transition agent. https://arxiv.org/abs/1807.06736 + forward_attn_mask (bool): if true, mask attention values smaller than a threshold. + location_attn (bool): if true, use location sensitive attention. + attn_K (int): number of attention heads for GravesAttention. + separate_stopnet (bool): if true, detach stopnet input to prevent gradient flow. + """ + # Pylint gets confused by PyTorch conventions here + #pylint: disable=attribute-defined-outside-init + def __init__(self, in_channels, frame_channels, r, attn_type, attn_win, attn_norm, + prenet_type, prenet_dropout, forward_attn, trans_agent, + forward_attn_mask, location_attn, attn_K, separate_stopnet): + super(Decoder, self).__init__() + self.frame_channels = frame_channels + self.r_init = r + self.r = r + self.encoder_embedding_dim = in_channels + self.separate_stopnet = separate_stopnet + self.max_decoder_steps = 1000 + self.stop_threshold = 0.5 + + # model dimensions + self.query_dim = 1024 + self.decoder_rnn_dim = 1024 + self.prenet_dim = 256 + self.attn_dim = 128 + self.p_attention_dropout = 0.1 + self.p_decoder_dropout = 0.1 + + # memory -> |Prenet| -> processed_memory + prenet_dim = self.frame_channels + self.prenet = Prenet(prenet_dim, + prenet_type, + prenet_dropout, + out_features=[self.prenet_dim, self.prenet_dim], + bias=False) + + self.attention_rnn = nn.LSTMCell(self.prenet_dim + in_channels, + self.query_dim, + bias=True) + + self.attention = init_attn(attn_type=attn_type, + query_dim=self.query_dim, + embedding_dim=in_channels, + attention_dim=128, + location_attention=location_attn, + attention_location_n_filters=32, + attention_location_kernel_size=31, + windowing=attn_win, + norm=attn_norm, + forward_attn=forward_attn, + trans_agent=trans_agent, + forward_attn_mask=forward_attn_mask, + attn_K=attn_K) + + self.decoder_rnn = nn.LSTMCell(self.query_dim + in_channels, + self.decoder_rnn_dim, + bias=True) + + self.linear_projection = Linear(self.decoder_rnn_dim + in_channels, + self.frame_channels * self.r_init) + + self.stopnet = nn.Sequential( + nn.Dropout(0.1), + Linear(self.decoder_rnn_dim + self.frame_channels * self.r_init, + 1, + bias=True, + init_gain='sigmoid')) + self.memory_truncated = None + + def set_r(self, new_r): + self.r = new_r + + def get_go_frame(self, inputs): + B = inputs.size(0) + memory = torch.zeros(1, device=inputs.device).repeat( + B, self.frame_channels * self.r) + return memory + + def _init_states(self, inputs, mask, keep_states=False): + B = inputs.size(0) + # T = inputs.size(1) + if not keep_states: + self.query = torch.zeros(1, device=inputs.device).repeat( + B, self.query_dim) + self.attention_rnn_cell_state = torch.zeros( + 1, device=inputs.device).repeat(B, self.query_dim) + self.decoder_hidden = torch.zeros(1, device=inputs.device).repeat( + B, self.decoder_rnn_dim) + self.decoder_cell = torch.zeros(1, device=inputs.device).repeat( + B, self.decoder_rnn_dim) + self.context = torch.zeros(1, device=inputs.device).repeat( + B, self.encoder_embedding_dim) + self.inputs = inputs + self.processed_inputs = self.attention.preprocess_inputs(inputs) + self.mask = mask + + def _reshape_memory(self, memory): + """ + Reshape the spectrograms for given 'r' + """ + # Grouping multiple frames if necessary + if memory.size(-1) == self.frame_channels: + memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1) + # Time first (T_decoder, B, frame_channels) + memory = memory.transpose(0, 1) + return memory + + def _parse_outputs(self, outputs, stop_tokens, alignments): + alignments = torch.stack(alignments).transpose(0, 1) + stop_tokens = torch.stack(stop_tokens).transpose(0, 1) + outputs = torch.stack(outputs).transpose(0, 1).contiguous() + outputs = outputs.view(outputs.size(0), -1, self.frame_channels) + outputs = outputs.transpose(1, 2) + return outputs, stop_tokens, alignments + + def _update_memory(self, memory): + if len(memory.shape) == 2: + return memory[:, self.frame_channels * (self.r - 1):] + return memory[:, :, self.frame_channels * (self.r - 1):] + + def decode(self, memory): + ''' + shapes: + - memory: B x r * self.frame_channels + ''' + # self.context: B x D_en + # query_input: B x D_en + (r * self.frame_channels) + query_input = torch.cat((memory, self.context), -1) + # self.query and self.attention_rnn_cell_state : B x D_attn_rnn + self.query, self.attention_rnn_cell_state = self.attention_rnn( + query_input, (self.query, self.attention_rnn_cell_state)) + self.query = F.dropout(self.query, self.p_attention_dropout, + self.training) + self.attention_rnn_cell_state = F.dropout( + self.attention_rnn_cell_state, self.p_attention_dropout, + self.training) + # B x D_en + self.context = self.attention(self.query, self.inputs, + self.processed_inputs, self.mask) + # B x (D_en + D_attn_rnn) + decoder_rnn_input = torch.cat((self.query, self.context), -1) + # self.decoder_hidden and self.decoder_cell: B x D_decoder_rnn + self.decoder_hidden, self.decoder_cell = self.decoder_rnn( + decoder_rnn_input, (self.decoder_hidden, self.decoder_cell)) + self.decoder_hidden = F.dropout(self.decoder_hidden, + self.p_decoder_dropout, self.training) + # B x (D_decoder_rnn + D_en) + decoder_hidden_context = torch.cat((self.decoder_hidden, self.context), + dim=1) + # B x (self.r * self.frame_channels) + decoder_output = self.linear_projection(decoder_hidden_context) + # B x (D_decoder_rnn + (self.r * self.frame_channels)) + stopnet_input = torch.cat((self.decoder_hidden, decoder_output), dim=1) + if self.separate_stopnet: + stop_token = self.stopnet(stopnet_input.detach()) + else: + stop_token = self.stopnet(stopnet_input) + # select outputs for the reduction rate self.r + decoder_output = decoder_output[:, :self.r * self.frame_channels] + return decoder_output, self.attention.attention_weights, stop_token + + def forward(self, inputs, memories, mask): + r"""Train Decoder with teacher forcing. + Args: + inputs: Encoder outputs. + memories: Feature frames for teacher-forcing. + mask: Attention mask for sequence padding. + + Shapes: + - inputs: (B, T, D_out_enc) + - memory: (B, T_mel, D_mel) + - outputs: (B, T_mel, D_mel) + - alignments: (B, T_in, T_out) + - stop_tokens: (B, T_out) + """ + memory = self.get_go_frame(inputs).unsqueeze(0) + memories = self._reshape_memory(memories) + memories = torch.cat((memory, memories), dim=0) + memories = self._update_memory(memories) + memories = self.prenet(memories) + + self._init_states(inputs, mask=mask) + self.attention.init_states(inputs) + + outputs, stop_tokens, alignments = [], [], [] + while len(outputs) < memories.size(0) - 1: + memory = memories[len(outputs)] + decoder_output, attention_weights, stop_token = self.decode(memory) + outputs += [decoder_output.squeeze(1)] + stop_tokens += [stop_token.squeeze(1)] + alignments += [attention_weights] + + outputs, stop_tokens, alignments = self._parse_outputs( + outputs, stop_tokens, alignments) + return outputs, alignments, stop_tokens + + def inference(self, inputs): + r"""Decoder inference without teacher forcing and use + Stopnet to stop decoder. + Args: + inputs: Encoder outputs. + + Shapes: + - inputs: (B, T, D_out_enc) + - outputs: (B, T_mel, D_mel) + - alignments: (B, T_in, T_out) + - stop_tokens: (B, T_out) + """ + memory = self.get_go_frame(inputs) + memory = self._update_memory(memory) + + self._init_states(inputs, mask=None) + self.attention.init_states(inputs) + + outputs, stop_tokens, alignments, t = [], [], [], 0 + while True: + memory = self.prenet(memory) + decoder_output, alignment, stop_token = self.decode(memory) + stop_token = torch.sigmoid(stop_token.data) + outputs += [decoder_output.squeeze(1)] + stop_tokens += [stop_token] + alignments += [alignment] + + if stop_token > self.stop_threshold and t > inputs.shape[0] // 2: + break + if len(outputs) == self.max_decoder_steps: + print(" | > Decoder stopped with 'max_decoder_steps") + break + + memory = self._update_memory(decoder_output) + t += 1 + + outputs, stop_tokens, alignments = self._parse_outputs( + outputs, stop_tokens, alignments) + + return outputs, alignments, stop_tokens + + def inference_truncated(self, inputs): + """ + Preserve decoder states for continuous inference + """ + if self.memory_truncated is None: + self.memory_truncated = self.get_go_frame(inputs) + self._init_states(inputs, mask=None, keep_states=False) + else: + self._init_states(inputs, mask=None, keep_states=True) + + self.attention.init_win_idx() + self.attention.init_states(inputs) + outputs, stop_tokens, alignments, t = [], [], [], 0 + while True: + memory = self.prenet(self.memory_truncated) + decoder_output, alignment, stop_token = self.decode(memory) + stop_token = torch.sigmoid(stop_token.data) + outputs += [decoder_output.squeeze(1)] + stop_tokens += [stop_token] + alignments += [alignment] + + if stop_token > 0.7: + break + if len(outputs) == self.max_decoder_steps: + print(" | > Decoder stopped with 'max_decoder_steps") + break + + self.memory_truncated = decoder_output + t += 1 + + outputs, stop_tokens, alignments = self._parse_outputs( + outputs, stop_tokens, alignments) + + return outputs, alignments, stop_tokens + + def inference_step(self, inputs, t, memory=None): + """ + For debug purposes + """ + if t == 0: + memory = self.get_go_frame(inputs) + self._init_states(inputs, mask=None) + + memory = self.prenet(memory) + decoder_output, stop_token, alignment = self.decode(memory) + stop_token = torch.sigmoid(stop_token.data) + memory = decoder_output + return decoder_output, stop_token, alignment diff --git a/TTS/tts/models/__init__.py b/TTS/tts/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py new file mode 100644 index 0000000000000000000000000000000000000000..2f9b6f9be2db822570dcfbb549bba97b273f5d82 --- /dev/null +++ b/TTS/tts/models/glow_tts.py @@ -0,0 +1,233 @@ +import math +import torch +from torch import nn +from torch.nn import functional as F + +from TTS.tts.layers.glow_tts.encoder import Encoder +from TTS.tts.layers.glow_tts.decoder import Decoder +from TTS.tts.utils.generic_utils import sequence_mask +from TTS.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path + + +class GlowTts(nn.Module): + """Glow TTS models from https://arxiv.org/abs/2005.11129 + + Args: + num_chars (int): number of embedding characters. + hidden_channels_enc (int): number of embedding and encoder channels. + hidden_channels_dec (int): number of decoder channels. + use_encoder_prenet (bool): enable/disable prenet for encoder. Prenet modules are hard-coded for each alternative encoder. + hidden_channels_dp (int): number of duration predictor channels. + out_channels (int): number of output channels. It should be equal to the number of spectrogram filter. + num_flow_blocks_dec (int): number of decoder blocks. + kernel_size_dec (int): decoder kernel size. + dilation_rate (int): rate to increase dilation by each layer in a decoder block. + num_block_layers (int): number of decoder layers in each decoder block. + dropout_p_dec (float): dropout rate for decoder. + num_speaker (int): number of speaker to define the size of speaker embedding layer. + c_in_channels (int): number of speaker embedding channels. It is set to 512 if embeddings are learned. + num_splits (int): number of split levels in inversible conv1x1 operation. + num_squeeze (int): number of squeeze levels. When squeezing channels increases and time steps reduces by the factor 'num_squeeze'. + sigmoid_scale (bool): enable/disable sigmoid scaling in decoder. + mean_only (bool): if True, encoder only computes mean value and uses constant variance for each time step. + encoder_type (str): encoder module type. + encoder_params (dict): encoder module parameters. + external_speaker_embedding_dim (int): channels of external speaker embedding vectors. + """ + def __init__(self, + num_chars, + hidden_channels_enc, + hidden_channels_dec, + use_encoder_prenet, + hidden_channels_dp, + out_channels, + num_flow_blocks_dec=12, + kernel_size_dec=5, + dilation_rate=5, + num_block_layers=4, + dropout_p_dp=0.1, + dropout_p_dec=0.05, + num_speakers=0, + c_in_channels=0, + num_splits=4, + num_squeeze=1, + sigmoid_scale=False, + mean_only=False, + encoder_type="transformer", + encoder_params=None, + external_speaker_embedding_dim=None): + + super().__init__() + self.num_chars = num_chars + self.hidden_channels_dp = hidden_channels_dp + self.hidden_channels_enc = hidden_channels_enc + self.hidden_channels_dec = hidden_channels_dec + self.out_channels = out_channels + self.num_flow_blocks_dec = num_flow_blocks_dec + self.kernel_size_dec = kernel_size_dec + self.dilation_rate = dilation_rate + self.num_block_layers = num_block_layers + self.dropout_p_dec = dropout_p_dec + self.num_speakers = num_speakers + self.c_in_channels = c_in_channels + self.num_splits = num_splits + self.num_squeeze = num_squeeze + self.sigmoid_scale = sigmoid_scale + self.mean_only = mean_only + self.use_encoder_prenet = use_encoder_prenet + + # model constants. + self.noise_scale = 0.33 # defines the noise variance applied to the random z vector at inference. + self.length_scale = 1. # scaler for the duration predictor. The larger it is, the slower the speech. + self.external_speaker_embedding_dim = external_speaker_embedding_dim + + # if is a multispeaker and c_in_channels is 0, set to 256 + if num_speakers > 1: + if self.c_in_channels == 0 and not self.external_speaker_embedding_dim: + self.c_in_channels = 512 + elif self.external_speaker_embedding_dim: + self.c_in_channels = self.external_speaker_embedding_dim + + self.encoder = Encoder(num_chars, + out_channels=out_channels, + hidden_channels=hidden_channels_enc, + hidden_channels_dp=hidden_channels_dp, + encoder_type=encoder_type, + encoder_params=encoder_params, + mean_only=mean_only, + use_prenet=use_encoder_prenet, + dropout_p_dp=dropout_p_dp, + c_in_channels=self.c_in_channels) + + self.decoder = Decoder(out_channels, + hidden_channels_dec, + kernel_size_dec, + dilation_rate, + num_flow_blocks_dec, + num_block_layers, + dropout_p=dropout_p_dec, + num_splits=num_splits, + num_squeeze=num_squeeze, + sigmoid_scale=sigmoid_scale, + c_in_channels=self.c_in_channels) + + if num_speakers > 1 and not external_speaker_embedding_dim: + # speaker embedding layer + self.emb_g = nn.Embedding(num_speakers, self.c_in_channels) + nn.init.uniform_(self.emb_g.weight, -0.1, 0.1) + + @staticmethod + def compute_outputs(attn, o_mean, o_log_scale, x_mask): + # compute final values with the computed alignment + y_mean = torch.matmul( + attn.squeeze(1).transpose(1, 2), o_mean.transpose(1, 2)).transpose( + 1, 2) # [b, t', t], [b, t, d] -> [b, d, t'] + y_log_scale = torch.matmul( + attn.squeeze(1).transpose(1, 2), o_log_scale.transpose( + 1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t'] + # compute total duration with adjustment + o_attn_dur = torch.log(1 + torch.sum(attn, -1)) * x_mask + return y_mean, y_log_scale, o_attn_dur + + def forward(self, x, x_lengths, y=None, y_lengths=None, attn=None, g=None): + """ + Shapes: + x: [B, T] + x_lenghts: B + y: [B, C, T] + y_lengths: B + g: [B, C] or B + """ + y_max_length = y.size(2) + # norm speaker embeddings + if g is not None: + if self.external_speaker_embedding_dim: + g = F.normalize(g).unsqueeze(-1) + else: + g = F.normalize(self.emb_g(g)).unsqueeze(-1)# [b, h, 1] + + # embedding pass + o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, + x_lengths, + g=g) + # drop redisual frames wrt num_squeeze and set y_lengths. + y, y_lengths, y_max_length, attn = self.preprocess( + y, y_lengths, y_max_length, None) + # create masks + y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), + 1).to(x_mask.dtype) + attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) + # decoder pass + z, logdet = self.decoder(y, y_mask, g=g, reverse=False) + # find the alignment path + with torch.no_grad(): + o_scale = torch.exp(-2 * o_log_scale) + logp1 = torch.sum(-0.5 * math.log(2 * math.pi) - o_log_scale, + [1]).unsqueeze(-1) # [b, t, 1] + logp2 = torch.matmul(o_scale.transpose(1, 2), -0.5 * + (z**2)) # [b, t, d] x [b, d, t'] = [b, t, t'] + logp3 = torch.matmul((o_mean * o_scale).transpose(1, 2), + z) # [b, t, d] x [b, d, t'] = [b, t, t'] + logp4 = torch.sum(-0.5 * (o_mean**2) * o_scale, + [1]).unsqueeze(-1) # [b, t, 1] + logp = logp1 + logp2 + logp3 + logp4 # [b, t, t'] + attn = maximum_path(logp, + attn_mask.squeeze(1)).unsqueeze(1).detach() + y_mean, y_log_scale, o_attn_dur = self.compute_outputs( + attn, o_mean, o_log_scale, x_mask) + attn = attn.squeeze(1).permute(0, 2, 1) + return z, logdet, y_mean, y_log_scale, attn, o_dur_log, o_attn_dur + + @torch.no_grad() + def inference(self, x, x_lengths, g=None): + if g is not None: + if self.external_speaker_embedding_dim: + g = F.normalize(g).unsqueeze(-1) + else: + g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h] + + # embedding pass + o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, + x_lengths, + g=g) + # compute output durations + w = (torch.exp(o_dur_log) - 1) * x_mask * self.length_scale + w_ceil = torch.ceil(w) + y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long() + y_max_length = None + # compute masks + y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), + 1).to(x_mask.dtype) + attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) + # compute attention mask + attn = generate_path(w_ceil.squeeze(1), + attn_mask.squeeze(1)).unsqueeze(1) + y_mean, y_log_scale, o_attn_dur = self.compute_outputs( + attn, o_mean, o_log_scale, x_mask) + + z = (y_mean + torch.exp(y_log_scale) * torch.randn_like(y_mean) * + self.noise_scale) * y_mask + # decoder pass + y, logdet = self.decoder(z, y_mask, g=g, reverse=True) + attn = attn.squeeze(1).permute(0, 2, 1) + return y, logdet, y_mean, y_log_scale, attn, o_dur_log, o_attn_dur + + def preprocess(self, y, y_lengths, y_max_length, attn=None): + if y_max_length is not None: + y_max_length = (y_max_length // self.num_squeeze) * self.num_squeeze + y = y[:, :, :y_max_length] + if attn is not None: + attn = attn[:, :, :, :y_max_length] + y_lengths = (y_lengths // self.num_squeeze) * self.num_squeeze + return y, y_lengths, y_max_length, attn + + def store_inverse(self): + self.decoder.store_inverse() + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + self.store_inverse() + assert not self.training diff --git a/TTS/tts/models/speedy_speech.py b/TTS/tts/models/speedy_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..93496d59a68df996bed338701861522873c0ff93 --- /dev/null +++ b/TTS/tts/models/speedy_speech.py @@ -0,0 +1,199 @@ +import torch +from torch import nn +from TTS.tts.layers.speedy_speech.decoder import Decoder +from TTS.tts.layers.speedy_speech.duration_predictor import DurationPredictor +from TTS.tts.layers.speedy_speech.encoder import Encoder, PositionalEncoding +from TTS.tts.utils.generic_utils import sequence_mask +from TTS.tts.layers.glow_tts.monotonic_align import generate_path + + +class SpeedySpeech(nn.Module): + """Speedy Speech model + https://arxiv.org/abs/2008.03802 + + Encoder -> DurationPredictor -> Decoder + + This model is able to achieve a reasonable performance with only + ~3M model parameters and convolutional layers. + + This model requires precomputed phoneme durations to train a duration predictor. At inference + it only uses the duration predictor to compute durations and expand encoder outputs respectively. + + Args: + num_chars (int): number of unique input to characters + out_channels (int): number of output tensor channels. It is equal to the expected spectrogram size. + hidden_channels (int): number of channels in all the model layers. + positional_encoding (bool, optional): enable/disable Positional encoding on encoder outputs. Defaults to True. + length_scale (int, optional): coefficient to set the speech speed. <1 slower, >1 faster. Defaults to 1. + encoder_type (str, optional): set the encoder type. Defaults to 'residual_conv_bn'. + encoder_params (dict, optional): set encoder parameters depending on 'encoder_type'. Defaults to { "kernel_size": 4, "dilations": 4 * [1, 2, 4] + [1], "num_conv_blocks": 2, "num_res_blocks": 13 }. + decoder_type (str, optional): decoder type. Defaults to 'residual_conv_bn'. + decoder_params (dict, optional): set decoder parameters depending on 'decoder_type'. Defaults to { "kernel_size": 4, "dilations": 4 * [1, 2, 4, 8] + [1], "num_conv_blocks": 2, "num_res_blocks": 17 }. + num_speakers (int, optional): number of speakers for multi-speaker training. Defaults to 0. + external_c (bool, optional): enable external speaker embeddings. Defaults to False. + c_in_channels (int, optional): number of channels in speaker embedding vectors. Defaults to 0. + """ +# pylint: disable=dangerous-default-value + + def __init__( + self, + num_chars, + out_channels, + hidden_channels, + positional_encoding=True, + length_scale=1, + encoder_type='residual_conv_bn', + encoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + }, + decoder_type='residual_conv_bn', + decoder_params={ + "kernel_size": 4, + "dilations": 4 * [1, 2, 4, 8] + [1], + "num_conv_blocks": 2, + "num_res_blocks": 17 + }, + num_speakers=0, + external_c=False, + c_in_channels=0): + + super().__init__() + self.length_scale = float(length_scale) if isinstance(length_scale, int) else length_scale + self.emb = nn.Embedding(num_chars, hidden_channels) + self.encoder = Encoder(hidden_channels, hidden_channels, encoder_type, + encoder_params, c_in_channels) + if positional_encoding: + self.pos_encoder = PositionalEncoding(hidden_channels) + self.decoder = Decoder(out_channels, hidden_channels, + decoder_type, decoder_params) + self.duration_predictor = DurationPredictor(hidden_channels + c_in_channels) + + if num_speakers > 1 and not external_c: + # speaker embedding layer + self.emb_g = nn.Embedding(num_speakers, c_in_channels) + nn.init.uniform_(self.emb_g.weight, -0.1, 0.1) + + if c_in_channels > 0 and c_in_channels != hidden_channels: + self.proj_g = nn.Conv1d(c_in_channels, hidden_channels, 1) + + @staticmethod + def expand_encoder_outputs(en, dr, x_mask, y_mask): + """Generate attention alignment map from durations and + expand encoder outputs + + Example: + encoder output: [a,b,c,d] + durations: [1, 3, 2, 1] + + expanded: [a, b, b, b, c, c, d] + attention map: [[0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 1, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0]] + """ + attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) + attn = generate_path(dr, attn_mask.squeeze(1)).to(en.dtype) + o_en_ex = torch.matmul( + attn.squeeze(1).transpose(1, 2), en.transpose(1, + 2)).transpose(1, 2) + return o_en_ex, attn + + def format_durations(self, o_dr_log, x_mask): + o_dr = (torch.exp(o_dr_log) - 1) * x_mask * self.length_scale + o_dr[o_dr < 1] = 1.0 + o_dr = torch.round(o_dr) + return o_dr + + @staticmethod + def _concat_speaker_embedding(o_en, g): + g_exp = g.expand(-1, -1, o_en.size(-1)) # [B, C, T_en] + o_en = torch.cat([o_en, g_exp], 1) + return o_en + + def _sum_speaker_embedding(self, x, g): + # project g to decoder dim. + if hasattr(self, 'proj_g'): + g = self.proj_g(g) + return x + g + + def _forward_encoder(self, x, x_lengths, g=None): + if hasattr(self, 'emb_g'): + g = nn.functional.normalize(self.emb_g(g)) # [B, C, 1] + + if g is not None: + g = g.unsqueeze(-1) + + # [B, T, C] + x_emb = self.emb(x) + # [B, C, T] + x_emb = torch.transpose(x_emb, 1, -1) + + # compute sequence masks + x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), + 1).to(x.dtype) + + # encoder pass + o_en = self.encoder(x_emb, x_mask) + + # speaker conditioning for duration predictor + if g is not None: + o_en_dp = self._concat_speaker_embedding(o_en, g) + else: + o_en_dp = o_en + return o_en, o_en_dp, x_mask, g + + def _forward_decoder(self, o_en, o_en_dp, dr, x_mask, y_lengths, g): + y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), + 1).to(o_en_dp.dtype) + # expand o_en with durations + o_en_ex, attn = self.expand_encoder_outputs(o_en, dr, x_mask, y_mask) + # positional encoding + if hasattr(self, 'pos_encoder'): + o_en_ex = self.pos_encoder(o_en_ex, y_mask) + # speaker embedding + if g is not None: + o_en_ex = self._sum_speaker_embedding(o_en_ex, g) + # decoder pass + o_de = self.decoder(o_en_ex, y_mask, g=g) + return o_de, attn.transpose(1, 2) + + def forward(self, x, x_lengths, y_lengths, dr, g=None): # pylint: disable=unused-argument + """ + Shapes: + x: [B, T_max] + x_lengths: [B] + y_lengths: [B] + dr: [B, T_max] + g: [B, C] + """ + o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g) + o_dr_log = self.duration_predictor(o_en_dp.detach(), x_mask) + o_de, attn= self._forward_decoder(o_en, o_en_dp, dr, x_mask, y_lengths, g=g) + return o_de, o_dr_log.squeeze(1), attn + + def inference(self, x, x_lengths, g=None): # pylint: disable=unused-argument + """ + Shapes: + x: [B, T_max] + x_lengths: [B] + g: [B, C] + """ + # pad input to prevent dropping the last word + x = torch.nn.functional.pad(x, pad=(0, 5), mode='constant', value=0) + o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g) + # duration predictor pass + o_dr_log = self.duration_predictor(o_en_dp.detach(), x_mask) + o_dr = self.format_durations(o_dr_log, x_mask).squeeze(1) + y_lengths = o_dr.sum(1) + o_de, attn = self._forward_decoder(o_en, o_en_dp, o_dr, x_mask, y_lengths, g=g) + return o_de, attn + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py new file mode 100644 index 0000000000000000000000000000000000000000..0b68a96c8a0a2f07ad5e26e399d8c1ea33f83b7c --- /dev/null +++ b/TTS/tts/models/tacotron.py @@ -0,0 +1,213 @@ +# coding: utf-8 +import torch +from torch import nn + +from TTS.tts.layers.gst_layers import GST +from TTS.tts.layers.tacotron import Decoder, Encoder, PostCBHG +from TTS.tts.models.tacotron_abstract import TacotronAbstract + + +class Tacotron(TacotronAbstract): + """Tacotron as in https://arxiv.org/abs/1703.10135 + + It's an autoregressive encoder-attention-decoder-postnet architecture. + + Args: + num_chars (int): number of input characters to define the size of embedding layer. + num_speakers (int): number of speakers in the dataset. >1 enables multi-speaker training and model learns speaker embeddings. + r (int): initial model reduction rate. + postnet_output_dim (int, optional): postnet output channels. Defaults to 80. + decoder_output_dim (int, optional): decoder output channels. Defaults to 80. + attn_type (str, optional): attention type. Check ```TTS.tts.layers.attentions.init_attn```. Defaults to 'original'. + attn_win (bool, optional): enable/disable attention windowing. + It especially useful at inference to keep attention alignment diagonal. Defaults to False. + attn_norm (str, optional): Attention normalization method. "sigmoid" or "softmax". Defaults to "softmax". + prenet_type (str, optional): prenet type for the decoder. Defaults to "original". + prenet_dropout (bool, optional): prenet dropout rate. Defaults to True. + forward_attn (bool, optional): enable/disable forward attention. + It is only valid if ```attn_type``` is ```original```. Defaults to False. + trans_agent (bool, optional): enable/disable transition agent in forward attention. Defaults to False. + forward_attn_mask (bool, optional): enable/disable extra masking over forward attention. Defaults to False. + location_attn (bool, optional): enable/disable location sensitive attention. + It is only valid if ```attn_type``` is ```original```. Defaults to True. + attn_K (int, optional): Number of attention heads for GMM attention. Defaults to 5. + separate_stopnet (bool, optional): enable/disable separate stopnet training without only gradient + flow from stopnet to the rest of the model. Defaults to True. + bidirectional_decoder (bool, optional): enable/disable bidirectional decoding. Defaults to False. + double_decoder_consistency (bool, optional): enable/disable double decoder consistency. Defaults to False. + ddc_r (int, optional): reduction rate for the coarse decoder of double decoder consistency. Defaults to None. + encoder_in_features (int, optional): input channels for the encoder. Defaults to 512. + decoder_in_features (int, optional): input channels for the decoder. Defaults to 512. + speaker_embedding_dim (int, optional): external speaker conditioning vector channels. Defaults to None. + gst (bool, optional): enable/disable global style token learning. Defaults to False. + gst_embedding_dim (int, optional): size of channels for GST vectors. Defaults to 512. + gst_num_heads (int, optional): number of attention heads for GST. Defaults to 4. + gst_style_tokens (int, optional): number of GST tokens. Defaults to 10. + gst_use_speaker_embedding (bool, optional): enable/disable inputing speaker embedding to GST. Defaults to False. + memory_size (int, optional): size of the history queue fed to the prenet. Model feeds the last ```memory_size``` + output frames to the prenet. + """ + def __init__(self, + num_chars, + num_speakers, + r=5, + postnet_output_dim=1025, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm="sigmoid", + prenet_type="original", + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + attn_K=5, + separate_stopnet=True, + bidirectional_decoder=False, + double_decoder_consistency=False, + ddc_r=None, + encoder_in_features=256, + decoder_in_features=256, + speaker_embedding_dim=None, + gst=False, + gst_embedding_dim=256, + gst_num_heads=4, + gst_style_tokens=10, + memory_size=5, + gst_use_speaker_embedding=False): + super(Tacotron, + self).__init__(num_chars, num_speakers, r, postnet_output_dim, + decoder_output_dim, attn_type, attn_win, + attn_norm, prenet_type, prenet_dropout, + forward_attn, trans_agent, forward_attn_mask, + location_attn, attn_K, separate_stopnet, + bidirectional_decoder, double_decoder_consistency, + ddc_r, encoder_in_features, decoder_in_features, + speaker_embedding_dim, gst, gst_embedding_dim, + gst_num_heads, gst_style_tokens, gst_use_speaker_embedding) + + # speaker embedding layers + if self.num_speakers > 1: + if not self.embeddings_per_sample: + speaker_embedding_dim = 256 + self.speaker_embedding = nn.Embedding(self.num_speakers, speaker_embedding_dim) + self.speaker_embedding.weight.data.normal_(0, 0.3) + + # speaker and gst embeddings is concat in decoder input + if self.num_speakers > 1: + self.decoder_in_features += speaker_embedding_dim # add speaker embedding dim + + # embedding layer + self.embedding = nn.Embedding(num_chars, 256, padding_idx=0) + self.embedding.weight.data.normal_(0, 0.3) + + # base model layers + self.encoder = Encoder(self.encoder_in_features) + self.decoder = Decoder(self.decoder_in_features, decoder_output_dim, r, + memory_size, attn_type, attn_win, attn_norm, + prenet_type, prenet_dropout, forward_attn, + trans_agent, forward_attn_mask, location_attn, + attn_K, separate_stopnet) + self.postnet = PostCBHG(decoder_output_dim) + self.last_linear = nn.Linear(self.postnet.cbhg.gru_features * 2, + postnet_output_dim) + + # global style token layers + if self.gst: + self.gst_layer = GST(num_mel=80, + num_heads=gst_num_heads, + num_style_tokens=gst_style_tokens, + gst_embedding_dim=self.gst_embedding_dim, + speaker_embedding_dim=speaker_embedding_dim if self.embeddings_per_sample and self.gst_use_speaker_embedding else None) + # backward pass decoder + if self.bidirectional_decoder: + self._init_backward_decoder() + # setup DDC + if self.double_decoder_consistency: + self.coarse_decoder = Decoder( + self.decoder_in_features, decoder_output_dim, ddc_r, memory_size, + attn_type, attn_win, attn_norm, prenet_type, prenet_dropout, + forward_attn, trans_agent, forward_attn_mask, location_attn, + attn_K, separate_stopnet) + + def forward(self, characters, text_lengths, mel_specs, mel_lengths=None, speaker_ids=None, speaker_embeddings=None): + """ + Shapes: + characters: [B, T_in] + text_lengths: [B] + mel_specs: [B, T_out, C] + mel_lengths: [B] + speaker_ids: [B, 1] + speaker_embeddings: [B, C] + """ + input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths) + # B x T_in x embed_dim + inputs = self.embedding(characters) + # B x T_in x encoder_in_features + encoder_outputs = self.encoder(inputs) + # sequence masking + encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs) + # global style token + if self.gst: + # B x gst_dim + encoder_outputs = self.compute_gst(encoder_outputs, + mel_specs, + speaker_embeddings if self.gst_use_speaker_embedding else None) + # speaker embedding + if self.num_speakers > 1: + if not self.embeddings_per_sample: + # B x 1 x speaker_embed_dim + speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None] + else: + # B x 1 x speaker_embed_dim + speaker_embeddings = torch.unsqueeze(speaker_embeddings, 1) + encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings) + # decoder_outputs: B x decoder_in_features x T_out + # alignments: B x T_in x encoder_in_features + # stop_tokens: B x T_in + decoder_outputs, alignments, stop_tokens = self.decoder( + encoder_outputs, mel_specs, input_mask) + # sequence masking + if output_mask is not None: + decoder_outputs = decoder_outputs * output_mask.unsqueeze(1).expand_as(decoder_outputs) + # B x T_out x decoder_in_features + postnet_outputs = self.postnet(decoder_outputs) + # sequence masking + if output_mask is not None: + postnet_outputs = postnet_outputs * output_mask.unsqueeze(2).expand_as(postnet_outputs) + # B x T_out x posnet_dim + postnet_outputs = self.last_linear(postnet_outputs) + # B x T_out x decoder_in_features + decoder_outputs = decoder_outputs.transpose(1, 2).contiguous() + if self.bidirectional_decoder: + decoder_outputs_backward, alignments_backward = self._backward_pass(mel_specs, encoder_outputs, input_mask) + return decoder_outputs, postnet_outputs, alignments, stop_tokens, decoder_outputs_backward, alignments_backward + if self.double_decoder_consistency: + decoder_outputs_backward, alignments_backward = self._coarse_decoder_pass(mel_specs, encoder_outputs, alignments, input_mask) + return decoder_outputs, postnet_outputs, alignments, stop_tokens, decoder_outputs_backward, alignments_backward + return decoder_outputs, postnet_outputs, alignments, stop_tokens + + @torch.no_grad() + def inference(self, characters, speaker_ids=None, style_mel=None, speaker_embeddings=None): + inputs = self.embedding(characters) + encoder_outputs = self.encoder(inputs) + if self.gst: + # B x gst_dim + encoder_outputs = self.compute_gst(encoder_outputs, + style_mel, + speaker_embeddings if self.gst_use_speaker_embedding else None) + if self.num_speakers > 1: + if not self.embeddings_per_sample: + # B x 1 x speaker_embed_dim + speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None] + else: + # B x 1 x speaker_embed_dim + speaker_embeddings = torch.unsqueeze(speaker_embeddings, 1) + encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings) + decoder_outputs, alignments, stop_tokens = self.decoder.inference( + encoder_outputs) + postnet_outputs = self.postnet(decoder_outputs) + postnet_outputs = self.last_linear(postnet_outputs) + decoder_outputs = decoder_outputs.transpose(1, 2) + return decoder_outputs, postnet_outputs, alignments, stop_tokens diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py new file mode 100644 index 0000000000000000000000000000000000000000..e56e4ca06911a2bd5a877839e1e81dad677e7fc6 --- /dev/null +++ b/TTS/tts/models/tacotron2.py @@ -0,0 +1,235 @@ +import torch +from torch import nn + +from TTS.tts.layers.gst_layers import GST +from TTS.tts.layers.tacotron2 import Decoder, Encoder, Postnet +from TTS.tts.models.tacotron_abstract import TacotronAbstract + +# TODO: match function arguments with tacotron +class Tacotron2(TacotronAbstract): + """Tacotron2 as in https://arxiv.org/abs/1712.05884 + + It's an autoregressive encoder-attention-decoder-postnet architecture. + + Args: + num_chars (int): number of input characters to define the size of embedding layer. + num_speakers (int): number of speakers in the dataset. >1 enables multi-speaker training and model learns speaker embeddings. + r (int): initial model reduction rate. + postnet_output_dim (int, optional): postnet output channels. Defaults to 80. + decoder_output_dim (int, optional): decoder output channels. Defaults to 80. + attn_type (str, optional): attention type. Check ```TTS.tts.layers.common_layers.init_attn```. Defaults to 'original'. + attn_win (bool, optional): enable/disable attention windowing. + It especially useful at inference to keep attention alignment diagonal. Defaults to False. + attn_norm (str, optional): Attention normalization method. "sigmoid" or "softmax". Defaults to "softmax". + prenet_type (str, optional): prenet type for the decoder. Defaults to "original". + prenet_dropout (bool, optional): prenet dropout rate. Defaults to True. + forward_attn (bool, optional): enable/disable forward attention. + It is only valid if ```attn_type``` is ```original```. Defaults to False. + trans_agent (bool, optional): enable/disable transition agent in forward attention. Defaults to False. + forward_attn_mask (bool, optional): enable/disable extra masking over forward attention. Defaults to False. + location_attn (bool, optional): enable/disable location sensitive attention. + It is only valid if ```attn_type``` is ```original```. Defaults to True. + attn_K (int, optional): Number of attention heads for GMM attention. Defaults to 5. + separate_stopnet (bool, optional): enable/disable separate stopnet training without only gradient + flow from stopnet to the rest of the model. Defaults to True. + bidirectional_decoder (bool, optional): enable/disable bidirectional decoding. Defaults to False. + double_decoder_consistency (bool, optional): enable/disable double decoder consistency. Defaults to False. + ddc_r (int, optional): reduction rate for the coarse decoder of double decoder consistency. Defaults to None. + encoder_in_features (int, optional): input channels for the encoder. Defaults to 512. + decoder_in_features (int, optional): input channels for the decoder. Defaults to 512. + speaker_embedding_dim (int, optional): external speaker conditioning vector channels. Defaults to None. + gst (bool, optional): enable/disable global style token learning. Defaults to False. + gst_embedding_dim (int, optional): size of channels for GST vectors. Defaults to 512. + gst_num_heads (int, optional): number of attention heads for GST. Defaults to 4. + gst_style_tokens (int, optional): number of GST tokens. Defaults to 10. + gst_use_speaker_embedding (bool, optional): enable/disable inputing speaker embedding to GST. Defaults to False. + """ + def __init__(self, + num_chars, + num_speakers, + r, + postnet_output_dim=80, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm="softmax", + prenet_type="original", + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + attn_K=5, + separate_stopnet=True, + bidirectional_decoder=False, + double_decoder_consistency=False, + ddc_r=None, + encoder_in_features=512, + decoder_in_features=512, + speaker_embedding_dim=None, + gst=False, + gst_embedding_dim=512, + gst_num_heads=4, + gst_style_tokens=10, + gst_use_speaker_embedding=False): + super(Tacotron2, + self).__init__(num_chars, num_speakers, r, postnet_output_dim, + decoder_output_dim, attn_type, attn_win, + attn_norm, prenet_type, prenet_dropout, + forward_attn, trans_agent, forward_attn_mask, + location_attn, attn_K, separate_stopnet, + bidirectional_decoder, double_decoder_consistency, + ddc_r, encoder_in_features, decoder_in_features, + speaker_embedding_dim, gst, gst_embedding_dim, + gst_num_heads, gst_style_tokens, gst_use_speaker_embedding) + + # speaker embedding layer + if self.num_speakers > 1: + if not self.embeddings_per_sample: + speaker_embedding_dim = 512 + self.speaker_embedding = nn.Embedding(self.num_speakers, speaker_embedding_dim) + self.speaker_embedding.weight.data.normal_(0, 0.3) + + # speaker and gst embeddings is concat in decoder input + if self.num_speakers > 1: + self.decoder_in_features += speaker_embedding_dim # add speaker embedding dim + + # embedding layer + self.embedding = nn.Embedding(num_chars, 512, padding_idx=0) + + # base model layers + self.encoder = Encoder(self.encoder_in_features) + self.decoder = Decoder(self.decoder_in_features, self.decoder_output_dim, r, attn_type, attn_win, + attn_norm, prenet_type, prenet_dropout, + forward_attn, trans_agent, forward_attn_mask, + location_attn, attn_K, separate_stopnet) + self.postnet = Postnet(self.postnet_output_dim) + + # global style token layers + if self.gst: + self.gst_layer = GST(num_mel=80, + num_heads=self.gst_num_heads, + num_style_tokens=self.gst_style_tokens, + gst_embedding_dim=self.gst_embedding_dim, + speaker_embedding_dim=speaker_embedding_dim if self.embeddings_per_sample and self.gst_use_speaker_embedding else None) + # backward pass decoder + if self.bidirectional_decoder: + self._init_backward_decoder() + # setup DDC + if self.double_decoder_consistency: + self.coarse_decoder = Decoder( + self.decoder_in_features, self.decoder_output_dim, ddc_r, attn_type, + attn_win, attn_norm, prenet_type, prenet_dropout, forward_attn, + trans_agent, forward_attn_mask, location_attn, attn_K, + separate_stopnet) + + @staticmethod + def shape_outputs(mel_outputs, mel_outputs_postnet, alignments): + mel_outputs = mel_outputs.transpose(1, 2) + mel_outputs_postnet = mel_outputs_postnet.transpose(1, 2) + return mel_outputs, mel_outputs_postnet, alignments + + def forward(self, text, text_lengths, mel_specs=None, mel_lengths=None, speaker_ids=None, speaker_embeddings=None): + """ + Shapes: + text: [B, T_in] + text_lengths: [B] + mel_specs: [B, T_out, C] + mel_lengths: [B] + speaker_ids: [B, 1] + speaker_embeddings: [B, C] + """ + # compute mask for padding + # B x T_in_max (boolean) + input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths) + # B x D_embed x T_in_max + embedded_inputs = self.embedding(text).transpose(1, 2) + # B x T_in_max x D_en + encoder_outputs = self.encoder(embedded_inputs, text_lengths) + if self.gst: + # B x gst_dim + encoder_outputs = self.compute_gst(encoder_outputs, + mel_specs, + speaker_embeddings if self.gst_use_speaker_embedding else None) + if self.num_speakers > 1: + if not self.embeddings_per_sample: + # B x 1 x speaker_embed_dim + speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None] + else: + # B x 1 x speaker_embed_dim + speaker_embeddings = torch.unsqueeze(speaker_embeddings, 1) + encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings) + + encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs) + + # B x mel_dim x T_out -- B x T_out//r x T_in -- B x T_out//r + decoder_outputs, alignments, stop_tokens = self.decoder( + encoder_outputs, mel_specs, input_mask) + # sequence masking + if mel_lengths is not None: + decoder_outputs = decoder_outputs * output_mask.unsqueeze(1).expand_as(decoder_outputs) + # B x mel_dim x T_out + postnet_outputs = self.postnet(decoder_outputs) + postnet_outputs = decoder_outputs + postnet_outputs + # sequence masking + if output_mask is not None: + postnet_outputs = postnet_outputs * output_mask.unsqueeze(1).expand_as(postnet_outputs) + # B x T_out x mel_dim -- B x T_out x mel_dim -- B x T_out//r x T_in + decoder_outputs, postnet_outputs, alignments = self.shape_outputs( + decoder_outputs, postnet_outputs, alignments) + if self.bidirectional_decoder: + decoder_outputs_backward, alignments_backward = self._backward_pass(mel_specs, encoder_outputs, input_mask) + return decoder_outputs, postnet_outputs, alignments, stop_tokens, decoder_outputs_backward, alignments_backward + if self.double_decoder_consistency: + decoder_outputs_backward, alignments_backward = self._coarse_decoder_pass(mel_specs, encoder_outputs, alignments, input_mask) + return decoder_outputs, postnet_outputs, alignments, stop_tokens, decoder_outputs_backward, alignments_backward + return decoder_outputs, postnet_outputs, alignments, stop_tokens + + @torch.no_grad() + def inference(self, text, speaker_ids=None, style_mel=None, speaker_embeddings=None): + embedded_inputs = self.embedding(text).transpose(1, 2) + encoder_outputs = self.encoder.inference(embedded_inputs) + + if self.gst: + # B x gst_dim + encoder_outputs = self.compute_gst(encoder_outputs, + style_mel, + speaker_embeddings if self.gst_use_speaker_embedding else None) + if self.num_speakers > 1: + if not self.embeddings_per_sample: + speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None] + encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings) + + decoder_outputs, alignments, stop_tokens = self.decoder.inference( + encoder_outputs) + postnet_outputs = self.postnet(decoder_outputs) + postnet_outputs = decoder_outputs + postnet_outputs + decoder_outputs, postnet_outputs, alignments = self.shape_outputs( + decoder_outputs, postnet_outputs, alignments) + return decoder_outputs, postnet_outputs, alignments, stop_tokens + + def inference_truncated(self, text, speaker_ids=None, style_mel=None, speaker_embeddings=None): + """ + Preserve model states for continuous inference + """ + embedded_inputs = self.embedding(text).transpose(1, 2) + encoder_outputs = self.encoder.inference_truncated(embedded_inputs) + + if self.gst: + # B x gst_dim + encoder_outputs = self.compute_gst(encoder_outputs, + style_mel, + speaker_embeddings if self.gst_use_speaker_embedding else None) + + if self.num_speakers > 1: + if not self.embeddings_per_sample: + speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None] + encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings) + + mel_outputs, alignments, stop_tokens = self.decoder.inference_truncated( + encoder_outputs) + mel_outputs_postnet = self.postnet(mel_outputs) + mel_outputs_postnet = mel_outputs + mel_outputs_postnet + mel_outputs, mel_outputs_postnet, alignments = self.shape_outputs( + mel_outputs, mel_outputs_postnet, alignments) + return mel_outputs, mel_outputs_postnet, alignments, stop_tokens diff --git a/TTS/tts/models/tacotron_abstract.py b/TTS/tts/models/tacotron_abstract.py new file mode 100644 index 0000000000000000000000000000000000000000..10953269663a02fe52c181f8d7cfeeafe1240502 --- /dev/null +++ b/TTS/tts/models/tacotron_abstract.py @@ -0,0 +1,225 @@ +import copy +from abc import ABC, abstractmethod + +import torch +from torch import nn + +from TTS.tts.utils.generic_utils import sequence_mask + + +class TacotronAbstract(ABC, nn.Module): + def __init__(self, + num_chars, + num_speakers, + r, + postnet_output_dim=80, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm="softmax", + prenet_type="original", + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + attn_K=5, + separate_stopnet=True, + bidirectional_decoder=False, + double_decoder_consistency=False, + ddc_r=None, + encoder_in_features=512, + decoder_in_features=512, + speaker_embedding_dim=None, + gst=False, + gst_embedding_dim=512, + gst_num_heads=4, + gst_style_tokens=10, + gst_use_speaker_embedding=False): + """ Abstract Tacotron class """ + super().__init__() + self.num_chars = num_chars + self.r = r + self.decoder_output_dim = decoder_output_dim + self.postnet_output_dim = postnet_output_dim + self.gst = gst + self.gst_embedding_dim = gst_embedding_dim + self.gst_num_heads = gst_num_heads + self.gst_style_tokens = gst_style_tokens + self.gst_use_speaker_embedding = gst_use_speaker_embedding + self.num_speakers = num_speakers + self.bidirectional_decoder = bidirectional_decoder + self.double_decoder_consistency = double_decoder_consistency + self.ddc_r = ddc_r + self.attn_type = attn_type + self.attn_win = attn_win + self.attn_norm = attn_norm + self.prenet_type = prenet_type + self.prenet_dropout = prenet_dropout + self.forward_attn = forward_attn + self.trans_agent = trans_agent + self.forward_attn_mask = forward_attn_mask + self.location_attn = location_attn + self.attn_K = attn_K + self.separate_stopnet = separate_stopnet + self.encoder_in_features = encoder_in_features + self.decoder_in_features = decoder_in_features + self.speaker_embedding_dim = speaker_embedding_dim + + # layers + self.embedding = None + self.encoder = None + self.decoder = None + self.postnet = None + + # multispeaker + if self.speaker_embedding_dim is None: + # if speaker_embedding_dim is None we need use the nn.Embedding, with default speaker_embedding_dim + self.embeddings_per_sample = False + else: + # if speaker_embedding_dim is not None we need use speaker embedding per sample + self.embeddings_per_sample = True + + # global style token + if self.gst: + self.decoder_in_features += gst_embedding_dim # add gst embedding dim + self.gst_layer = None + + # model states + self.speaker_embeddings = None + self.speaker_embeddings_projected = None + + # additional layers + self.decoder_backward = None + self.coarse_decoder = None + + ############################# + # INIT FUNCTIONS + ############################# + + def _init_states(self): + self.speaker_embeddings = None + self.speaker_embeddings_projected = None + + def _init_backward_decoder(self): + self.decoder_backward = copy.deepcopy(self.decoder) + + def _init_coarse_decoder(self): + self.coarse_decoder = copy.deepcopy(self.decoder) + self.coarse_decoder.r_init = self.ddc_r + self.coarse_decoder.set_r(self.ddc_r) + + ############################# + # CORE FUNCTIONS + ############################# + + @abstractmethod + def forward(self): + pass + + @abstractmethod + def inference(self): + pass + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + self.decoder.set_r(state['r']) + if eval: + self.eval() + assert not self.training + + ############################# + # COMMON COMPUTE FUNCTIONS + ############################# + + def compute_masks(self, text_lengths, mel_lengths): + """Compute masks against sequence paddings.""" + # B x T_in_max (boolean) + device = text_lengths.device + input_mask = sequence_mask(text_lengths).to(device) + output_mask = None + if mel_lengths is not None: + max_len = mel_lengths.max() + r = self.decoder.r + max_len = max_len + (r - (max_len % r)) if max_len % r > 0 else max_len + output_mask = sequence_mask(mel_lengths, max_len=max_len).to(device) + return input_mask, output_mask + + def _backward_pass(self, mel_specs, encoder_outputs, mask): + """ Run backwards decoder """ + decoder_outputs_b, alignments_b, _ = self.decoder_backward( + encoder_outputs, torch.flip(mel_specs, dims=(1,)), mask, + self.speaker_embeddings_projected) + decoder_outputs_b = decoder_outputs_b.transpose(1, 2).contiguous() + return decoder_outputs_b, alignments_b + + def _coarse_decoder_pass(self, mel_specs, encoder_outputs, alignments, + input_mask): + """ Double Decoder Consistency """ + T = mel_specs.shape[1] + if T % self.coarse_decoder.r > 0: + padding_size = self.coarse_decoder.r - (T % self.coarse_decoder.r) + mel_specs = torch.nn.functional.pad(mel_specs, + (0, 0, 0, padding_size, 0, 0)) + decoder_outputs_backward, alignments_backward, _ = self.coarse_decoder( + encoder_outputs.detach(), mel_specs, input_mask) + # scale_factor = self.decoder.r_init / self.decoder.r + alignments_backward = torch.nn.functional.interpolate( + alignments_backward.transpose(1, 2), + size=alignments.shape[1], + mode='nearest').transpose(1, 2) + decoder_outputs_backward = decoder_outputs_backward.transpose(1, 2) + decoder_outputs_backward = decoder_outputs_backward[:, :T, :] + return decoder_outputs_backward, alignments_backward + + ############################# + # EMBEDDING FUNCTIONS + ############################# + + def compute_speaker_embedding(self, speaker_ids): + """ Compute speaker embedding vectors """ + if hasattr(self, "speaker_embedding") and speaker_ids is None: + raise RuntimeError( + " [!] Model has speaker embedding layer but speaker_id is not provided" + ) + if hasattr(self, "speaker_embedding") and speaker_ids is not None: + self.speaker_embeddings = self.speaker_embedding(speaker_ids).unsqueeze(1) + if hasattr(self, "speaker_project_mel") and speaker_ids is not None: + self.speaker_embeddings_projected = self.speaker_project_mel( + self.speaker_embeddings).squeeze(1) + + def compute_gst(self, inputs, style_input, speaker_embedding=None): + """ Compute global style token """ + device = inputs.device + if isinstance(style_input, dict): + query = torch.zeros(1, 1, self.gst_embedding_dim//2).to(device) + if speaker_embedding is not None: + query = torch.cat([query, speaker_embedding.reshape(1, 1, -1)], dim=-1) + + _GST = torch.tanh(self.gst_layer.style_token_layer.style_tokens) + gst_outputs = torch.zeros(1, 1, self.gst_embedding_dim).to(device) + for k_token, v_amplifier in style_input.items(): + key = _GST[int(k_token)].unsqueeze(0).expand(1, -1, -1) + gst_outputs_att = self.gst_layer.style_token_layer.attention(query, key) + gst_outputs = gst_outputs + gst_outputs_att * v_amplifier + elif style_input is None: + gst_outputs = torch.zeros(1, 1, self.gst_embedding_dim).to(device) + else: + gst_outputs = self.gst_layer(style_input, speaker_embedding) # pylint: disable=not-callable + inputs = self._concat_speaker_embedding(inputs, gst_outputs) + return inputs + + @staticmethod + def _add_speaker_embedding(outputs, speaker_embeddings): + speaker_embeddings_ = speaker_embeddings.expand( + outputs.size(0), outputs.size(1), -1) + outputs = outputs + speaker_embeddings_ + return outputs + + @staticmethod + def _concat_speaker_embedding(outputs, speaker_embeddings): + speaker_embeddings_ = speaker_embeddings.expand( + outputs.size(0), outputs.size(1), -1) + outputs = torch.cat([outputs, speaker_embeddings_], dim=-1) + return outputs diff --git a/TTS/tts/tf/README.md b/TTS/tts/tf/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0f9d58e905f9b85fb83c2be20c8166ef6ab403dd --- /dev/null +++ b/TTS/tts/tf/README.md @@ -0,0 +1,20 @@ +## Utilities to Convert Models to Tensorflow2 +Here there are experimental utilities to convert trained Torch models to Tensorflow (2.2>=). + +Converting Torch models to TF enables all the TF toolkit to be used for better deployment and device specific optimizations. + +Note that we do not plan to share training scripts for Tensorflow in near future. But any contribution in that direction would be more than welcome. + +To see how you can use TF model at inference, check the notebook. + +This is an experimental release. If you encounter an error, please put an issue or in the best send a PR but you are mostly on your own. + + +### Converting a Model +- Run ```convert_tacotron2_torch_to_tf.py --torch_model_path /path/to/torch/model.pth.tar --config_path /path/to/model/config.json --output_path /path/to/output/tf/model``` with the right arguments. + +### Known issues ans limitations +- We use a custom model load/save mechanism which enables us to store model related information with models weights. (Similar to Torch). However, it is prone to random errors. +- Current TF model implementation is slightly slower than Torch model. Hopefully, it'll get better with improving TF support for eager mode and ```tf.function```. +- TF implementation of Tacotron2 only supports regular Tacotron2 as in the paper. +- You can only convert models trained after TF model implementation since model layers has been updated in Torch model. diff --git a/TTS/tts/tf/__init__.py b/TTS/tts/tf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/tf/layers/common_layers.py b/TTS/tts/tf/layers/common_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ad18b9fc6b34ede942a7dfdb67c8476f765aa35f --- /dev/null +++ b/TTS/tts/tf/layers/common_layers.py @@ -0,0 +1,288 @@ +import tensorflow as tf +from tensorflow import keras +from tensorflow.python.ops import math_ops +# from tensorflow_addons.seq2seq import BahdanauAttention + +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg + +class Linear(keras.layers.Layer): + def __init__(self, units, use_bias, **kwargs): + super(Linear, self).__init__(**kwargs) + self.linear_layer = keras.layers.Dense(units, use_bias=use_bias, name='linear_layer') + self.activation = keras.layers.ReLU() + + def call(self, x): + """ + shapes: + x: B x T x C + """ + return self.activation(self.linear_layer(x)) + + +class LinearBN(keras.layers.Layer): + def __init__(self, units, use_bias, **kwargs): + super(LinearBN, self).__init__(**kwargs) + self.linear_layer = keras.layers.Dense(units, use_bias=use_bias, name='linear_layer') + self.batch_normalization = keras.layers.BatchNormalization(axis=-1, momentum=0.90, epsilon=1e-5, name='batch_normalization') + self.activation = keras.layers.ReLU() + + def call(self, x, training=None): + """ + shapes: + x: B x T x C + """ + out = self.linear_layer(x) + out = self.batch_normalization(out, training=training) + return self.activation(out) + + +class Prenet(keras.layers.Layer): + def __init__(self, + prenet_type, + prenet_dropout, + units, + bias, + **kwargs): + super(Prenet, self).__init__(**kwargs) + self.prenet_type = prenet_type + self.prenet_dropout = prenet_dropout + self.linear_layers = [] + if prenet_type == "bn": + self.linear_layers += [LinearBN(unit, use_bias=bias, name=f'linear_layer_{idx}') for idx, unit in enumerate(units)] + elif prenet_type == "original": + self.linear_layers += [Linear(unit, use_bias=bias, name=f'linear_layer_{idx}') for idx, unit in enumerate(units)] + else: + raise RuntimeError(' [!] Unknown prenet type.') + if prenet_dropout: + self.dropout = keras.layers.Dropout(rate=0.5) + + def call(self, x, training=None): + """ + shapes: + x: B x T x C + """ + for linear in self.linear_layers: + if self.prenet_dropout: + x = self.dropout(linear(x), training=training) + else: + x = linear(x) + return x + + +def _sigmoid_norm(score): + attn_weights = tf.nn.sigmoid(score) + attn_weights = attn_weights / tf.reduce_sum(attn_weights, axis=1, keepdims=True) + return attn_weights + + +class Attention(keras.layers.Layer): + """TODO: implement forward_attention + TODO: location sensitive attention + TODO: implement attention windowing """ + def __init__(self, attn_dim, use_loc_attn, loc_attn_n_filters, + loc_attn_kernel_size, use_windowing, norm, use_forward_attn, + use_trans_agent, use_forward_attn_mask, **kwargs): + super(Attention, self).__init__(**kwargs) + self.use_loc_attn = use_loc_attn + self.loc_attn_n_filters = loc_attn_n_filters + self.loc_attn_kernel_size = loc_attn_kernel_size + self.use_windowing = use_windowing + self.norm = norm + self.use_forward_attn = use_forward_attn + self.use_trans_agent = use_trans_agent + self.use_forward_attn_mask = use_forward_attn_mask + self.query_layer = tf.keras.layers.Dense(attn_dim, use_bias=False, name='query_layer/linear_layer') + self.inputs_layer = tf.keras.layers.Dense(attn_dim, use_bias=False, name=f'{self.name}/inputs_layer/linear_layer') + self.v = tf.keras.layers.Dense(1, use_bias=True, name='v/linear_layer') + if use_loc_attn: + self.location_conv1d = keras.layers.Conv1D( + filters=loc_attn_n_filters, + kernel_size=loc_attn_kernel_size, + padding='same', + use_bias=False, + name='location_layer/location_conv1d') + self.location_dense = keras.layers.Dense(attn_dim, use_bias=False, name='location_layer/location_dense') + if norm == 'softmax': + self.norm_func = tf.nn.softmax + elif norm == 'sigmoid': + self.norm_func = _sigmoid_norm + else: + raise ValueError("Unknown value for attention norm type") + + def init_states(self, batch_size, value_length): + states = [] + if self.use_loc_attn: + attention_cum = tf.zeros([batch_size, value_length]) + attention_old = tf.zeros([batch_size, value_length]) + states = [attention_cum, attention_old] + if self.use_forward_attn: + alpha = tf.concat([ + tf.ones([batch_size, 1]), + tf.zeros([batch_size, value_length])[:, :-1] + 1e-7 + ], 1) + states.append(alpha) + return tuple(states) + + def process_values(self, values): + """ cache values for decoder iterations """ + #pylint: disable=attribute-defined-outside-init + self.processed_values = self.inputs_layer(values) + self.values = values + + def get_loc_attn(self, query, states): + """ compute location attention, query layer and + unnorm. attention weights""" + attention_cum, attention_old = states[:2] + attn_cat = tf.stack([attention_old, attention_cum], axis=2) + + processed_query = self.query_layer(tf.expand_dims(query, 1)) + processed_attn = self.location_dense(self.location_conv1d(attn_cat)) + score = self.v( + tf.nn.tanh(self.processed_values + processed_query + + processed_attn)) + score = tf.squeeze(score, axis=2) + return score, processed_query + + def get_attn(self, query): + """ compute query layer and unnormalized attention weights """ + processed_query = self.query_layer(tf.expand_dims(query, 1)) + score = self.v(tf.nn.tanh(self.processed_values + processed_query)) + score = tf.squeeze(score, axis=2) + return score, processed_query + + def apply_score_masking(self, score, mask): #pylint: disable=no-self-use + """ ignore sequence paddings """ + padding_mask = tf.expand_dims(math_ops.logical_not(mask), 2) + # Bias so padding positions do not contribute to attention distribution. + score -= 1.e9 * math_ops.cast(padding_mask, dtype=tf.float32) + return score + + def apply_forward_attention(self, alignment, alpha): #pylint: disable=no-self-use + # forward attention + fwd_shifted_alpha = tf.pad(alpha[:, :-1], ((0, 0), (1, 0)), constant_values=0.0) + # compute transition potentials + new_alpha = ((1 - 0.5) * alpha + 0.5 * fwd_shifted_alpha + 1e-8) * alignment + # renormalize attention weights + new_alpha = new_alpha / tf.reduce_sum(new_alpha, axis=1, keepdims=True) + return new_alpha + + def update_states(self, old_states, scores_norm, attn_weights, new_alpha=None): + states = [] + if self.use_loc_attn: + states = [old_states[0] + scores_norm, attn_weights] + if self.use_forward_attn: + states.append(new_alpha) + return tuple(states) + + def call(self, query, states): + """ + shapes: + query: B x D + """ + if self.use_loc_attn: + score, _ = self.get_loc_attn(query, states) + else: + score, _ = self.get_attn(query) + + # TODO: masking + # if mask is not None: + # self.apply_score_masking(score, mask) + # attn_weights shape == (batch_size, max_length, 1) + + # normalize attention scores + scores_norm = self.norm_func(score) + attn_weights = scores_norm + + # apply forward attention + new_alpha = None + if self.use_forward_attn: + new_alpha = self.apply_forward_attention(attn_weights, states[-1]) + attn_weights = new_alpha + + # update states tuple + # states = (cum_attn_weights, attn_weights, new_alpha) + states = self.update_states(states, scores_norm, attn_weights, new_alpha) + + # context_vector shape after sum == (batch_size, hidden_size) + context_vector = tf.matmul(tf.expand_dims(attn_weights, axis=2), self.values, transpose_a=True, transpose_b=False) + context_vector = tf.squeeze(context_vector, axis=1) + return context_vector, attn_weights, states + + +# def _location_sensitive_score(processed_query, keys, processed_loc, attention_v, attention_b): +# dtype = processed_query.dtype +# num_units = keys.shape[-1].value or array_ops.shape(keys)[-1] +# return tf.reduce_sum(attention_v * tf.tanh(keys + processed_query + processed_loc + attention_b), [2]) + + +# class LocationSensitiveAttention(BahdanauAttention): +# def __init__(self, +# units, +# memory=None, +# memory_sequence_length=None, +# normalize=False, +# probability_fn="softmax", +# kernel_initializer="glorot_uniform", +# dtype=None, +# name="LocationSensitiveAttention", +# location_attention_filters=32, +# location_attention_kernel_size=31): + +# super(LocationSensitiveAttention, +# self).__init__(units=units, +# memory=memory, +# memory_sequence_length=memory_sequence_length, +# normalize=normalize, +# probability_fn='softmax', ## parent module default +# kernel_initializer=kernel_initializer, +# dtype=dtype, +# name=name) +# if probability_fn == 'sigmoid': +# self.probability_fn = lambda score, _: self._sigmoid_normalization(score) +# self.location_conv = keras.layers.Conv1D(filters=location_attention_filters, kernel_size=location_attention_kernel_size, padding='same', use_bias=False) +# self.location_dense = keras.layers.Dense(units, use_bias=False) +# # self.v = keras.layers.Dense(1, use_bias=True) + +# def _location_sensitive_score(self, processed_query, keys, processed_loc): +# processed_query = tf.expand_dims(processed_query, 1) +# return tf.reduce_sum(self.attention_v * tf.tanh(keys + processed_query + processed_loc), [2]) + +# def _location_sensitive(self, alignment_cum, alignment_old): +# alignment_cat = tf.stack([alignment_cum, alignment_old], axis=2) +# return self.location_dense(self.location_conv(alignment_cat)) + +# def _sigmoid_normalization(self, score): +# return tf.nn.sigmoid(score) / tf.reduce_sum(tf.nn.sigmoid(score), axis=-1, keepdims=True) + +# # def _apply_masking(self, score, mask): +# # padding_mask = tf.expand_dims(math_ops.logical_not(mask), 2) +# # # Bias so padding positions do not contribute to attention distribution. +# # score -= 1.e9 * math_ops.cast(padding_mask, dtype=tf.float32) +# # return score + +# def _calculate_attention(self, query, state): +# alignment_cum, alignment_old = state[:2] +# processed_query = self.query_layer( +# query) if self.query_layer else query +# processed_loc = self._location_sensitive(alignment_cum, alignment_old) +# score = self._location_sensitive_score( +# processed_query, +# self.keys, +# processed_loc) +# alignment = self.probability_fn(score, state) +# alignment_cum = alignment_cum + alignment +# state[0] = alignment_cum +# state[1] = alignment +# return alignment, state + +# def compute_context(self, alignments): +# expanded_alignments = tf.expand_dims(alignments, 1) +# context = tf.matmul(expanded_alignments, self.values) +# context = tf.squeeze(context, [1]) +# return context + +# # def call(self, query, state): +# # alignment, next_state = self._calculate_attention(query, state) +# # return alignment, next_state diff --git a/TTS/tts/tf/layers/tacotron2.py b/TTS/tts/tf/layers/tacotron2.py new file mode 100644 index 0000000000000000000000000000000000000000..50a766a9a1c378356cb841cda41236c4ca2391fa --- /dev/null +++ b/TTS/tts/tf/layers/tacotron2.py @@ -0,0 +1,302 @@ +import tensorflow as tf +from tensorflow import keras +from TTS.tts.tf.utils.tf_utils import shape_list +from TTS.tts.tf.layers.common_layers import Prenet, Attention + + +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg +class ConvBNBlock(keras.layers.Layer): + def __init__(self, filters, kernel_size, activation, **kwargs): + super(ConvBNBlock, self).__init__(**kwargs) + self.convolution1d = keras.layers.Conv1D(filters, kernel_size, padding='same', name='convolution1d') + self.batch_normalization = keras.layers.BatchNormalization(axis=2, momentum=0.90, epsilon=1e-5, name='batch_normalization') + self.dropout = keras.layers.Dropout(rate=0.5, name='dropout') + self.activation = keras.layers.Activation(activation, name='activation') + + def call(self, x, training=None): + o = self.convolution1d(x) + o = self.batch_normalization(o, training=training) + o = self.activation(o) + o = self.dropout(o, training=training) + return o + + +class Postnet(keras.layers.Layer): + def __init__(self, output_filters, num_convs, **kwargs): + super(Postnet, self).__init__(**kwargs) + self.convolutions = [] + self.convolutions.append(ConvBNBlock(512, 5, 'tanh', name='convolutions_0')) + for idx in range(1, num_convs - 1): + self.convolutions.append(ConvBNBlock(512, 5, 'tanh', name=f'convolutions_{idx}')) + self.convolutions.append(ConvBNBlock(output_filters, 5, 'linear', name=f'convolutions_{idx+1}')) + + def call(self, x, training=None): + o = x + for layer in self.convolutions: + o = layer(o, training=training) + return o + + +class Encoder(keras.layers.Layer): + def __init__(self, output_input_dim, **kwargs): + super(Encoder, self).__init__(**kwargs) + self.convolutions = [] + for idx in range(3): + self.convolutions.append(ConvBNBlock(output_input_dim, 5, 'relu', name=f'convolutions_{idx}')) + self.lstm = keras.layers.Bidirectional(keras.layers.LSTM(output_input_dim // 2, return_sequences=True, use_bias=True), name='lstm') + + def call(self, x, training=None): + o = x + for layer in self.convolutions: + o = layer(o, training=training) + o = self.lstm(o) + return o + + +class Decoder(keras.layers.Layer): + #pylint: disable=unused-argument + def __init__(self, frame_dim, r, attn_type, use_attn_win, attn_norm, prenet_type, + prenet_dropout, use_forward_attn, use_trans_agent, use_forward_attn_mask, + use_location_attn, attn_K, separate_stopnet, speaker_emb_dim, enable_tflite, **kwargs): + super(Decoder, self).__init__(**kwargs) + self.frame_dim = frame_dim + self.r_init = tf.constant(r, dtype=tf.int32) + self.r = tf.constant(r, dtype=tf.int32) + self.output_dim = r * self.frame_dim + self.separate_stopnet = separate_stopnet + self.enable_tflite = enable_tflite + + # layer constants + self.max_decoder_steps = tf.constant(1000, dtype=tf.int32) + self.stop_thresh = tf.constant(0.5, dtype=tf.float32) + + # model dimensions + self.query_dim = 1024 + self.decoder_rnn_dim = 1024 + self.prenet_dim = 256 + self.attn_dim = 128 + self.p_attention_dropout = 0.1 + self.p_decoder_dropout = 0.1 + + self.prenet = Prenet(prenet_type, + prenet_dropout, + [self.prenet_dim, self.prenet_dim], + bias=False, + name='prenet') + self.attention_rnn = keras.layers.LSTMCell(self.query_dim, use_bias=True, name='attention_rnn', ) + self.attention_rnn_dropout = keras.layers.Dropout(0.5) + + # TODO: implement other attn options + self.attention = Attention(attn_dim=self.attn_dim, + use_loc_attn=True, + loc_attn_n_filters=32, + loc_attn_kernel_size=31, + use_windowing=False, + norm=attn_norm, + use_forward_attn=use_forward_attn, + use_trans_agent=use_trans_agent, + use_forward_attn_mask=use_forward_attn_mask, + name='attention') + self.decoder_rnn = keras.layers.LSTMCell(self.decoder_rnn_dim, use_bias=True, name='decoder_rnn') + self.decoder_rnn_dropout = keras.layers.Dropout(0.5) + self.linear_projection = keras.layers.Dense(self.frame_dim * r, name='linear_projection/linear_layer') + self.stopnet = keras.layers.Dense(1, name='stopnet/linear_layer') + + + def set_max_decoder_steps(self, new_max_steps): + self.max_decoder_steps = tf.constant(new_max_steps, dtype=tf.int32) + + def set_r(self, new_r): + self.r = tf.constant(new_r, dtype=tf.int32) + self.output_dim = self.frame_dim * new_r + + def build_decoder_initial_states(self, batch_size, memory_dim, memory_length): + zero_frame = tf.zeros([batch_size, self.frame_dim]) + zero_context = tf.zeros([batch_size, memory_dim]) + attention_rnn_state = self.attention_rnn.get_initial_state(batch_size=batch_size, dtype=tf.float32) + decoder_rnn_state = self.decoder_rnn.get_initial_state(batch_size=batch_size, dtype=tf.float32) + attention_states = self.attention.init_states(batch_size, memory_length) + return zero_frame, zero_context, attention_rnn_state, decoder_rnn_state, attention_states + + def step(self, prenet_next, states, + memory_seq_length=None, training=None): + _, context_next, attention_rnn_state, decoder_rnn_state, attention_states = states + attention_rnn_input = tf.concat([prenet_next, context_next], -1) + attention_rnn_output, attention_rnn_state = \ + self.attention_rnn(attention_rnn_input, + attention_rnn_state, training=training) + attention_rnn_output = self.attention_rnn_dropout(attention_rnn_output, training=training) + context, attention, attention_states = self.attention(attention_rnn_output, attention_states, training=training) + decoder_rnn_input = tf.concat([attention_rnn_output, context], -1) + decoder_rnn_output, decoder_rnn_state = \ + self.decoder_rnn(decoder_rnn_input, decoder_rnn_state, training=training) + decoder_rnn_output = self.decoder_rnn_dropout(decoder_rnn_output, training=training) + linear_projection_input = tf.concat([decoder_rnn_output, context], -1) + output_frame = self.linear_projection(linear_projection_input, training=training) + stopnet_input = tf.concat([decoder_rnn_output, output_frame], -1) + stopnet_output = self.stopnet(stopnet_input, training=training) + output_frame = output_frame[:, :self.r * self.frame_dim] + states = (output_frame[:, self.frame_dim * (self.r - 1):], context, attention_rnn_state, decoder_rnn_state, attention_states) + return output_frame, stopnet_output, states, attention + + def decode(self, memory, states, frames, memory_seq_length=None): + B, _, _ = shape_list(memory) + num_iter = shape_list(frames)[1] // self.r + # init states + frame_zero = tf.expand_dims(states[0], 1) + frames = tf.concat([frame_zero, frames], axis=1) + outputs = tf.TensorArray(dtype=tf.float32, size=num_iter) + attentions = tf.TensorArray(dtype=tf.float32, size=num_iter) + stop_tokens = tf.TensorArray(dtype=tf.float32, size=num_iter) + # pre-computes + self.attention.process_values(memory) + prenet_output = self.prenet(frames, training=True) + step_count = tf.constant(0, dtype=tf.int32) + + def _body(step, memory, prenet_output, states, outputs, stop_tokens, attentions): + prenet_next = prenet_output[:, step] + output, stop_token, states, attention = self.step(prenet_next, + states, + memory_seq_length) + outputs = outputs.write(step, output) + attentions = attentions.write(step, attention) + stop_tokens = stop_tokens.write(step, stop_token) + return step + 1, memory, prenet_output, states, outputs, stop_tokens, attentions + _, memory, _, states, outputs, stop_tokens, attentions = \ + tf.while_loop(lambda *arg: True, + _body, + loop_vars=(step_count, memory, prenet_output, + states, outputs, stop_tokens, attentions), + parallel_iterations=32, + swap_memory=True, + maximum_iterations=num_iter) + + outputs = outputs.stack() + attentions = attentions.stack() + stop_tokens = stop_tokens.stack() + outputs = tf.transpose(outputs, [1, 0, 2]) + attentions = tf.transpose(attentions, [1, 0, 2]) + stop_tokens = tf.transpose(stop_tokens, [1, 0, 2]) + stop_tokens = tf.squeeze(stop_tokens, axis=2) + outputs = tf.reshape(outputs, [B, -1, self.frame_dim]) + return outputs, stop_tokens, attentions + + def decode_inference(self, memory, states): + B, _, _ = shape_list(memory) + # init states + outputs = tf.TensorArray(dtype=tf.float32, size=0, clear_after_read=False, dynamic_size=True) + attentions = tf.TensorArray(dtype=tf.float32, size=0, clear_after_read=False, dynamic_size=True) + stop_tokens = tf.TensorArray(dtype=tf.float32, size=0, clear_after_read=False, dynamic_size=True) + + # pre-computes + self.attention.process_values(memory) + + # iter vars + stop_flag = tf.constant(False, dtype=tf.bool) + step_count = tf.constant(0, dtype=tf.int32) + + def _body(step, memory, states, outputs, stop_tokens, attentions, stop_flag): + frame_next = states[0] + prenet_next = self.prenet(frame_next, training=False) + output, stop_token, states, attention = self.step(prenet_next, + states, + None, + training=False) + stop_token = tf.math.sigmoid(stop_token) + outputs = outputs.write(step, output) + attentions = attentions.write(step, attention) + stop_tokens = stop_tokens.write(step, stop_token) + stop_flag = tf.greater(stop_token, self.stop_thresh) + stop_flag = tf.reduce_all(stop_flag) + return step + 1, memory, states, outputs, stop_tokens, attentions, stop_flag + + cond = lambda step, m, s, o, st, a, stop_flag: tf.equal(stop_flag, tf.constant(False, dtype=tf.bool)) + _, memory, states, outputs, stop_tokens, attentions, stop_flag = \ + tf.while_loop(cond, + _body, + loop_vars=(step_count, memory, states, outputs, + stop_tokens, attentions, stop_flag), + parallel_iterations=32, + swap_memory=True, + maximum_iterations=self.max_decoder_steps) + + outputs = outputs.stack() + attentions = attentions.stack() + stop_tokens = stop_tokens.stack() + + outputs = tf.transpose(outputs, [1, 0, 2]) + attentions = tf.transpose(attentions, [1, 0, 2]) + stop_tokens = tf.transpose(stop_tokens, [1, 0, 2]) + stop_tokens = tf.squeeze(stop_tokens, axis=2) + outputs = tf.reshape(outputs, [B, -1, self.frame_dim]) + return outputs, stop_tokens, attentions + + def decode_inference_tflite(self, memory, states): + """Inference with TF-Lite compatibility. It assumes + batch_size is 1""" + # init states + # dynamic_shape is not supported in TFLite + outputs = tf.TensorArray(dtype=tf.float32, + size=self.max_decoder_steps, + element_shape=tf.TensorShape( + [self.output_dim]), + clear_after_read=False, + dynamic_size=False) + # stop_flags = tf.TensorArray(dtype=tf.bool, + # size=self.max_decoder_steps, + # element_shape=tf.TensorShape( + # []), + # clear_after_read=False, + # dynamic_size=False) + attentions = () + stop_tokens = () + + # pre-computes + self.attention.process_values(memory) + + # iter vars + stop_flag = tf.constant(False, dtype=tf.bool) + step_count = tf.constant(0, dtype=tf.int32) + + def _body(step, memory, states, outputs, stop_flag): + frame_next = states[0] + prenet_next = self.prenet(frame_next, training=False) + output, stop_token, states, _ = self.step(prenet_next, + states, + None, + training=False) + stop_token = tf.math.sigmoid(stop_token) + stop_flag = tf.greater(stop_token, self.stop_thresh) + stop_flag = tf.reduce_all(stop_flag) + # stop_flags = stop_flags.write(step, tf.logical_not(stop_flag)) + + outputs = outputs.write(step, tf.reshape(output, [-1])) + return step + 1, memory, states, outputs, stop_flag + + cond = lambda step, m, s, o, stop_flag: tf.equal(stop_flag, tf.constant(False, dtype=tf.bool)) + step_count, memory, states, outputs, stop_flag = \ + tf.while_loop(cond, + _body, + loop_vars=(step_count, memory, states, outputs, + stop_flag), + parallel_iterations=32, + swap_memory=True, + maximum_iterations=self.max_decoder_steps) + + + outputs = outputs.stack() + outputs = tf.gather(outputs, tf.range(step_count)) # pylint: disable=no-value-for-parameter + outputs = tf.expand_dims(outputs, axis=[0]) + outputs = tf.transpose(outputs, [1, 0, 2]) + outputs = tf.reshape(outputs, [1, -1, self.frame_dim]) + return outputs, stop_tokens, attentions + + + def call(self, memory, states, frames=None, memory_seq_length=None, training=False): + if training: + return self.decode(memory, states, frames, memory_seq_length) + if self.enable_tflite: + return self.decode_inference_tflite(memory, states) + return self.decode_inference(memory, states) diff --git a/TTS/tts/tf/models/tacotron2.py b/TTS/tts/tf/models/tacotron2.py new file mode 100644 index 0000000000000000000000000000000000000000..9d470b09bd9288c23f8f0836dfd10174a0e0546e --- /dev/null +++ b/TTS/tts/tf/models/tacotron2.py @@ -0,0 +1,107 @@ +import tensorflow as tf +from tensorflow import keras + +from TTS.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet +from TTS.tts.tf.utils.tf_utils import shape_list + + +#pylint: disable=too-many-ancestors, abstract-method +class Tacotron2(keras.models.Model): + def __init__(self, + num_chars, + num_speakers, + r, + postnet_output_dim=80, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm="softmax", + attn_K=4, + prenet_type="original", + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + separate_stopnet=True, + bidirectional_decoder=False, + enable_tflite=False): + super(Tacotron2, self).__init__() + self.r = r + self.decoder_output_dim = decoder_output_dim + self.postnet_output_dim = postnet_output_dim + self.bidirectional_decoder = bidirectional_decoder + self.num_speakers = num_speakers + self.speaker_embed_dim = 256 + self.enable_tflite = enable_tflite + + self.embedding = keras.layers.Embedding(num_chars, 512, name='embedding') + self.encoder = Encoder(512, name='encoder') + # TODO: most of the decoder args have no use at the momment + self.decoder = Decoder(decoder_output_dim, + r, + attn_type=attn_type, + use_attn_win=attn_win, + attn_norm=attn_norm, + prenet_type=prenet_type, + prenet_dropout=prenet_dropout, + use_forward_attn=forward_attn, + use_trans_agent=trans_agent, + use_forward_attn_mask=forward_attn_mask, + use_location_attn=location_attn, + attn_K=attn_K, + separate_stopnet=separate_stopnet, + speaker_emb_dim=self.speaker_embed_dim, + name='decoder', + enable_tflite=enable_tflite) + self.postnet = Postnet(postnet_output_dim, 5, name='postnet') + + @tf.function(experimental_relax_shapes=True) + def call(self, characters, text_lengths=None, frames=None, training=None): + if training: + return self.training(characters, text_lengths, frames) + if not training: + return self.inference(characters) + raise RuntimeError(' [!] Set model training mode True or False') + + def training(self, characters, text_lengths, frames): + B, T = shape_list(characters) + embedding_vectors = self.embedding(characters, training=True) + encoder_output = self.encoder(embedding_vectors, training=True) + decoder_states = self.decoder.build_decoder_initial_states(B, 512, T) + decoder_frames, stop_tokens, attentions = self.decoder(encoder_output, decoder_states, frames, text_lengths, training=True) + postnet_frames = self.postnet(decoder_frames, training=True) + output_frames = decoder_frames + postnet_frames + return decoder_frames, output_frames, attentions, stop_tokens + + def inference(self, characters): + B, T = shape_list(characters) + embedding_vectors = self.embedding(characters, training=False) + encoder_output = self.encoder(embedding_vectors, training=False) + decoder_states = self.decoder.build_decoder_initial_states(B, 512, T) + decoder_frames, stop_tokens, attentions = self.decoder(encoder_output, decoder_states, training=False) + postnet_frames = self.postnet(decoder_frames, training=False) + output_frames = decoder_frames + postnet_frames + print(output_frames.shape) + return decoder_frames, output_frames, attentions, stop_tokens + + @tf.function( + experimental_relax_shapes=True, + input_signature=[ + tf.TensorSpec([1, None], dtype=tf.int32), + ],) + def inference_tflite(self, characters): + B, T = shape_list(characters) + embedding_vectors = self.embedding(characters, training=False) + encoder_output = self.encoder(embedding_vectors, training=False) + decoder_states = self.decoder.build_decoder_initial_states(B, 512, T) + decoder_frames, stop_tokens, attentions = self.decoder(encoder_output, decoder_states, training=False) + postnet_frames = self.postnet(decoder_frames, training=False) + output_frames = decoder_frames + postnet_frames + print(output_frames.shape) + return decoder_frames, output_frames, attentions, stop_tokens + + def build_inference(self, ): + # TODO: issue https://github.com/PyCQA/pylint/issues/3613 + input_ids = tf.random.uniform(shape=[1, 4], maxval=10, dtype=tf.int32) #pylint: disable=unexpected-keyword-arg + self(input_ids) diff --git a/TTS/tts/tf/utils/convert_torch_to_tf_utils.py b/TTS/tts/tf/utils/convert_torch_to_tf_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..03b418036627cf9ec72cf4228c761f6f683ff71e --- /dev/null +++ b/TTS/tts/tf/utils/convert_torch_to_tf_utils.py @@ -0,0 +1,84 @@ +import numpy as np +import tensorflow as tf + +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg + +def tf_create_dummy_inputs(): + """ Create dummy inputs for TF Tacotron2 model """ + batch_size = 4 + max_input_length = 32 + max_mel_length = 128 + pad = 1 + n_chars = 24 + input_ids = tf.random.uniform([batch_size, max_input_length + pad], maxval=n_chars, dtype=tf.int32) + input_lengths = np.random.randint(0, high=max_input_length+1 + pad, size=[batch_size]) + input_lengths[-1] = max_input_length + input_lengths = tf.convert_to_tensor(input_lengths, dtype=tf.int32) + mel_outputs = tf.random.uniform(shape=[batch_size, max_mel_length + pad, 80]) + mel_lengths = np.random.randint(0, high=max_mel_length+1 + pad, size=[batch_size]) + mel_lengths[-1] = max_mel_length + mel_lengths = tf.convert_to_tensor(mel_lengths, dtype=tf.int32) + return input_ids, input_lengths, mel_outputs, mel_lengths + + +def compare_torch_tf(torch_tensor, tf_tensor): + """ Compute the average absolute difference b/w torch and tf tensors """ + return abs(torch_tensor.detach().numpy() - tf_tensor.numpy()).mean() + + +def convert_tf_name(tf_name): + """ Convert certain patterns in TF layer names to Torch patterns """ + tf_name_tmp = tf_name + tf_name_tmp = tf_name_tmp.replace(':0', '') + tf_name_tmp = tf_name_tmp.replace('/forward_lstm/lstm_cell_1/recurrent_kernel', '/weight_hh_l0') + tf_name_tmp = tf_name_tmp.replace('/forward_lstm/lstm_cell_2/kernel', '/weight_ih_l1') + tf_name_tmp = tf_name_tmp.replace('/recurrent_kernel', '/weight_hh') + tf_name_tmp = tf_name_tmp.replace('/kernel', '/weight') + tf_name_tmp = tf_name_tmp.replace('/gamma', '/weight') + tf_name_tmp = tf_name_tmp.replace('/beta', '/bias') + tf_name_tmp = tf_name_tmp.replace('/', '.') + return tf_name_tmp + + +def transfer_weights_torch_to_tf(tf_vars, var_map_dict, state_dict): + """ Transfer weigths from torch state_dict to TF variables """ + print(" > Passing weights from Torch to TF ...") + for tf_var in tf_vars: + torch_var_name = var_map_dict[tf_var.name] + print(f' | > {tf_var.name} <-- {torch_var_name}') + # if tuple, it is a bias variable + if not isinstance(torch_var_name, tuple): + torch_layer_name = '.'.join(torch_var_name.split('.')[-2:]) + torch_weight = state_dict[torch_var_name] + if 'convolution1d/kernel' in tf_var.name or 'conv1d/kernel' in tf_var.name: + # out_dim, in_dim, filter -> filter, in_dim, out_dim + numpy_weight = torch_weight.permute([2, 1, 0]).detach().cpu().numpy() + elif 'lstm_cell' in tf_var.name and 'kernel' in tf_var.name: + numpy_weight = torch_weight.transpose(0, 1).detach().cpu().numpy() + # if variable is for bidirectional lstm and it is a bias vector there + # needs to be pre-defined two matching torch bias vectors + elif '_lstm/lstm_cell_' in tf_var.name and 'bias' in tf_var.name: + bias_vectors = [value for key, value in state_dict.items() if key in torch_var_name] + assert len(bias_vectors) == 2 + numpy_weight = bias_vectors[0] + bias_vectors[1] + elif 'rnn' in tf_var.name and 'kernel' in tf_var.name: + numpy_weight = torch_weight.transpose(0, 1).detach().cpu().numpy() + elif 'rnn' in tf_var.name and 'bias' in tf_var.name: + bias_vectors = [value for key, value in state_dict.items() if torch_var_name[:-2] in key] + assert len(bias_vectors) == 2 + numpy_weight = bias_vectors[0] + bias_vectors[1] + elif 'linear_layer' in torch_layer_name and 'weight' in torch_var_name: + numpy_weight = torch_weight.transpose(0, 1).detach().cpu().numpy() + else: + numpy_weight = torch_weight.detach().cpu().numpy() + assert np.all(tf_var.shape == numpy_weight.shape), f" [!] weight shapes does not match: {tf_var.name} vs {torch_var_name} --> {tf_var.shape} vs {numpy_weight.shape}" + tf.keras.backend.set_value(tf_var, numpy_weight) + return tf_vars + + +def load_tf_vars(model_tf, tf_vars): + for tf_var in tf_vars: + model_tf.get_layer(tf_var.name).set_weights(tf_var) + return model_tf diff --git a/TTS/tts/tf/utils/generic_utils.py b/TTS/tts/tf/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7eba946b16aafd0a09a80ae6f255f329c97469e1 --- /dev/null +++ b/TTS/tts/tf/utils/generic_utils.py @@ -0,0 +1,103 @@ +import datetime +import importlib +import pickle +import numpy as np +import tensorflow as tf + + +def save_checkpoint(model, optimizer, current_step, epoch, r, output_path, **kwargs): + state = { + 'model': model.weights, + 'optimizer': optimizer, + 'step': current_step, + 'epoch': epoch, + 'date': datetime.date.today().strftime("%B %d, %Y"), + 'r': r + } + state.update(kwargs) + pickle.dump(state, open(output_path, 'wb')) + + +def load_checkpoint(model, checkpoint_path): + checkpoint = pickle.load(open(checkpoint_path, 'rb')) + chkp_var_dict = {var.name: var.numpy() for var in checkpoint['model']} + tf_vars = model.weights + for tf_var in tf_vars: + layer_name = tf_var.name + try: + chkp_var_value = chkp_var_dict[layer_name] + except KeyError: + class_name = list(chkp_var_dict.keys())[0].split("/")[0] + layer_name = f"{class_name}/{layer_name}" + chkp_var_value = chkp_var_dict[layer_name] + + tf.keras.backend.set_value(tf_var, chkp_var_value) + if 'r' in checkpoint.keys(): + model.decoder.set_r(checkpoint['r']) + return model + + +def sequence_mask(sequence_length, max_len=None): + if max_len is None: + max_len = sequence_length.max() + batch_size = sequence_length.size(0) + seq_range = np.empty([0, max_len], dtype=np.int8) + seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len) + if sequence_length.is_cuda: + seq_range_expand = seq_range_expand.cuda() + seq_length_expand = ( + sequence_length.unsqueeze(1).expand_as(seq_range_expand)) + # B x T_max + return seq_range_expand < seq_length_expand + + +# @tf.custom_gradient +def check_gradient(x, grad_clip): + x_normed = tf.clip_by_norm(x, grad_clip) + grad_norm = tf.norm(grad_clip) + return x_normed, grad_norm + + +def count_parameters(model, c): + try: + return model.count_params() + except RuntimeError: + input_dummy = tf.convert_to_tensor(np.random.rand(8, 128).astype('int32')) + input_lengths = np.random.randint(100, 129, (8, )) + input_lengths[-1] = 128 + input_lengths = tf.convert_to_tensor(input_lengths.astype('int32')) + mel_spec = np.random.rand(8, 2 * c.r, + c.audio['num_mels']).astype('float32') + mel_spec = tf.convert_to_tensor(mel_spec) + speaker_ids = np.random.randint( + 0, 5, (8, )) if c.use_speaker_embedding else None + _ = model(input_dummy, input_lengths, mel_spec, speaker_ids=speaker_ids) + return model.count_params() + + +def setup_model(num_chars, num_speakers, c, enable_tflite=False): + print(" > Using model: {}".format(c.model)) + MyModel = importlib.import_module('TTS.tts.tf.models.' + c.model.lower()) + MyModel = getattr(MyModel, c.model) + if c.model.lower() in "tacotron": + raise NotImplementedError(' [!] Tacotron model is not ready.') + # tacotron2 + model = MyModel(num_chars=num_chars, + num_speakers=num_speakers, + r=c.r, + postnet_output_dim=c.audio['num_mels'], + decoder_output_dim=c.audio['num_mels'], + attn_type=c.attention_type, + attn_win=c.windowing, + attn_norm=c.attention_norm, + prenet_type=c.prenet_type, + prenet_dropout=c.prenet_dropout, + forward_attn=c.use_forward_attn, + trans_agent=c.transition_agent, + forward_attn_mask=c.forward_attn_mask, + location_attn=c.location_attn, + attn_K=c.attention_heads, + separate_stopnet=c.separate_stopnet, + bidirectional_decoder=c.bidirectional_decoder, + enable_tflite=enable_tflite) + return model diff --git a/TTS/tts/tf/utils/io.py b/TTS/tts/tf/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..143422d279a58b8ad1d639a723a9f0be7035a7a3 --- /dev/null +++ b/TTS/tts/tf/utils/io.py @@ -0,0 +1,41 @@ +import pickle +import datetime +import tensorflow as tf + + +def save_checkpoint(model, optimizer, current_step, epoch, r, output_path, **kwargs): + state = { + 'model': model.weights, + 'optimizer': optimizer, + 'step': current_step, + 'epoch': epoch, + 'date': datetime.date.today().strftime("%B %d, %Y"), + 'r': r + } + state.update(kwargs) + pickle.dump(state, open(output_path, 'wb')) + + +def load_checkpoint(model, checkpoint_path): + checkpoint = pickle.load(open(checkpoint_path, 'rb')) + chkp_var_dict = {var.name: var.numpy() for var in checkpoint['model']} + tf_vars = model.weights + for tf_var in tf_vars: + layer_name = tf_var.name + try: + chkp_var_value = chkp_var_dict[layer_name] + except KeyError: + class_name = list(chkp_var_dict.keys())[0].split("/")[0] + layer_name = f"{class_name}/{layer_name}" + chkp_var_value = chkp_var_dict[layer_name] + + tf.keras.backend.set_value(tf_var, chkp_var_value) + if 'r' in checkpoint.keys(): + model.decoder.set_r(checkpoint['r']) + return model + + +def load_tflite_model(tflite_path): + tflite_model = tf.lite.Interpreter(model_path=tflite_path) + tflite_model.allocate_tensors() + return tflite_model diff --git a/TTS/tts/tf/utils/tf_utils.py b/TTS/tts/tf/utils/tf_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..558936d5476e07b84ec41ab3297a77f48dee025d --- /dev/null +++ b/TTS/tts/tf/utils/tf_utils.py @@ -0,0 +1,8 @@ +import tensorflow as tf + + +def shape_list(x): + """Deal with dynamic shape in tensorflow cleanly.""" + static = x.shape.as_list() + dynamic = tf.shape(x) + return [dynamic[i] if s is None else s for i, s in enumerate(static)] diff --git a/TTS/tts/tf/utils/tflite.py b/TTS/tts/tf/utils/tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..b8daf2542949a64bd7f753b198f6818e4ddf84a5 --- /dev/null +++ b/TTS/tts/tf/utils/tflite.py @@ -0,0 +1,31 @@ +import tensorflow as tf + + +def convert_tacotron2_to_tflite(model, + output_path=None, + experimental_converter=True): + """Convert Tensorflow Tacotron2 model to TFLite. Save a binary file if output_path is + provided, else return TFLite model.""" + + concrete_function = model.inference_tflite.get_concrete_function() + converter = tf.lite.TFLiteConverter.from_concrete_functions( + [concrete_function]) + converter.experimental_new_converter = experimental_converter + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS + ] + tflite_model = converter.convert() + print(f'Tflite Model size is {len(tflite_model) / (1024.0 * 1024.0)} MBs.') + if output_path is not None: + # same model binary if outputpath is provided + with open(output_path, 'wb') as f: + f.write(tflite_model) + return None + return tflite_model + + +def load_tflite_model(tflite_path): + tflite_model = tf.lite.Interpreter(model_path=tflite_path) + tflite_model.allocate_tensors() + return tflite_model diff --git a/TTS/tts/utils/__init__.py b/TTS/tts/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/tts/utils/data.py b/TTS/tts/utils/data.py new file mode 100644 index 0000000000000000000000000000000000000000..a75410b484155af1d6a7a9c9711c2890a4ada365 --- /dev/null +++ b/TTS/tts/utils/data.py @@ -0,0 +1,76 @@ +import numpy as np + + +def _pad_data(x, length): + _pad = 0 + assert x.ndim == 1 + return np.pad( + x, (0, length - x.shape[0]), mode='constant', constant_values=_pad) + + +def prepare_data(inputs): + max_len = max((len(x) for x in inputs)) + return np.stack([_pad_data(x, max_len) for x in inputs]) + + +def _pad_tensor(x, length): + _pad = 0. + assert x.ndim == 2 + x = np.pad( + x, [[0, 0], [0, length - x.shape[1]]], + mode='constant', + constant_values=_pad) + return x + + +def prepare_tensor(inputs, out_steps): + max_len = max((x.shape[1] for x in inputs)) + remainder = max_len % out_steps + pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len + return np.stack([_pad_tensor(x, pad_len) for x in inputs]) + + +def _pad_stop_target(x, length): + _pad = 0. + assert x.ndim == 1 + return np.pad( + x, (0, length - x.shape[0]), mode='constant', constant_values=_pad) + + +def prepare_stop_target(inputs, out_steps): + """ Pad row vectors with 1. """ + max_len = max((x.shape[0] for x in inputs)) + remainder = max_len % out_steps + pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len + return np.stack([_pad_stop_target(x, pad_len) for x in inputs]) + + +def pad_per_step(inputs, pad_len): + return np.pad( + inputs, [[0, 0], [0, 0], [0, pad_len]], + mode='constant', + constant_values=0.0) + + +# pylint: disable=attribute-defined-outside-init +class StandardScaler(): + + def set_stats(self, mean, scale): + self.mean_ = mean + self.scale_ = scale + + def reset_stats(self): + delattr(self, 'mean_') + delattr(self, 'scale_') + + def transform(self, X): + X = np.asarray(X) + X -= self.mean_ + X /= self.scale_ + return X + + def inverse_transform(self, X): + X = np.asarray(X) + X *= self.scale_ + X += self.mean_ + return X diff --git a/TTS/tts/utils/generic_utils.py b/TTS/tts/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7a4c3a30df6280b0d6ba4bb72079e559f5ca7a93 --- /dev/null +++ b/TTS/tts/utils/generic_utils.py @@ -0,0 +1,292 @@ +import re +import torch +import importlib +import numpy as np +from collections import Counter + +from TTS.utils.generic_utils import check_argument + + +def split_dataset(items): + speakers = [item[-1] for item in items] + is_multi_speaker = len(set(speakers)) > 1 + eval_split_size = min(500, int(len(items) * 0.01)) + assert eval_split_size > 0, " [!] You do not have enough samples to train. You need at least 100 samples." + np.random.seed(0) + np.random.shuffle(items) + if is_multi_speaker: + items_eval = [] + speakers = [item[-1] for item in items] + speaker_counter = Counter(speakers) + while len(items_eval) < eval_split_size: + item_idx = np.random.randint(0, len(items)) + speaker_to_be_removed = items[item_idx][-1] + if speaker_counter[speaker_to_be_removed] > 1: + items_eval.append(items[item_idx]) + speaker_counter[speaker_to_be_removed] -= 1 + del items[item_idx] + return items_eval, items + return items[:eval_split_size], items[eval_split_size:] + +# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1 +def sequence_mask(sequence_length, max_len=None): + if max_len is None: + max_len = sequence_length.data.max() + seq_range = torch.arange(max_len, + dtype=sequence_length.dtype, + device=sequence_length.device) + # B x T_max + return seq_range.unsqueeze(0) < sequence_length.unsqueeze(1) + + +def to_camel(text): + text = text.capitalize() + return re.sub(r'(?!^)_([a-zA-Z])', lambda m: m.group(1).upper(), text) + + +def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None): + print(" > Using model: {}".format(c.model)) + MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower()) + MyModel = getattr(MyModel, to_camel(c.model)) + if c.model.lower() in "tacotron": + model = MyModel(num_chars=num_chars + getattr(c, "add_blank", False), + num_speakers=num_speakers, + r=c.r, + postnet_output_dim=int(c.audio['fft_size'] / 2 + 1), + decoder_output_dim=c.audio['num_mels'], + gst=c.use_gst, + gst_embedding_dim=c.gst['gst_embedding_dim'], + gst_num_heads=c.gst['gst_num_heads'], + gst_style_tokens=c.gst['gst_style_tokens'], + gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding'], + memory_size=c.memory_size, + attn_type=c.attention_type, + attn_win=c.windowing, + attn_norm=c.attention_norm, + prenet_type=c.prenet_type, + prenet_dropout=c.prenet_dropout, + forward_attn=c.use_forward_attn, + trans_agent=c.transition_agent, + forward_attn_mask=c.forward_attn_mask, + location_attn=c.location_attn, + attn_K=c.attention_heads, + separate_stopnet=c.separate_stopnet, + bidirectional_decoder=c.bidirectional_decoder, + double_decoder_consistency=c.double_decoder_consistency, + ddc_r=c.ddc_r, + speaker_embedding_dim=speaker_embedding_dim) + elif c.model.lower() == "tacotron2": + model = MyModel(num_chars=num_chars + getattr(c, "add_blank", False), + num_speakers=num_speakers, + r=c.r, + postnet_output_dim=c.audio['num_mels'], + decoder_output_dim=c.audio['num_mels'], + gst=c.use_gst, + gst_embedding_dim=c.gst['gst_embedding_dim'], + gst_num_heads=c.gst['gst_num_heads'], + gst_style_tokens=c.gst['gst_style_tokens'], + gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding'], + attn_type=c.attention_type, + attn_win=c.windowing, + attn_norm=c.attention_norm, + prenet_type=c.prenet_type, + prenet_dropout=c.prenet_dropout, + forward_attn=c.use_forward_attn, + trans_agent=c.transition_agent, + forward_attn_mask=c.forward_attn_mask, + location_attn=c.location_attn, + attn_K=c.attention_heads, + separate_stopnet=c.separate_stopnet, + bidirectional_decoder=c.bidirectional_decoder, + double_decoder_consistency=c.double_decoder_consistency, + ddc_r=c.ddc_r, + speaker_embedding_dim=speaker_embedding_dim) + elif c.model.lower() == "glow_tts": + model = MyModel(num_chars=num_chars + getattr(c, "add_blank", False), + hidden_channels_enc=c['hidden_channels_encoder'], + hidden_channels_dec=c['hidden_channels_decoder'], + hidden_channels_dp=c['hidden_channels_duration_predictor'], + out_channels=c.audio['num_mels'], + encoder_type=c.encoder_type, + encoder_params=c.encoder_params, + use_encoder_prenet=c["use_encoder_prenet"], + num_flow_blocks_dec=12, + kernel_size_dec=5, + dilation_rate=1, + num_block_layers=4, + dropout_p_dec=0.05, + num_speakers=num_speakers, + c_in_channels=0, + num_splits=4, + num_squeeze=2, + sigmoid_scale=False, + mean_only=True, + external_speaker_embedding_dim=speaker_embedding_dim) + elif c.model.lower() == "speedy_speech": + model = MyModel(num_chars=num_chars + getattr(c, "add_blank", False), + out_channels=c.audio['num_mels'], + hidden_channels=c['hidden_channels'], + positional_encoding=c['positional_encoding'], + encoder_type=c['encoder_type'], + encoder_params=c['encoder_params'], + decoder_type=c['decoder_type'], + decoder_params=c['decoder_params'], + c_in_channels=0) + return model + +def is_tacotron(c): + return False if c['model'] in ['speedy_speech', 'glow_tts'] else True + +def check_config_tts(c): + check_argument('model', c, enum_list=['tacotron', 'tacotron2', 'glow_tts', 'speedy_speech'], restricted=True, val_type=str) + check_argument('run_name', c, restricted=True, val_type=str) + check_argument('run_description', c, val_type=str) + + # AUDIO + check_argument('audio', c, restricted=True, val_type=dict) + + # audio processing parameters + check_argument('num_mels', c['audio'], restricted=True, val_type=int, min_val=10, max_val=2056) + check_argument('fft_size', c['audio'], restricted=True, val_type=int, min_val=128, max_val=4058) + check_argument('sample_rate', c['audio'], restricted=True, val_type=int, min_val=512, max_val=100000) + check_argument('frame_length_ms', c['audio'], restricted=True, val_type=float, min_val=10, max_val=1000, alternative='win_length') + check_argument('frame_shift_ms', c['audio'], restricted=True, val_type=float, min_val=1, max_val=1000, alternative='hop_length') + check_argument('preemphasis', c['audio'], restricted=True, val_type=float, min_val=0, max_val=1) + check_argument('min_level_db', c['audio'], restricted=True, val_type=int, min_val=-1000, max_val=10) + check_argument('ref_level_db', c['audio'], restricted=True, val_type=int, min_val=0, max_val=1000) + check_argument('power', c['audio'], restricted=True, val_type=float, min_val=1, max_val=5) + check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000) + + # vocabulary parameters + check_argument('characters', c, restricted=False, val_type=dict) + check_argument('pad', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + check_argument('eos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + check_argument('bos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + check_argument('characters', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + check_argument('phonemes', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + check_argument('punctuations', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + + # normalization parameters + check_argument('signal_norm', c['audio'], restricted=True, val_type=bool) + check_argument('symmetric_norm', c['audio'], restricted=True, val_type=bool) + check_argument('max_norm', c['audio'], restricted=True, val_type=float, min_val=0.1, max_val=1000) + check_argument('clip_norm', c['audio'], restricted=True, val_type=bool) + check_argument('mel_fmin', c['audio'], restricted=True, val_type=float, min_val=0.0, max_val=1000) + check_argument('mel_fmax', c['audio'], restricted=True, val_type=float, min_val=500.0) + check_argument('spec_gain', c['audio'], restricted=True, val_type=[int, float], min_val=1, max_val=100) + check_argument('do_trim_silence', c['audio'], restricted=True, val_type=bool) + check_argument('trim_db', c['audio'], restricted=True, val_type=int) + + # training parameters + check_argument('batch_size', c, restricted=True, val_type=int, min_val=1) + check_argument('eval_batch_size', c, restricted=True, val_type=int, min_val=1) + check_argument('r', c, restricted=True, val_type=int, min_val=1) + check_argument('gradual_training', c, restricted=False, val_type=list) + check_argument('mixed_precision', c, restricted=False, val_type=bool) + # check_argument('grad_accum', c, restricted=True, val_type=int, min_val=1, max_val=100) + + # loss parameters + check_argument('loss_masking', c, restricted=True, val_type=bool) + if c['model'].lower() in ['tacotron', 'tacotron2']: + check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0) + if c['model'].lower == "speedy_speech": + check_argument('ssim_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('l1_alpha', c, restricted=True, val_type=float, min_val=0) + check_argument('huber_alpha', c, restricted=True, val_type=float, min_val=0) + + # validation parameters + check_argument('run_eval', c, restricted=True, val_type=bool) + check_argument('test_delay_epochs', c, restricted=True, val_type=int, min_val=0) + check_argument('test_sentences_file', c, restricted=False, val_type=str) + + # optimizer + check_argument('noam_schedule', c, restricted=False, val_type=bool) + check_argument('grad_clip', c, restricted=True, val_type=float, min_val=0.0) + check_argument('epochs', c, restricted=True, val_type=int, min_val=1) + check_argument('lr', c, restricted=True, val_type=float, min_val=0) + check_argument('wd', c, restricted=is_tacotron(c), val_type=float, min_val=0) + check_argument('warmup_steps', c, restricted=True, val_type=int, min_val=0) + check_argument('seq_len_norm', c, restricted=is_tacotron(c), val_type=bool) + + # tacotron prenet + check_argument('memory_size', c, restricted=is_tacotron(c), val_type=int, min_val=-1) + check_argument('prenet_type', c, restricted=is_tacotron(c), val_type=str, enum_list=['original', 'bn']) + check_argument('prenet_dropout', c, restricted=is_tacotron(c), val_type=bool) + + # attention + check_argument('attention_type', c, restricted=is_tacotron(c), val_type=str, enum_list=['graves', 'original', 'dynamic_convolution']) + check_argument('attention_heads', c, restricted=is_tacotron(c), val_type=int) + check_argument('attention_norm', c, restricted=is_tacotron(c), val_type=str, enum_list=['sigmoid', 'softmax']) + check_argument('windowing', c, restricted=is_tacotron(c), val_type=bool) + check_argument('use_forward_attn', c, restricted=is_tacotron(c), val_type=bool) + check_argument('forward_attn_mask', c, restricted=is_tacotron(c), val_type=bool) + check_argument('transition_agent', c, restricted=is_tacotron(c), val_type=bool) + check_argument('transition_agent', c, restricted=is_tacotron(c), val_type=bool) + check_argument('location_attn', c, restricted=is_tacotron(c), val_type=bool) + check_argument('bidirectional_decoder', c, restricted=is_tacotron(c), val_type=bool) + check_argument('double_decoder_consistency', c, restricted=is_tacotron(c), val_type=bool) + check_argument('ddc_r', c, restricted='double_decoder_consistency' in c.keys(), min_val=1, max_val=7, val_type=int) + + if c['model'].lower() in ['tacotron', 'tacotron2']: + # stopnet + check_argument('stopnet', c, restricted=is_tacotron(c), val_type=bool) + check_argument('separate_stopnet', c, restricted=is_tacotron(c), val_type=bool) + + # Model Parameters for non-tacotron models + if c['model'].lower == "speedy_speech": + check_argument('positional_encoding', c, restricted=True, val_type=type) + check_argument('encoder_type', c, restricted=True, val_type=str) + check_argument('encoder_params', c, restricted=True, val_type=dict) + check_argument('decoder_residual_conv_bn_params', c, restricted=True, val_type=dict) + + # GlowTTS parameters + check_argument('encoder_type', c, restricted=not is_tacotron(c), val_type=str) + + # tensorboard + check_argument('print_step', c, restricted=True, val_type=int, min_val=1) + check_argument('tb_plot_step', c, restricted=True, val_type=int, min_val=1) + check_argument('save_step', c, restricted=True, val_type=int, min_val=1) + check_argument('checkpoint', c, restricted=True, val_type=bool) + check_argument('tb_model_param_stats', c, restricted=True, val_type=bool) + + # dataloading + # pylint: disable=import-outside-toplevel + from TTS.tts.utils.text import cleaners + check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners)) + check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool) + check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0) + check_argument('num_val_loader_workers', c, restricted=True, val_type=int, min_val=0) + check_argument('batch_group_size', c, restricted=True, val_type=int, min_val=0) + check_argument('min_seq_len', c, restricted=True, val_type=int, min_val=0) + check_argument('max_seq_len', c, restricted=True, val_type=int, min_val=10) + check_argument('compute_input_seq_cache', c, restricted=True, val_type=bool) + + # paths + check_argument('output_path', c, restricted=True, val_type=str) + + # multi-speaker and gst + check_argument('use_speaker_embedding', c, restricted=True, val_type=bool) + check_argument('use_external_speaker_embedding_file', c, restricted=c['use_speaker_embedding'], val_type=bool) + check_argument('external_speaker_embedding_file', c, restricted=c['use_external_speaker_embedding_file'], val_type=str) + if c['model'].lower() in ['tacotron', 'tacotron2'] and c['use_gst']: + check_argument('use_gst', c, restricted=is_tacotron(c), val_type=bool) + check_argument('gst', c, restricted=is_tacotron(c), val_type=dict) + check_argument('gst_style_input', c['gst'], restricted=is_tacotron(c), val_type=[str, dict]) + check_argument('gst_embedding_dim', c['gst'], restricted=is_tacotron(c), val_type=int, min_val=0, max_val=1000) + check_argument('gst_use_speaker_embedding', c['gst'], restricted=is_tacotron(c), val_type=bool) + check_argument('gst_num_heads', c['gst'], restricted=is_tacotron(c), val_type=int, min_val=2, max_val=10) + check_argument('gst_style_tokens', c['gst'], restricted=is_tacotron(c), val_type=int, min_val=1, max_val=1000) + + # datasets - checking only the first entry + check_argument('datasets', c, restricted=True, val_type=list) + for dataset_entry in c['datasets']: + check_argument('name', dataset_entry, restricted=True, val_type=str) + check_argument('path', dataset_entry, restricted=True, val_type=str) + check_argument('meta_file_train', dataset_entry, restricted=True, val_type=[str, list]) + check_argument('meta_file_val', dataset_entry, restricted=True, val_type=str) diff --git a/TTS/tts/utils/io.py b/TTS/tts/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..63e042833ff24a12e6106c3b9bfffc3b5057ac6a --- /dev/null +++ b/TTS/tts/utils/io.py @@ -0,0 +1,112 @@ +import os +import torch +import datetime +import pickle as pickle_tts + +from TTS.utils.io import RenamingUnpickler + + + +def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False): + """Load ```TTS.tts.models``` checkpoints. + + Args: + model (TTS.tts.models): model object to load the weights for. + checkpoint_path (string): checkpoint file path. + amp (apex.amp, optional): Apex amp abject to load apex related state vars. Defaults to None. + use_cuda (bool, optional): load model to GPU if True. Defaults to False. + + Returns: + [type]: [description] + """ + try: + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + except ModuleNotFoundError: + pickle_tts.Unpickler = RenamingUnpickler + state = torch.load(checkpoint_path, map_location=torch.device('cpu'), pickle_module=pickle_tts) + model.load_state_dict(state['model']) + if amp and 'amp' in state: + amp.load_state_dict(state['amp']) + if use_cuda: + model.cuda() + # set model stepsize + if hasattr(model.decoder, 'r'): + model.decoder.set_r(state['r']) + print(" > Model r: ", state['r']) + if eval: + model.eval() + return model, state + + +def save_model(model, optimizer, current_step, epoch, r, output_path, amp_state_dict=None, **kwargs): + """Save ```TTS.tts.models``` states with extra fields. + + Args: + model (TTS.tts.models.Model): models object to be saved. + optimizer (torch.optim.optimizers.Optimizer): model optimizer used for training. + current_step (int): current number of training steps. + epoch (int): current number of training epochs. + r (int): model reduction rate for Tacotron models. + output_path (str): output path to save the model file. + amp_state_dict (state_dict, optional): Apex.amp state dict if Apex is enabled. Defaults to None. + """ + if hasattr(model, 'module'): + model_state = model.module.state_dict() + else: + model_state = model.state_dict() + state = { + 'model': model_state, + 'optimizer': optimizer.state_dict() if optimizer is not None else None, + 'step': current_step, + 'epoch': epoch, + 'date': datetime.date.today().strftime("%B %d, %Y"), + 'r': r + } + if amp_state_dict: + state['amp'] = amp_state_dict + state.update(kwargs) + torch.save(state, output_path) + + +def save_checkpoint(model, optimizer, current_step, epoch, r, output_folder, **kwargs): + """Save model checkpoint, intended for saving checkpoints at training. + + Args: + model (TTS.tts.models.Model): models object to be saved. + optimizer (torch.optim.optimizers.Optimizer): model optimizer used for training. + current_step (int): current number of training steps. + epoch (int): current number of training epochs. + r (int): model reduction rate for Tacotron models. + output_path (str): output path to save the model file. + """ + file_name = 'checkpoint_{}.pth.tar'.format(current_step) + checkpoint_path = os.path.join(output_folder, file_name) + print(" > CHECKPOINT : {}".format(checkpoint_path)) + save_model(model, optimizer, current_step, epoch, r, checkpoint_path, **kwargs) + + +def save_best_model(target_loss, best_loss, model, optimizer, current_step, epoch, r, output_folder, **kwargs): + """Save model checkpoint, intended for saving the best model after each epoch. + It compares the current model loss with the best loss so far and saves the + model if the current loss is better. + + Args: + target_loss (float): current model loss. + best_loss (float): best loss so far. + model (TTS.tts.models.Model): models object to be saved. + optimizer (torch.optim.optimizers.Optimizer): model optimizer used for training. + current_step (int): current number of training steps. + epoch (int): current number of training epochs. + r (int): model reduction rate for Tacotron models. + output_path (str): output path to save the model file. + + Returns: + float: updated current best loss. + """ + if target_loss < best_loss: + file_name = 'best_model.pth.tar' + checkpoint_path = os.path.join(output_folder, file_name) + print(" >> BEST MODEL : {}".format(checkpoint_path)) + save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs) + best_loss = target_loss + return best_loss diff --git a/TTS/tts/utils/measures.py b/TTS/tts/utils/measures.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd31242cd2620811d84737c5c96781779d6bb03 --- /dev/null +++ b/TTS/tts/utils/measures.py @@ -0,0 +1,15 @@ +def alignment_diagonal_score(alignments, binary=False): + """ + Compute how diagonal alignment predictions are. It is useful + to measure the alignment consistency of a model + Args: + alignments (torch.Tensor): batch of alignments. + binary (bool): if True, ignore scores and consider attention + as a binary mask. + Shape: + alignments : batch x decoder_steps x encoder_steps + """ + maxs = alignments.max(dim=1)[0] + if binary: + maxs[maxs > 0] = 1 + return maxs.mean(dim=1).mean(dim=0).item() diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py new file mode 100644 index 0000000000000000000000000000000000000000..43bb1f6a0d972afccebfa14242019a8a91e7628d --- /dev/null +++ b/TTS/tts/utils/speakers.py @@ -0,0 +1,73 @@ +import os +import json + + +def make_speakers_json_path(out_path): + """Returns conventional speakers.json location.""" + return os.path.join(out_path, "speakers.json") + + +def load_speaker_mapping(out_path): + """Loads speaker mapping if already present.""" + try: + if os.path.splitext(out_path)[1] == '.json': + json_file = out_path + else: + json_file = make_speakers_json_path(out_path) + with open(json_file) as f: + return json.load(f) + except FileNotFoundError: + return {} + +def save_speaker_mapping(out_path, speaker_mapping): + """Saves speaker mapping if not yet present.""" + speakers_json_path = make_speakers_json_path(out_path) + with open(speakers_json_path, "w") as f: + json.dump(speaker_mapping, f, indent=4) + + +def get_speakers(items): + """Returns a sorted, unique list of speakers in a given dataset.""" + speakers = {e[2] for e in items} + return sorted(speakers) + +def parse_speakers(c, args, meta_data_train, OUT_PATH): + """ Returns number of speakers, speaker embedding shape and speaker mapping""" + if c.use_speaker_embedding: + speakers = get_speakers(meta_data_train) + if args.restore_path: + if c.use_external_speaker_embedding_file: # if restore checkpoint and use External Embedding file + prev_out_path = os.path.dirname(args.restore_path) + speaker_mapping = load_speaker_mapping(prev_out_path) + if not speaker_mapping: + print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.external_speaker_embedding_file") + speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) + if not speaker_mapping: + raise RuntimeError("You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.external_speaker_embedding_file") + speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) + elif not c.use_external_speaker_embedding_file: # if restore checkpoint and don't use External Embedding file + prev_out_path = os.path.dirname(args.restore_path) + speaker_mapping = load_speaker_mapping(prev_out_path) + speaker_embedding_dim = None + assert all([speaker in speaker_mapping + for speaker in speakers]), "As of now you, you cannot " \ + "introduce new speakers to " \ + "a previously trained model." + elif c.use_external_speaker_embedding_file and c.external_speaker_embedding_file: # if start new train using External Embedding file + speaker_mapping = load_speaker_mapping(c.external_speaker_embedding_file) + speaker_embedding_dim = len(speaker_mapping[list(speaker_mapping.keys())[0]]['embedding']) + elif c.use_external_speaker_embedding_file and not c.external_speaker_embedding_file: # if start new train using External Embedding file and don't pass external embedding file + raise "use_external_speaker_embedding_file is True, so you need pass a external speaker embedding file, run GE2E-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb or AngularPrototypical-Speaker_Encoder-ExtractSpeakerEmbeddings-by-sample.ipynb notebook in notebooks/ folder" + else: # if start new train and don't use External Embedding file + speaker_mapping = {name: i for i, name in enumerate(speakers)} + speaker_embedding_dim = None + save_speaker_mapping(OUT_PATH, speaker_mapping) + num_speakers = len(speaker_mapping) + print(" > Training with {} speakers: {}".format(len(speakers), + ", ".join(speakers))) + else: + num_speakers = 0 + speaker_embedding_dim = None + speaker_mapping = None + + return num_speakers, speaker_embedding_dim, speaker_mapping \ No newline at end of file diff --git a/TTS/tts/utils/ssim.py b/TTS/tts/utils/ssim.py new file mode 100644 index 0000000000000000000000000000000000000000..399d0898de75eb3d747557143abd48fcd05f4dc3 --- /dev/null +++ b/TTS/tts/utils/ssim.py @@ -0,0 +1,75 @@ +# taken from https://github.com/Po-Hsun-Su/pytorch-ssim + +from math import exp + +import torch +import torch.nn.functional as F +from torch.autograd import Variable + + +def gaussian(window_size, sigma): + gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) + return gauss/gauss.sum() + +def create_window(window_size, channel): + _1D_window = gaussian(window_size, 1.5).unsqueeze(1) + _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) + window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) + return window + +def _ssim(img1, img2, window, window_size, channel, size_average = True): + mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) + mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) + + mu1_sq = mu1.pow(2) + mu2_sq = mu2.pow(2) + mu1_mu2 = mu1*mu2 + + sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq + sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq + sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 + + C1 = 0.01**2 + C2 = 0.03**2 + + ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) + + if size_average: + return ssim_map.mean() + return ssim_map.mean(1).mean(1).mean(1) + +class SSIM(torch.nn.Module): + def __init__(self, window_size = 11, size_average = True): + super().__init__() + self.window_size = window_size + self.size_average = size_average + self.channel = 1 + self.window = create_window(window_size, self.channel) + + def forward(self, img1, img2): + (_, channel, _, _) = img1.size() + + if channel == self.channel and self.window.data.type() == img1.data.type(): + window = self.window + else: + window = create_window(self.window_size, channel) + + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + + self.window = window + self.channel = channel + + + return _ssim(img1, img2, window, self.window_size, channel, self.size_average) + +def ssim(img1, img2, window_size = 11, size_average = True): + (_, channel, _, _) = img1.size() + window = create_window(window_size, channel) + + if img1.is_cuda: + window = window.cuda(img1.get_device()) + window = window.type_as(img1) + + return _ssim(img1, img2, window, window_size, channel, size_average) diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py new file mode 100644 index 0000000000000000000000000000000000000000..7e71df64668049e6430944c2631abf493ae594cd --- /dev/null +++ b/TTS/tts/utils/synthesis.py @@ -0,0 +1,281 @@ +import pkg_resources +installed = {pkg.key for pkg in pkg_resources.working_set} #pylint: disable=not-an-iterable +if 'tensorflow' in installed or 'tensorflow-gpu' in installed: + import tensorflow as tf +import torch +import numpy as np +from .text import text_to_sequence, phoneme_to_sequence + + +def text_to_seqvec(text, CONFIG): + text_cleaner = [CONFIG.text_cleaner] + # text ot phonemes to sequence vector + if CONFIG.use_phonemes: + seq = np.asarray( + phoneme_to_sequence(text, text_cleaner, CONFIG.phoneme_language, + CONFIG.enable_eos_bos_chars, + tp=CONFIG.characters if 'characters' in CONFIG.keys() else None, + add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False), + dtype=np.int32) + else: + seq = np.asarray( + text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None, + add_blank=CONFIG['add_blank'] if 'add_blank' in CONFIG.keys() else False), dtype=np.int32) + return seq + + +def numpy_to_torch(np_array, dtype, cuda=False): + if np_array is None: + return None + tensor = torch.as_tensor(np_array, dtype=dtype) + if cuda: + return tensor.cuda() + return tensor + + +def numpy_to_tf(np_array, dtype): + if np_array is None: + return None + tensor = tf.convert_to_tensor(np_array, dtype=dtype) + return tensor + + +def compute_style_mel(style_wav, ap, cuda=False): + style_mel = torch.FloatTensor(ap.melspectrogram( + ap.load_wav(style_wav, sr=ap.sample_rate))).unsqueeze(0) + if cuda: + return style_mel.cuda() + return style_mel + + +def run_model_torch(model, inputs, CONFIG, truncated, speaker_id=None, style_mel=None, speaker_embeddings=None): + if 'tacotron' in CONFIG.model.lower(): + if CONFIG.use_gst: + decoder_output, postnet_output, alignments, stop_tokens = model.inference( + inputs, style_mel=style_mel, speaker_ids=speaker_id, speaker_embeddings=speaker_embeddings) + else: + if truncated: + decoder_output, postnet_output, alignments, stop_tokens = model.inference_truncated( + inputs, speaker_ids=speaker_id, speaker_embeddings=speaker_embeddings) + else: + decoder_output, postnet_output, alignments, stop_tokens = model.inference( + inputs, speaker_ids=speaker_id, speaker_embeddings=speaker_embeddings) + elif 'glow' in CONFIG.model.lower(): + inputs_lengths = torch.tensor(inputs.shape[1:2]).to(inputs.device) # pylint: disable=not-callable + if hasattr(model, 'module'): + # distributed model + postnet_output, _, _, _, alignments, _, _ = model.module.inference(inputs, inputs_lengths, g=speaker_id if speaker_id is not None else speaker_embeddings) + else: + postnet_output, _, _, _, alignments, _, _ = model.inference(inputs, inputs_lengths, g=speaker_id if speaker_id is not None else speaker_embeddings) + postnet_output = postnet_output.permute(0, 2, 1) + # these only belong to tacotron models. + decoder_output = None + stop_tokens = None + elif 'speedy_speech' in CONFIG.model.lower(): + inputs_lengths = torch.tensor(inputs.shape[1:2]).to(inputs.device) # pylint: disable=not-callable + if hasattr(model, 'module'): + # distributed model + postnet_output, alignments= model.module.inference(inputs, inputs_lengths, g=speaker_id if speaker_id is not None else speaker_embeddings) + else: + postnet_output, alignments= model.inference(inputs, inputs_lengths, g=speaker_id if speaker_id is not None else speaker_embeddings) + postnet_output = postnet_output.permute(0, 2, 1) + # these only belong to tacotron models. + decoder_output = None + stop_tokens = None + return decoder_output, postnet_output, alignments, stop_tokens + + +def run_model_tf(model, inputs, CONFIG, truncated, speaker_id=None, style_mel=None): + if CONFIG.use_gst and style_mel is not None: + raise NotImplementedError(' [!] GST inference not implemented for TF') + if truncated: + raise NotImplementedError(' [!] Truncated inference not implemented for TF') + if speaker_id is not None: + raise NotImplementedError(' [!] Multi-Speaker not implemented for TF') + # TODO: handle multispeaker case + decoder_output, postnet_output, alignments, stop_tokens = model( + inputs, training=False) + return decoder_output, postnet_output, alignments, stop_tokens + + +def run_model_tflite(model, inputs, CONFIG, truncated, speaker_id=None, style_mel=None): + if CONFIG.use_gst and style_mel is not None: + raise NotImplementedError(' [!] GST inference not implemented for TfLite') + if truncated: + raise NotImplementedError(' [!] Truncated inference not implemented for TfLite') + if speaker_id is not None: + raise NotImplementedError(' [!] Multi-Speaker not implemented for TfLite') + # get input and output details + input_details = model.get_input_details() + output_details = model.get_output_details() + # reshape input tensor for the new input shape + model.resize_tensor_input(input_details[0]['index'], inputs.shape) + model.allocate_tensors() + detail = input_details[0] + # input_shape = detail['shape'] + model.set_tensor(detail['index'], inputs) + # run the model + model.invoke() + # collect outputs + decoder_output = model.get_tensor(output_details[0]['index']) + postnet_output = model.get_tensor(output_details[1]['index']) + # tflite model only returns feature frames + return decoder_output, postnet_output, None, None + + +def parse_outputs_torch(postnet_output, decoder_output, alignments, stop_tokens): + postnet_output = postnet_output[0].data.cpu().numpy() + decoder_output = None if decoder_output is None else decoder_output[0].data.cpu().numpy() + alignment = alignments[0].cpu().data.numpy() + stop_tokens = None if stop_tokens is None else stop_tokens[0].cpu().numpy() + return postnet_output, decoder_output, alignment, stop_tokens + + +def parse_outputs_tf(postnet_output, decoder_output, alignments, stop_tokens): + postnet_output = postnet_output[0].numpy() + decoder_output = decoder_output[0].numpy() + alignment = alignments[0].numpy() + stop_tokens = stop_tokens[0].numpy() + return postnet_output, decoder_output, alignment, stop_tokens + + +def parse_outputs_tflite(postnet_output, decoder_output): + postnet_output = postnet_output[0] + decoder_output = decoder_output[0] + return postnet_output, decoder_output + + +def trim_silence(wav, ap): + return wav[:ap.find_endpoint(wav)] + + +def inv_spectrogram(postnet_output, ap, CONFIG): + if CONFIG.model.lower() in ["tacotron"]: + wav = ap.inv_spectrogram(postnet_output.T) + else: + wav = ap.inv_melspectrogram(postnet_output.T) + return wav + + +def id_to_torch(speaker_id, cuda=False): + if speaker_id is not None: + speaker_id = np.asarray(speaker_id) + # TODO: test this for tacotron models + speaker_id = torch.from_numpy(speaker_id) + if cuda: + return speaker_id.cuda() + return speaker_id + + +def embedding_to_torch(speaker_embedding, cuda=False): + if speaker_embedding is not None: + speaker_embedding = np.asarray(speaker_embedding) + speaker_embedding = torch.from_numpy(speaker_embedding).unsqueeze(0).type(torch.FloatTensor) + if cuda: + return speaker_embedding.cuda() + return speaker_embedding + + +# TODO: perform GL with pytorch for batching +def apply_griffin_lim(inputs, input_lens, CONFIG, ap): + '''Apply griffin-lim to each sample iterating throught the first dimension. + Args: + inputs (Tensor or np.Array): Features to be converted by GL. First dimension is the batch size. + input_lens (Tensor or np.Array): 1D array of sample lengths. + CONFIG (Dict): TTS config. + ap (AudioProcessor): TTS audio processor. + ''' + wavs = [] + for idx, spec in enumerate(inputs): + wav_len = (input_lens[idx] * ap.hop_length) - ap.hop_length # inverse librosa padding + wav = inv_spectrogram(spec, ap, CONFIG) + # assert len(wav) == wav_len, f" [!] wav lenght: {len(wav)} vs expected: {wav_len}" + wavs.append(wav[:wav_len]) + return wavs + + +def synthesis(model, + text, + CONFIG, + use_cuda, + ap, + speaker_id=None, + style_wav=None, + truncated=False, + enable_eos_bos_chars=False, #pylint: disable=unused-argument + use_griffin_lim=False, + do_trim_silence=False, + speaker_embedding=None, + backend='torch'): + """Synthesize voice for the given text. + + Args: + model (TTS.tts.models): model to synthesize. + text (str): target text + CONFIG (dict): config dictionary to be loaded from config.json. + use_cuda (bool): enable cuda. + ap (TTS.tts.utils.audio.AudioProcessor): audio processor to process + model outputs. + speaker_id (int): id of speaker + style_wav (str): Uses for style embedding of GST. + truncated (bool): keep model states after inference. It can be used + for continuous inference at long texts. + enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence. + do_trim_silence (bool): trim silence after synthesis. + backend (str): tf or torch + """ + # GST processing + style_mel = None + if 'use_gst' in CONFIG.keys() and CONFIG.use_gst and style_wav is not None: + if isinstance(style_wav, dict): + style_mel = style_wav + else: + style_mel = compute_style_mel(style_wav, ap, cuda=use_cuda) + # preprocess the given text + inputs = text_to_seqvec(text, CONFIG) + # pass tensors to backend + if backend == 'torch': + if speaker_id is not None: + speaker_id = id_to_torch(speaker_id, cuda=use_cuda) + + if speaker_embedding is not None: + speaker_embedding = embedding_to_torch(speaker_embedding, cuda=use_cuda) + + if not isinstance(style_mel, dict): + style_mel = numpy_to_torch(style_mel, torch.float, cuda=use_cuda) + inputs = numpy_to_torch(inputs, torch.long, cuda=use_cuda) + inputs = inputs.unsqueeze(0) + elif backend == 'tf': + # TODO: handle speaker id for tf model + style_mel = numpy_to_tf(style_mel, tf.float32) + inputs = numpy_to_tf(inputs, tf.int32) + inputs = tf.expand_dims(inputs, 0) + elif backend == 'tflite': + style_mel = numpy_to_tf(style_mel, tf.float32) + inputs = numpy_to_tf(inputs, tf.int32) + inputs = tf.expand_dims(inputs, 0) + # synthesize voice + if backend == 'torch': + decoder_output, postnet_output, alignments, stop_tokens = run_model_torch( + model, inputs, CONFIG, truncated, speaker_id, style_mel, speaker_embeddings=speaker_embedding) + postnet_output, decoder_output, alignment, stop_tokens = parse_outputs_torch( + postnet_output, decoder_output, alignments, stop_tokens) + elif backend == 'tf': + decoder_output, postnet_output, alignments, stop_tokens = run_model_tf( + model, inputs, CONFIG, truncated, speaker_id, style_mel) + postnet_output, decoder_output, alignment, stop_tokens = parse_outputs_tf( + postnet_output, decoder_output, alignments, stop_tokens) + elif backend == 'tflite': + decoder_output, postnet_output, alignment, stop_tokens = run_model_tflite( + model, inputs, CONFIG, truncated, speaker_id, style_mel) + postnet_output, decoder_output = parse_outputs_tflite( + postnet_output, decoder_output) + # convert outputs to numpy + # plot results + wav = None + if use_griffin_lim: + wav = inv_spectrogram(postnet_output, ap, CONFIG) + # trim silence + if do_trim_silence: + wav = trim_silence(wav, ap) + return wav, alignment, decoder_output, postnet_output, stop_tokens, inputs diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..29f4af1d4ac2db9973f6429e874750e3ab767ad1 --- /dev/null +++ b/TTS/tts/utils/text/__init__.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +import re +from packaging import version +import phonemizer +from phonemizer.phonemize import phonemize +from TTS.tts.utils.text import cleaners +from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \ + _eos + +# pylint: disable=unnecessary-comprehension +# Mappings from symbol to numeric ID and vice versa: +_symbol_to_id = {s: i for i, s in enumerate(symbols)} +_id_to_symbol = {i: s for i, s in enumerate(symbols)} + +_phonemes_to_id = {s: i for i, s in enumerate(phonemes)} +_id_to_phonemes = {i: s for i, s in enumerate(phonemes)} + +_symbols = symbols +_phonemes = phonemes +# Regular expression matching text enclosed in curly braces: +_CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)') + +# Regular expression matching punctuations, ignoring empty space +PHONEME_PUNCTUATION_PATTERN = r'['+_phoneme_punctuations+']+' + + +def text2phone(text, language): + ''' + Convert graphemes to phonemes. + ''' + seperator = phonemizer.separator.Separator(' |', '', '|') + #try: + punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text) + if version.parse(phonemizer.__version__) < version.parse('2.1'): + ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language) + ph = ph[:-1].strip() # skip the last empty character + # phonemizer does not tackle punctuations. Here we do. + # Replace \n with matching punctuations. + if punctuations: + # if text ends with a punctuation. + if text[-1] == punctuations[-1]: + for punct in punctuations[:-1]: + ph = ph.replace('| |\n', '|'+punct+'| |', 1) + ph = ph + punctuations[-1] + else: + for punct in punctuations: + ph = ph.replace('| |\n', '|'+punct+'| |', 1) + elif version.parse(phonemizer.__version__) >= version.parse('2.1'): + ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language, preserve_punctuation=True, language_switch='remove-flags') + # this is a simple fix for phonemizer. + # https://github.com/bootphon/phonemizer/issues/32 + if punctuations: + for punctuation in punctuations: + ph = ph.replace(f"| |{punctuation} ", f"|{punctuation}| |").replace(f"| |{punctuation}", f"|{punctuation}| |") + ph = ph[:-3] + else: + raise RuntimeError(" [!] Use 'phonemizer' version 2.1 or older.") + + return ph + +def intersperse(sequence, token): + result = [token] * (len(sequence) * 2 + 1) + result[1::2] = sequence + return result + +def pad_with_eos_bos(phoneme_sequence, tp=None): + # pylint: disable=global-statement + global _phonemes_to_id, _bos, _eos + if tp: + _bos = tp['bos'] + _eos = tp['eos'] + _, _phonemes = make_symbols(**tp) + _phonemes_to_id = {s: i for i, s in enumerate(_phonemes)} + + return [_phonemes_to_id[_bos]] + list(phoneme_sequence) + [_phonemes_to_id[_eos]] + +def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False, tp=None, add_blank=False): + # pylint: disable=global-statement + global _phonemes_to_id, _phonemes + if tp: + _, _phonemes = make_symbols(**tp) + _phonemes_to_id = {s: i for i, s in enumerate(_phonemes)} + + sequence = [] + clean_text = _clean_text(text, cleaner_names) + to_phonemes = text2phone(clean_text, language) + if to_phonemes is None: + print("!! After phoneme conversion the result is None. -- {} ".format(clean_text)) + # iterate by skipping empty strings - NOTE: might be useful to keep it to have a better intonation. + for phoneme in filter(None, to_phonemes.split('|')): + sequence += _phoneme_to_sequence(phoneme) + # Append EOS char + if enable_eos_bos: + sequence = pad_with_eos_bos(sequence, tp=tp) + if add_blank: + sequence = intersperse(sequence, len(_phonemes)) # add a blank token (new), whose id number is len(_phonemes) + return sequence + + +def sequence_to_phoneme(sequence, tp=None, add_blank=False): + # pylint: disable=global-statement + '''Converts a sequence of IDs back to a string''' + global _id_to_phonemes, _phonemes + if add_blank: + sequence = list(filter(lambda x: x != len(_phonemes), sequence)) + result = '' + if tp: + _, _phonemes = make_symbols(**tp) + _id_to_phonemes = {i: s for i, s in enumerate(_phonemes)} + + for symbol_id in sequence: + if symbol_id in _id_to_phonemes: + s = _id_to_phonemes[symbol_id] + result += s + return result.replace('}{', ' ') + + +def text_to_sequence(text, cleaner_names, tp=None, add_blank=False): + '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. + + The text can optionally have ARPAbet sequences enclosed in curly braces embedded + in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street." + + Args: + text: string to convert to a sequence + cleaner_names: names of the cleaner functions to run the text through + + Returns: + List of integers corresponding to the symbols in the text + ''' + # pylint: disable=global-statement + global _symbol_to_id, _symbols + if tp: + _symbols, _ = make_symbols(**tp) + _symbol_to_id = {s: i for i, s in enumerate(_symbols)} + + sequence = [] + # Check for curly braces and treat their contents as ARPAbet: + while text: + m = _CURLY_RE.match(text) + if not m: + sequence += _symbols_to_sequence(_clean_text(text, cleaner_names)) + break + sequence += _symbols_to_sequence( + _clean_text(m.group(1), cleaner_names)) + sequence += _arpabet_to_sequence(m.group(2)) + text = m.group(3) + + if add_blank: + sequence = intersperse(sequence, len(_symbols)) # add a blank token (new), whose id number is len(_symbols) + return sequence + + +def sequence_to_text(sequence, tp=None, add_blank=False): + '''Converts a sequence of IDs back to a string''' + # pylint: disable=global-statement + global _id_to_symbol, _symbols + if add_blank: + sequence = list(filter(lambda x: x != len(_symbols), sequence)) + + if tp: + _symbols, _ = make_symbols(**tp) + _id_to_symbol = {i: s for i, s in enumerate(_symbols)} + + result = '' + for symbol_id in sequence: + if symbol_id in _id_to_symbol: + s = _id_to_symbol[symbol_id] + # Enclose ARPAbet back in curly braces: + if len(s) > 1 and s[0] == '@': + s = '{%s}' % s[1:] + result += s + return result.replace('}{', ' ') + + +def _clean_text(text, cleaner_names): + for name in cleaner_names: + cleaner = getattr(cleaners, name) + if not cleaner: + raise Exception('Unknown cleaner: %s' % name) + text = cleaner(text) + return text + + +def _symbols_to_sequence(syms): + return [_symbol_to_id[s] for s in syms if _should_keep_symbol(s)] + + +def _phoneme_to_sequence(phons): + return [_phonemes_to_id[s] for s in list(phons) if _should_keep_phoneme(s)] + + +def _arpabet_to_sequence(text): + return _symbols_to_sequence(['@' + s for s in text.split()]) + + +def _should_keep_symbol(s): + return s in _symbol_to_id and s not in ['~', '^', '_'] + + +def _should_keep_phoneme(p): + return p in _phonemes_to_id and p not in ['~', '^', '_'] diff --git a/TTS/tts/utils/text/abbreviations.py b/TTS/tts/utils/text/abbreviations.py new file mode 100644 index 0000000000000000000000000000000000000000..d14426e157c5518d21189af4bc66c3a29d44f26d --- /dev/null +++ b/TTS/tts/utils/text/abbreviations.py @@ -0,0 +1,61 @@ +import re + +# List of (regular expression, replacement) pairs for abbreviations in english: +abbreviations_en = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) + for x in [ + ('mrs', 'misess'), + ('mr', 'mister'), + ('dr', 'doctor'), + ('st', 'saint'), + ('co', 'company'), + ('jr', 'junior'), + ('maj', 'major'), + ('gen', 'general'), + ('drs', 'doctors'), + ('rev', 'reverend'), + ('lt', 'lieutenant'), + ('hon', 'honorable'), + ('sgt', 'sergeant'), + ('capt', 'captain'), + ('esq', 'esquire'), + ('ltd', 'limited'), + ('col', 'colonel'), + ('ft', 'fort'), + ]] + +# List of (regular expression, replacement) pairs for abbreviations in french: +abbreviations_fr = [(re.compile('\\b%s\\.?' % x[0], re.IGNORECASE), x[1]) + for x in [ + ('M', 'monsieur'), + ('Mlle', 'mademoiselle'), + ('Mlles', 'mesdemoiselles'), + ('Mme', 'Madame'), + ('Mmes', 'Mesdames'), + ('N.B', 'nota bene'), + ('M', 'monsieur'), + ('p.c.q', 'parce que'), + ('Pr', 'professeur'), + ('qqch', 'quelque chose'), + ('rdv', 'rendez-vous'), + ('max', 'maximum'), + ('min', 'minimum'), + ('no', 'numéro'), + ('adr', 'adresse'), + ('dr', 'docteur'), + ('st', 'saint'), + ('co', 'companie'), + ('jr', 'junior'), + ('sgt', 'sergent'), + ('capt', 'capitain'), + ('col', 'colonel'), + ('av', 'avenue'), + ('av. J.-C', 'avant Jésus-Christ'), + ('apr. J.-C', 'après Jésus-Christ'), + ('art', 'article'), + ('boul', 'boulevard'), + ('c.-à-d', 'c’est-à-dire'), + ('etc', 'et cetera'), + ('ex', 'exemple'), + ('excl', 'exclusivement'), + ('boul', 'boulevard'), + ]] \ No newline at end of file diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py new file mode 100644 index 0000000000000000000000000000000000000000..7c3f1017ca720641c124fa93e1a950c70ff7018c --- /dev/null +++ b/TTS/tts/utils/text/cleaners.py @@ -0,0 +1,133 @@ +''' +Cleaners are transformations that run over the input text at both training and eval time. + +Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" +hyperparameter. Some cleaners are English-specific. You'll typically want to use: + 1. "english_cleaners" for English text + 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using + the Unidecode library (https://pypi.python.org/pypi/Unidecode) + 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update + the symbols in symbols.py to match your data). +''' + +import re +from unidecode import unidecode +from .number_norm import normalize_numbers +from .abbreviations import abbreviations_en, abbreviations_fr +from .time import expand_time_english + +# Regular expression matching whitespace: +_whitespace_re = re.compile(r'\s+') + + +def expand_abbreviations(text, lang='en'): + if lang == 'en': + _abbreviations = abbreviations_en + elif lang == 'fr': + _abbreviations = abbreviations_fr + for regex, replacement in _abbreviations: + text = re.sub(regex, replacement, text) + return text + + +def expand_numbers(text): + return normalize_numbers(text) + + +def lowercase(text): + return text.lower() + + +def collapse_whitespace(text): + return re.sub(_whitespace_re, ' ', text).strip() + + +def convert_to_ascii(text): + return unidecode(text) + + +def remove_aux_symbols(text): + text = re.sub(r'[\<\>\(\)\[\]\"]+', '', text) + return text + +def replace_symbols(text, lang='en'): + text = text.replace(';', ',') + text = text.replace('-', ' ') + text = text.replace(':', ',') + if lang == 'en': + text = text.replace('&', ' and ') + elif lang == 'fr': + text = text.replace('&', ' et ') + elif lang == 'pt': + text = text.replace('&', ' e ') + return text + +def basic_cleaners(text): + '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def transliteration_cleaners(text): + '''Pipeline for non-English text that transliterates to ASCII.''' + text = convert_to_ascii(text) + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def basic_german_cleaners(text): + '''Pipeline for German text''' + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +# TODO: elaborate it +def basic_turkish_cleaners(text): + '''Pipeline for Turkish text''' + text = text.replace("I", "ı") + text = lowercase(text) + text = collapse_whitespace(text) + return text + +def english_cleaners(text): + '''Pipeline for English text, including number and abbreviation expansion.''' + text = convert_to_ascii(text) + text = lowercase(text) + text = expand_time_english(text) + text = expand_numbers(text) + text = expand_abbreviations(text) + text = replace_symbols(text) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + +def french_cleaners(text): + '''Pipeline for French text. There is no need to expand numbers, phonemizer already does that''' + text = lowercase(text) + text = expand_abbreviations(text, lang='fr') + text = replace_symbols(text, lang='fr') + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + +def portuguese_cleaners(text): + '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and + numbers, phonemizer already does that''' + text = lowercase(text) + text = replace_symbols(text, lang='pt') + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + +def phoneme_cleaners(text): + '''Pipeline for phonemes mode, including number and abbreviation expansion.''' + text = expand_numbers(text) + text = convert_to_ascii(text) + text = expand_abbreviations(text) + text = replace_symbols(text) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text diff --git a/TTS/tts/utils/text/cmudict.py b/TTS/tts/utils/text/cmudict.py new file mode 100644 index 0000000000000000000000000000000000000000..c0f23406f57536ff18594653757bd8ee3dd55112 --- /dev/null +++ b/TTS/tts/utils/text/cmudict.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +import re + +VALID_SYMBOLS = [ + 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', + 'AH2', 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', + 'AY1', 'AY2', 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', + 'ER1', 'ER2', 'EY', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', + 'IH1', 'IH2', 'IY', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', + 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', + 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', + 'Y', 'Z', 'ZH' +] + + +class CMUDict: + '''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict''' + + def __init__(self, file_or_path, keep_ambiguous=True): + if isinstance(file_or_path, str): + with open(file_or_path, encoding='latin-1') as f: + entries = _parse_cmudict(f) + else: + entries = _parse_cmudict(file_or_path) + if not keep_ambiguous: + entries = { + word: pron + for word, pron in entries.items() if len(pron) == 1 + } + self._entries = entries + + def __len__(self): + return len(self._entries) + + def lookup(self, word): + '''Returns list of ARPAbet pronunciations of the given word.''' + return self._entries.get(word.upper()) + + @staticmethod + def get_arpabet(word, cmudict, punctuation_symbols): + first_symbol, last_symbol = '', '' + if word and word[0] in punctuation_symbols: + first_symbol = word[0] + word = word[1:] + if word and word[-1] in punctuation_symbols: + last_symbol = word[-1] + word = word[:-1] + arpabet = cmudict.lookup(word) + if arpabet is not None: + return first_symbol + '{%s}' % arpabet[0] + last_symbol + return first_symbol + word + last_symbol + + +_alt_re = re.compile(r'\([0-9]+\)') + + +def _parse_cmudict(file): + cmudict = {} + for line in file: + if line and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"): + parts = line.split(' ') + word = re.sub(_alt_re, '', parts[0]) + pronunciation = _get_pronunciation(parts[1]) + if pronunciation: + if word in cmudict: + cmudict[word].append(pronunciation) + else: + cmudict[word] = [pronunciation] + return cmudict + + +def _get_pronunciation(s): + parts = s.strip().split(' ') + for part in parts: + if part not in VALID_SYMBOLS: + return None + return ' '.join(parts) diff --git a/TTS/tts/utils/text/number_norm.py b/TTS/tts/utils/text/number_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..2b83c271cd8e8b15176f912ab296f895fcbd2c47 --- /dev/null +++ b/TTS/tts/utils/text/number_norm.py @@ -0,0 +1,99 @@ +""" from https://github.com/keithito/tacotron """ + +import inflect +import re +from typing import Dict + +_inflect = inflect.engine() +_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') +_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') +_currency_re = re.compile(r'(£|\$|¥)([0-9\,\.]*[0-9]+)') +_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') +_number_re = re.compile(r'-?[0-9]+') + + +def _remove_commas(m): + return m.group(1).replace(',', '') + + +def _expand_decimal_point(m): + return m.group(1).replace('.', ' point ') + + +def __expand_currency(value: str, inflection: Dict[float, str]) -> str: + parts = value.replace(",", "").split('.') + if len(parts) > 2: + return f"{value} {inflection[2]}" # Unexpected format + text = [] + integer = int(parts[0]) if parts[0] else 0 + if integer > 0: + integer_unit = inflection.get(integer, inflection[2]) + text.append(f"{integer} {integer_unit}") + fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0 + if fraction > 0: + fraction_unit = inflection.get(fraction/100, inflection[0.02]) + text.append(f"{fraction} {fraction_unit}") + if len(text) == 0: + return f"zero {inflection[2]}" + return " ".join(text) + + +def _expand_currency(m: "re.Match") -> str: + currencies = { + "$": { + 0.01: "cent", + 0.02: "cents", + 1: "dollar", + 2: "dollars", + }, + "€": { + 0.01: "cent", + 0.02: "cents", + 1: "euro", + 2: "euros", + }, + "£": { + 0.01: "penny", + 0.02: "pence", + 1: "pound sterling", + 2: "pounds sterling", + }, + "¥": { + # TODO rin + 0.02: "sen", + 2: "yen", + } + } + unit = m.group(1) + currency = currencies[unit] + value = m.group(2) + return __expand_currency(value, currency) + + +def _expand_ordinal(m): + return _inflect.number_to_words(m.group(0)) + + +def _expand_number(m): + num = int(m.group(0)) + if 1000 < num < 3000: + if num == 2000: + return 'two thousand' + if 2000 < num < 2010: + return 'two thousand ' + _inflect.number_to_words(num % 100) + if num % 100 == 0: + return _inflect.number_to_words(num // 100) + ' hundred' + return _inflect.number_to_words(num, + andword='', + zero='oh', + group=2).replace(', ', ' ') + return _inflect.number_to_words(num, andword='') + + +def normalize_numbers(text): + text = re.sub(_comma_number_re, _remove_commas, text) + text = re.sub(_currency_re, _expand_currency, text) + text = re.sub(_decimal_number_re, _expand_decimal_point, text) + text = re.sub(_ordinal_re, _expand_ordinal, text) + text = re.sub(_number_re, _expand_number, text) + return text diff --git a/TTS/tts/utils/text/symbols.py b/TTS/tts/utils/text/symbols.py new file mode 100644 index 0000000000000000000000000000000000000000..544277c51bf5283089912e1068fa664dd875e2a9 --- /dev/null +++ b/TTS/tts/utils/text/symbols.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +''' +Defines the set of symbols used in text input to the model. + +The default is a set of ASCII characters that works well for English or text that has been run +through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. +''' +def make_symbols(characters, phonemes, punctuations='!\'(),-.:;? ', pad='_', eos='~', bos='^'):# pylint: disable=redefined-outer-name + ''' Function to create symbols and phonemes ''' + _phonemes_sorted = sorted(list(phonemes)) + + # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): + _arpabet = ['@' + s for s in _phonemes_sorted] + + # Export all symbols: + _symbols = [pad, eos, bos] + list(characters) + _arpabet + _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations) + + return _symbols, _phonemes + +_pad = '_' +_eos = '~' +_bos = '^' +_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? ' +_punctuations = '!\'(),-.:;? ' +_phoneme_punctuations = '.!;:,?' + +# Phonemes definition +_vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ' +_non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ' +_pulmonic_consonants = 'pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ' +_suprasegmentals = 'ˈˌːˑ' +_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ' +_diacrilics = 'ɚ˞ɫ' +_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics + +symbols, phonemes = make_symbols(_characters, _phonemes, _punctuations, _pad, _eos, _bos) + +# Generate ALIEN language +# from random import shuffle +# shuffle(phonemes) + +if __name__ == '__main__': + print(" > TTS symbols {}".format(len(symbols))) + print(symbols) + print(" > TTS phonemes {}".format(len(phonemes))) + print(phonemes) diff --git a/TTS/tts/utils/text/time.py b/TTS/tts/utils/text/time.py new file mode 100644 index 0000000000000000000000000000000000000000..55ecbd8c0668b5a5c37cd52eca1bb67eaef8ae4c --- /dev/null +++ b/TTS/tts/utils/text/time.py @@ -0,0 +1,44 @@ +import re +import inflect + +_inflect = inflect.engine() + +_time_re = re.compile(r"""\b + ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours + : + ([0-5][0-9]) # minutes + \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm + \b""", + re.IGNORECASE | re.X) + + +def _expand_num(n: int) -> str: + return _inflect.number_to_words(n) + + +def _expand_time_english(match: "re.Match") -> str: + hour = int(match.group(1)) + past_noon = hour >= 12 + time = [] + if hour > 12: + hour -= 12 + elif hour == 0: + hour = 12 + past_noon = True + time.append(_expand_num(hour)) + + minute = int(match.group(6)) + if minute > 0: + if minute < 10: + time.append("oh") + time.append(_expand_num(minute)) + am_pm = match.group(7) + if am_pm is None: + time.append("p m" if past_noon else "a m") + else: + time.extend(list(am_pm.replace(".", ""))) + return " ".join(time) + + +def expand_time_english(text: str) -> str: + return re.sub(_time_re, _expand_time_english, text) diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py new file mode 100644 index 0000000000000000000000000000000000000000..e5bb5891463fbdbe842ad00e38a19f648c25e392 --- /dev/null +++ b/TTS/tts/utils/visual.py @@ -0,0 +1,140 @@ +import librosa +import matplotlib +import numpy as np +import torch + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme + + +def plot_alignment(alignment, + info=None, + fig_size=(16, 10), + title=None, + output_fig=False): + if isinstance(alignment, torch.Tensor): + alignment_ = alignment.detach().cpu().numpy().squeeze() + else: + alignment_ = alignment + alignment_ = alignment_.astype( + np.float32) if alignment_.dtype == np.float16 else alignment_ + fig, ax = plt.subplots(figsize=fig_size) + im = ax.imshow(alignment_.T, + aspect='auto', + origin='lower', + interpolation='none') + fig.colorbar(im, ax=ax) + xlabel = 'Decoder timestep' + if info is not None: + xlabel += '\n\n' + info + plt.xlabel(xlabel) + plt.ylabel('Encoder timestep') + # plt.yticks(range(len(text)), list(text)) + plt.tight_layout() + if title is not None: + plt.title(title) + if not output_fig: + plt.close() + return fig + + +def plot_spectrogram(spectrogram, + ap=None, + fig_size=(16, 10), + output_fig=False): + if isinstance(spectrogram, torch.Tensor): + spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T + else: + spectrogram_ = spectrogram.T + spectrogram_ = spectrogram_.astype( + np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ + if ap is not None: + spectrogram_ = ap.denormalize(spectrogram_) # pylint: disable=protected-access + fig = plt.figure(figsize=fig_size) + plt.imshow(spectrogram_, aspect="auto", origin="lower") + plt.colorbar() + plt.tight_layout() + if not output_fig: + plt.close() + return fig + + +def visualize(alignment, + postnet_output, + text, + hop_length, + CONFIG, + stop_tokens=None, + decoder_output=None, + output_path=None, + figsize=(8, 24), + output_fig=False): + + if decoder_output is not None: + num_plot = 4 + else: + num_plot = 3 + + label_fontsize = 16 + fig = plt.figure(figsize=figsize) + + plt.subplot(num_plot, 1, 1) + plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None) + plt.xlabel("Decoder timestamp", fontsize=label_fontsize) + plt.ylabel("Encoder timestamp", fontsize=label_fontsize) + # compute phoneme representation and back + if CONFIG.use_phonemes: + seq = phoneme_to_sequence( + text, [CONFIG.text_cleaner], + CONFIG.phoneme_language, + CONFIG.enable_eos_bos_chars, + tp=CONFIG.characters if 'characters' in CONFIG.keys() else None) + text = sequence_to_phoneme( + seq, + tp=CONFIG.characters if 'characters' in CONFIG.keys() else None) + print(text) + plt.yticks(range(len(text)), list(text)) + plt.colorbar() + + if stop_tokens is not None: + # plot stopnet predictions + plt.subplot(num_plot, 1, 2) + plt.plot(range(len(stop_tokens)), list(stop_tokens)) + + # plot postnet spectrogram + plt.subplot(num_plot, 1, 3) + librosa.display.specshow(postnet_output.T, + sr=CONFIG.audio['sample_rate'], + hop_length=hop_length, + x_axis="time", + y_axis="linear", + fmin=CONFIG.audio['mel_fmin'], + fmax=CONFIG.audio['mel_fmax']) + + plt.xlabel("Time", fontsize=label_fontsize) + plt.ylabel("Hz", fontsize=label_fontsize) + plt.tight_layout() + plt.colorbar() + + if decoder_output is not None: + plt.subplot(num_plot, 1, 4) + librosa.display.specshow(decoder_output.T, + sr=CONFIG.audio['sample_rate'], + hop_length=hop_length, + x_axis="time", + y_axis="linear", + fmin=CONFIG.audio['mel_fmin'], + fmax=CONFIG.audio['mel_fmax']) + plt.xlabel("Time", fontsize=label_fontsize) + plt.ylabel("Hz", fontsize=label_fontsize) + plt.tight_layout() + plt.colorbar() + + if output_path: + print(output_path) + fig.savefig(output_path) + plt.close() + + if not output_fig: + plt.close() diff --git a/TTS/utils/__init__.py b/TTS/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py new file mode 100644 index 0000000000000000000000000000000000000000..93093d1a915fcb6ef3d8cbde5396b10928c1403c --- /dev/null +++ b/TTS/utils/audio.py @@ -0,0 +1,375 @@ +import librosa +import soundfile as sf +import numpy as np +import scipy.io.wavfile +import scipy.signal +# import pyworld as pw + +from TTS.tts.utils.data import StandardScaler + +#pylint: disable=too-many-public-methods +class AudioProcessor(object): + def __init__(self, + sample_rate=None, + resample=False, + num_mels=None, + min_level_db=None, + frame_shift_ms=None, + frame_length_ms=None, + hop_length=None, + win_length=None, + ref_level_db=None, + fft_size=1024, + power=None, + preemphasis=0.0, + signal_norm=None, + symmetric_norm=None, + max_norm=None, + mel_fmin=None, + mel_fmax=None, + spec_gain=20, + stft_pad_mode='reflect', + clip_norm=True, + griffin_lim_iters=None, + do_trim_silence=False, + trim_db=60, + do_sound_norm=False, + stats_path=None, + verbose=True, + **_): + + # setup class attributed + self.sample_rate = sample_rate + self.resample = resample + self.num_mels = num_mels + self.min_level_db = min_level_db or 0 + self.frame_shift_ms = frame_shift_ms + self.frame_length_ms = frame_length_ms + self.ref_level_db = ref_level_db + self.fft_size = fft_size + self.power = power + self.preemphasis = preemphasis + self.griffin_lim_iters = griffin_lim_iters + self.signal_norm = signal_norm + self.symmetric_norm = symmetric_norm + self.mel_fmin = mel_fmin or 0 + self.mel_fmax = mel_fmax + self.spec_gain = float(spec_gain) + self.stft_pad_mode = stft_pad_mode + self.max_norm = 1.0 if max_norm is None else float(max_norm) + self.clip_norm = clip_norm + self.do_trim_silence = do_trim_silence + self.trim_db = trim_db + self.do_sound_norm = do_sound_norm + self.stats_path = stats_path + # setup stft parameters + if hop_length is None: + # compute stft parameters from given time values + self.hop_length, self.win_length = self._stft_parameters() + else: + # use stft parameters from config file + self.hop_length = hop_length + self.win_length = win_length + assert min_level_db != 0.0, " [!] min_level_db is 0" + assert self.win_length <= self.fft_size, " [!] win_length cannot be larger than fft_size" + members = vars(self) + if verbose: + print(" > Setting up Audio Processor...") + for key, value in members.items(): + print(" | > {}:{}".format(key, value)) + # create spectrogram utils + self.mel_basis = self._build_mel_basis() + self.inv_mel_basis = np.linalg.pinv(self._build_mel_basis()) + # setup scaler + if stats_path: + mel_mean, mel_std, linear_mean, linear_std, _ = self.load_stats(stats_path) + self.setup_scaler(mel_mean, mel_std, linear_mean, linear_std) + self.signal_norm = True + self.max_norm = None + self.clip_norm = None + self.symmetric_norm = None + + ### setting up the parameters ### + def _build_mel_basis(self, ): + if self.mel_fmax is not None: + assert self.mel_fmax <= self.sample_rate // 2 + return librosa.filters.mel( + self.sample_rate, + self.fft_size, + n_mels=self.num_mels, + fmin=self.mel_fmin, + fmax=self.mel_fmax) + + def _stft_parameters(self, ): + """Compute necessary stft parameters with given time values""" + factor = self.frame_length_ms / self.frame_shift_ms + assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms" + hop_length = int(self.frame_shift_ms / 1000.0 * self.sample_rate) + win_length = int(hop_length * factor) + return hop_length, win_length + + ### normalization ### + def normalize(self, S): + """Put values in [0, self.max_norm] or [-self.max_norm, self.max_norm]""" + #pylint: disable=no-else-return + S = S.copy() + if self.signal_norm: + # mean-var scaling + if hasattr(self, 'mel_scaler'): + if S.shape[0] == self.num_mels: + return self.mel_scaler.transform(S.T).T + elif S.shape[0] == self.fft_size / 2: + return self.linear_scaler.transform(S.T).T + else: + raise RuntimeError(' [!] Mean-Var stats does not match the given feature dimensions.') + # range normalization + S -= self.ref_level_db # discard certain range of DB assuming it is air noise + S_norm = ((S - self.min_level_db) / (-self.min_level_db)) + if self.symmetric_norm: + S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm + if self.clip_norm: + S_norm = np.clip(S_norm, -self.max_norm, self.max_norm) # pylint: disable=invalid-unary-operand-type + return S_norm + else: + S_norm = self.max_norm * S_norm + if self.clip_norm: + S_norm = np.clip(S_norm, 0, self.max_norm) + return S_norm + else: + return S + + def denormalize(self, S): + """denormalize values""" + #pylint: disable=no-else-return + S_denorm = S.copy() + if self.signal_norm: + # mean-var scaling + if hasattr(self, 'mel_scaler'): + if S_denorm.shape[0] == self.num_mels: + return self.mel_scaler.inverse_transform(S_denorm.T).T + elif S_denorm.shape[0] == self.fft_size / 2: + return self.linear_scaler.inverse_transform(S_denorm.T).T + else: + raise RuntimeError(' [!] Mean-Var stats does not match the given feature dimensions.') + if self.symmetric_norm: + if self.clip_norm: + S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm) #pylint: disable=invalid-unary-operand-type + S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db + return S_denorm + self.ref_level_db + else: + if self.clip_norm: + S_denorm = np.clip(S_denorm, 0, self.max_norm) + S_denorm = (S_denorm * -self.min_level_db / + self.max_norm) + self.min_level_db + return S_denorm + self.ref_level_db + else: + return S_denorm + + ### Mean-STD scaling ### + def load_stats(self, stats_path): + stats = np.load(stats_path, allow_pickle=True).item() #pylint: disable=unexpected-keyword-arg + mel_mean = stats['mel_mean'] + mel_std = stats['mel_std'] + linear_mean = stats['linear_mean'] + linear_std = stats['linear_std'] + stats_config = stats['audio_config'] + # check all audio parameters used for computing stats + skip_parameters = ['griffin_lim_iters', 'stats_path', 'do_trim_silence', 'ref_level_db', 'power'] + for key in stats_config.keys(): + if key in skip_parameters: + continue + if key not in ['sample_rate', 'trim_db']: + assert stats_config[key] == self.__dict__[key],\ + f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}" + return mel_mean, mel_std, linear_mean, linear_std, stats_config + + # pylint: disable=attribute-defined-outside-init + def setup_scaler(self, mel_mean, mel_std, linear_mean, linear_std): + self.mel_scaler = StandardScaler() + self.mel_scaler.set_stats(mel_mean, mel_std) + self.linear_scaler = StandardScaler() + self.linear_scaler.set_stats(linear_mean, linear_std) + + ### DB and AMP conversion ### + # pylint: disable=no-self-use + def _amp_to_db(self, x): + return self.spec_gain * np.log10(np.maximum(1e-5, x)) + + # pylint: disable=no-self-use + def _db_to_amp(self, x): + return np.power(10.0, x / self.spec_gain) + + ### Preemphasis ### + def apply_preemphasis(self, x): + if self.preemphasis == 0: + raise RuntimeError(" [!] Preemphasis is set 0.0.") + return scipy.signal.lfilter([1, -self.preemphasis], [1], x) + + def apply_inv_preemphasis(self, x): + if self.preemphasis == 0: + raise RuntimeError(" [!] Preemphasis is set 0.0.") + return scipy.signal.lfilter([1], [1, -self.preemphasis], x) + + ### SPECTROGRAMs ### + def _linear_to_mel(self, spectrogram): + return np.dot(self.mel_basis, spectrogram) + + def _mel_to_linear(self, mel_spec): + return np.maximum(1e-10, np.dot(self.inv_mel_basis, mel_spec)) + + def spectrogram(self, y): + if self.preemphasis != 0: + D = self._stft(self.apply_preemphasis(y)) + else: + D = self._stft(y) + S = self._amp_to_db(np.abs(D)) + return self.normalize(S) + + def melspectrogram(self, y): + if self.preemphasis != 0: + D = self._stft(self.apply_preemphasis(y)) + else: + D = self._stft(y) + S = self._amp_to_db(self._linear_to_mel(np.abs(D))) + return self.normalize(S) + + def inv_spectrogram(self, spectrogram): + """Converts spectrogram to waveform using librosa""" + S = self.denormalize(spectrogram) + S = self._db_to_amp(S) + # Reconstruct phase + if self.preemphasis != 0: + return self.apply_inv_preemphasis(self._griffin_lim(S**self.power)) + return self._griffin_lim(S**self.power) + + def inv_melspectrogram(self, mel_spectrogram): + '''Converts melspectrogram to waveform using librosa''' + D = self.denormalize(mel_spectrogram) + S = self._db_to_amp(D) + S = self._mel_to_linear(S) # Convert back to linear + if self.preemphasis != 0: + return self.apply_inv_preemphasis(self._griffin_lim(S**self.power)) + return self._griffin_lim(S**self.power) + + def out_linear_to_mel(self, linear_spec): + S = self.denormalize(linear_spec) + S = self._db_to_amp(S) + S = self._linear_to_mel(np.abs(S)) + S = self._amp_to_db(S) + mel = self.normalize(S) + return mel + + ### STFT and ISTFT ### + def _stft(self, y): + return librosa.stft( + y=y, + n_fft=self.fft_size, + hop_length=self.hop_length, + win_length=self.win_length, + pad_mode=self.stft_pad_mode, + ) + + def _istft(self, y): + return librosa.istft( + y, hop_length=self.hop_length, win_length=self.win_length) + + def _griffin_lim(self, S): + angles = np.exp(2j * np.pi * np.random.rand(*S.shape)) + S_complex = np.abs(S).astype(np.complex) + y = self._istft(S_complex * angles) + for _ in range(self.griffin_lim_iters): + angles = np.exp(1j * np.angle(self._stft(y))) + y = self._istft(S_complex * angles) + return y + + def compute_stft_paddings(self, x, pad_sides=1): + '''compute right padding (final frame) or both sides padding (first and final frames) + ''' + assert pad_sides in (1, 2) + pad = (x.shape[0] // self.hop_length + 1) * self.hop_length - x.shape[0] + if pad_sides == 1: + return 0, pad + return pad // 2, pad // 2 + pad % 2 + + ### Compute F0 ### + # def compute_f0(self, x): + # f0, t = pw.dio( + # x.astype(np.double), + # fs=self.sample_rate, + # f0_ceil=self.mel_fmax, + # frame_period=1000 * self.hop_length / self.sample_rate, + # ) + # f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate) + # return f0 + + ### Audio Processing ### + def find_endpoint(self, wav, threshold_db=-40, min_silence_sec=0.8): + window_length = int(self.sample_rate * min_silence_sec) + hop_length = int(window_length / 4) + threshold = self._db_to_amp(threshold_db) + for x in range(hop_length, len(wav) - window_length, hop_length): + if np.max(wav[x:x + window_length]) < threshold: + return x + hop_length + return len(wav) + + def trim_silence(self, wav): + """ Trim silent parts with a threshold and 0.01 sec margin """ + margin = int(self.sample_rate * 0.01) + wav = wav[margin:-margin] + return librosa.effects.trim( + wav, top_db=self.trim_db, frame_length=self.win_length, hop_length=self.hop_length)[0] + + @staticmethod + def sound_norm(x): + return x / abs(x).max() * 0.9 + + ### save and load ### + def load_wav(self, filename, sr=None): + if self.resample: + x, sr = librosa.load(filename, sr=self.sample_rate) + elif sr is None: + x, sr = sf.read(filename) + assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr) + else: + x, sr = librosa.load(filename, sr=sr) + if self.do_trim_silence: + try: + x = self.trim_silence(x) + except ValueError: + print(f' [!] File cannot be trimmed for silence - {filename}') + if self.do_sound_norm: + x = self.sound_norm(x) + return x + + def save_wav(self, wav, path): + wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) + scipy.io.wavfile.write(path, self.sample_rate, wav_norm.astype(np.int16)) + + @staticmethod + def mulaw_encode(wav, qc): + mu = 2 ** qc - 1 + # wav_abs = np.minimum(np.abs(wav), 1.0) + signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1. + mu) + # Quantize signal to the specified number of levels. + signal = (signal + 1) / 2 * mu + 0.5 + return np.floor(signal,) + + @staticmethod + def mulaw_decode(wav, qc): + """Recovers waveform from quantized values.""" + mu = 2 ** qc - 1 + x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) + return x + + + @staticmethod + def encode_16bits(x): + return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16) + + @staticmethod + def quantize(x, bits): + return (x + 1.) * (2**bits - 1) / 2 + + @staticmethod + def dequantize(x, bits): + return 2 * x / (2**bits - 1) - 1 diff --git a/TTS/utils/console_logger.py b/TTS/utils/console_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..3affd6afc31042d8c4f534e63f20ca1710666090 --- /dev/null +++ b/TTS/utils/console_logger.py @@ -0,0 +1,102 @@ +import datetime +from TTS.utils.io import AttrDict + + +tcolors = AttrDict({ + 'OKBLUE': '\033[94m', + 'HEADER': '\033[95m', + 'OKGREEN': '\033[92m', + 'WARNING': '\033[93m', + 'FAIL': '\033[91m', + 'ENDC': '\033[0m', + 'BOLD': '\033[1m', + 'UNDERLINE': '\033[4m' +}) + + +class ConsoleLogger(): + def __init__(self): + # TODO: color code for value changes + # use these to compare values between iterations + self.old_train_loss_dict = None + self.old_epoch_loss_dict = None + self.old_eval_loss_dict = None + + # pylint: disable=no-self-use + def get_time(self): + now = datetime.datetime.now() + return now.strftime("%Y-%m-%d %H:%M:%S") + + def print_epoch_start(self, epoch, max_epoch): + print("\n{}{} > EPOCH: {}/{}{}".format(tcolors.UNDERLINE, tcolors.BOLD, + epoch, max_epoch, tcolors.ENDC), + flush=True) + + def print_train_start(self): + print(f"\n{tcolors.BOLD} > TRAINING ({self.get_time()}) {tcolors.ENDC}") + + def print_train_step(self, batch_steps, step, global_step, log_dict, + loss_dict, avg_loss_dict): + indent = " | > " + print() + log_text = "{} --> STEP: {}/{} -- GLOBAL_STEP: {}{}\n".format( + tcolors.BOLD, step, batch_steps, global_step, tcolors.ENDC) + for key, value in loss_dict.items(): + # print the avg value if given + if f'avg_{key}' in avg_loss_dict.keys(): + log_text += "{}{}: {:.5f} ({:.5f})\n".format(indent, key, value, avg_loss_dict[f'avg_{key}']) + else: + log_text += "{}{}: {:.5f} \n".format(indent, key, value) + for idx, (key, value) in enumerate(log_dict.items()): + if isinstance(value, list): + log_text += f"{indent}{key}: {value[0]:.{value[1]}f}" + else: + log_text += f"{indent}{key}: {value}" + if idx < len(log_dict)-1: + log_text += "\n" + print(log_text, flush=True) + + # pylint: disable=unused-argument + def print_train_epoch_end(self, global_step, epoch, epoch_time, + print_dict): + indent = " | > " + log_text = f"\n{tcolors.BOLD} --> TRAIN PERFORMACE -- EPOCH TIME: {epoch_time:.2f} sec -- GLOBAL_STEP: {global_step}{tcolors.ENDC}\n" + for key, value in print_dict.items(): + log_text += "{}{}: {:.5f}\n".format(indent, key, value) + print(log_text, flush=True) + + def print_eval_start(self): + print(f"{tcolors.BOLD} > EVALUATION {tcolors.ENDC}\n") + + def print_eval_step(self, step, loss_dict, avg_loss_dict): + indent = " | > " + print() + log_text = f"{tcolors.BOLD} --> STEP: {step}{tcolors.ENDC}\n" + for key, value in loss_dict.items(): + # print the avg value if given + if f'avg_{key}' in avg_loss_dict.keys(): + log_text += "{}{}: {:.5f} ({:.5f})\n".format(indent, key, value, avg_loss_dict[f'avg_{key}']) + else: + log_text += "{}{}: {:.5f} \n".format(indent, key, value) + print(log_text, flush=True) + + def print_epoch_end(self, epoch, avg_loss_dict): + indent = " | > " + log_text = " {}--> EVAL PERFORMANCE{}\n".format( + tcolors.BOLD, tcolors.ENDC) + for key, value in avg_loss_dict.items(): + # print the avg value if given + color = '' + sign = '+' + diff = 0 + if self.old_eval_loss_dict is not None and key in self.old_eval_loss_dict: + diff = value - self.old_eval_loss_dict[key] + if diff < 0: + color = tcolors.OKGREEN + sign = '' + elif diff > 0: + color = tcolors.FAIL + sign = '+' + log_text += "{}{}:{} {:.5f} {}({}{:.5f})\n".format(indent, key, color, value, tcolors.ENDC, sign, diff) + self.old_eval_loss_dict = avg_loss_dict + print(log_text, flush=True) diff --git a/TTS/utils/distribute.py b/TTS/utils/distribute.py new file mode 100644 index 0000000000000000000000000000000000000000..89d4efec0049614982d3f503bbdcbcd5c4408fba --- /dev/null +++ b/TTS/utils/distribute.py @@ -0,0 +1,116 @@ +# edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py +import math + +import torch +import torch.distributed as dist +from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors +from torch.autograd import Variable +from torch.utils.data.sampler import Sampler + + +class DistributedSampler(Sampler): + """ + Non shuffling Distributed Sampler + """ + + def __init__(self, dataset, num_replicas=None, rank=None): + super(DistributedSampler, self).__init__(dataset) + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + + def __iter__(self): + indices = torch.arange(len(self.dataset)).tolist() + + # add extra samples to make it evenly divisible + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + indices = indices[self.rank:self.total_size:self.num_replicas] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch + + +def reduce_tensor(tensor, num_gpus): + rt = tensor.clone() + dist.all_reduce(rt, op=dist.reduce_op.SUM) + rt /= num_gpus + return rt + + +def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url): + assert torch.cuda.is_available(), "Distributed mode requires CUDA." + + # Set cuda device so everything is done on the right GPU. + torch.cuda.set_device(rank % torch.cuda.device_count()) + + # Initialize distributed communication + dist.init_process_group( + dist_backend, + init_method=dist_url, + world_size=num_gpus, + rank=rank, + group_name=group_name) + + +def apply_gradient_allreduce(module): + + # sync model parameters + for p in module.state_dict().values(): + if not torch.is_tensor(p): + continue + dist.broadcast(p, 0) + + def allreduce_params(): + if module.needs_reduction: + module.needs_reduction = False + # bucketing params based on value types + buckets = {} + for param in module.parameters(): + if param.requires_grad and param.grad is not None: + tp = type(param.data) + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(param) + for tp in buckets: + bucket = buckets[tp] + grads = [param.grad.data for param in bucket] + coalesced = _flatten_dense_tensors(grads) + dist.all_reduce(coalesced, op=dist.reduce_op.SUM) + coalesced /= dist.get_world_size() + for buf, synced in zip( + grads, _unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + + for param in list(module.parameters()): + + def allreduce_hook(*_): + Variable._execution_engine.queue_callback(allreduce_params) #pylint: disable=protected-access + + if param.requires_grad: + param.register_hook(allreduce_hook) + + def set_needs_reduction(self, *_): + self.needs_reduction = True + + module.register_forward_hook(set_needs_reduction) + return module diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5890f04d630dc66e74923dc28e00d88ea154ee81 --- /dev/null +++ b/TTS/utils/generic_utils.py @@ -0,0 +1,177 @@ +import datetime +import glob +import os +import shutil +import subprocess +import sys +from pathlib import Path + +import torch + + +def get_git_branch(): + try: + out = subprocess.check_output(["git", "branch"]).decode("utf8") + current = next(line for line in out.split("\n") + if line.startswith("*")) + current.replace("* ", "") + except subprocess.CalledProcessError: + current = "inside_docker" + return current + + +def get_commit_hash(): + """https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script""" + # try: + # subprocess.check_output(['git', 'diff-index', '--quiet', + # 'HEAD']) # Verify client is clean + # except: + # raise RuntimeError( + # " !! Commit before training to get the commit hash.") + try: + commit = subprocess.check_output( + ['git', 'rev-parse', '--short', 'HEAD']).decode().strip() + # Not copying .git folder into docker container + except subprocess.CalledProcessError: + commit = "0000000" + print(' > Git Hash: {}'.format(commit)) + return commit + + +def create_experiment_folder(root_path, model_name, debug): + """ Create a folder with the current date and time """ + date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p") + if debug: + commit_hash = 'debug' + else: + commit_hash = get_commit_hash() + output_folder = os.path.join( + root_path, model_name + '-' + date_str + '-' + commit_hash) + os.makedirs(output_folder, exist_ok=True) + print(" > Experiment folder: {}".format(output_folder)) + return output_folder + + +def remove_experiment_folder(experiment_path): + """Check folder if there is a checkpoint, otherwise remove the folder""" + + checkpoint_files = glob.glob(experiment_path + "/*.pth.tar") + if not checkpoint_files: + if os.path.exists(experiment_path): + shutil.rmtree(experiment_path, ignore_errors=True) + print(" ! Run is removed from {}".format(experiment_path)) + else: + print(" ! Run is kept in {}".format(experiment_path)) + + +def count_parameters(model): + r"""Count number of trainable parameters in a network""" + return sum(p.numel() for p in model.parameters() if p.requires_grad) + + +def get_user_data_dir(appname): + if sys.platform == "win32": + import winreg # pylint: disable=import-outside-toplevel + key = winreg.OpenKey( + winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" + ) + dir_, _ = winreg.QueryValueEx(key, "Local AppData") + ans = Path(dir_).resolve(strict=False) + elif sys.platform == 'darwin': + ans = Path('~/Library/Application Support/').expanduser() + else: + ans = Path.home().joinpath('.local/share') + return ans.joinpath(appname) + + +def set_init_dict(model_dict, checkpoint_state, c): + # Partial initialization: if there is a mismatch with new and old layer, it is skipped. + for k, v in checkpoint_state.items(): + if k not in model_dict: + print(" | > Layer missing in the model definition: {}".format(k)) + # 1. filter out unnecessary keys + pretrained_dict = { + k: v + for k, v in checkpoint_state.items() if k in model_dict + } + # 2. filter out different size layers + pretrained_dict = { + k: v + for k, v in pretrained_dict.items() + if v.numel() == model_dict[k].numel() + } + # 3. skip reinit layers + if c.reinit_layers is not None: + for reinit_layer_name in c.reinit_layers: + pretrained_dict = { + k: v + for k, v in pretrained_dict.items() + if reinit_layer_name not in k + } + # 4. overwrite entries in the existing state dict + model_dict.update(pretrained_dict) + print(" | > {} / {} layers are restored.".format(len(pretrained_dict), + len(model_dict))) + return model_dict + + +class KeepAverage(): + def __init__(self): + self.avg_values = {} + self.iters = {} + + def __getitem__(self, key): + return self.avg_values[key] + + def items(self): + return self.avg_values.items() + + def add_value(self, name, init_val=0, init_iter=0): + self.avg_values[name] = init_val + self.iters[name] = init_iter + + def update_value(self, name, value, weighted_avg=False): + if name not in self.avg_values: + # add value if not exist before + self.add_value(name, init_val=value) + else: + # else update existing value + if weighted_avg: + self.avg_values[name] = 0.99 * self.avg_values[name] + 0.01 * value + self.iters[name] += 1 + else: + self.avg_values[name] = self.avg_values[name] * \ + self.iters[name] + value + self.iters[name] += 1 + self.avg_values[name] /= self.iters[name] + + def add_values(self, name_dict): + for key, value in name_dict.items(): + self.add_value(key, init_val=value) + + def update_values(self, value_dict): + for key, value in value_dict.items(): + self.update_value(key, value) + + +def check_argument(name, c, enum_list=None, max_val=None, min_val=None, restricted=False, val_type=None, alternative=None): + if alternative in c.keys() and c[alternative] is not None: + return + if restricted: + assert name in c.keys(), f' [!] {name} not defined in config.json' + if name in c.keys(): + if max_val: + assert c[name] <= max_val, f' [!] {name} is larger than max value {max_val}' + if min_val: + assert c[name] >= min_val, f' [!] {name} is smaller than min value {min_val}' + if enum_list: + assert c[name].lower() in enum_list, f' [!] {name} is not a valid value' + if isinstance(val_type, list): + is_valid = False + for typ in val_type: + if isinstance(c[name], typ): + is_valid = True + assert is_valid or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}' + elif val_type: + assert isinstance(c[name], val_type) or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}' diff --git a/TTS/utils/io.py b/TTS/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..46abf1c825b499be2f980f4d3f6ef313fc952930 --- /dev/null +++ b/TTS/utils/io.py @@ -0,0 +1,78 @@ +import os +import re +import json +import yaml +import pickle as pickle_tts +from shutil import copyfile + + +class RenamingUnpickler(pickle_tts.Unpickler): + """Overload default pickler to solve module renaming problem""" + def find_class(self, module, name): + return super().find_class(module.replace('mozilla_voice_tts', 'TTS'), name) + + +class AttrDict(dict): + """A custom dict which converts dict keys + to class attributes""" + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +def read_json_with_comments(json_path): + # fallback to json + with open(json_path, "r") as f: + input_str = f.read() + # handle comments + input_str = re.sub(r'\\\n', '', input_str) + input_str = re.sub(r'//.*\n', '\n', input_str) + data = json.loads(input_str) + return data + +def load_config(config_path: str) -> AttrDict: + """Load config files and discard comments + + Args: + config_path (str): path to config file. + """ + config = AttrDict() + + ext = os.path.splitext(config_path)[1] + if ext in (".yml", ".yaml"): + with open(config_path, "r") as f: + data = yaml.safe_load(f) + else: + data = read_json_with_comments(config_path) + config.update(data) + return config + + +def copy_model_files(c, config_file, out_path, new_fields): + """Copy config.json and other model files to training folder and add + new fields. + + Args: + c (dict): model config from config.json. + config_file (str): path to config file. + out_path (str): output path to copy the file. + new_fields (dict): new fileds to be added or edited + in the config file. + """ + # copy config.json + copy_config_path = os.path.join(out_path, 'config.json') + config_lines = open(config_file, "r").readlines() + # add extra information fields + for key, value in new_fields.items(): + if isinstance(value, str): + new_line = '"{}":"{}",\n'.format(key, value) + else: + new_line = '"{}":{},\n'.format(key, value) + config_lines.insert(1, new_line) + config_out_file = open(copy_config_path, "w") + config_out_file.writelines(config_lines) + config_out_file.close() + # copy model stats file if available + if c.audio['stats_path'] is not None: + copy_stats_path = os.path.join(out_path, 'scale_stats.npy') + copyfile(c.audio['stats_path'], copy_stats_path) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py new file mode 100644 index 0000000000000000000000000000000000000000..25b3d797ab9da641db3726dfd5313a63d64d12be --- /dev/null +++ b/TTS/utils/manage.py @@ -0,0 +1,103 @@ +import json +import gdown +from pathlib import Path +import os + +from TTS.utils.io import load_config +from TTS.utils.generic_utils import get_user_data_dir + +class ModelManager(object): + """Manage TTS models defined in .models.json. + It provides an interface to list and download + models defines in '.model.json' + + Models are downloaded under '.TTS' folder in the user's + home path. + + Args: + models_file (str): path to .model.json + """ + def __init__(self, models_file): + super().__init__() + self.output_prefix = get_user_data_dir('tts') + self.url_prefix = "https://drive.google.com/uc?id=" + self.models_dict = None + self.read_models_file(models_file) + + def read_models_file(self, file_path): + """Read .models.json as a dict + + Args: + file_path (str): path to .models.json. + """ + with open(file_path) as json_file: + self.models_dict = json.load(json_file) + + def list_langs(self): + print(" Name format: type/language") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + print(f" >: {model_type}/{lang} ") + + def list_datasets(self): + print(" Name format: type/language/dataset") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + for dataset in self.models_dict[model_type][lang]: + print(f" >: {model_type}/{lang}/{dataset}") + + def list_models(self): + print(" Name format: type/language/dataset/model") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + for dataset in self.models_dict[model_type][lang]: + for model in self.models_dict[model_type][lang][dataset]: + print(f" >: {model_type}/{lang}/{dataset}/{model} ") + + def download_model(self, model_name): + """Download model files given the full model name. + Model name is in the format + 'type/language/dataset/model' + e.g. 'tts_model/en/ljspeech/tacotron' + + Args: + model_name (str): model name as explained above. + + TODO: support multi-speaker models + """ + # fetch model info from the dict + model_type, lang, dataset, model = model_name.split("/") + model_full_name = f"{model_type}--{lang}--{dataset}--{model}" + model_item = self.models_dict[model_type][lang][dataset][model] + # set the model specific output path + output_path = os.path.join(self.output_prefix, model_full_name) + output_model_path = os.path.join(output_path, "model_file.pth.tar") + output_config_path = os.path.join(output_path, "config.json") + if os.path.exists(output_path): + print(f" > {model_name} is already downloaded.") + else: + os.makedirs(output_path, exist_ok=True) + print(f" > Downloading model to {output_path}") + output_stats_path = None + # download files to the output path + self._download_file(model_item['model_file'], output_model_path) + self._download_file(model_item['config_file'], output_config_path) + if model_item['stats_file'] is not None and len(model_item['stats_file']) > 1: + output_stats_path = os.path.join(output_path, 'scale_stats.npy') + self._download_file(model_item['stats_file'], output_stats_path) + # set scale stats path in config.json + config_path = output_config_path + config = load_config(config_path) + config["audio"]['stats_path'] = output_stats_path + with open(config_path, "w") as jf: + json.dump(config, jf) + return output_model_path, output_config_path + + def _download_file(self, idx, output): + gdown.download(f"{self.url_prefix}{idx}", output=output) + + + + + + diff --git a/TTS/utils/radam.py b/TTS/utils/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..58cec9205a90b5607d4ac4792efdb6545f5168d3 --- /dev/null +++ b/TTS/utils/radam.py @@ -0,0 +1,97 @@ +# from https://github.com/LiyuanLucasLiu/RAdam + +import math +import torch +from torch.optim.optimizer import Optimizer + + +class RAdam(Optimizer): + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True): + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if eps < 0.0: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + + self.degenerated_to_sgd = degenerated_to_sgd + if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict): + for param in params: + if 'betas' in param and (param['betas'][0] != betas[0] or param['betas'][1] != betas[1]): + param['buffer'] = [[None, None, None] for _ in range(10)] + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)]) + super(RAdam, self).__init__(params, defaults) + + def __setstate__(self, state): # pylint: disable=useless-super-delegation + super(RAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError('RAdam does not support sparse gradients') + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p_data_fp32) + state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) + else: + state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) + state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + beta1, beta2 = group['betas'] + + exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + + state['step'] += 1 + buffered = group['buffer'][int(state['step'] % 10)] + if state['step'] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state['step'] + beta2_t = beta2 ** state['step'] + N_sma_max = 2 / (1 - beta2) - 1 + N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) + buffered[1] = N_sma + + # more conservative since it's an approximated value + if N_sma >= 5: + step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) + elif self.degenerated_to_sgd: + step_size = 1.0 / (1 - beta1 ** state['step']) + else: + step_size = -1 + buffered[2] = step_size + + # more conservative since it's an approximated value + if N_sma >= 5: + if group['weight_decay'] != 0: + p_data_fp32.add_(p_data_fp32, alpha=-group['weight_decay'] * group['lr']) + denom = exp_avg_sq.sqrt().add_(group['eps']) + p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size * group['lr']) + p.data.copy_(p_data_fp32) + elif step_size > 0: + if group['weight_decay'] != 0: + p_data_fp32.add_(p_data_fp32, alpha=-group['weight_decay'] * group['lr']) + p_data_fp32.add_(exp_avg, alpha=-step_size * group['lr']) + p.data.copy_(p_data_fp32) + + return loss diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py new file mode 100644 index 0000000000000000000000000000000000000000..615e0d1d5431783f9baf2bf97ae6de4ad0cf38a9 --- /dev/null +++ b/TTS/utils/synthesizer.py @@ -0,0 +1,169 @@ +import time + +import numpy as np +import torch +import pysbd + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.tts.utils.generic_utils import setup_model +from TTS.tts.utils.speakers import load_speaker_mapping +from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input +# pylint: disable=unused-wildcard-import +# pylint: disable=wildcard-import +from TTS.tts.utils.synthesis import * + +from TTS.tts.utils.text import make_symbols, phonemes, symbols + + +class Synthesizer(object): + def __init__(self, tts_checkpoint, tts_config, vocoder_checkpoint=None, vocoder_config=None, use_cuda=False): + """Encapsulation of tts and vocoder models for inference. + + TODO: handle multi-speaker and GST inference. + + Args: + tts_checkpoint (str): path to the tts model file. + tts_config (str): path to the tts config file. + vocoder_checkpoint (str, optional): path to the vocoder model file. Defaults to None. + vocoder_config (str, optional): path to the vocoder config file. Defaults to None. + use_cuda (bool, optional): enable/disable cuda. Defaults to False. + """ + self.tts_checkpoint = tts_checkpoint + self.tts_config = tts_config + self.vocoder_checkpoint = vocoder_checkpoint + self.vocoder_config = vocoder_config + self.use_cuda = use_cuda + self.wavernn = None + self.vocoder_model = None + self.num_speakers = 0 + self.tts_speakers = None + self.speaker_embedding_dim = None + self.seg = self.get_segmenter("en") + self.use_cuda = use_cuda + if self.use_cuda: + assert torch.cuda.is_available(), "CUDA is not availabe on this machine." + self.load_tts(tts_checkpoint, tts_config, + use_cuda) + if vocoder_checkpoint: + self.load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda) + + @staticmethod + def get_segmenter(lang): + return pysbd.Segmenter(language=lang, clean=True) + + def load_speakers(self): + # load speakers + if self.model_config.use_speaker_embedding is not None: + self.tts_speakers = load_speaker_mapping(self.tts_config.tts_speakers_json) + self.num_speakers = len(self.tts_speakers) + else: + self.num_speakers = 0 + # set external speaker embedding + if self.tts_config.use_external_speaker_embedding_file: + speaker_embedding = self.tts_speakers[list(self.tts_speakers.keys())[0]]['embedding'] + self.speaker_embedding_dim = len(speaker_embedding) + + def init_speaker(self, speaker_idx): + # load speakers + speaker_embedding = None + if hasattr(self, 'tts_speakers') and speaker_idx is not None: + assert speaker_idx < len(self.tts_speakers), f" [!] speaker_idx is out of the range. {speaker_idx} vs {len(self.tts_speakers)}" + if self.tts_config.use_external_speaker_embedding_file: + speaker_embedding = self.tts_speakers[speaker_idx]['embedding'] + return speaker_embedding + + def load_tts(self, tts_checkpoint, tts_config, use_cuda): + # pylint: disable=global-statement + global symbols, phonemes + + self.tts_config = load_config(tts_config) + self.use_phonemes = self.tts_config.use_phonemes + self.ap = AudioProcessor(**self.tts_config.audio) + + if 'characters' in self.tts_config.keys(): + symbols, phonemes = make_symbols(**self.tts_config.characters) + + if self.use_phonemes: + self.input_size = len(phonemes) + else: + self.input_size = len(symbols) + + self.tts_model = setup_model(self.input_size, num_speakers=self.num_speakers, c=self.tts_config) + self.tts_model.load_checkpoint(tts_config, tts_checkpoint, eval=True) + if use_cuda: + self.tts_model.cuda() + + def load_vocoder(self, model_file, model_config, use_cuda): + self.vocoder_config = load_config(model_config) + self.vocoder_ap = AudioProcessor(**self.vocoder_config['audio']) + self.vocoder_model = setup_generator(self.vocoder_config) + self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True) + if use_cuda: + self.vocoder_model.cuda() + + def save_wav(self, wav, path): + wav = np.array(wav) + self.ap.save_wav(wav, path) + + def split_into_sentences(self, text): + return self.seg.segment(text) + + def tts(self, text, speaker_idx=None): + start_time = time.time() + wavs = [] + sens = self.split_into_sentences(text) + print(" > Text splitted to sentences.") + print(sens) + + speaker_embedding = self.init_speaker(speaker_idx) + use_gl = self.vocoder_model is None + + for sen in sens: + # synthesize voice + waveform, _, _, mel_postnet_spec, _, _ = synthesis( + self.tts_model, + sen, + self.tts_config, + self.use_cuda, + self.ap, + speaker_idx, + None, + False, + self.tts_config.enable_eos_bos_chars, + use_gl, + speaker_embedding=speaker_embedding) + if not use_gl: + # denormalize tts output based on tts audio config + mel_postnet_spec = self.ap.denormalize(mel_postnet_spec.T).T + device_type = "cuda" if self.use_cuda else "cpu" + # renormalize spectrogram based on vocoder config + vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T) + # compute scale factor for possible sample rate mismatch + scale_factor = [1, self.vocoder_config['audio']['sample_rate'] / self.ap.sample_rate] + if scale_factor[1] != 1: + print(" > interpolating tts model output.") + vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) + else: + vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable + # run vocoder model + # [1, T, C] + waveform = self.vocoder_model.inference(vocoder_input.to(device_type)) + if self.use_cuda and not use_gl: + waveform = waveform.cpu() + if not use_gl: + waveform = waveform.numpy() + waveform = waveform.squeeze() + + # trim silence + waveform = trim_silence(waveform, self.ap) + + wavs += list(waveform) + wavs += [0] * 10000 + + # compute stats + process_time = time.time() - start_time + audio_time = len(wavs) / self.tts_config.audio['sample_rate'] + print(f" > Processing time: {process_time}") + print(f" > Real-time factor: {process_time / audio_time}") + return wavs diff --git a/TTS/utils/tensorboard_logger.py b/TTS/utils/tensorboard_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..4ee12d74ec39510a00d7d658831fb15b338b527f --- /dev/null +++ b/TTS/utils/tensorboard_logger.py @@ -0,0 +1,81 @@ +import traceback +from tensorboardX import SummaryWriter + + +class TensorboardLogger(object): + def __init__(self, log_dir, model_name): + self.model_name = model_name + self.writer = SummaryWriter(log_dir) + self.train_stats = {} + self.eval_stats = {} + + def tb_model_weights(self, model, step): + layer_num = 1 + for name, param in model.named_parameters(): + if param.numel() == 1: + self.writer.add_scalar( + "layer{}-{}/value".format(layer_num, name), + param.max(), step) + else: + self.writer.add_scalar( + "layer{}-{}/max".format(layer_num, name), + param.max(), step) + self.writer.add_scalar( + "layer{}-{}/min".format(layer_num, name), + param.min(), step) + self.writer.add_scalar( + "layer{}-{}/mean".format(layer_num, name), + param.mean(), step) + self.writer.add_scalar( + "layer{}-{}/std".format(layer_num, name), + param.std(), step) + self.writer.add_histogram( + "layer{}-{}/param".format(layer_num, name), param, step) + self.writer.add_histogram( + "layer{}-{}/grad".format(layer_num, name), param.grad, step) + layer_num += 1 + + def dict_to_tb_scalar(self, scope_name, stats, step): + for key, value in stats.items(): + self.writer.add_scalar('{}/{}'.format(scope_name, key), value, step) + + def dict_to_tb_figure(self, scope_name, figures, step): + for key, value in figures.items(): + self.writer.add_figure('{}/{}'.format(scope_name, key), value, step) + + def dict_to_tb_audios(self, scope_name, audios, step, sample_rate): + for key, value in audios.items(): + try: + self.writer.add_audio('{}/{}'.format(scope_name, key), value, step, sample_rate=sample_rate) + except RuntimeError: + traceback.print_exc() + + def tb_train_iter_stats(self, step, stats): + self.dict_to_tb_scalar(f"{self.model_name}_TrainIterStats", stats, step) + + def tb_train_epoch_stats(self, step, stats): + self.dict_to_tb_scalar(f"{self.model_name}_TrainEpochStats", stats, step) + + def tb_train_figures(self, step, figures): + self.dict_to_tb_figure(f"{self.model_name}_TrainFigures", figures, step) + + def tb_train_audios(self, step, audios, sample_rate): + self.dict_to_tb_audios(f"{self.model_name}_TrainAudios", audios, step, sample_rate) + + def tb_eval_stats(self, step, stats): + self.dict_to_tb_scalar(f"{self.model_name}_EvalStats", stats, step) + + def tb_eval_figures(self, step, figures): + self.dict_to_tb_figure(f"{self.model_name}_EvalFigures", figures, step) + + def tb_eval_audios(self, step, audios, sample_rate): + self.dict_to_tb_audios(f"{self.model_name}_EvalAudios", audios, step, sample_rate) + + def tb_test_audios(self, step, audios, sample_rate): + self.dict_to_tb_audios(f"{self.model_name}_TestAudios", audios, step, sample_rate) + + def tb_test_figures(self, step, figures): + self.dict_to_tb_figure(f"{self.model_name}_TestFigures", figures, step) + + def tb_add_text(self, title, text, step): + self.writer.add_text(title, text, step) diff --git a/TTS/utils/training.py b/TTS/utils/training.py new file mode 100644 index 0000000000000000000000000000000000000000..8166562ccb0caa4f11da1754215cc7412722f799 --- /dev/null +++ b/TTS/utils/training.py @@ -0,0 +1,116 @@ +import torch +import numpy as np + + +def setup_torch_training_env(cudnn_enable, cudnn_benchmark): + torch.backends.cudnn.enabled = cudnn_enable + torch.backends.cudnn.benchmark = cudnn_benchmark + torch.manual_seed(54321) + use_cuda = torch.cuda.is_available() + num_gpus = torch.cuda.device_count() + print(" > Using CUDA: ", use_cuda) + print(" > Number of GPUs: ", num_gpus) + return use_cuda, num_gpus + + +def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None): + r'''Check model gradient against unexpected jumps and failures''' + skip_flag = False + if ignore_stopnet: + if not amp_opt_params: + grad_norm = torch.nn.utils.clip_grad_norm_( + [param for name, param in model.named_parameters() if 'stopnet' not in name], grad_clip) + else: + grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip) + else: + if not amp_opt_params: + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip) + else: + grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip) + + # compatibility with different torch versions + if isinstance(grad_norm, float): + if np.isinf(grad_norm): + print(" | > Gradient is INF !!") + skip_flag = True + else: + if torch.isinf(grad_norm): + print(" | > Gradient is INF !!") + skip_flag = True + return grad_norm, skip_flag + + +def lr_decay(init_lr, global_step, warmup_steps): + r'''from https://github.com/r9y9/tacotron_pytorch/blob/master/train.py''' + warmup_steps = float(warmup_steps) + step = global_step + 1. + lr = init_lr * warmup_steps**0.5 * np.minimum(step * warmup_steps**-1.5, + step**-0.5) + return lr + + +def adam_weight_decay(optimizer): + """ + Custom weight decay operation, not effecting grad values. + """ + for group in optimizer.param_groups: + for param in group['params']: + current_lr = group['lr'] + weight_decay = group['weight_decay'] + factor = -weight_decay * group['lr'] + param.data = param.data.add(param.data, + alpha=factor) + return optimizer, current_lr + +# pylint: disable=dangerous-default-value +def set_weight_decay(model, weight_decay, skip_list={"decoder.attention.v", "rnn", "lstm", "gru", "embedding"}): + """ + Skip biases, BatchNorm parameters, rnns. + and attention projection layer v + """ + decay = [] + no_decay = [] + for name, param in model.named_parameters(): + if not param.requires_grad: + continue + + if len(param.shape) == 1 or any([skip_name in name for skip_name in skip_list]): + no_decay.append(param) + else: + decay.append(param) + return [{ + 'params': no_decay, + 'weight_decay': 0. + }, { + 'params': decay, + 'weight_decay': weight_decay + }] + + +# pylint: disable=protected-access +class NoamLR(torch.optim.lr_scheduler._LRScheduler): + def __init__(self, optimizer, warmup_steps=0.1, last_epoch=-1): + self.warmup_steps = float(warmup_steps) + super(NoamLR, self).__init__(optimizer, last_epoch) + + def get_lr(self): + step = max(self.last_epoch, 1) + return [ + base_lr * self.warmup_steps**0.5 * + min(step * self.warmup_steps**-1.5, step**-0.5) + for base_lr in self.base_lrs + ] + + +def gradual_training_scheduler(global_step, config): + """Setup the gradual training schedule wrt number + of active GPUs""" + num_gpus = torch.cuda.device_count() + if num_gpus == 0: + num_gpus = 1 + new_values = None + # we set the scheduling wrt num_gpus + for values in config.gradual_training: + if global_step * num_gpus >= values[0]: + new_values = values + return new_values[1], new_values[2] diff --git a/TTS/vocoder/README.md b/TTS/vocoder/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e0ae8f21376658541dc2df1b2894c9d8f57ea185 --- /dev/null +++ b/TTS/vocoder/README.md @@ -0,0 +1,39 @@ +# Mozilla TTS Vocoders (Experimental) + +Here there are vocoder model implementations which can be combined with the other TTS models. + +Currently, following models are implemented: + +- Melgan +- MultiBand-Melgan +- ParallelWaveGAN +- GAN-TTS (Discriminator Only) + +It is also very easy to adapt different vocoder models as we provide a flexible and modular (but not too modular) framework. + +## Training a model + +You can see here an example (Soon)[Colab Notebook]() training MelGAN with LJSpeech dataset. + +In order to train a new model, you need to gather all wav files into a folder and give this folder to `data_path` in '''config.json''' + +You need to define other relevant parameters in your ```config.json``` and then start traning with the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --config_path path/to/config.json``` + +Example config files can be found under `tts/vocoder/configs/` folder. + +You can continue a previous training run by the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --continue_path path/to/your/model/folder``` + +You can fine-tune a pre-trained model by the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth.tar``` + +Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off. + +You can also follow your training runs on Tensorboard as you do with our TTS models. + +## Acknowledgement +Thanks to @kan-bayashi for his [repository](https://github.com/kan-bayashi/ParallelWaveGAN) being the start point of our work. diff --git a/TTS/vocoder/__init__.py b/TTS/vocoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/configs/multiband-melgan_and_rwd_config.json b/TTS/vocoder/configs/multiband-melgan_and_rwd_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b7518548ecbf778eedfbf0be87760c280e8d224 --- /dev/null +++ b/TTS/vocoder/configs/multiband-melgan_and_rwd_config.json @@ -0,0 +1,151 @@ +{ + "run_name": "multiband-melgan-rwd", + "run_description": "multiband melgan with random window discriminator from https://arxiv.org/pdf/1909.11646.pdf", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "num_freq": 513, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // MODEL PARAMETERS + "use_pqmf": true, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": true, + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + // subband multiscale stft loss parameters + "subband_stft_loss_params":{ + "n_ffts": [384, 683, 171], + "hop_lengths": [30, 60, 10], + "win_lengths": [150, 300, 60] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "random_window_discriminator", + "discriminator_model_params":{ + "uncond_disc_donwsample_factors": [8, 4], + "cond_disc_downsample_factors": [[8, 4, 2, 2, 2], [8, 4, 2, 2], [8, 4, 2], [8, 4], [4, 2, 2]], + "cond_disc_out_channels": [[128, 128, 256, 256], [128, 256, 256], [128, 256], [256], [128, 256]], + "window_sizes": [512, 1024, 2048, 4096, 8192] + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "multiband_melgan_generator", + "generator_model_params": { + "upsample_factors":[8, 4, 2], + "num_res_blocks": 4 + }, + + // DATASET + "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", + "seq_len": 16384, + "pad_short": 2000, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": false, // use noam warmup and lr schedule. + "warmup_steps_gen": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "warmup_steps_disc": 4000, + "epochs": 10000, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 1e-4, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/" +} + diff --git a/TTS/vocoder/configs/multiband_melgan_config.json b/TTS/vocoder/configs/multiband_melgan_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7a5a13e3989dbf1476a78232cc049b00ff0e70b6 --- /dev/null +++ b/TTS/vocoder/configs/multiband_melgan_config.json @@ -0,0 +1,141 @@ +{ + "run_name": "multiband-melgan", + "run_description": "multiband melgan mean-var scaling", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": true, // use only with multi-band models. + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + // subband multiscale stft loss parameters + "subband_stft_loss_params":{ + "n_ffts": [384, 683, 171], + "hop_lengths": [30, 60, 10], + "win_lengths": [150, 300, 60] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "melgan_multiscale_discriminator", + "discriminator_model_params":{ + "base_channels": 16, + "max_channels":512, + "downsample_factors":[4, 4, 4] + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "multiband_melgan_generator", + "generator_model_params": { + "upsample_factors":[8, 4, 2], + "num_res_blocks": 4 + }, + + // DATASET + "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", + "feature_path": null, + "seq_len": 16384, + "pad_short": 2000, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "epochs": 10000, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 1e-4, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/" +} + diff --git a/TTS/vocoder/configs/multiband_melgan_config_mozilla.json b/TTS/vocoder/configs/multiband_melgan_config_mozilla.json new file mode 100644 index 0000000000000000000000000000000000000000..4978d42f0df13936a77516bd5bfa0ab8768204c4 --- /dev/null +++ b/TTS/vocoder/configs/multiband_melgan_config_mozilla.json @@ -0,0 +1,154 @@ +{ + "run_name": "multiband-melgan", + "run_description": "multiband melgan mean-var scaling", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/MozillaMerged22050/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // MODEL PARAMETERS + "use_pqmf": true, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": true, + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + // subband multiscale stft loss parameters + "subband_stft_loss_params":{ + "n_ffts": [384, 683, 171], + "hop_lengths": [30, 60, 10], + "win_lengths": [150, 300, 60] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "melgan_multiscale_discriminator", + "discriminator_model_params":{ + "base_channels": 16, + "max_channels":512, + "downsample_factors":[4, 4, 4] + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "multiband_melgan_generator", + "generator_model_params": { + "upsample_factors":[8, 4, 2], + "num_res_blocks": 4 + }, + + // DATASET + "data_path": "/home/erogol/Data/MozillaMerged22050/wavs/", + "feature_path": null, + "seq_len": 6144, + "pad_short": 500, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "train_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 1000 + }, + "test_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 50 + } + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "epochs": 10000, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 1e-4, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "/home/erogol/Models/Mozilla/" +} + diff --git a/TTS/vocoder/configs/parallel_wavegan_config.json b/TTS/vocoder/configs/parallel_wavegan_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fcd765bd5b1e2223481f803a4af9f638006c4f86 --- /dev/null +++ b/TTS/vocoder/configs/parallel_wavegan_config.json @@ -0,0 +1,143 @@ +{ + "run_name": "pwgan", + "run_description": "parallel-wavegan training", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/LJSpeech-1.1/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // MODEL PARAMETERS + "use_pqmf": true, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": false, // USE ONLY WITH MULTIBAND MODELS + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + // subband multiscale stft loss parameters + "subband_stft_loss_params":{ + "n_ffts": [384, 683, 171], + "hop_lengths": [30, 60, 10], + "win_lengths": [150, 300, 60] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "parallel_wavegan_discriminator", + "discriminator_model_params":{ + "num_layers": 10 + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "parallel_wavegan_generator", + "generator_model_params": { + "upsample_factors":[4, 4, 4, 4], + "stacks": 3, + "num_res_blocks": 30 + }, + + // DATASET + "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", + "feature_path": null, + "seq_len": 25600, + "pad_short": 2000, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 6, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "epochs": 10000, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 1e-4, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/" +} + diff --git a/TTS/vocoder/configs/universal_fullband_melgan.json b/TTS/vocoder/configs/universal_fullband_melgan.json new file mode 100644 index 0000000000000000000000000000000000000000..fe4433c24b580d72efa462cac120c50d5c42e8d2 --- /dev/null +++ b/TTS/vocoder/configs/universal_fullband_melgan.json @@ -0,0 +1,138 @@ +{ + "run_name": "fullband-melgan", + "run_description": "fullband melgan mean-var scaling", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 24000, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54324" + }, + + // MODEL PARAMETERS + "use_pqmf": false, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": false, + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "melgan_multiscale_discriminator", + "discriminator_model_params":{ + "base_channels": 16, + "max_channels":512, + "downsample_factors":[4, 4, 4] + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "fullband_melgan_generator", + "generator_model_params": { + "upsample_factors":[8, 8, 4], + "num_res_blocks": 4 + }, + + // DATASET + "data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", + "feature_path": null, + "seq_len": 16384, + "pad_short": 2000, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 48, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "epochs": 10000, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 0.000015625, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 0.000015625, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "/home/erogol/Models/" +} + + diff --git a/TTS/vocoder/configs/wavegrad_libritts.json b/TTS/vocoder/configs/wavegrad_libritts.json new file mode 100644 index 0000000000000000000000000000000000000000..a271ce33b4c290499352c914558fe7311c51e373 --- /dev/null +++ b/TTS/vocoder/configs/wavegrad_libritts.json @@ -0,0 +1,116 @@ +{ + "run_name": "wavegrad-libritts", + "run_description": "wavegrad libritts", + + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 24000, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats_wavegrad.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + "mixed_precision": true, // enable torch mixed precision training (true, false) + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54322" + }, + + "target_loss": "avg_wavegrad_loss", // loss value to pick the best model to save after each epoch + + // MODEL PARAMETERS + "generator_model": "wavegrad", + "model_params":{ + "use_weight_norm": true, + "y_conv_channels":32, + "x_conv_channels":768, + "ublock_out_channels": [512, 512, 256, 128, 128], + "dblock_out_channels": [128, 128, 256, 512], + "upsample_factors": [4, 4, 4, 2, 2], + "upsample_dilations": [ + [1, 2, 1, 2], + [1, 2, 1, 2], + [1, 2, 4, 8], + [1, 2, 4, 8], + [1, 2, 4, 8]] + }, + + // DATASET + "data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", // root data path. It finds all wav files recursively from there. + "feature_path": null, // if you use precomputed features + "seq_len": 6144, // 24 * hop_length + "pad_short": 0, // additional padding for short wavs + "conv_pad": 0, // additional padding against convolutions applied to spectrograms + "use_noise_augment": false, // add noise to the audio signal for augmentation + "use_cache": false, // use in memory cache to keep the computed features. This might cause OOM. + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 96, // Batch size for training. + + // NOISE SCHEDULE PARAMS - Only effective at training time. + "train_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 1000 + }, + "test_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 50 + }, + + // VALIDATION + "run_eval": true, // enable/disable evaluation run + + // OPTIMIZER + "epochs": 10000, // total number of epochs to train. + "clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + + // TENSORBOARD and LOGGING + "print_step": 50, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 5000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 256, + + // PATHS + "output_path": "/home/erogol/Models/LJSpeech/" +} + diff --git a/TTS/vocoder/configs/wavernn_config.json b/TTS/vocoder/configs/wavernn_config.json new file mode 100644 index 0000000000000000000000000000000000000000..58667b699052c548b59c874ece591add78febba6 --- /dev/null +++ b/TTS/vocoder/configs/wavernn_config.json @@ -0,0 +1,98 @@ +{ + "run_name": "wavernn_librittts", + "run_description": "wavernn libritts training from LJSpeech model", + +// AUDIO PARAMETERS + "audio": { + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + // Audio processing parameters + "sample_rate": 24000, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.98, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + // Silence trimming + "do_trim_silence": false, // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 40.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 20.0, // scaler value appplied after log transform of spectrogram. + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + +// Generating / Synthesizing + "batched": true, + "target_samples": 11000, // target number of samples to be generated in each batch entry + "overlap_samples": 550, // number of samples for crossfading between batches + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + +// MODEL MODE + "mode": "mold", // mold [string], gauss [string], bits [int] + "mulaw": true, // apply mulaw if mode is bits + +// MODEL PARAMETERS + "wavernn_model_params": { + "rnn_dims": 512, + "fc_dims": 512, + "compute_dims": 128, + "res_out_dims": 128, + "num_res_blocks": 10, + "use_aux_net": true, + "use_upsample_net": true, + "upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length + }, + +// DATASET + //"use_gta": true, // use computed gta features from the tts model + "data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", // path containing training wav files + "feature_path": null, // path containing computed features from wav files if null compute them + "seq_len": 1280, // has to be devideable by hop_length + "padding": 2, // pad the input for resnet to see wider input length + +// TRAINING + "batch_size": 256, // Batch size for training. + "epochs": 10000, // total number of epochs to train. + "mixed_precision": true, // enable/ disable mixed precision training + +// VALIDATION + "run_eval": true, + "test_every_epochs": 10, // Test after set number of epochs (Test every 10 epochs for example) + +// OPTIMIZER + "grad_clip": 4, // apply gradient clipping if > 0 + "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_params": { + "gamma": 0.5, + "milestones": [200000, 400000, 600000] + }, + "lr": 1e-4, // initial learning rate + +// TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + +// DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 50, // number of samples for testing + +// PATHS + "output_path": "/home/erogol/Models/LJSpeech/" +} diff --git a/TTS/vocoder/datasets/__init__.py b/TTS/vocoder/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..af23fbf23cdb712dbf4c7dbfdd43d6ab10b07112 --- /dev/null +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -0,0 +1,127 @@ +import os +import glob +import torch +import random +import numpy as np +from torch.utils.data import Dataset +from multiprocessing import Manager + + +class GANDataset(Dataset): + """ + GAN Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly and returns + random segments of (audio, feature) couples. + """ + def __init__(self, + ap, + items, + seq_len, + hop_len, + pad_short, + conv_pad=2, + is_training=True, + return_segments=True, + use_noise_augment=False, + use_cache=False, + verbose=False): + + self.ap = ap + self.item_list = items + self.compute_feat = not isinstance(items[0], (tuple, list)) + self.seq_len = seq_len + self.hop_len = hop_len + self.pad_short = pad_short + self.conv_pad = conv_pad + self.is_training = is_training + self.return_segments = return_segments + self.use_cache = use_cache + self.use_noise_augment = use_noise_augment + self.verbose = verbose + + assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." + self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) + + # map G and D instances + self.G_to_D_mappings = list(range(len(self.item_list))) + self.shuffle_mapping() + + # cache acoustic features + if use_cache: + self.create_feature_cache() + + def create_feature_cache(self): + self.manager = Manager() + self.cache = self.manager.list() + self.cache += [None for _ in range(len(self.item_list))] + + @staticmethod + def find_wav_files(path): + return glob.glob(os.path.join(path, '**', '*.wav'), recursive=True) + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, idx): + """ Return different items for Generator and Discriminator and + cache acoustic features """ + if self.return_segments: + idx2 = self.G_to_D_mappings[idx] + item1 = self.load_item(idx) + item2 = self.load_item(idx2) + return item1, item2 + item1 = self.load_item(idx) + return item1 + + def shuffle_mapping(self): + random.shuffle(self.G_to_D_mappings) + + def load_item(self, idx): + """ load (audio, feat) couple """ + if self.compute_feat: + # compute features from wav + wavpath = self.item_list[idx] + # print(wavpath) + + if self.use_cache and self.cache[idx] is not None: + audio, mel = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + + if len(audio) < self.seq_len + self.pad_short: + audio = np.pad(audio, (0, self.seq_len + self.pad_short - len(audio)), \ + mode='constant', constant_values=0.0) + + mel = self.ap.melspectrogram(audio) + else: + + # load precomputed features + wavpath, feat_path = self.item_list[idx] + + if self.use_cache and self.cache[idx] is not None: + audio, mel = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + mel = np.load(feat_path) + + # correct the audio length wrt padding applied in stft + audio = np.pad(audio, (0, self.hop_len), mode="edge") + audio = audio[:mel.shape[-1] * self.hop_len] + assert mel.shape[-1] * self.hop_len == audio.shape[-1], f' [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}' + + audio = torch.from_numpy(audio).float().unsqueeze(0) + mel = torch.from_numpy(mel).float().squeeze(0) + + if self.return_segments: + max_mel_start = mel.shape[1] - self.feat_frame_len + mel_start = random.randint(0, max_mel_start) + mel_end = mel_start + self.feat_frame_len + mel = mel[:, mel_start:mel_end] + + audio_start = mel_start * self.hop_len + audio = audio[:, audio_start:audio_start + + self.seq_len] + + if self.use_noise_augment and self.is_training and self.return_segments: + audio = audio + (1 / 32768) * torch.randn_like(audio) + return (mel, audio) diff --git a/TTS/vocoder/datasets/preprocess.py b/TTS/vocoder/datasets/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..afea45fd32062cff5142deeb3aff9b4ccd47290c --- /dev/null +++ b/TTS/vocoder/datasets/preprocess.py @@ -0,0 +1,62 @@ +import glob +import os +from pathlib import Path +from tqdm import tqdm + +import numpy as np + + +def preprocess_wav_files(out_path, config, ap): + os.makedirs(os.path.join(out_path, "quant"), exist_ok=True) + os.makedirs(os.path.join(out_path, "mel"), exist_ok=True) + wav_files = find_wav_files(config.data_path) + for path in tqdm(wav_files): + wav_name = Path(path).stem + quant_path = os.path.join(out_path, "quant", wav_name + ".npy") + mel_path = os.path.join(out_path, "mel", wav_name + ".npy") + y = ap.load_wav(path) + mel = ap.melspectrogram(y) + np.save(mel_path, mel) + if isinstance(config.mode, int): + quant = ( + ap.mulaw_encode(y, qc=config.mode) + if config.mulaw + else ap.quantize(y, bits=config.mode) + ) + np.save(quant_path, quant) + + +def find_wav_files(data_path): + wav_paths = glob.glob(os.path.join(data_path, "**", "*.wav"), recursive=True) + return wav_paths + + +def find_feat_files(data_path): + feat_paths = glob.glob(os.path.join(data_path, "**", "*.npy"), recursive=True) + return feat_paths + + +def load_wav_data(data_path, eval_split_size): + wav_paths = find_wav_files(data_path) + np.random.seed(0) + np.random.shuffle(wav_paths) + return wav_paths[:eval_split_size], wav_paths[eval_split_size:] + + +def load_wav_feat_data(data_path, feat_path, eval_split_size): + wav_paths = find_wav_files(data_path) + feat_paths = find_feat_files(feat_path) + + wav_paths.sort(key=lambda x: Path(x).stem) + feat_paths.sort(key=lambda x: Path(x).stem) + + assert len(wav_paths) == len(feat_paths) + for wav, feat in zip(wav_paths, feat_paths): + wav_name = Path(wav).stem + feat_name = Path(feat).stem + assert wav_name == feat_name + + items = list(zip(wav_paths, feat_paths)) + np.random.seed(0) + np.random.shuffle(items) + return items[:eval_split_size], items[eval_split_size:] diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..22f2af32f7cb02af10103718489b1491b0d4f209 --- /dev/null +++ b/TTS/vocoder/datasets/wavegrad_dataset.py @@ -0,0 +1,131 @@ +import os +import glob +import torch +import random +import numpy as np +from torch.utils.data import Dataset +from multiprocessing import Manager + + +class WaveGradDataset(Dataset): + """ + WaveGrad Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly and returns + random segments of (audio, feature) couples. + """ + def __init__(self, + ap, + items, + seq_len, + hop_len, + pad_short, + conv_pad=2, + is_training=True, + return_segments=True, + use_noise_augment=False, + use_cache=False, + verbose=False): + + self.ap = ap + self.item_list = items + self.seq_len = seq_len if return_segments else None + self.hop_len = hop_len + self.pad_short = pad_short + self.conv_pad = conv_pad + self.is_training = is_training + self.return_segments = return_segments + self.use_cache = use_cache + self.use_noise_augment = use_noise_augment + self.verbose = verbose + + if return_segments: + assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." + self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) + + # cache acoustic features + if use_cache: + self.create_feature_cache() + + def create_feature_cache(self): + self.manager = Manager() + self.cache = self.manager.list() + self.cache += [None for _ in range(len(self.item_list))] + + @staticmethod + def find_wav_files(path): + return glob.glob(os.path.join(path, '**', '*.wav'), recursive=True) + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, idx): + item = self.load_item(idx) + return item + + def load_test_samples(self, num_samples): + samples = [] + return_segments = self.return_segments + self.return_segments = False + for idx in range(num_samples): + mel, audio = self.load_item(idx) + samples.append([mel, audio]) + self.return_segments = return_segments + return samples + + def load_item(self, idx): + """ load (audio, feat) couple """ + # compute features from wav + wavpath = self.item_list[idx] + + if self.use_cache and self.cache[idx] is not None: + audio = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + + if self.return_segments: + # correct audio length wrt segment length + if audio.shape[-1] < self.seq_len + self.pad_short: + audio = np.pad(audio, (0, self.seq_len + self.pad_short - len(audio)), \ + mode='constant', constant_values=0.0) + assert audio.shape[-1] >= self.seq_len + self.pad_short, f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}" + + # correct the audio length wrt hop length + p = (audio.shape[-1] // self.hop_len + 1) * self.hop_len - audio.shape[-1] + audio = np.pad(audio, (0, p), mode='constant', constant_values=0.0) + + if self.use_cache: + self.cache[idx] = audio + + if self.return_segments: + max_start = len(audio) - self.seq_len + start = random.randint(0, max_start) + end = start + self.seq_len + audio = audio[start:end] + + if self.use_noise_augment and self.is_training and self.return_segments: + audio = audio + (1 / 32768) * torch.randn_like(audio) + + mel = self.ap.melspectrogram(audio) + mel = mel[..., :-1] # ignore the padding + + audio = torch.from_numpy(audio).float() + mel = torch.from_numpy(mel).float().squeeze(0) + return (mel, audio) + + @staticmethod + def collate_full_clips(batch): + """This is used in tune_wavegrad.py. + It pads sequences to the max length.""" + max_mel_length = max([b[0].shape[1] for b in batch]) if len(batch) > 1 else batch[0][0].shape[1] + max_audio_length = max([b[1].shape[0] for b in batch]) if len(batch) > 1 else batch[0][1].shape[0] + + mels = torch.zeros([len(batch), batch[0][0].shape[0], max_mel_length]) + audios = torch.zeros([len(batch), max_audio_length]) + + for idx, b in enumerate(batch): + mel = b[0] + audio = b[1] + mels[idx, :, :mel.shape[1]] = mel + audios[idx, :audio.shape[0]] = audio + + return mels, audios diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..257800b006c72e304d3f118f1bcd5cf799f345ba --- /dev/null +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -0,0 +1,118 @@ +import torch +import numpy as np +from torch.utils.data import Dataset + + +class WaveRNNDataset(Dataset): + """ + WaveRNN Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly. + """ + + def __init__(self, + ap, + items, + seq_len, + hop_len, + pad, + mode, + mulaw, + is_training=True, + verbose=False, + ): + + self.ap = ap + self.compute_feat = not isinstance(items[0], (tuple, list)) + self.item_list = items + self.seq_len = seq_len + self.hop_len = hop_len + self.mel_len = seq_len // hop_len + self.pad = pad + self.mode = mode + self.mulaw = mulaw + self.is_training = is_training + self.verbose = verbose + + assert self.seq_len % self.hop_len == 0 + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, index): + item = self.load_item(index) + return item + + def load_item(self, index): + """ + load (audio, feat) couple if feature_path is set + else compute it on the fly + """ + if self.compute_feat: + + wavpath = self.item_list[index] + audio = self.ap.load_wav(wavpath) + min_audio_len = 2 * self.seq_len + (2 * self.pad * self.hop_len) + if audio.shape[0] < min_audio_len: + print(" [!] Instance is too short! : {}".format(wavpath)) + audio = np.pad(audio, [0, min_audio_len - audio.shape[0] + self.hop_len]) + mel = self.ap.melspectrogram(audio) + + if self.mode in ["gauss", "mold"]: + x_input = audio + elif isinstance(self.mode, int): + x_input = (self.ap.mulaw_encode(audio, qc=self.mode) + if self.mulaw else self.ap.quantize(audio, bits=self.mode)) + else: + raise RuntimeError("Unknown dataset mode - ", self.mode) + + else: + + wavpath, feat_path = self.item_list[index] + mel = np.load(feat_path.replace("/quant/", "/mel/")) + + if mel.shape[-1] < self.mel_len + 2 * self.pad: + print(" [!] Instance is too short! : {}".format(wavpath)) + self.item_list[index] = self.item_list[index + 1] + feat_path = self.item_list[index] + mel = np.load(feat_path.replace("/quant/", "/mel/")) + if self.mode in ["gauss", "mold"]: + x_input = self.ap.load_wav(wavpath) + elif isinstance(self.mode, int): + x_input = np.load(feat_path.replace("/mel/", "/quant/")) + else: + raise RuntimeError("Unknown dataset mode - ", self.mode) + + return mel, x_input, wavpath + + def collate(self, batch): + mel_win = self.seq_len // self.hop_len + 2 * self.pad + max_offsets = [x[0].shape[-1] - + (mel_win + 2 * self.pad) for x in batch] + + mel_offsets = [np.random.randint(0, offset) for offset in max_offsets] + sig_offsets = [(offset + self.pad) * + self.hop_len for offset in mel_offsets] + + mels = [ + x[0][:, mel_offsets[i]: mel_offsets[i] + mel_win] + for i, x in enumerate(batch) + ] + + coarse = [ + x[1][sig_offsets[i]: sig_offsets[i] + self.seq_len + 1] + for i, x in enumerate(batch) + ] + + mels = np.stack(mels).astype(np.float32) + if self.mode in ["gauss", "mold"]: + coarse = np.stack(coarse).astype(np.float32) + coarse = torch.FloatTensor(coarse) + x_input = coarse[:, : self.seq_len] + elif isinstance(self.mode, int): + coarse = np.stack(coarse).astype(np.int64) + coarse = torch.LongTensor(coarse) + x_input = (2 * coarse[:, : self.seq_len].float() / + (2 ** self.mode - 1.0) - 1.0) + y_coarse = coarse[:, 1:] + mels = torch.FloatTensor(mels) + return x_input, mels, y_coarse diff --git a/TTS/vocoder/layers/__init__.py b/TTS/vocoder/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..1107b3c5e67789557e1f252d218fbfd34aa01eb2 --- /dev/null +++ b/TTS/vocoder/layers/losses.py @@ -0,0 +1,312 @@ +import torch + +from torch import nn +from torch.nn import functional as F + + +class TorchSTFT(nn.Module): + def __init__(self, n_fft, hop_length, win_length, window='hann_window'): + """ Torch based STFT operation """ + super(TorchSTFT, self).__init__() + self.n_fft = n_fft + self.hop_length = hop_length + self.win_length = win_length + self.window = nn.Parameter(getattr(torch, window)(win_length), + requires_grad=False) + + def __call__(self, x): + # B x D x T x 2 + o = torch.stft(x, + self.n_fft, + self.hop_length, + self.win_length, + self.window, + center=True, + pad_mode="reflect", # compatible with audio.py + normalized=False, + onesided=True, + return_complex=False) + M = o[:, :, :, 0] + P = o[:, :, :, 1] + return torch.sqrt(torch.clamp(M ** 2 + P ** 2, min=1e-8)) + + +################################# +# GENERATOR LOSSES +################################# + + +class STFTLoss(nn.Module): + """ Single scale STFT Loss """ + def __init__(self, n_fft, hop_length, win_length): + super(STFTLoss, self).__init__() + self.n_fft = n_fft + self.hop_length = hop_length + self.win_length = win_length + self.stft = TorchSTFT(n_fft, hop_length, win_length) + + def forward(self, y_hat, y): + y_hat_M = self.stft(y_hat) + y_M = self.stft(y) + # magnitude loss + loss_mag = F.l1_loss(torch.log(y_M), torch.log(y_hat_M)) + # spectral convergence loss + loss_sc = torch.norm(y_M - y_hat_M, p="fro") / torch.norm(y_M, p="fro") + return loss_mag, loss_sc + +class MultiScaleSTFTLoss(torch.nn.Module): + """ Multi scale STFT loss """ + def __init__(self, + n_ffts=(1024, 2048, 512), + hop_lengths=(120, 240, 50), + win_lengths=(600, 1200, 240)): + super(MultiScaleSTFTLoss, self).__init__() + self.loss_funcs = torch.nn.ModuleList() + for n_fft, hop_length, win_length in zip(n_ffts, hop_lengths, win_lengths): + self.loss_funcs.append(STFTLoss(n_fft, hop_length, win_length)) + + def forward(self, y_hat, y): + N = len(self.loss_funcs) + loss_sc = 0 + loss_mag = 0 + for f in self.loss_funcs: + lm, lsc = f(y_hat, y) + loss_mag += lm + loss_sc += lsc + loss_sc /= N + loss_mag /= N + return loss_mag, loss_sc + + +class MultiScaleSubbandSTFTLoss(MultiScaleSTFTLoss): + """ Multiscale STFT loss for multi band model outputs """ + # pylint: disable=no-self-use + def forward(self, y_hat, y): + y_hat = y_hat.view(-1, 1, y_hat.shape[2]) + y = y.view(-1, 1, y.shape[2]) + return super().forward(y_hat.squeeze(1), y.squeeze(1)) + + +class MSEGLoss(nn.Module): + """ Mean Squared Generator Loss """ + # pylint: disable=no-self-use + def forward(self, score_real): + loss_fake = F.mse_loss(score_real, score_real.new_ones(score_real.shape)) + return loss_fake + + +class HingeGLoss(nn.Module): + """ Hinge Discriminator Loss """ + # pylint: disable=no-self-use + def forward(self, score_real): + # TODO: this might be wrong + loss_fake = torch.mean(F.relu(1. - score_real)) + return loss_fake + + +################################## +# DISCRIMINATOR LOSSES +################################## + + +class MSEDLoss(nn.Module): + """ Mean Squared Discriminator Loss """ + def __init__(self,): + super(MSEDLoss, self).__init__() + self.loss_func = nn.MSELoss() + + # pylint: disable=no-self-use + def forward(self, score_fake, score_real): + loss_real = self.loss_func(score_real, score_real.new_ones(score_real.shape)) + loss_fake = self.loss_func(score_fake, score_fake.new_zeros(score_fake.shape)) + loss_d = loss_real + loss_fake + return loss_d, loss_real, loss_fake + + +class HingeDLoss(nn.Module): + """ Hinge Discriminator Loss """ + # pylint: disable=no-self-use + def forward(self, score_fake, score_real): + loss_real = torch.mean(F.relu(1. - score_real)) + loss_fake = torch.mean(F.relu(1. + score_fake)) + loss_d = loss_real + loss_fake + return loss_d, loss_real, loss_fake + + +class MelganFeatureLoss(nn.Module): + def __init__(self,): + super(MelganFeatureLoss, self).__init__() + self.loss_func = nn.L1Loss() + + # pylint: disable=no-self-use + def forward(self, fake_feats, real_feats): + loss_feats = 0 + for fake_feat, real_feat in zip(fake_feats, real_feats): + loss_feats += self.loss_func(fake_feat, real_feat) + loss_feats /= len(fake_feats) + len(real_feats) + return loss_feats + + +##################################### +# LOSS WRAPPERS +##################################### + + +def _apply_G_adv_loss(scores_fake, loss_func): + """ Compute G adversarial loss function + and normalize values """ + adv_loss = 0 + if isinstance(scores_fake, list): + for score_fake in scores_fake: + fake_loss = loss_func(score_fake) + adv_loss += fake_loss + adv_loss /= len(scores_fake) + else: + fake_loss = loss_func(scores_fake) + adv_loss = fake_loss + return adv_loss + + +def _apply_D_loss(scores_fake, scores_real, loss_func): + """ Compute D loss func and normalize loss values """ + loss = 0 + real_loss = 0 + fake_loss = 0 + if isinstance(scores_fake, list): + # multi-scale loss + for score_fake, score_real in zip(scores_fake, scores_real): + total_loss, real_loss, fake_loss = loss_func(score_fake=score_fake, score_real=score_real) + loss += total_loss + real_loss += real_loss + fake_loss += fake_loss + # normalize loss values with number of scales + loss /= len(scores_fake) + real_loss /= len(scores_real) + fake_loss /= len(scores_fake) + else: + # single scale loss + total_loss, real_loss, fake_loss = loss_func(scores_fake, scores_real) + loss = total_loss + return loss, real_loss, fake_loss + + +################################## +# MODEL LOSSES +################################## + + +class GeneratorLoss(nn.Module): + def __init__(self, C): + """ Compute Generator Loss values depending on training + configuration """ + super(GeneratorLoss, self).__init__() + assert not(C.use_mse_gan_loss and C.use_hinge_gan_loss),\ + " [!] Cannot use HingeGANLoss and MSEGANLoss together." + + self.use_stft_loss = C.use_stft_loss + self.use_subband_stft_loss = C.use_subband_stft_loss + self.use_mse_gan_loss = C.use_mse_gan_loss + self.use_hinge_gan_loss = C.use_hinge_gan_loss + self.use_feat_match_loss = C.use_feat_match_loss + + self.stft_loss_weight = C.stft_loss_weight + self.subband_stft_loss_weight = C.subband_stft_loss_weight + self.mse_gan_loss_weight = C.mse_G_loss_weight + self.hinge_gan_loss_weight = C.hinge_G_loss_weight + self.feat_match_loss_weight = C.feat_match_loss_weight + + if C.use_stft_loss: + self.stft_loss = MultiScaleSTFTLoss(**C.stft_loss_params) + if C.use_subband_stft_loss: + self.subband_stft_loss = MultiScaleSubbandSTFTLoss(**C.subband_stft_loss_params) + if C.use_mse_gan_loss: + self.mse_loss = MSEGLoss() + if C.use_hinge_gan_loss: + self.hinge_loss = HingeGLoss() + if C.use_feat_match_loss: + self.feat_match_loss = MelganFeatureLoss() + + def forward(self, y_hat=None, y=None, scores_fake=None, feats_fake=None, feats_real=None, y_hat_sub=None, y_sub=None): + gen_loss = 0 + adv_loss = 0 + return_dict = {} + + # STFT Loss + if self.use_stft_loss: + stft_loss_mg, stft_loss_sc = self.stft_loss(y_hat.squeeze(1), y.squeeze(1)) + return_dict['G_stft_loss_mg'] = stft_loss_mg + return_dict['G_stft_loss_sc'] = stft_loss_sc + gen_loss += self.stft_loss_weight * (stft_loss_mg + stft_loss_sc) + + # subband STFT Loss + if self.use_subband_stft_loss: + subband_stft_loss_mg, subband_stft_loss_sc = self.subband_stft_loss(y_hat_sub, y_sub) + return_dict['G_subband_stft_loss_mg'] = subband_stft_loss_mg + return_dict['G_subband_stft_loss_sc'] = subband_stft_loss_sc + gen_loss += self.subband_stft_loss_weight * (subband_stft_loss_mg + subband_stft_loss_sc) + + # multiscale MSE adversarial loss + if self.use_mse_gan_loss and scores_fake is not None: + mse_fake_loss = _apply_G_adv_loss(scores_fake, self.mse_loss) + return_dict['G_mse_fake_loss'] = mse_fake_loss + adv_loss += self.mse_gan_loss_weight * mse_fake_loss + + # multiscale Hinge adversarial loss + if self.use_hinge_gan_loss and not scores_fake is not None: + hinge_fake_loss = _apply_G_adv_loss(scores_fake, self.hinge_loss) + return_dict['G_hinge_fake_loss'] = hinge_fake_loss + adv_loss += self.hinge_gan_loss_weight * hinge_fake_loss + + # Feature Matching Loss + if self.use_feat_match_loss and not feats_fake: + feat_match_loss = self.feat_match_loss(feats_fake, feats_real) + return_dict['G_feat_match_loss'] = feat_match_loss + adv_loss += self.feat_match_loss_weight * feat_match_loss + return_dict['G_loss'] = gen_loss + adv_loss + return_dict['G_gen_loss'] = gen_loss + return_dict['G_adv_loss'] = adv_loss + return return_dict + + +class DiscriminatorLoss(nn.Module): + """ Compute Discriminator Loss values depending on training + configuration """ + def __init__(self, C): + super(DiscriminatorLoss, self).__init__() + assert not(C.use_mse_gan_loss and C.use_hinge_gan_loss),\ + " [!] Cannot use HingeGANLoss and MSEGANLoss together." + + self.use_mse_gan_loss = C.use_mse_gan_loss + self.use_hinge_gan_loss = C.use_hinge_gan_loss + + if C.use_mse_gan_loss: + self.mse_loss = MSEDLoss() + if C.use_hinge_gan_loss: + self.hinge_loss = HingeDLoss() + + def forward(self, scores_fake, scores_real): + loss = 0 + return_dict = {} + + if self.use_mse_gan_loss: + mse_D_loss, mse_D_real_loss, mse_D_fake_loss = _apply_D_loss( + scores_fake=scores_fake, + scores_real=scores_real, + loss_func=self.mse_loss) + return_dict['D_mse_gan_loss'] = mse_D_loss + return_dict['D_mse_gan_real_loss'] = mse_D_real_loss + return_dict['D_mse_gan_fake_loss'] = mse_D_fake_loss + loss += mse_D_loss + + if self.use_hinge_gan_loss: + hinge_D_loss, hinge_D_real_loss, hinge_D_fake_loss = _apply_D_loss( + scores_fake=scores_fake, + scores_real=scores_real, + loss_func=self.hinge_loss) + return_dict['D_hinge_gan_loss'] = hinge_D_loss + return_dict['D_hinge_gan_real_loss'] = hinge_D_real_loss + return_dict['D_hinge_gan_fake_loss'] = hinge_D_fake_loss + loss += hinge_D_loss + + return_dict['D_loss'] = loss + return return_dict diff --git a/TTS/vocoder/layers/melgan.py b/TTS/vocoder/layers/melgan.py new file mode 100644 index 0000000000000000000000000000000000000000..58c12a2eb86c1ce9713cf14561a68579debc7bd1 --- /dev/null +++ b/TTS/vocoder/layers/melgan.py @@ -0,0 +1,45 @@ +from torch import nn +from torch.nn.utils import weight_norm + + +class ResidualStack(nn.Module): + def __init__(self, channels, num_res_blocks, kernel_size): + super(ResidualStack, self).__init__() + + assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." + base_padding = (kernel_size - 1) // 2 + + self.blocks = nn.ModuleList() + for idx in range(num_res_blocks): + layer_kernel_size = kernel_size + layer_dilation = layer_kernel_size**idx + layer_padding = base_padding * layer_dilation + self.blocks += [nn.Sequential( + nn.LeakyReLU(0.2), + nn.ReflectionPad1d(layer_padding), + weight_norm( + nn.Conv1d(channels, + channels, + kernel_size=kernel_size, + dilation=layer_dilation, + bias=True)), + nn.LeakyReLU(0.2), + weight_norm( + nn.Conv1d(channels, channels, kernel_size=1, bias=True)), + )] + + self.shortcuts = nn.ModuleList([ + weight_norm(nn.Conv1d(channels, channels, kernel_size=1, + bias=True)) for i in range(num_res_blocks) + ]) + + def forward(self, x): + for block, shortcut in zip(self.blocks, self.shortcuts): + x = shortcut(x) + block(x) + return x + + def remove_weight_norm(self): + for block, shortcut in zip(self.blocks, self.shortcuts): + nn.utils.remove_weight_norm(block[2]) + nn.utils.remove_weight_norm(block[4]) + nn.utils.remove_weight_norm(shortcut) diff --git a/TTS/vocoder/layers/parallel_wavegan.py b/TTS/vocoder/layers/parallel_wavegan.py new file mode 100644 index 0000000000000000000000000000000000000000..bedfe5519aa3595e4b628b247cdde5d678bfcc0f --- /dev/null +++ b/TTS/vocoder/layers/parallel_wavegan.py @@ -0,0 +1,87 @@ +import torch +from torch.nn import functional as F + + +class ResidualBlock(torch.nn.Module): + """Residual block module in WaveNet.""" + def __init__(self, + kernel_size=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + dilation=1, + bias=True, + use_causal_conv=False): + super(ResidualBlock, self).__init__() + self.dropout = dropout + # no future time stamps available + if use_causal_conv: + padding = (kernel_size - 1) * dilation + else: + assert (kernel_size - + 1) % 2 == 0, "Not support even number kernel size." + padding = (kernel_size - 1) // 2 * dilation + self.use_causal_conv = use_causal_conv + + # dilation conv + self.conv = torch.nn.Conv1d(res_channels, + gate_channels, + kernel_size, + padding=padding, + dilation=dilation, + bias=bias) + + # local conditioning + if aux_channels > 0: + self.conv1x1_aux = torch.nn.Conv1d(aux_channels, + gate_channels, + 1, + bias=False) + else: + self.conv1x1_aux = None + + # conv output is split into two groups + gate_out_channels = gate_channels // 2 + self.conv1x1_out = torch.nn.Conv1d(gate_out_channels, + res_channels, + 1, + bias=bias) + self.conv1x1_skip = torch.nn.Conv1d(gate_out_channels, + skip_channels, + 1, + bias=bias) + + def forward(self, x, c): + """ + x: B x D_res x T + c: B x D_aux x T + """ + residual = x + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.conv(x) + + # remove future time steps if use_causal_conv conv + x = x[:, :, :residual.size(-1)] if self.use_causal_conv else x + + # split into two part for gated activation + splitdim = 1 + xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim) + + # local conditioning + if c is not None: + assert self.conv1x1_aux is not None + c = self.conv1x1_aux(c) + ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim) + xa, xb = xa + ca, xb + cb + + x = torch.tanh(xa) * torch.sigmoid(xb) + + # for skip connection + s = self.conv1x1_skip(x) + + # for residual connection + x = (self.conv1x1_out(x) + residual) * (0.5**2) + + return x, s diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py new file mode 100644 index 0000000000000000000000000000000000000000..d31953d628d11e5036fe0ab3f52d1bc26e1c7f68 --- /dev/null +++ b/TTS/vocoder/layers/pqmf.py @@ -0,0 +1,56 @@ +import numpy as np +import torch +import torch.nn.functional as F + +from scipy import signal as sig + + +# adapted from +# https://github.com/kan-bayashi/ParallelWaveGAN/tree/master/parallel_wavegan +class PQMF(torch.nn.Module): + def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0): + super(PQMF, self).__init__() + + self.N = N + self.taps = taps + self.cutoff = cutoff + self.beta = beta + + QMF = sig.firwin(taps + 1, cutoff, window=('kaiser', beta)) + H = np.zeros((N, len(QMF))) + G = np.zeros((N, len(QMF))) + for k in range(N): + constant_factor = (2 * k + 1) * (np.pi / + (2 * N)) * (np.arange(taps + 1) - + ((taps - 1) / 2)) # TODO: (taps - 1) -> taps + phase = (-1)**k * np.pi / 4 + H[k] = 2 * QMF * np.cos(constant_factor + phase) + + G[k] = 2 * QMF * np.cos(constant_factor - phase) + + H = torch.from_numpy(H[:, None, :]).float() + G = torch.from_numpy(G[None, :, :]).float() + + self.register_buffer("H", H) + self.register_buffer("G", G) + + updown_filter = torch.zeros((N, N, N)).float() + for k in range(N): + updown_filter[k, k, 0] = 1.0 + self.register_buffer("updown_filter", updown_filter) + self.N = N + + self.pad_fn = torch.nn.ConstantPad1d(taps // 2, 0.0) + + def forward(self, x): + return self.analysis(x) + + def analysis(self, x): + return F.conv1d(x, self.H, padding=self.taps // 2, stride=self.N) + + def synthesis(self, x): + x = F.conv_transpose1d(x, + self.updown_filter * self.N, + stride=self.N) + x = F.conv1d(x, self.G, padding=self.taps // 2) + return x diff --git a/TTS/vocoder/layers/qmf.dat b/TTS/vocoder/layers/qmf.dat new file mode 100644 index 0000000000000000000000000000000000000000..17eab1379de991c36897c2ce701802ef76849c0d --- /dev/null +++ b/TTS/vocoder/layers/qmf.dat @@ -0,0 +1,640 @@ + 0.0000000e+000 + -5.5252865e-004 + -5.6176926e-004 + -4.9475181e-004 + -4.8752280e-004 + -4.8937912e-004 + -5.0407143e-004 + -5.2265643e-004 + -5.4665656e-004 + -5.6778026e-004 + -5.8709305e-004 + -6.1327474e-004 + -6.3124935e-004 + -6.5403334e-004 + -6.7776908e-004 + -6.9416146e-004 + -7.1577365e-004 + -7.2550431e-004 + -7.4409419e-004 + -7.4905981e-004 + -7.6813719e-004 + -7.7248486e-004 + -7.8343323e-004 + -7.7798695e-004 + -7.8036647e-004 + -7.8014496e-004 + -7.7579773e-004 + -7.6307936e-004 + -7.5300014e-004 + -7.3193572e-004 + -7.2153920e-004 + -6.9179375e-004 + -6.6504151e-004 + -6.3415949e-004 + -5.9461189e-004 + -5.5645764e-004 + -5.1455722e-004 + -4.6063255e-004 + -4.0951215e-004 + -3.5011759e-004 + -2.8969812e-004 + -2.0983373e-004 + -1.4463809e-004 + -6.1733441e-005 + 1.3494974e-005 + 1.0943831e-004 + 2.0430171e-004 + 2.9495311e-004 + 4.0265402e-004 + 5.1073885e-004 + 6.2393761e-004 + 7.4580259e-004 + 8.6084433e-004 + 9.8859883e-004 + 1.1250155e-003 + 1.2577885e-003 + 1.3902495e-003 + 1.5443220e-003 + 1.6868083e-003 + 1.8348265e-003 + 1.9841141e-003 + 2.1461584e-003 + 2.3017255e-003 + 2.4625617e-003 + 2.6201759e-003 + 2.7870464e-003 + 2.9469448e-003 + 3.1125421e-003 + 3.2739613e-003 + 3.4418874e-003 + 3.6008268e-003 + 3.7603923e-003 + 3.9207432e-003 + 4.0819753e-003 + 4.2264269e-003 + 4.3730720e-003 + 4.5209853e-003 + 4.6606461e-003 + 4.7932561e-003 + 4.9137604e-003 + 5.0393023e-003 + 5.1407354e-003 + 5.2461166e-003 + 5.3471681e-003 + 5.4196776e-003 + 5.4876040e-003 + 5.5475715e-003 + 5.5938023e-003 + 5.6220643e-003 + 5.6455197e-003 + 5.6389200e-003 + 5.6266114e-003 + 5.5917129e-003 + 5.5404364e-003 + 5.4753783e-003 + 5.3838976e-003 + 5.2715759e-003 + 5.1382275e-003 + 4.9839688e-003 + 4.8109469e-003 + 4.6039530e-003 + 4.3801862e-003 + 4.1251642e-003 + 3.8456408e-003 + 3.5401247e-003 + 3.2091886e-003 + 2.8446758e-003 + 2.4508540e-003 + 2.0274176e-003 + 1.5784683e-003 + 1.0902329e-003 + 5.8322642e-004 + 2.7604519e-005 + -5.4642809e-004 + -1.1568136e-003 + -1.8039473e-003 + -2.4826724e-003 + -3.1933778e-003 + -3.9401124e-003 + -4.7222596e-003 + -5.5337211e-003 + -6.3792293e-003 + -7.2615817e-003 + -8.1798233e-003 + -9.1325330e-003 + -1.0115022e-002 + -1.1131555e-002 + -1.2185000e-002 + -1.3271822e-002 + -1.4390467e-002 + -1.5540555e-002 + -1.6732471e-002 + -1.7943338e-002 + -1.9187243e-002 + -2.0453179e-002 + -2.1746755e-002 + -2.3068017e-002 + -2.4416099e-002 + -2.5787585e-002 + -2.7185943e-002 + -2.8607217e-002 + -3.0050266e-002 + -3.1501761e-002 + -3.2975408e-002 + -3.4462095e-002 + -3.5969756e-002 + -3.7481285e-002 + -3.9005368e-002 + -4.0534917e-002 + -4.2064909e-002 + -4.3609754e-002 + -4.5148841e-002 + -4.6684303e-002 + -4.8216572e-002 + -4.9738576e-002 + -5.1255616e-002 + -5.2763075e-002 + -5.4245277e-002 + -5.5717365e-002 + -5.7161645e-002 + -5.8591568e-002 + -5.9983748e-002 + -6.1345517e-002 + -6.2685781e-002 + -6.3971590e-002 + -6.5224711e-002 + -6.6436751e-002 + -6.7607599e-002 + -6.8704383e-002 + -6.9763024e-002 + -7.0762871e-002 + -7.1700267e-002 + -7.2568258e-002 + -7.3362026e-002 + -7.4100364e-002 + -7.4745256e-002 + -7.5313734e-002 + -7.5800836e-002 + -7.6199248e-002 + -7.6499217e-002 + -7.6709349e-002 + -7.6817398e-002 + -7.6823001e-002 + -7.6720492e-002 + -7.6505072e-002 + -7.6174832e-002 + -7.5730576e-002 + -7.5157626e-002 + -7.4466439e-002 + -7.3640601e-002 + -7.2677464e-002 + -7.1582636e-002 + -7.0353307e-002 + -6.8966401e-002 + -6.7452502e-002 + -6.5769067e-002 + -6.3944481e-002 + -6.1960278e-002 + -5.9816657e-002 + -5.7515269e-002 + -5.5046003e-002 + -5.2409382e-002 + -4.9597868e-002 + -4.6630331e-002 + -4.3476878e-002 + -4.0145828e-002 + -3.6641812e-002 + -3.2958393e-002 + -2.9082401e-002 + -2.5030756e-002 + -2.0799707e-002 + -1.6370126e-002 + -1.1762383e-002 + -6.9636862e-003 + -1.9765601e-003 + 3.2086897e-003 + 8.5711749e-003 + 1.4128883e-002 + 1.9883413e-002 + 2.5822729e-002 + 3.1953127e-002 + 3.8277657e-002 + 4.4780682e-002 + 5.1480418e-002 + 5.8370533e-002 + 6.5440985e-002 + 7.2694330e-002 + 8.0137293e-002 + 8.7754754e-002 + 9.5553335e-002 + 1.0353295e-001 + 1.1168269e-001 + 1.2000780e-001 + 1.2850029e-001 + 1.3715518e-001 + 1.4597665e-001 + 1.5496071e-001 + 1.6409589e-001 + 1.7338082e-001 + 1.8281725e-001 + 1.9239667e-001 + 2.0212502e-001 + 2.1197359e-001 + 2.2196527e-001 + 2.3206909e-001 + 2.4230169e-001 + 2.5264803e-001 + 2.6310533e-001 + 2.7366340e-001 + 2.8432142e-001 + 2.9507167e-001 + 3.0590986e-001 + 3.1682789e-001 + 3.2781137e-001 + 3.3887227e-001 + 3.4999141e-001 + 3.6115899e-001 + 3.7237955e-001 + 3.8363500e-001 + 3.9492118e-001 + 4.0623177e-001 + 4.1756969e-001 + 4.2891199e-001 + 4.4025538e-001 + 4.5159965e-001 + 4.6293081e-001 + 4.7424532e-001 + 4.8552531e-001 + 4.9677083e-001 + 5.0798175e-001 + 5.1912350e-001 + 5.3022409e-001 + 5.4125534e-001 + 5.5220513e-001 + 5.6307891e-001 + 5.7385241e-001 + 5.8454032e-001 + 5.9511231e-001 + 6.0557835e-001 + 6.1591099e-001 + 6.2612427e-001 + 6.3619801e-001 + 6.4612697e-001 + 6.5590163e-001 + 6.6551399e-001 + 6.7496632e-001 + 6.8423533e-001 + 6.9332824e-001 + 7.0223887e-001 + 7.1094104e-001 + 7.1944626e-001 + 7.2774489e-001 + 7.3582118e-001 + 7.4368279e-001 + 7.5131375e-001 + 7.5870808e-001 + 7.6586749e-001 + 7.7277809e-001 + 7.7942875e-001 + 7.8583531e-001 + 7.9197358e-001 + 7.9784664e-001 + 8.0344858e-001 + 8.0876950e-001 + 8.1381913e-001 + 8.1857760e-001 + 8.2304199e-001 + 8.2722753e-001 + 8.3110385e-001 + 8.3469374e-001 + 8.3797173e-001 + 8.4095414e-001 + 8.4362383e-001 + 8.4598185e-001 + 8.4803158e-001 + 8.4978052e-001 + 8.5119715e-001 + 8.5230470e-001 + 8.5310209e-001 + 8.5357206e-001 + 8.5373856e-001 + 8.5357206e-001 + 8.5310209e-001 + 8.5230470e-001 + 8.5119715e-001 + 8.4978052e-001 + 8.4803158e-001 + 8.4598185e-001 + 8.4362383e-001 + 8.4095414e-001 + 8.3797173e-001 + 8.3469374e-001 + 8.3110385e-001 + 8.2722753e-001 + 8.2304199e-001 + 8.1857760e-001 + 8.1381913e-001 + 8.0876950e-001 + 8.0344858e-001 + 7.9784664e-001 + 7.9197358e-001 + 7.8583531e-001 + 7.7942875e-001 + 7.7277809e-001 + 7.6586749e-001 + 7.5870808e-001 + 7.5131375e-001 + 7.4368279e-001 + 7.3582118e-001 + 7.2774489e-001 + 7.1944626e-001 + 7.1094104e-001 + 7.0223887e-001 + 6.9332824e-001 + 6.8423533e-001 + 6.7496632e-001 + 6.6551399e-001 + 6.5590163e-001 + 6.4612697e-001 + 6.3619801e-001 + 6.2612427e-001 + 6.1591099e-001 + 6.0557835e-001 + 5.9511231e-001 + 5.8454032e-001 + 5.7385241e-001 + 5.6307891e-001 + 5.5220513e-001 + 5.4125534e-001 + 5.3022409e-001 + 5.1912350e-001 + 5.0798175e-001 + 4.9677083e-001 + 4.8552531e-001 + 4.7424532e-001 + 4.6293081e-001 + 4.5159965e-001 + 4.4025538e-001 + 4.2891199e-001 + 4.1756969e-001 + 4.0623177e-001 + 3.9492118e-001 + 3.8363500e-001 + 3.7237955e-001 + 3.6115899e-001 + 3.4999141e-001 + 3.3887227e-001 + 3.2781137e-001 + 3.1682789e-001 + 3.0590986e-001 + 2.9507167e-001 + 2.8432142e-001 + 2.7366340e-001 + 2.6310533e-001 + 2.5264803e-001 + 2.4230169e-001 + 2.3206909e-001 + 2.2196527e-001 + 2.1197359e-001 + 2.0212502e-001 + 1.9239667e-001 + 1.8281725e-001 + 1.7338082e-001 + 1.6409589e-001 + 1.5496071e-001 + 1.4597665e-001 + 1.3715518e-001 + 1.2850029e-001 + 1.2000780e-001 + 1.1168269e-001 + 1.0353295e-001 + 9.5553335e-002 + 8.7754754e-002 + 8.0137293e-002 + 7.2694330e-002 + 6.5440985e-002 + 5.8370533e-002 + 5.1480418e-002 + 4.4780682e-002 + 3.8277657e-002 + 3.1953127e-002 + 2.5822729e-002 + 1.9883413e-002 + 1.4128883e-002 + 8.5711749e-003 + 3.2086897e-003 + -1.9765601e-003 + -6.9636862e-003 + -1.1762383e-002 + -1.6370126e-002 + -2.0799707e-002 + -2.5030756e-002 + -2.9082401e-002 + -3.2958393e-002 + -3.6641812e-002 + -4.0145828e-002 + -4.3476878e-002 + -4.6630331e-002 + -4.9597868e-002 + -5.2409382e-002 + -5.5046003e-002 + -5.7515269e-002 + -5.9816657e-002 + -6.1960278e-002 + -6.3944481e-002 + -6.5769067e-002 + -6.7452502e-002 + -6.8966401e-002 + -7.0353307e-002 + -7.1582636e-002 + -7.2677464e-002 + -7.3640601e-002 + -7.4466439e-002 + -7.5157626e-002 + -7.5730576e-002 + -7.6174832e-002 + -7.6505072e-002 + -7.6720492e-002 + -7.6823001e-002 + -7.6817398e-002 + -7.6709349e-002 + -7.6499217e-002 + -7.6199248e-002 + -7.5800836e-002 + -7.5313734e-002 + -7.4745256e-002 + -7.4100364e-002 + -7.3362026e-002 + -7.2568258e-002 + -7.1700267e-002 + -7.0762871e-002 + -6.9763024e-002 + -6.8704383e-002 + -6.7607599e-002 + -6.6436751e-002 + -6.5224711e-002 + -6.3971590e-002 + -6.2685781e-002 + -6.1345517e-002 + -5.9983748e-002 + -5.8591568e-002 + -5.7161645e-002 + -5.5717365e-002 + -5.4245277e-002 + -5.2763075e-002 + -5.1255616e-002 + -4.9738576e-002 + -4.8216572e-002 + -4.6684303e-002 + -4.5148841e-002 + -4.3609754e-002 + -4.2064909e-002 + -4.0534917e-002 + -3.9005368e-002 + -3.7481285e-002 + -3.5969756e-002 + -3.4462095e-002 + -3.2975408e-002 + -3.1501761e-002 + -3.0050266e-002 + -2.8607217e-002 + -2.7185943e-002 + -2.5787585e-002 + -2.4416099e-002 + -2.3068017e-002 + -2.1746755e-002 + -2.0453179e-002 + -1.9187243e-002 + -1.7943338e-002 + -1.6732471e-002 + -1.5540555e-002 + -1.4390467e-002 + -1.3271822e-002 + -1.2185000e-002 + -1.1131555e-002 + -1.0115022e-002 + -9.1325330e-003 + -8.1798233e-003 + -7.2615817e-003 + -6.3792293e-003 + -5.5337211e-003 + -4.7222596e-003 + -3.9401124e-003 + -3.1933778e-003 + -2.4826724e-003 + -1.8039473e-003 + -1.1568136e-003 + -5.4642809e-004 + 2.7604519e-005 + 5.8322642e-004 + 1.0902329e-003 + 1.5784683e-003 + 2.0274176e-003 + 2.4508540e-003 + 2.8446758e-003 + 3.2091886e-003 + 3.5401247e-003 + 3.8456408e-003 + 4.1251642e-003 + 4.3801862e-003 + 4.6039530e-003 + 4.8109469e-003 + 4.9839688e-003 + 5.1382275e-003 + 5.2715759e-003 + 5.3838976e-003 + 5.4753783e-003 + 5.5404364e-003 + 5.5917129e-003 + 5.6266114e-003 + 5.6389200e-003 + 5.6455197e-003 + 5.6220643e-003 + 5.5938023e-003 + 5.5475715e-003 + 5.4876040e-003 + 5.4196776e-003 + 5.3471681e-003 + 5.2461166e-003 + 5.1407354e-003 + 5.0393023e-003 + 4.9137604e-003 + 4.7932561e-003 + 4.6606461e-003 + 4.5209853e-003 + 4.3730720e-003 + 4.2264269e-003 + 4.0819753e-003 + 3.9207432e-003 + 3.7603923e-003 + 3.6008268e-003 + 3.4418874e-003 + 3.2739613e-003 + 3.1125421e-003 + 2.9469448e-003 + 2.7870464e-003 + 2.6201759e-003 + 2.4625617e-003 + 2.3017255e-003 + 2.1461584e-003 + 1.9841141e-003 + 1.8348265e-003 + 1.6868083e-003 + 1.5443220e-003 + 1.3902495e-003 + 1.2577885e-003 + 1.1250155e-003 + 9.8859883e-004 + 8.6084433e-004 + 7.4580259e-004 + 6.2393761e-004 + 5.1073885e-004 + 4.0265402e-004 + 2.9495311e-004 + 2.0430171e-004 + 1.0943831e-004 + 1.3494974e-005 + -6.1733441e-005 + -1.4463809e-004 + -2.0983373e-004 + -2.8969812e-004 + -3.5011759e-004 + -4.0951215e-004 + -4.6063255e-004 + -5.1455722e-004 + -5.5645764e-004 + -5.9461189e-004 + -6.3415949e-004 + -6.6504151e-004 + -6.9179375e-004 + -7.2153920e-004 + -7.3193572e-004 + -7.5300014e-004 + -7.6307936e-004 + -7.7579773e-004 + -7.8014496e-004 + -7.8036647e-004 + -7.7798695e-004 + -7.8343323e-004 + -7.7248486e-004 + -7.6813719e-004 + -7.4905981e-004 + -7.4409419e-004 + -7.2550431e-004 + -7.1577365e-004 + -6.9416146e-004 + -6.7776908e-004 + -6.5403334e-004 + -6.3124935e-004 + -6.1327474e-004 + -5.8709305e-004 + -5.6778026e-004 + -5.4665656e-004 + -5.2265643e-004 + -5.0407143e-004 + -4.8937912e-004 + -4.8752280e-004 + -4.9475181e-004 + -5.6176926e-004 + -5.5252865e-004 diff --git a/TTS/vocoder/layers/upsample.py b/TTS/vocoder/layers/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..1340687582d7c1efb8cb4546ce65aa0a3e8c0c0c --- /dev/null +++ b/TTS/vocoder/layers/upsample.py @@ -0,0 +1,101 @@ +import torch +from torch.nn import functional as F + + +class Stretch2d(torch.nn.Module): + def __init__(self, x_scale, y_scale, mode="nearest"): + super(Stretch2d, self).__init__() + self.x_scale = x_scale + self.y_scale = y_scale + self.mode = mode + + def forward(self, x): + """ + x (Tensor): Input tensor (B, C, F, T). + Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale), + """ + return F.interpolate( + x, scale_factor=(self.y_scale, self.x_scale), mode=self.mode) + + +class UpsampleNetwork(torch.nn.Module): + # pylint: disable=dangerous-default-value + def __init__(self, + upsample_factors, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + use_causal_conv=False, + ): + super(UpsampleNetwork, self).__init__() + self.use_causal_conv = use_causal_conv + self.up_layers = torch.nn.ModuleList() + for scale in upsample_factors: + # interpolation layer + stretch = Stretch2d(scale, 1, interpolate_mode) + self.up_layers += [stretch] + + # conv layer + assert (freq_axis_kernel_size - 1) % 2 == 0, "Not support even number freq axis kernel size." + freq_axis_padding = (freq_axis_kernel_size - 1) // 2 + kernel_size = (freq_axis_kernel_size, scale * 2 + 1) + if use_causal_conv: + padding = (freq_axis_padding, scale * 2) + else: + padding = (freq_axis_padding, scale) + conv = torch.nn.Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False) + self.up_layers += [conv] + + # nonlinear + if nonlinear_activation is not None: + nonlinear = getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params) + self.up_layers += [nonlinear] + + def forward(self, c): + """ + c : (B, C, T_in). + Tensor: (B, C, T_upsample) + """ + c = c.unsqueeze(1) # (B, 1, C, T) + for f in self.up_layers: + c = f(c) + return c.squeeze(1) # (B, C, T') + + +class ConvUpsample(torch.nn.Module): + # pylint: disable=dangerous-default-value + def __init__(self, + upsample_factors, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + aux_channels=80, + aux_context_window=0, + use_causal_conv=False + ): + super(ConvUpsample, self).__init__() + self.aux_context_window = aux_context_window + self.use_causal_conv = use_causal_conv and aux_context_window > 0 + # To capture wide-context information in conditional features + kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1 + # NOTE(kan-bayashi): Here do not use padding because the input is already padded + self.conv_in = torch.nn.Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False) + self.upsample = UpsampleNetwork( + upsample_factors=upsample_factors, + nonlinear_activation=nonlinear_activation, + nonlinear_activation_params=nonlinear_activation_params, + interpolate_mode=interpolate_mode, + freq_axis_kernel_size=freq_axis_kernel_size, + use_causal_conv=use_causal_conv, + ) + + def forward(self, c): + """ + c : (B, C, T_in). + Tensor: (B, C, T_upsampled), + """ + c_ = self.conv_in(c) + c = c_[:, :, :-self.aux_context_window] if self.use_causal_conv else c_ + return self.upsample(c) diff --git a/TTS/vocoder/layers/wavegrad.py b/TTS/vocoder/layers/wavegrad.py new file mode 100644 index 0000000000000000000000000000000000000000..d09b49505ce94d59c0d898ae3d9a35a5ec2b90a4 --- /dev/null +++ b/TTS/vocoder/layers/wavegrad.py @@ -0,0 +1,175 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.utils import weight_norm + + +class Conv1d(nn.Conv1d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + nn.init.orthogonal_(self.weight) + nn.init.zeros_(self.bias) + + +class PositionalEncoding(nn.Module): + """Positional encoding with noise level conditioning""" + def __init__(self, n_channels, max_len=10000): + super().__init__() + self.n_channels = n_channels + self.max_len = max_len + self.C = 5000 + self.pe = torch.zeros(0, 0) + + def forward(self, x, noise_level): + if x.shape[2] > self.pe.shape[1]: + self.init_pe_matrix(x.shape[1] ,x.shape[2], x) + return x + noise_level[..., None, None] + self.pe[:, :x.size(2)].repeat(x.shape[0], 1, 1) / self.C + + def init_pe_matrix(self, n_channels, max_len, x): + pe = torch.zeros(max_len, n_channels) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.pow(10000, torch.arange(0, n_channels, 2).float() / n_channels) + + pe[:, 0::2] = torch.sin(position / div_term) + pe[:, 1::2] = torch.cos(position / div_term) + self.pe = pe.transpose(0, 1).to(x) + + +class FiLM(nn.Module): + def __init__(self, input_size, output_size): + super().__init__() + self.encoding = PositionalEncoding(input_size) + self.input_conv = nn.Conv1d(input_size, input_size, 3, padding=1) + self.output_conv = nn.Conv1d(input_size, output_size * 2, 3, padding=1) + + nn.init.xavier_uniform_(self.input_conv.weight) + nn.init.xavier_uniform_(self.output_conv.weight) + nn.init.zeros_(self.input_conv.bias) + nn.init.zeros_(self.output_conv.bias) + + def forward(self, x, noise_scale): + o = self.input_conv(x) + o = F.leaky_relu(o, 0.2) + o = self.encoding(o, noise_scale) + shift, scale = torch.chunk(self.output_conv(o), 2, dim=1) + return shift, scale + + def remove_weight_norm(self): + nn.utils.remove_weight_norm(self.input_conv) + nn.utils.remove_weight_norm(self.output_conv) + + def apply_weight_norm(self): + self.input_conv = weight_norm(self.input_conv) + self.output_conv = weight_norm(self.output_conv) + + +@torch.jit.script +def shif_and_scale(x, scale, shift): + o = shift + scale * x + return o + + +class UBlock(nn.Module): + def __init__(self, input_size, hidden_size, factor, dilation): + super().__init__() + assert isinstance(dilation, (list, tuple)) + assert len(dilation) == 4 + + self.factor = factor + self.res_block = Conv1d(input_size, hidden_size, 1) + self.main_block = nn.ModuleList([ + Conv1d(input_size, + hidden_size, + 3, + dilation=dilation[0], + padding=dilation[0]), + Conv1d(hidden_size, + hidden_size, + 3, + dilation=dilation[1], + padding=dilation[1]) + ]) + self.out_block = nn.ModuleList([ + Conv1d(hidden_size, + hidden_size, + 3, + dilation=dilation[2], + padding=dilation[2]), + Conv1d(hidden_size, + hidden_size, + 3, + dilation=dilation[3], + padding=dilation[3]) + ]) + + def forward(self, x, shift, scale): + x_inter = F.interpolate(x, size=x.shape[-1] * self.factor) + res = self.res_block(x_inter) + o = F.leaky_relu(x_inter, 0.2) + o = F.interpolate(o, size=x.shape[-1] * self.factor) + o = self.main_block[0](o) + o = shif_and_scale(o, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.main_block[1](o) + res2 = res + o + o = shif_and_scale(res2, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.out_block[0](o) + o = shif_and_scale(o, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.out_block[1](o) + o = o + res2 + return o + + def remove_weight_norm(self): + nn.utils.remove_weight_norm(self.res_block) + for _, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + nn.utils.remove_weight_norm(layer) + for _, layer in enumerate(self.out_block): + if len(layer.state_dict()) != 0: + nn.utils.remove_weight_norm(layer) + + def apply_weight_norm(self): + self.res_block = weight_norm(self.res_block) + for idx, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + self.main_block[idx] = weight_norm(layer) + for idx, layer in enumerate(self.out_block): + if len(layer.state_dict()) != 0: + self.out_block[idx] = weight_norm(layer) + + +class DBlock(nn.Module): + def __init__(self, input_size, hidden_size, factor): + super().__init__() + self.factor = factor + self.res_block = Conv1d(input_size, hidden_size, 1) + self.main_block = nn.ModuleList([ + Conv1d(input_size, hidden_size, 3, dilation=1, padding=1), + Conv1d(hidden_size, hidden_size, 3, dilation=2, padding=2), + Conv1d(hidden_size, hidden_size, 3, dilation=4, padding=4), + ]) + + def forward(self, x): + size = x.shape[-1] // self.factor + res = self.res_block(x) + res = F.interpolate(res, size=size) + o = F.interpolate(x, size=size) + for layer in self.main_block: + o = F.leaky_relu(o, 0.2) + o = layer(o) + return o + res + + def remove_weight_norm(self): + nn.utils.remove_weight_norm(self.res_block) + for _, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + nn.utils.remove_weight_norm(layer) + + def apply_weight_norm(self): + self.res_block = weight_norm(self.res_block) + for idx, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + self.main_block[idx] = weight_norm(layer) + diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/models/fullband_melgan_generator.py b/TTS/vocoder/models/fullband_melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..52dcc75ee14a45f77b5fdf82b1374a4504f81a5d --- /dev/null +++ b/TTS/vocoder/models/fullband_melgan_generator.py @@ -0,0 +1,30 @@ +import torch + +from TTS.vocoder.models.melgan_generator import MelganGenerator + + +class FullbandMelganGenerator(MelganGenerator): + def __init__(self, + in_channels=80, + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=(2, 8, 2, 2), + res_kernel=3, + num_res_blocks=4): + super().__init__(in_channels=in_channels, + out_channels=out_channels, + proj_kernel=proj_kernel, + base_channels=base_channels, + upsample_factors=upsample_factors, + res_kernel=res_kernel, + num_res_blocks=num_res_blocks) + + @torch.no_grad() + def inference(self, cond_features): + cond_features = cond_features.to(self.layers[1].weight.device) + cond_features = torch.nn.functional.pad( + cond_features, + (self.inference_padding, self.inference_padding), + 'replicate') + return self.layers(cond_features) diff --git a/TTS/vocoder/models/melgan_discriminator.py b/TTS/vocoder/models/melgan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..3847babb594938a86834bb9df6c15c3e60139994 --- /dev/null +++ b/TTS/vocoder/models/melgan_discriminator.py @@ -0,0 +1,78 @@ +import numpy as np +from torch import nn +from torch.nn.utils import weight_norm + + +class MelganDiscriminator(nn.Module): + def __init__(self, + in_channels=1, + out_channels=1, + kernel_sizes=(5, 3), + base_channels=16, + max_channels=1024, + downsample_factors=(4, 4, 4, 4)): + super(MelganDiscriminator, self).__init__() + self.layers = nn.ModuleList() + + layer_kernel_size = np.prod(kernel_sizes) + layer_padding = (layer_kernel_size - 1) // 2 + + # initial layer + self.layers += [ + nn.Sequential( + nn.ReflectionPad1d(layer_padding), + weight_norm( + nn.Conv1d(in_channels, + base_channels, + layer_kernel_size, + stride=1)), nn.LeakyReLU(0.2, inplace=True)) + ] + + # downsampling layers + layer_in_channels = base_channels + for downsample_factor in downsample_factors: + layer_out_channels = min(layer_in_channels * downsample_factor, + max_channels) + layer_kernel_size = downsample_factor * 10 + 1 + layer_padding = (layer_kernel_size - 1) // 2 + layer_groups = layer_in_channels // 4 + self.layers += [ + nn.Sequential( + weight_norm( + nn.Conv1d(layer_in_channels, + layer_out_channels, + kernel_size=layer_kernel_size, + stride=downsample_factor, + padding=layer_padding, + groups=layer_groups)), + nn.LeakyReLU(0.2, inplace=True)) + ] + layer_in_channels = layer_out_channels + + # last 2 layers + layer_padding1 = (kernel_sizes[0] - 1) // 2 + layer_padding2 = (kernel_sizes[1] - 1) // 2 + self.layers += [ + nn.Sequential( + weight_norm( + nn.Conv1d(layer_out_channels, + layer_out_channels, + kernel_size=kernel_sizes[0], + stride=1, + padding=layer_padding1)), + nn.LeakyReLU(0.2, inplace=True), + ), + weight_norm( + nn.Conv1d(layer_out_channels, + out_channels, + kernel_size=kernel_sizes[1], + stride=1, + padding=layer_padding2)), + ] + + def forward(self, x): + feats = [] + for layer in self.layers: + x = layer(x) + feats.append(x) + return x, feats diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..3070eac779716164fd2e2fb2474545dabeb7ac36 --- /dev/null +++ b/TTS/vocoder/models/melgan_generator.py @@ -0,0 +1,105 @@ +import torch +from torch import nn +from torch.nn.utils import weight_norm + +from TTS.vocoder.layers.melgan import ResidualStack + + +class MelganGenerator(nn.Module): + def __init__(self, + in_channels=80, + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=(8, 8, 2, 2), + res_kernel=3, + num_res_blocks=3): + super(MelganGenerator, self).__init__() + + # assert model parameters + assert (proj_kernel - + 1) % 2 == 0, " [!] proj_kernel should be an odd number." + + # setup additional model parameters + base_padding = (proj_kernel - 1) // 2 + act_slope = 0.2 + self.inference_padding = 2 + + # initial layer + layers = [] + layers += [ + nn.ReflectionPad1d(base_padding), + weight_norm( + nn.Conv1d(in_channels, + base_channels, + kernel_size=proj_kernel, + stride=1, + bias=True)) + ] + + # upsampling layers and residual stacks + for idx, upsample_factor in enumerate(upsample_factors): + layer_in_channels = base_channels // (2**idx) + layer_out_channels = base_channels // (2**(idx + 1)) + layer_filter_size = upsample_factor * 2 + layer_stride = upsample_factor + layer_output_padding = upsample_factor % 2 + layer_padding = upsample_factor // 2 + layer_output_padding + layers += [ + nn.LeakyReLU(act_slope), + weight_norm( + nn.ConvTranspose1d(layer_in_channels, + layer_out_channels, + layer_filter_size, + stride=layer_stride, + padding=layer_padding, + output_padding=layer_output_padding, + bias=True)), + ResidualStack( + channels=layer_out_channels, + num_res_blocks=num_res_blocks, + kernel_size=res_kernel + ) + ] + + layers += [nn.LeakyReLU(act_slope)] + + # final layer + layers += [ + nn.ReflectionPad1d(base_padding), + weight_norm( + nn.Conv1d(layer_out_channels, + out_channels, + proj_kernel, + stride=1, + bias=True)), + nn.Tanh() + ] + self.layers = nn.Sequential(*layers) + + def forward(self, c): + return self.layers(c) + + def inference(self, c): + c = c.to(self.layers[1].weight.device) + c = torch.nn.functional.pad( + c, + (self.inference_padding, self.inference_padding), + 'replicate') + return self.layers(c) + + def remove_weight_norm(self): + for _, layer in enumerate(self.layers): + if len(layer.state_dict()) != 0: + try: + nn.utils.remove_weight_norm(layer) + except ValueError: + layer.remove_weight_norm() + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + self.remove_weight_norm() diff --git a/TTS/vocoder/models/melgan_multiscale_discriminator.py b/TTS/vocoder/models/melgan_multiscale_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9cca96724927b609353c6cd863badd6a6f2a4b --- /dev/null +++ b/TTS/vocoder/models/melgan_multiscale_discriminator.py @@ -0,0 +1,41 @@ +from torch import nn + +from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator + + +class MelganMultiscaleDiscriminator(nn.Module): + def __init__(self, + in_channels=1, + out_channels=1, + num_scales=3, + kernel_sizes=(5, 3), + base_channels=16, + max_channels=1024, + downsample_factors=(4, 4, 4), + pooling_kernel_size=4, + pooling_stride=2, + pooling_padding=1): + super(MelganMultiscaleDiscriminator, self).__init__() + + self.discriminators = nn.ModuleList([ + MelganDiscriminator(in_channels=in_channels, + out_channels=out_channels, + kernel_sizes=kernel_sizes, + base_channels=base_channels, + max_channels=max_channels, + downsample_factors=downsample_factors) + for _ in range(num_scales) + ]) + + self.pooling = nn.AvgPool1d(kernel_size=pooling_kernel_size, stride=pooling_stride, padding=pooling_padding, count_include_pad=False) + + + def forward(self, x): + scores = list() + feats = list() + for disc in self.discriminators: + score, feat = disc(x) + scores.append(score) + feats.append(feat) + x = self.pooling(x) + return scores, feats diff --git a/TTS/vocoder/models/multiband_melgan_generator.py b/TTS/vocoder/models/multiband_melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..15e7426e74eab61091478376aa43cca118e86eaf --- /dev/null +++ b/TTS/vocoder/models/multiband_melgan_generator.py @@ -0,0 +1,39 @@ +import torch + +from TTS.vocoder.models.melgan_generator import MelganGenerator +from TTS.vocoder.layers.pqmf import PQMF + + +class MultibandMelganGenerator(MelganGenerator): + def __init__(self, + in_channels=80, + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=(2, 8, 2, 2), + res_kernel=3, + num_res_blocks=3): + super(MultibandMelganGenerator, + self).__init__(in_channels=in_channels, + out_channels=out_channels, + proj_kernel=proj_kernel, + base_channels=base_channels, + upsample_factors=upsample_factors, + res_kernel=res_kernel, + num_res_blocks=num_res_blocks) + self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) + + def pqmf_analysis(self, x): + return self.pqmf_layer.analysis(x) + + def pqmf_synthesis(self, x): + return self.pqmf_layer.synthesis(x) + + @torch.no_grad() + def inference(self, cond_features): + cond_features = cond_features.to(self.layers[1].weight.device) + cond_features = torch.nn.functional.pad( + cond_features, + (self.inference_padding, self.inference_padding), + 'replicate') + return self.pqmf_synthesis(self.layers(cond_features)) diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..37c22695502cbb4ef7930479e06dceca8f52446b --- /dev/null +++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py @@ -0,0 +1,197 @@ +import math +import torch +from torch import nn + +from TTS.vocoder.layers.parallel_wavegan import ResidualBlock + + +class ParallelWaveganDiscriminator(nn.Module): + """PWGAN discriminator as in https://arxiv.org/abs/1910.11480. + It classifies each audio window real/fake and returns a sequence + of predictions. + It is a stack of convolutional blocks with dilation. + """ + # pylint: disable=dangerous-default-value + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=10, + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True, + ): + super(ParallelWaveganDiscriminator, self).__init__() + assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size." + assert dilation_factor > 0, " [!] dilation factor must be > 0." + self.conv_layers = nn.ModuleList() + conv_in_channels = in_channels + for i in range(num_layers - 1): + if i == 0: + dilation = 1 + else: + dilation = i if dilation_factor == 1 else dilation_factor ** i + conv_in_channels = conv_channels + padding = (kernel_size - 1) // 2 * dilation + conv_layer = [ + nn.Conv1d(conv_in_channels, + conv_channels, + kernel_size=kernel_size, + padding=padding, + dilation=dilation, + bias=bias), + getattr(nn, + nonlinear_activation)(inplace=True, + **nonlinear_activation_params) + ] + self.conv_layers += conv_layer + padding = (kernel_size - 1) // 2 + last_conv_layer = nn.Conv1d( + conv_in_channels, out_channels, + kernel_size=kernel_size, padding=padding, bias=bias) + self.conv_layers += [last_conv_layer] + self.apply_weight_norm() + + def forward(self, x): + """ + x : (B, 1, T). + Returns: + Tensor: (B, 1, T) + """ + for f in self.conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.weight_norm(m) + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + # print(f"Weight norm is removed from {m}.") + nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + self.apply(_remove_weight_norm) + + +class ResidualParallelWaveganDiscriminator(nn.Module): + # pylint: disable=dangerous-default-value + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + ): + super(ResidualParallelWaveganDiscriminator, self).__init__() + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + + self.in_channels = in_channels + self.out_channels = out_channels + self.num_layers = num_layers + self.stacks = stacks + self.kernel_size = kernel_size + self.res_factor = math.sqrt(1.0 / num_layers) + + # check the number of num_layers and stacks + assert num_layers % stacks == 0 + layers_per_stack = num_layers // stacks + + # define first convolution + self.first_conv = nn.Sequential( + nn.Conv1d(in_channels, + res_channels, + kernel_size=1, + padding=0, + dilation=1, + bias=True), + getattr(nn, nonlinear_activation)(inplace=True, + **nonlinear_activation_params), + ) + + # define residual blocks + self.conv_layers = nn.ModuleList() + for layer in range(num_layers): + dilation = 2 ** (layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + res_channels=res_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=-1, + dilation=dilation, + dropout=dropout, + bias=bias, + use_causal_conv=False, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = nn.ModuleList([ + getattr(nn, nonlinear_activation)(inplace=True, + **nonlinear_activation_params), + nn.Conv1d(skip_channels, + skip_channels, + kernel_size=1, + padding=0, + dilation=1, + bias=True), + getattr(nn, nonlinear_activation)(inplace=True, + **nonlinear_activation_params), + nn.Conv1d(skip_channels, + out_channels, + kernel_size=1, + padding=0, + dilation=1, + bias=True), + ]) + + # apply weight norm + self.apply_weight_norm() + + def forward(self, x): + """ + x: (B, 1, T). + """ + x = self.first_conv(x) + + skips = 0 + for f in self.conv_layers: + x, h = f(x, None) + skips += h + skips *= self.res_factor + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.weight_norm(m) + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + print(f"Weight norm is removed from {m}.") + nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..1d1bcdcbf8057084c76956434e3765c01a6019fa --- /dev/null +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -0,0 +1,168 @@ +import math +import numpy as np +import torch + +from TTS.vocoder.layers.parallel_wavegan import ResidualBlock +from TTS.vocoder.layers.upsample import ConvUpsample + + +class ParallelWaveganGenerator(torch.nn.Module): + """PWGAN generator as in https://arxiv.org/pdf/1910.11480.pdf. + It is similar to WaveNet with no causal convolution. + It is conditioned on an aux feature (spectrogram) to generate + an output waveform from an input noise. + """ + # pylint: disable=dangerous-default-value + def __init__(self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_res_blocks=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + bias=True, + use_weight_norm=True, + upsample_factors=[4, 4, 4, 4], + inference_padding=2): + + super(ParallelWaveganGenerator, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.aux_channels = aux_channels + self.num_res_blocks = num_res_blocks + self.stacks = stacks + self.kernel_size = kernel_size + self.upsample_factors = upsample_factors + self.upsample_scale = np.prod(upsample_factors) + self.inference_padding = inference_padding + self.use_weight_norm = use_weight_norm + + # check the number of layers and stacks + assert num_res_blocks % stacks == 0 + layers_per_stack = num_res_blocks // stacks + + # define first convolution + self.first_conv = torch.nn.Conv1d(in_channels, + res_channels, + kernel_size=1, + bias=True) + + # define conv + upsampling network + self.upsample_net = ConvUpsample(upsample_factors=upsample_factors) + + # define residual blocks + self.conv_layers = torch.nn.ModuleList() + for layer in range(num_res_blocks): + dilation = 2**(layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + res_channels=res_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=aux_channels, + dilation=dilation, + dropout=dropout, + bias=bias, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = torch.nn.ModuleList([ + torch.nn.ReLU(inplace=True), + torch.nn.Conv1d(skip_channels, + skip_channels, + kernel_size=1, + bias=True), + torch.nn.ReLU(inplace=True), + torch.nn.Conv1d(skip_channels, + out_channels, + kernel_size=1, + bias=True), + ]) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, c): + """ + c: (B, C ,T'). + o: Output tensor (B, out_channels, T) + """ + # random noise + x = torch.randn([c.shape[0], 1, c.shape[2] * self.upsample_scale]) + x = x.to(self.first_conv.bias.device) + + # perform upsampling + if c is not None and self.upsample_net is not None: + c = self.upsample_net(c) + assert c.shape[-1] == x.shape[ + -1], f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}" + + # encode to hidden representation + x = self.first_conv(x) + skips = 0 + for f in self.conv_layers: + x, h = f(x, c) + skips += h + skips *= math.sqrt(1.0 / len(self.conv_layers)) + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + + return x + + @torch.no_grad() + def inference(self, c): + c = c.to(self.first_conv.weight.device) + c = torch.nn.functional.pad( + c, (self.inference_padding, self.inference_padding), 'replicate') + return self.forward(c) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + # print(f"Weight norm is removed from {m}.") + torch.nn.utils.remove_weight_norm(m) + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.weight_norm(m) + # print(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + @staticmethod + def _get_receptive_field_size(layers, + stacks, + kernel_size, + dilation=lambda x: 2**x): + assert layers % stacks == 0 + layers_per_cycle = layers // stacks + dilations = [dilation(i % layers_per_cycle) for i in range(layers)] + return (kernel_size - 1) * sum(dilations) + 1 + + @property + def receptive_field_size(self): + return self._get_receptive_field_size(self.layers, self.stacks, + self.kernel_size) + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + if self.use_weight_norm: + self.remove_weight_norm() diff --git a/TTS/vocoder/models/random_window_discriminator.py b/TTS/vocoder/models/random_window_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..3efd395e1de7fbb838f2296f4422145284f356c1 --- /dev/null +++ b/TTS/vocoder/models/random_window_discriminator.py @@ -0,0 +1,225 @@ +import numpy as np +from torch import nn + + +class GBlock(nn.Module): + def __init__(self, in_channels, cond_channels, downsample_factor): + super(GBlock, self).__init__() + + self.in_channels = in_channels + self.cond_channels = cond_channels + self.downsample_factor = downsample_factor + + self.start = nn.Sequential( + nn.AvgPool1d(downsample_factor, stride=downsample_factor), + nn.ReLU(), + nn.Conv1d(in_channels, in_channels * 2, kernel_size=3, padding=1)) + self.lc_conv1d = nn.Conv1d(cond_channels, + in_channels * 2, + kernel_size=1) + self.end = nn.Sequential( + nn.ReLU(), + nn.Conv1d(in_channels * 2, + in_channels * 2, + kernel_size=3, + dilation=2, + padding=2)) + self.residual = nn.Sequential( + nn.Conv1d(in_channels, in_channels * 2, kernel_size=1), + nn.AvgPool1d(downsample_factor, stride=downsample_factor)) + + def forward(self, inputs, conditions): + outputs = self.start(inputs) + self.lc_conv1d(conditions) + outputs = self.end(outputs) + residual_outputs = self.residual(inputs) + outputs = outputs + residual_outputs + + return outputs + + +class DBlock(nn.Module): + def __init__(self, in_channels, out_channels, downsample_factor): + super(DBlock, self).__init__() + + self.in_channels = in_channels + self.downsample_factor = downsample_factor + self.out_channels = out_channels + + self.donwsample_layer = nn.AvgPool1d(downsample_factor, + stride=downsample_factor) + self.layers = nn.Sequential( + nn.ReLU(), + nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv1d(out_channels, + out_channels, + kernel_size=3, + dilation=2, + padding=2)) + self.residual = nn.Sequential( + nn.Conv1d(in_channels, out_channels, kernel_size=1), ) + + def forward(self, inputs): + if self.downsample_factor > 1: + outputs = self.layers(self.donwsample_layer(inputs))\ + + self.donwsample_layer(self.residual(inputs)) + else: + outputs = self.layers(inputs) + self.residual(inputs) + return outputs + + +class ConditionalDiscriminator(nn.Module): + def __init__(self, + in_channels, + cond_channels, + downsample_factors=(2, 2, 2), + out_channels=(128, 256)): + super(ConditionalDiscriminator, self).__init__() + + assert len(downsample_factors) == len(out_channels) + 1 + + self.in_channels = in_channels + self.cond_channels = cond_channels + self.downsample_factors = downsample_factors + self.out_channels = out_channels + + self.pre_cond_layers = nn.ModuleList() + self.post_cond_layers = nn.ModuleList() + + # layers before condition features + self.pre_cond_layers += [DBlock(in_channels, 64, 1)] + in_channels = 64 + for (i, channel) in enumerate(out_channels): + self.pre_cond_layers.append( + DBlock(in_channels, channel, downsample_factors[i])) + in_channels = channel + + # condition block + self.cond_block = GBlock(in_channels, cond_channels, + downsample_factors[-1]) + + # layers after condition block + self.post_cond_layers += [ + DBlock(in_channels * 2, in_channels * 2, 1), + DBlock(in_channels * 2, in_channels * 2, 1), + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(in_channels * 2, 1, kernel_size=1), + ] + + def forward(self, inputs, conditions): + batch_size = inputs.size()[0] + outputs = inputs.view(batch_size, self.in_channels, -1) + for layer in self.pre_cond_layers: + outputs = layer(outputs) + outputs = self.cond_block(outputs, conditions) + for layer in self.post_cond_layers: + outputs = layer(outputs) + + return outputs + + +class UnconditionalDiscriminator(nn.Module): + def __init__(self, + in_channels, + base_channels=64, + downsample_factors=(8, 4), + out_channels=(128, 256)): + super(UnconditionalDiscriminator, self).__init__() + + self.downsample_factors = downsample_factors + self.in_channels = in_channels + self.downsample_factors = downsample_factors + self.out_channels = out_channels + + self.layers = nn.ModuleList() + self.layers += [DBlock(self.in_channels, base_channels, 1)] + in_channels = base_channels + for (i, factor) in enumerate(downsample_factors): + self.layers.append(DBlock(in_channels, out_channels[i], factor)) + in_channels *= 2 + self.layers += [ + DBlock(in_channels, in_channels, 1), + DBlock(in_channels, in_channels, 1), + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(in_channels, 1, kernel_size=1), + ] + + def forward(self, inputs): + batch_size = inputs.size()[0] + outputs = inputs.view(batch_size, self.in_channels, -1) + for layer in self.layers: + outputs = layer(outputs) + return outputs + + +class RandomWindowDiscriminator(nn.Module): + """Random Window Discriminator as described in + http://arxiv.org/abs/1909.11646""" + def __init__(self, + cond_channels, + hop_length, + uncond_disc_donwsample_factors=(8, 4), + cond_disc_downsample_factors=((8, 4, 2, 2, 2), (8, 4, 2, 2), + (8, 4, 2), (8, 4), (4, 2, 2)), + cond_disc_out_channels=((128, 128, 256, 256), (128, 256, 256), + (128, 256), (256, ), (128, 256)), + window_sizes=(512, 1024, 2048, 4096, 8192)): + + super(RandomWindowDiscriminator, self).__init__() + self.cond_channels = cond_channels + self.window_sizes = window_sizes + self.hop_length = hop_length + self.base_window_size = self.hop_length * 2 + self.ks = [ws // self.base_window_size for ws in window_sizes] + + # check arguments + assert len(cond_disc_downsample_factors) == len( + cond_disc_out_channels) == len(window_sizes) + for ws in window_sizes: + assert ws % hop_length == 0 + + for idx, cf in enumerate(cond_disc_downsample_factors): + assert np.prod(cf) == hop_length // self.ks[idx] + + # define layers + self.unconditional_discriminators = nn.ModuleList([]) + for k in self.ks: + layer = UnconditionalDiscriminator( + in_channels=k, + base_channels=64, + downsample_factors=uncond_disc_donwsample_factors) + self.unconditional_discriminators.append(layer) + + self.conditional_discriminators = nn.ModuleList([]) + for idx, k in enumerate(self.ks): + layer = ConditionalDiscriminator( + in_channels=k, + cond_channels=cond_channels, + downsample_factors=cond_disc_downsample_factors[idx], + out_channels=cond_disc_out_channels[idx]) + self.conditional_discriminators.append(layer) + + def forward(self, x, c): + scores = [] + feats = [] + # unconditional pass + for (window_size, layer) in zip(self.window_sizes, + self.unconditional_discriminators): + index = np.random.randint(x.shape[-1] - window_size) + + score = layer(x[:, :, index:index + window_size]) + scores.append(score) + + # conditional pass + for (window_size, layer) in zip(self.window_sizes, + self.conditional_discriminators): + frame_size = window_size // self.hop_length + lc_index = np.random.randint(c.shape[-1] - frame_size) + sample_index = lc_index * self.hop_length + x_sub = x[:, :, + sample_index:(lc_index + frame_size) * self.hop_length] + c_sub = c[:, :, lc_index:lc_index + frame_size] + + score = layer(x_sub, c_sub) + scores.append(score) + return scores, feats diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py new file mode 100644 index 0000000000000000000000000000000000000000..f4a5faa3a0a5fd15acddb36120a59a980387a7d5 --- /dev/null +++ b/TTS/vocoder/models/wavegrad.py @@ -0,0 +1,196 @@ +import numpy as np +import torch +from torch import nn +from torch.nn.utils import weight_norm + +from ..layers.wavegrad import DBlock, FiLM, UBlock, Conv1d + + +class Wavegrad(nn.Module): + # pylint: disable=dangerous-default-value + def __init__(self, + in_channels=80, + out_channels=1, + use_weight_norm=False, + y_conv_channels=32, + x_conv_channels=768, + dblock_out_channels=[128, 128, 256, 512], + ublock_out_channels=[512, 512, 256, 128, 128], + upsample_factors=[5, 5, 3, 2, 2], + upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], + [1, 2, 4, 8], [1, 2, 4, 8]]): + super().__init__() + + self.use_weight_norm = use_weight_norm + self.hop_len = np.prod(upsample_factors) + self.noise_level = None + self.num_steps = None + self.beta = None + self.alpha = None + self.alpha_hat = None + self.noise_level = None + self.c1 = None + self.c2 = None + self.sigma = None + + # dblocks + self.y_conv = Conv1d(1, y_conv_channels, 5, padding=2) + self.dblocks = nn.ModuleList([]) + ic = y_conv_channels + for oc, df in zip(dblock_out_channels, reversed(upsample_factors)): + self.dblocks.append(DBlock(ic, oc, df)) + ic = oc + + # film + self.film = nn.ModuleList([]) + ic = y_conv_channels + for oc in reversed(ublock_out_channels): + self.film.append(FiLM(ic, oc)) + ic = oc + + # ublocks + self.ublocks = nn.ModuleList([]) + ic = x_conv_channels + for oc, uf, ud in zip(ublock_out_channels, upsample_factors, upsample_dilations): + self.ublocks.append(UBlock(ic, oc, uf, ud)) + ic = oc + + self.x_conv = Conv1d(in_channels, x_conv_channels, 3, padding=1) + self.out_conv = Conv1d(oc, out_channels, 3, padding=1) + + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, x, spectrogram, noise_scale): + shift_and_scale = [] + + x = self.y_conv(x) + shift_and_scale.append(self.film[0](x, noise_scale)) + + for film, layer in zip(self.film[1:], self.dblocks): + x = layer(x) + shift_and_scale.append(film(x, noise_scale)) + + x = self.x_conv(spectrogram) + for layer, (film_shift, film_scale) in zip(self.ublocks, + reversed(shift_and_scale)): + x = layer(x, film_shift, film_scale) + x = self.out_conv(x) + return x + + def load_noise_schedule(self, path): + beta = np.load(path, allow_pickle=True).item()['beta'] + self.compute_noise_level(beta) + + @torch.no_grad() + def inference(self, x, y_n=None): + """ x: B x D X T """ + if y_n is None: + y_n = torch.randn(x.shape[0], 1, self.hop_len * x.shape[-1], dtype=torch.float32).to(x) + else: + y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0).to(x) + sqrt_alpha_hat = self.noise_level.to(x) + for n in range(len(self.alpha) - 1, -1, -1): + y_n = self.c1[n] * (y_n - + self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0]))) + if n > 0: + z = torch.randn_like(y_n) + y_n += self.sigma[n - 1] * z + y_n.clamp_(-1.0, 1.0) + return y_n + + + def compute_y_n(self, y_0): + """Compute noisy audio based on noise schedule""" + self.noise_level = self.noise_level.to(y_0) + if len(y_0.shape) == 3: + y_0 = y_0.squeeze(1) + s = torch.randint(0, self.num_steps - 1, [y_0.shape[0]]) + l_a, l_b = self.noise_level[s], self.noise_level[s+1] + noise_scale = l_a + torch.rand(y_0.shape[0]).to(y_0) * (l_b - l_a) + noise_scale = noise_scale.unsqueeze(1) + noise = torch.randn_like(y_0) + noisy_audio = noise_scale * y_0 + (1.0 - noise_scale**2)**0.5 * noise + return noise.unsqueeze(1), noisy_audio.unsqueeze(1), noise_scale[:, 0] + + def compute_noise_level(self, beta): + """Compute noise schedule parameters""" + self.num_steps = len(beta) + alpha = 1 - beta + alpha_hat = np.cumprod(alpha) + noise_level = np.concatenate([[1.0], alpha_hat ** 0.5], axis=0) + noise_level = alpha_hat ** 0.5 + + # pylint: disable=not-callable + self.beta = torch.tensor(beta.astype(np.float32)) + self.alpha = torch.tensor(alpha.astype(np.float32)) + self.alpha_hat = torch.tensor(alpha_hat.astype(np.float32)) + self.noise_level = torch.tensor(noise_level.astype(np.float32)) + + self.c1 = 1 / self.alpha**0.5 + self.c2 = (1 - self.alpha) / (1 - self.alpha_hat)**0.5 + self.sigma = ((1.0 - self.alpha_hat[:-1]) / (1.0 - self.alpha_hat[1:]) * self.beta[1:])**0.5 + + def remove_weight_norm(self): + for _, layer in enumerate(self.dblocks): + if len(layer.state_dict()) != 0: + try: + nn.utils.remove_weight_norm(layer) + except ValueError: + layer.remove_weight_norm() + + for _, layer in enumerate(self.film): + if len(layer.state_dict()) != 0: + try: + nn.utils.remove_weight_norm(layer) + except ValueError: + layer.remove_weight_norm() + + + for _, layer in enumerate(self.ublocks): + if len(layer.state_dict()) != 0: + try: + nn.utils.remove_weight_norm(layer) + except ValueError: + layer.remove_weight_norm() + + nn.utils.remove_weight_norm(self.x_conv) + nn.utils.remove_weight_norm(self.out_conv) + nn.utils.remove_weight_norm(self.y_conv) + + def apply_weight_norm(self): + for _, layer in enumerate(self.dblocks): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + for _, layer in enumerate(self.film): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + + for _, layer in enumerate(self.ublocks): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + self.x_conv = weight_norm(self.x_conv) + self.out_conv = weight_norm(self.out_conv) + self.y_conv = weight_norm(self.y_conv) + + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + if self.use_weight_norm: + self.remove_weight_norm() + betas = np.linspace(config['test_noise_schedule']['min_val'], + config['test_noise_schedule']['max_val'], + config['test_noise_schedule']['num_steps']) + self.compute_noise_level(betas) + else: + betas = np.linspace(config['train_noise_schedule']['min_val'], + config['train_noise_schedule']['max_val'], + config['train_noise_schedule']['num_steps']) + self.compute_noise_level(betas) diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py new file mode 100644 index 0000000000000000000000000000000000000000..cb03deb35e35b3b621e85137f6631244885eee92 --- /dev/null +++ b/TTS/vocoder/models/wavernn.py @@ -0,0 +1,508 @@ +import sys +import torch +import torch.nn as nn +import numpy as np +import torch.nn.functional as F +import time + +# fix this +from TTS.utils.audio import AudioProcessor as ap +from TTS.vocoder.utils.distribution import ( + sample_from_gaussian, + sample_from_discretized_mix_logistic, +) + + +def stream(string, variables): + sys.stdout.write(f"\r{string}" % variables) + +# pylint: disable=abstract-method +# relates https://github.com/pytorch/pytorch/issues/42305 +class ResBlock(nn.Module): + def __init__(self, dims): + super().__init__() + self.conv1 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) + self.conv2 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) + self.batch_norm1 = nn.BatchNorm1d(dims) + self.batch_norm2 = nn.BatchNorm1d(dims) + + def forward(self, x): + residual = x + x = self.conv1(x) + x = self.batch_norm1(x) + x = F.relu(x) + x = self.conv2(x) + x = self.batch_norm2(x) + return x + residual + + +class MelResNet(nn.Module): + def __init__(self, num_res_blocks, in_dims, compute_dims, res_out_dims, pad): + super().__init__() + k_size = pad * 2 + 1 + self.conv_in = nn.Conv1d( + in_dims, compute_dims, kernel_size=k_size, bias=False) + self.batch_norm = nn.BatchNorm1d(compute_dims) + self.layers = nn.ModuleList() + for _ in range(num_res_blocks): + self.layers.append(ResBlock(compute_dims)) + self.conv_out = nn.Conv1d(compute_dims, res_out_dims, kernel_size=1) + + def forward(self, x): + x = self.conv_in(x) + x = self.batch_norm(x) + x = F.relu(x) + for f in self.layers: + x = f(x) + x = self.conv_out(x) + return x + + +class Stretch2d(nn.Module): + def __init__(self, x_scale, y_scale): + super().__init__() + self.x_scale = x_scale + self.y_scale = y_scale + + def forward(self, x): + b, c, h, w = x.size() + x = x.unsqueeze(-1).unsqueeze(3) + x = x.repeat(1, 1, 1, self.y_scale, 1, self.x_scale) + return x.view(b, c, h * self.y_scale, w * self.x_scale) + + +class UpsampleNetwork(nn.Module): + def __init__( + self, + feat_dims, + upsample_scales, + compute_dims, + num_res_blocks, + res_out_dims, + pad, + use_aux_net, + ): + super().__init__() + self.total_scale = np.cumproduct(upsample_scales)[-1] + self.indent = pad * self.total_scale + self.use_aux_net = use_aux_net + if use_aux_net: + self.resnet = MelResNet( + num_res_blocks, feat_dims, compute_dims, res_out_dims, pad + ) + self.resnet_stretch = Stretch2d(self.total_scale, 1) + self.up_layers = nn.ModuleList() + for scale in upsample_scales: + k_size = (1, scale * 2 + 1) + padding = (0, scale) + stretch = Stretch2d(scale, 1) + conv = nn.Conv2d(1, 1, kernel_size=k_size, + padding=padding, bias=False) + conv.weight.data.fill_(1.0 / k_size[1]) + self.up_layers.append(stretch) + self.up_layers.append(conv) + + def forward(self, m): + if self.use_aux_net: + aux = self.resnet(m).unsqueeze(1) + aux = self.resnet_stretch(aux) + aux = aux.squeeze(1) + aux = aux.transpose(1, 2) + else: + aux = None + m = m.unsqueeze(1) + for f in self.up_layers: + m = f(m) + m = m.squeeze(1)[:, :, self.indent: -self.indent] + return m.transpose(1, 2), aux + + +class Upsample(nn.Module): + def __init__( + self, scale, pad, num_res_blocks, feat_dims, compute_dims, res_out_dims, use_aux_net + ): + super().__init__() + self.scale = scale + self.pad = pad + self.indent = pad * scale + self.use_aux_net = use_aux_net + self.resnet = MelResNet(num_res_blocks, feat_dims, + compute_dims, res_out_dims, pad) + + def forward(self, m): + if self.use_aux_net: + aux = self.resnet(m) + aux = torch.nn.functional.interpolate( + aux, scale_factor=self.scale, mode="linear", align_corners=True + ) + aux = aux.transpose(1, 2) + else: + aux = None + m = torch.nn.functional.interpolate( + m, scale_factor=self.scale, mode="linear", align_corners=True + ) + m = m[:, :, self.indent: -self.indent] + m = m * 0.045 # empirically found + + return m.transpose(1, 2), aux + + +class WaveRNN(nn.Module): + def __init__(self, + rnn_dims, + fc_dims, + mode, + mulaw, + pad, + use_aux_net, + use_upsample_net, + upsample_factors, + feat_dims, + compute_dims, + res_out_dims, + num_res_blocks, + hop_length, + sample_rate, + ): + super().__init__() + self.mode = mode + self.mulaw = mulaw + self.pad = pad + self.use_upsample_net = use_upsample_net + self.use_aux_net = use_aux_net + if isinstance(self.mode, int): + self.n_classes = 2 ** self.mode + elif self.mode == "mold": + self.n_classes = 3 * 10 + elif self.mode == "gauss": + self.n_classes = 2 + else: + raise RuntimeError("Unknown model mode value - ", self.mode) + + self.rnn_dims = rnn_dims + self.aux_dims = res_out_dims // 4 + self.hop_length = hop_length + self.sample_rate = sample_rate + + if self.use_upsample_net: + assert ( + np.cumproduct(upsample_factors)[-1] == self.hop_length + ), " [!] upsample scales needs to be equal to hop_length" + self.upsample = UpsampleNetwork( + feat_dims, + upsample_factors, + compute_dims, + num_res_blocks, + res_out_dims, + pad, + use_aux_net, + ) + else: + self.upsample = Upsample( + hop_length, + pad, + num_res_blocks, + feat_dims, + compute_dims, + res_out_dims, + use_aux_net, + ) + if self.use_aux_net: + self.I = nn.Linear(feat_dims + self.aux_dims + 1, rnn_dims) + self.rnn1 = nn.GRU(rnn_dims, rnn_dims, batch_first=True) + self.rnn2 = nn.GRU(rnn_dims + self.aux_dims, + rnn_dims, batch_first=True) + self.fc1 = nn.Linear(rnn_dims + self.aux_dims, fc_dims) + self.fc2 = nn.Linear(fc_dims + self.aux_dims, fc_dims) + self.fc3 = nn.Linear(fc_dims, self.n_classes) + else: + self.I = nn.Linear(feat_dims + 1, rnn_dims) + self.rnn1 = nn.GRU(rnn_dims, rnn_dims, batch_first=True) + self.rnn2 = nn.GRU(rnn_dims, rnn_dims, batch_first=True) + self.fc1 = nn.Linear(rnn_dims, fc_dims) + self.fc2 = nn.Linear(fc_dims, fc_dims) + self.fc3 = nn.Linear(fc_dims, self.n_classes) + + def forward(self, x, mels): + bsize = x.size(0) + h1 = torch.zeros(1, bsize, self.rnn_dims).to(x.device) + h2 = torch.zeros(1, bsize, self.rnn_dims).to(x.device) + mels, aux = self.upsample(mels) + + if self.use_aux_net: + aux_idx = [self.aux_dims * i for i in range(5)] + a1 = aux[:, :, aux_idx[0]: aux_idx[1]] + a2 = aux[:, :, aux_idx[1]: aux_idx[2]] + a3 = aux[:, :, aux_idx[2]: aux_idx[3]] + a4 = aux[:, :, aux_idx[3]: aux_idx[4]] + + x = ( + torch.cat([x.unsqueeze(-1), mels, a1], dim=2) + if self.use_aux_net + else torch.cat([x.unsqueeze(-1), mels], dim=2) + ) + x = self.I(x) + res = x + self.rnn1.flatten_parameters() + x, _ = self.rnn1(x, h1) + + x = x + res + res = x + x = torch.cat([x, a2], dim=2) if self.use_aux_net else x + self.rnn2.flatten_parameters() + x, _ = self.rnn2(x, h2) + + x = x + res + x = torch.cat([x, a3], dim=2) if self.use_aux_net else x + x = F.relu(self.fc1(x)) + + x = torch.cat([x, a4], dim=2) if self.use_aux_net else x + x = F.relu(self.fc2(x)) + return self.fc3(x) + + def inference(self, mels, batched, target, overlap): + + self.eval() + device = mels.device + output = [] + start = time.time() + rnn1 = self.get_gru_cell(self.rnn1) + rnn2 = self.get_gru_cell(self.rnn2) + + with torch.no_grad(): + if isinstance(mels, np.ndarray): + mels = torch.FloatTensor(mels).to(device) + + if mels.ndim == 2: + mels = mels.unsqueeze(0) + wave_len = (mels.size(-1) - 1) * self.hop_length + + mels = self.pad_tensor(mels.transpose( + 1, 2), pad=self.pad, side="both") + mels, aux = self.upsample(mels.transpose(1, 2)) + + if batched: + mels = self.fold_with_overlap(mels, target, overlap) + if aux is not None: + aux = self.fold_with_overlap(aux, target, overlap) + + b_size, seq_len, _ = mels.size() + + h1 = torch.zeros(b_size, self.rnn_dims).to(device) + h2 = torch.zeros(b_size, self.rnn_dims).to(device) + x = torch.zeros(b_size, 1).to(device) + + if self.use_aux_net: + d = self.aux_dims + aux_split = [aux[:, :, d * i: d * (i + 1)] for i in range(4)] + + for i in range(seq_len): + + m_t = mels[:, i, :] + + if self.use_aux_net: + a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split) + + x = ( + torch.cat([x, m_t, a1_t], dim=1) + if self.use_aux_net + else torch.cat([x, m_t], dim=1) + ) + x = self.I(x) + h1 = rnn1(x, h1) + + x = x + h1 + inp = torch.cat([x, a2_t], dim=1) if self.use_aux_net else x + h2 = rnn2(inp, h2) + + x = x + h2 + x = torch.cat([x, a3_t], dim=1) if self.use_aux_net else x + x = F.relu(self.fc1(x)) + + x = torch.cat([x, a4_t], dim=1) if self.use_aux_net else x + x = F.relu(self.fc2(x)) + + logits = self.fc3(x) + + if self.mode == "mold": + sample = sample_from_discretized_mix_logistic( + logits.unsqueeze(0).transpose(1, 2) + ) + output.append(sample.view(-1)) + x = sample.transpose(0, 1).to(device) + elif self.mode == "gauss": + sample = sample_from_gaussian( + logits.unsqueeze(0).transpose(1, 2)) + output.append(sample.view(-1)) + x = sample.transpose(0, 1).to(device) + elif isinstance(self.mode, int): + posterior = F.softmax(logits, dim=1) + distrib = torch.distributions.Categorical(posterior) + + sample = 2 * distrib.sample().float() / (self.n_classes - 1.0) - 1.0 + output.append(sample) + x = sample.unsqueeze(-1) + else: + raise RuntimeError( + "Unknown model mode value - ", self.mode) + + if i % 100 == 0: + self.gen_display(i, seq_len, b_size, start) + + output = torch.stack(output).transpose(0, 1) + output = output.cpu().numpy() + output = output.astype(np.float64) + + if batched: + output = self.xfade_and_unfold(output, target, overlap) + else: + output = output[0] + + if self.mulaw and isinstance(self.mode, int): + output = ap.mulaw_decode(output, self.mode) + + # Fade-out at the end to avoid signal cutting out suddenly + fade_out = np.linspace(1, 0, 20 * self.hop_length) + output = output[:wave_len] + + if wave_len > len(fade_out): + output[-20 * self.hop_length:] *= fade_out + + self.train() + return output + + def gen_display(self, i, seq_len, b_size, start): + gen_rate = (i + 1) / (time.time() - start) * b_size / 1000 + realtime_ratio = gen_rate * 1000 / self.sample_rate + stream( + "%i/%i -- batch_size: %i -- gen_rate: %.1f kHz -- x_realtime: %.1f ", + (i * b_size, seq_len * b_size, b_size, gen_rate, realtime_ratio), + ) + + def fold_with_overlap(self, x, target, overlap): + """Fold the tensor with overlap for quick batched inference. + Overlap will be used for crossfading in xfade_and_unfold() + Args: + x (tensor) : Upsampled conditioning features. + shape=(1, timesteps, features) + target (int) : Target timesteps for each index of batch + overlap (int) : Timesteps for both xfade and rnn warmup + Return: + (tensor) : shape=(num_folds, target + 2 * overlap, features) + Details: + x = [[h1, h2, ... hn]] + Where each h is a vector of conditioning features + Eg: target=2, overlap=1 with x.size(1)=10 + folded = [[h1, h2, h3, h4], + [h4, h5, h6, h7], + [h7, h8, h9, h10]] + """ + + _, total_len, features = x.size() + + # Calculate variables needed + num_folds = (total_len - overlap) // (target + overlap) + extended_len = num_folds * (overlap + target) + overlap + remaining = total_len - extended_len + + # Pad if some time steps poking out + if remaining != 0: + num_folds += 1 + padding = target + 2 * overlap - remaining + x = self.pad_tensor(x, padding, side="after") + + folded = torch.zeros(num_folds, target + 2 * + overlap, features).to(x.device) + + # Get the values for the folded tensor + for i in range(num_folds): + start = i * (target + overlap) + end = start + target + 2 * overlap + folded[i] = x[:, start:end, :] + + return folded + + @staticmethod + def get_gru_cell(gru): + gru_cell = nn.GRUCell(gru.input_size, gru.hidden_size) + gru_cell.weight_hh.data = gru.weight_hh_l0.data + gru_cell.weight_ih.data = gru.weight_ih_l0.data + gru_cell.bias_hh.data = gru.bias_hh_l0.data + gru_cell.bias_ih.data = gru.bias_ih_l0.data + return gru_cell + + @staticmethod + def pad_tensor(x, pad, side="both"): + # NB - this is just a quick method i need right now + # i.e., it won't generalise to other shapes/dims + b, t, c = x.size() + total = t + 2 * pad if side == "both" else t + pad + padded = torch.zeros(b, total, c).to(x.device) + if side in ("before", "both"): + padded[:, pad: pad + t, :] = x + elif side == "after": + padded[:, :t, :] = x + return padded + + @staticmethod + def xfade_and_unfold(y, target, overlap): + """Applies a crossfade and unfolds into a 1d array. + Args: + y (ndarry) : Batched sequences of audio samples + shape=(num_folds, target + 2 * overlap) + dtype=np.float64 + overlap (int) : Timesteps for both xfade and rnn warmup + Return: + (ndarry) : audio samples in a 1d array + shape=(total_len) + dtype=np.float64 + Details: + y = [[seq1], + [seq2], + [seq3]] + Apply a gain envelope at both ends of the sequences + y = [[seq1_in, seq1_target, seq1_out], + [seq2_in, seq2_target, seq2_out], + [seq3_in, seq3_target, seq3_out]] + Stagger and add up the groups of samples: + [seq1_in, seq1_target, (seq1_out + seq2_in), seq2_target, ...] + """ + + num_folds, length = y.shape + target = length - 2 * overlap + total_len = num_folds * (target + overlap) + overlap + + # Need some silence for the rnn warmup + silence_len = overlap // 2 + fade_len = overlap - silence_len + silence = np.zeros((silence_len), dtype=np.float64) + + # Equal power crossfade + t = np.linspace(-1, 1, fade_len, dtype=np.float64) + fade_in = np.sqrt(0.5 * (1 + t)) + fade_out = np.sqrt(0.5 * (1 - t)) + + # Concat the silence to the fades + fade_in = np.concatenate([silence, fade_in]) + fade_out = np.concatenate([fade_out, silence]) + + # Apply the gain to the overlap samples + y[:, :overlap] *= fade_in + y[:, -overlap:] *= fade_out + + unfolded = np.zeros((total_len), dtype=np.float64) + + # Loop to add up all the samples + for i in range(num_folds): + start = i * (target + overlap) + end = start + target + 2 * overlap + unfolded[start:end] += y[i] + + return unfolded + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training diff --git a/TTS/vocoder/pqmf_output.wav b/TTS/vocoder/pqmf_output.wav new file mode 100644 index 0000000000000000000000000000000000000000..8a77747b00198a4adfd6c398998517df5b4bdb8d Binary files /dev/null and b/TTS/vocoder/pqmf_output.wav differ diff --git a/TTS/vocoder/tf/layers/melgan.py b/TTS/vocoder/tf/layers/melgan.py new file mode 100644 index 0000000000000000000000000000000000000000..34b25d65cf10bca57e56d15356054650a66ecc17 --- /dev/null +++ b/TTS/vocoder/tf/layers/melgan.py @@ -0,0 +1,56 @@ +import tensorflow as tf + + +class ReflectionPad1d(tf.keras.layers.Layer): + def __init__(self, padding): + super(ReflectionPad1d, self).__init__() + self.padding = padding + + def call(self, x): + return tf.pad(x, [[0, 0], [self.padding, self.padding], [0, 0], [0, 0]], "REFLECT") + + +class ResidualStack(tf.keras.layers.Layer): + def __init__(self, channels, num_res_blocks, kernel_size, name): + super(ResidualStack, self).__init__(name=name) + + assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." + base_padding = (kernel_size - 1) // 2 + + self.blocks = [] + num_layers = 2 + for idx in range(num_res_blocks): + layer_kernel_size = kernel_size + layer_dilation = layer_kernel_size**idx + layer_padding = base_padding * layer_dilation + block = [ + tf.keras.layers.LeakyReLU(0.2), + ReflectionPad1d(layer_padding), + tf.keras.layers.Conv2D(filters=channels, + kernel_size=(kernel_size, 1), + dilation_rate=(layer_dilation, 1), + use_bias=True, + padding='valid', + name=f'blocks.{idx}.{num_layers}'), + tf.keras.layers.LeakyReLU(0.2), + tf.keras.layers.Conv2D(filters=channels, + kernel_size=(1, 1), + use_bias=True, + name=f'blocks.{idx}.{num_layers + 2}') + ] + self.blocks.append(block) + self.shortcuts = [ + tf.keras.layers.Conv2D(channels, + kernel_size=1, + use_bias=True, + name=f'shortcuts.{i}') + for i in range(num_res_blocks) + ] + + def call(self, x): + for block, shortcut in zip(self.blocks, self.shortcuts): + res = shortcut(x) + for layer in block: + x = layer(x) + x += res + return x diff --git a/TTS/vocoder/tf/layers/pqmf.py b/TTS/vocoder/tf/layers/pqmf.py new file mode 100644 index 0000000000000000000000000000000000000000..c018971f3e0e43326dd84b1d8d734d42d7b940fd --- /dev/null +++ b/TTS/vocoder/tf/layers/pqmf.py @@ -0,0 +1,66 @@ +import numpy as np +import tensorflow as tf + +from scipy import signal as sig + + +class PQMF(tf.keras.layers.Layer): + def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0): + super(PQMF, self).__init__() + # define filter coefficient + self.N = N + self.taps = taps + self.cutoff = cutoff + self.beta = beta + + QMF = sig.firwin(taps + 1, cutoff, window=('kaiser', beta)) + H = np.zeros((N, len(QMF))) + G = np.zeros((N, len(QMF))) + for k in range(N): + constant_factor = (2 * k + 1) * (np.pi / + (2 * N)) * (np.arange(taps + 1) - + ((taps - 1) / 2)) + phase = (-1)**k * np.pi / 4 + H[k] = 2 * QMF * np.cos(constant_factor + phase) + + G[k] = 2 * QMF * np.cos(constant_factor - phase) + + # [N, 1, taps + 1] == [filter_width, in_channels, out_channels] + self.H = np.transpose(H[:, None, :], (2, 1, 0)).astype('float32') + self.G = np.transpose(G[None, :, :], (2, 1, 0)).astype('float32') + + # filter for downsampling & upsampling + updown_filter = np.zeros((N, N, N), dtype=np.float32) + for k in range(N): + updown_filter[0, k, k] = 1.0 + self.updown_filter = updown_filter.astype(np.float32) + + def analysis(self, x): + """ + x : B x 1 x T + """ + x = tf.transpose(x, perm=[0, 2, 1]) + x = tf.pad(x, [[0, 0], [self.taps // 2, self.taps // 2], [0, 0]], constant_values=0.0) + x = tf.nn.conv1d(x, self.H, stride=1, padding='VALID') + x = tf.nn.conv1d(x, + self.updown_filter, + stride=self.N, + padding='VALID') + x = tf.transpose(x, perm=[0, 2, 1]) + return x + + def synthesis(self, x): + """ + x : B x D x T + """ + x = tf.transpose(x, perm=[0, 2, 1]) + x = tf.nn.conv1d_transpose( + x, + self.updown_filter * self.N, + strides=self.N, + output_shape=(tf.shape(x)[0], tf.shape(x)[1] * self.N, + self.N)) + x = tf.pad(x, [[0, 0], [self.taps // 2, self.taps // 2], [0, 0]], constant_values=0.0) + x = tf.nn.conv1d(x, self.G, stride=1, padding="VALID") + x = tf.transpose(x, perm=[0, 2, 1]) + return x diff --git a/TTS/vocoder/tf/models/melgan_generator.py b/TTS/vocoder/tf/models/melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..9a029df45a303b6a910c562347ba1478ff123eb8 --- /dev/null +++ b/TTS/vocoder/tf/models/melgan_generator.py @@ -0,0 +1,128 @@ +import logging +import os + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL +logging.getLogger('tensorflow').setLevel(logging.FATAL) + +import tensorflow as tf +from TTS.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d + + +#pylint: disable=too-many-ancestors +#pylint: disable=abstract-method +class MelganGenerator(tf.keras.models.Model): + """ Melgan Generator TF implementation dedicated for inference with no + weight norm """ + def __init__(self, + in_channels=80, + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=(8, 8, 2, 2), + res_kernel=3, + num_res_blocks=3): + super(MelganGenerator, self).__init__() + + self.in_channels = in_channels + + # assert model parameters + assert (proj_kernel - + 1) % 2 == 0, " [!] proj_kernel should be an odd number." + + # setup additional model parameters + base_padding = (proj_kernel - 1) // 2 + act_slope = 0.2 + self.inference_padding = 2 + + # initial layer + self.initial_layer = [ + ReflectionPad1d(base_padding), + tf.keras.layers.Conv2D(filters=base_channels, + kernel_size=(proj_kernel, 1), + strides=1, + padding='valid', + use_bias=True, + name="1") + ] + num_layers = 3 # count number of layers for layer naming + + # upsampling layers and residual stacks + self.upsample_layers = [] + for idx, upsample_factor in enumerate(upsample_factors): + layer_out_channels = base_channels // (2**(idx + 1)) + layer_filter_size = upsample_factor * 2 + layer_stride = upsample_factor + # layer_output_padding = upsample_factor % 2 + self.upsample_layers += [ + tf.keras.layers.LeakyReLU(act_slope), + tf.keras.layers.Conv2DTranspose( + filters=layer_out_channels, + kernel_size=(layer_filter_size, 1), + strides=(layer_stride, 1), + padding='same', + # output_padding=layer_output_padding, + use_bias=True, + name=f'{num_layers}'), + ResidualStack(channels=layer_out_channels, + num_res_blocks=num_res_blocks, + kernel_size=res_kernel, + name=f'layers.{num_layers + 1}') + ] + num_layers += num_res_blocks - 1 + + self.upsample_layers += [tf.keras.layers.LeakyReLU(act_slope)] + + # final layer + self.final_layers = [ + ReflectionPad1d(base_padding), + tf.keras.layers.Conv2D(filters=out_channels, + kernel_size=(proj_kernel, 1), + use_bias=True, + name=f'layers.{num_layers + 1}'), + tf.keras.layers.Activation("tanh") + ] + + # self.model_layers = tf.keras.models.Sequential(self.initial_layer + self.upsample_layers + self.final_layers, name="layers") + self.model_layers = self.initial_layer + self.upsample_layers + self.final_layers + + @tf.function(experimental_relax_shapes=True) + def call(self, c, training=False): + """ + c : B x C x T + """ + if training: + raise NotImplementedError() + return self.inference(c) + + def inference(self, c): + c = tf.transpose(c, perm=[0, 2, 1]) + c = tf.expand_dims(c, 2) + # FIXME: TF had no replicate padding as in Torch + # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") + o = c + for layer in self.model_layers: + o = layer(o) + # o = self.model_layers(c) + o = tf.transpose(o, perm=[0, 3, 2, 1]) + return o[:, :, 0, :] + + def build_inference(self): + x = tf.random.uniform((1, self.in_channels, 4), dtype=tf.float32) + self(x, training=False) + + @tf.function( + experimental_relax_shapes=True, + input_signature=[ + tf.TensorSpec([1, None, None], dtype=tf.float32), + ],) + def inference_tflite(self, c): + c = tf.transpose(c, perm=[0, 2, 1]) + c = tf.expand_dims(c, 2) + # FIXME: TF had no replicate padding as in Torch + # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") + o = c + for layer in self.model_layers: + o = layer(o) + # o = self.model_layers(c) + o = tf.transpose(o, perm=[0, 3, 2, 1]) + return o[:, :, 0, :] diff --git a/TTS/vocoder/tf/models/multiband_melgan_generator.py b/TTS/vocoder/tf/models/multiband_melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..bdd333ed3a13c03213065a66778b5037338eb9f7 --- /dev/null +++ b/TTS/vocoder/tf/models/multiband_melgan_generator.py @@ -0,0 +1,60 @@ +import tensorflow as tf + +from TTS.vocoder.tf.models.melgan_generator import MelganGenerator +from TTS.vocoder.tf.layers.pqmf import PQMF + +#pylint: disable=too-many-ancestors +#pylint: disable=abstract-method +class MultibandMelganGenerator(MelganGenerator): + def __init__(self, + in_channels=80, + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=(2, 8, 2, 2), + res_kernel=3, + num_res_blocks=3): + super(MultibandMelganGenerator, + self).__init__(in_channels=in_channels, + out_channels=out_channels, + proj_kernel=proj_kernel, + base_channels=base_channels, + upsample_factors=upsample_factors, + res_kernel=res_kernel, + num_res_blocks=num_res_blocks) + self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) + + def pqmf_analysis(self, x): + return self.pqmf_layer.analysis(x) + + def pqmf_synthesis(self, x): + return self.pqmf_layer.synthesis(x) + + def inference(self, c): + c = tf.transpose(c, perm=[0, 2, 1]) + c = tf.expand_dims(c, 2) + # FIXME: TF had no replicate padding as in Torch + # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") + o = c + for layer in self.model_layers: + o = layer(o) + o = tf.transpose(o, perm=[0, 3, 2, 1]) + o = self.pqmf_layer.synthesis(o[:, :, 0, :]) + return o + + @tf.function( + experimental_relax_shapes=True, + input_signature=[ + tf.TensorSpec([1, 80, None], dtype=tf.float32), + ],) + def inference_tflite(self, c): + c = tf.transpose(c, perm=[0, 2, 1]) + c = tf.expand_dims(c, 2) + # FIXME: TF had no replicate padding as in Torch + # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") + o = c + for layer in self.model_layers: + o = layer(o) + o = tf.transpose(o, perm=[0, 3, 2, 1]) + o = self.pqmf_layer.synthesis(o[:, :, 0, :]) + return o diff --git a/TTS/vocoder/tf/utils/__init__.py b/TTS/vocoder/tf/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/tf/utils/convert_torch_to_tf_utils.py b/TTS/vocoder/tf/utils/convert_torch_to_tf_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..25139cc3cee108ab1eba696ce253a6aea2e58395 --- /dev/null +++ b/TTS/vocoder/tf/utils/convert_torch_to_tf_utils.py @@ -0,0 +1,45 @@ +import numpy as np +import tensorflow as tf + + +def compare_torch_tf(torch_tensor, tf_tensor): + """ Compute the average absolute difference b/w torch and tf tensors """ + return abs(torch_tensor.detach().numpy() - tf_tensor.numpy()).mean() + + +def convert_tf_name(tf_name): + """ Convert certain patterns in TF layer names to Torch patterns """ + tf_name_tmp = tf_name + tf_name_tmp = tf_name_tmp.replace(':0', '') + tf_name_tmp = tf_name_tmp.replace('/forward_lstm/lstm_cell_1/recurrent_kernel', '/weight_hh_l0') + tf_name_tmp = tf_name_tmp.replace('/forward_lstm/lstm_cell_2/kernel', '/weight_ih_l1') + tf_name_tmp = tf_name_tmp.replace('/recurrent_kernel', '/weight_hh') + tf_name_tmp = tf_name_tmp.replace('/kernel', '/weight') + tf_name_tmp = tf_name_tmp.replace('/gamma', '/weight') + tf_name_tmp = tf_name_tmp.replace('/beta', '/bias') + tf_name_tmp = tf_name_tmp.replace('/', '.') + return tf_name_tmp + + +def transfer_weights_torch_to_tf(tf_vars, var_map_dict, state_dict): + """ Transfer weigths from torch state_dict to TF variables """ + print(" > Passing weights from Torch to TF ...") + for tf_var in tf_vars: + torch_var_name = var_map_dict[tf_var.name] + print(f' | > {tf_var.name} <-- {torch_var_name}') + # if tuple, it is a bias variable + if 'kernel' in tf_var.name: + torch_weight = state_dict[torch_var_name] + numpy_weight = torch_weight.permute([2, 1, 0]).numpy()[:, None, :, :] + if 'bias' in tf_var.name: + torch_weight = state_dict[torch_var_name] + numpy_weight = torch_weight + assert np.all(tf_var.shape == numpy_weight.shape), f" [!] weight shapes does not match: {tf_var.name} vs {torch_var_name} --> {tf_var.shape} vs {numpy_weight.shape}" + tf.keras.backend.set_value(tf_var, numpy_weight) + return tf_vars + + +def load_tf_vars(model_tf, tf_vars): + for tf_var in tf_vars: + model_tf.get_layer(tf_var.name).set_weights(tf_var) + return model_tf diff --git a/TTS/vocoder/tf/utils/generic_utils.py b/TTS/vocoder/tf/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0daf2d6e139e1325bfb5edaa9d9cbc4ed89a1c8d --- /dev/null +++ b/TTS/vocoder/tf/utils/generic_utils.py @@ -0,0 +1,35 @@ +import re +import importlib + + +def to_camel(text): + text = text.capitalize() + return re.sub(r'(?!^)_([a-zA-Z])', lambda m: m.group(1).upper(), text) + + +def setup_generator(c): + print(" > Generator Model: {}".format(c.generator_model)) + MyModel = importlib.import_module('TTS.vocoder.tf.models.' + + c.generator_model.lower()) + MyModel = getattr(MyModel, to_camel(c.generator_model)) + if c.generator_model in 'melgan_generator': + model = MyModel( + in_channels=c.audio['num_mels'], + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=c.generator_model_params['upsample_factors'], + res_kernel=3, + num_res_blocks=c.generator_model_params['num_res_blocks']) + if c.generator_model in 'melgan_fb_generator': + pass + if c.generator_model in 'multiband_melgan_generator': + model = MyModel( + in_channels=c.audio['num_mels'], + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=c.generator_model_params['upsample_factors'], + res_kernel=3, + num_res_blocks=c.generator_model_params['num_res_blocks']) + return model diff --git a/TTS/vocoder/tf/utils/io.py b/TTS/vocoder/tf/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..c73c9cd86a4a585bb09b4cbd3f15cf16c3ddc42d --- /dev/null +++ b/TTS/vocoder/tf/utils/io.py @@ -0,0 +1,27 @@ +import datetime +import pickle +import tensorflow as tf + + +def save_checkpoint(model, current_step, epoch, output_path, **kwargs): + """ Save TF Vocoder model """ + state = { + 'model': model.weights, + 'step': current_step, + 'epoch': epoch, + 'date': datetime.date.today().strftime("%B %d, %Y"), + } + state.update(kwargs) + pickle.dump(state, open(output_path, 'wb')) + + +def load_checkpoint(model, checkpoint_path): + """ Load TF Vocoder model """ + checkpoint = pickle.load(open(checkpoint_path, 'rb')) + chkp_var_dict = {var.name: var.numpy() for var in checkpoint['model']} + tf_vars = model.weights + for tf_var in tf_vars: + layer_name = tf_var.name + chkp_var_value = chkp_var_dict[layer_name] + tf.keras.backend.set_value(tf_var, chkp_var_value) + return model diff --git a/TTS/vocoder/tf/utils/tflite.py b/TTS/vocoder/tf/utils/tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..d62a081a24fb3892881495c8f8a2dbc14d6c825a --- /dev/null +++ b/TTS/vocoder/tf/utils/tflite.py @@ -0,0 +1,31 @@ +import tensorflow as tf + + +def convert_melgan_to_tflite(model, + output_path=None, + experimental_converter=True): + """Convert Tensorflow MelGAN model to TFLite. Save a binary file if output_path is + provided, else return TFLite model.""" + + concrete_function = model.inference_tflite.get_concrete_function() + converter = tf.lite.TFLiteConverter.from_concrete_functions( + [concrete_function]) + converter.experimental_new_converter = experimental_converter + converter.optimizations = [] + converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS + ] + tflite_model = converter.convert() + print(f'Tflite Model size is {len(tflite_model) / (1024.0 * 1024.0)} MBs.') + if output_path is not None: + # same model binary if outputpath is provided + with open(output_path, 'wb') as f: + f.write(tflite_model) + return None + return tflite_model + + +def load_tflite_model(tflite_path): + tflite_model = tf.lite.Interpreter(model_path=tflite_path) + tflite_model.allocate_tensors() + return tflite_model diff --git a/TTS/vocoder/utils/__init__.py b/TTS/vocoder/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TTS/vocoder/utils/distribution.py b/TTS/vocoder/utils/distribution.py new file mode 100644 index 0000000000000000000000000000000000000000..6aba5e34e8e5176b5e45e7b6507f5655a3b8598a --- /dev/null +++ b/TTS/vocoder/utils/distribution.py @@ -0,0 +1,168 @@ +import numpy as np +import math +import torch +from torch.distributions.normal import Normal +import torch.nn.functional as F + + +def gaussian_loss(y_hat, y, log_std_min=-7.0): + assert y_hat.dim() == 3 + assert y_hat.size(2) == 2 + mean = y_hat[:, :, :1] + log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) + # TODO: replace with pytorch dist + log_probs = -0.5 * ( + -math.log(2.0 * math.pi) + - 2.0 * log_std + - torch.pow(y - mean, 2) * torch.exp((-2.0 * log_std)) + ) + return log_probs.squeeze().mean() + + +def sample_from_gaussian(y_hat, log_std_min=-7.0, scale_factor=1.0): + assert y_hat.size(2) == 2 + mean = y_hat[:, :, :1] + log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) + dist = Normal( + mean, + torch.exp(log_std), + ) + sample = dist.sample() + sample = torch.clamp(torch.clamp( + sample, min=-scale_factor), max=scale_factor) + del dist + return sample + + +def log_sum_exp(x): + """ numerically stable log_sum_exp implementation that prevents overflow """ + # TF ordering + axis = len(x.size()) - 1 + m, _ = torch.max(x, dim=axis) + m2, _ = torch.max(x, dim=axis, keepdim=True) + return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis)) + + +# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py +def discretized_mix_logistic_loss( + y_hat, y, num_classes=65536, log_scale_min=None, reduce=True +): + if log_scale_min is None: + log_scale_min = float(np.log(1e-14)) + y_hat = y_hat.permute(0, 2, 1) + assert y_hat.dim() == 3 + assert y_hat.size(1) % 3 == 0 + nr_mix = y_hat.size(1) // 3 + + # (B x T x C) + y_hat = y_hat.transpose(1, 2) + + # unpack parameters. (B, T, num_mixtures) x 3 + logit_probs = y_hat[:, :, :nr_mix] + means = y_hat[:, :, nr_mix: 2 * nr_mix] + log_scales = torch.clamp( + y_hat[:, :, 2 * nr_mix: 3 * nr_mix], min=log_scale_min) + + # B x T x 1 -> B x T x num_mixtures + y = y.expand_as(means) + + centered_y = y - means + inv_stdv = torch.exp(-log_scales) + plus_in = inv_stdv * (centered_y + 1.0 / (num_classes - 1)) + cdf_plus = torch.sigmoid(plus_in) + min_in = inv_stdv * (centered_y - 1.0 / (num_classes - 1)) + cdf_min = torch.sigmoid(min_in) + + # log probability for edge case of 0 (before scaling) + # equivalent: torch.log(F.sigmoid(plus_in)) + log_cdf_plus = plus_in - F.softplus(plus_in) + + # log probability for edge case of 255 (before scaling) + # equivalent: (1 - F.sigmoid(min_in)).log() + log_one_minus_cdf_min = -F.softplus(min_in) + + # probability for all other cases + cdf_delta = cdf_plus - cdf_min + + mid_in = inv_stdv * centered_y + # log probability in the center of the bin, to be used in extreme cases + # (not actually used in our code) + log_pdf_mid = mid_in - log_scales - 2.0 * F.softplus(mid_in) + + # tf equivalent + + # log_probs = tf.where(x < -0.999, log_cdf_plus, + # tf.where(x > 0.999, log_one_minus_cdf_min, + # tf.where(cdf_delta > 1e-5, + # tf.log(tf.maximum(cdf_delta, 1e-12)), + # log_pdf_mid - np.log(127.5)))) + + # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value + # for num_classes=65536 case? 1e-7? not sure.. + inner_inner_cond = (cdf_delta > 1e-5).float() + + inner_inner_out = inner_inner_cond * torch.log( + torch.clamp(cdf_delta, min=1e-12) + ) + (1.0 - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2)) + inner_cond = (y > 0.999).float() + inner_out = ( + inner_cond * log_one_minus_cdf_min + + (1.0 - inner_cond) * inner_inner_out + ) + cond = (y < -0.999).float() + log_probs = cond * log_cdf_plus + (1.0 - cond) * inner_out + + log_probs = log_probs + F.log_softmax(logit_probs, -1) + + if reduce: + return -torch.mean(log_sum_exp(log_probs)) + return -log_sum_exp(log_probs).unsqueeze(-1) + + +def sample_from_discretized_mix_logistic(y, log_scale_min=None): + """ + Sample from discretized mixture of logistic distributions + Args: + y (Tensor): B x C x T + log_scale_min (float): Log scale minimum value + Returns: + Tensor: sample in range of [-1, 1]. + """ + if log_scale_min is None: + log_scale_min = float(np.log(1e-14)) + assert y.size(1) % 3 == 0 + nr_mix = y.size(1) // 3 + + # B x T x C + y = y.transpose(1, 2) + logit_probs = y[:, :, :nr_mix] + + # sample mixture indicator from softmax + temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5) + temp = logit_probs.data - torch.log(-torch.log(temp)) + _, argmax = temp.max(dim=-1) + + # (B, T) -> (B, T, nr_mix) + one_hot = to_one_hot(argmax, nr_mix) + # select logistic parameters + means = torch.sum(y[:, :, nr_mix: 2 * nr_mix] * one_hot, dim=-1) + log_scales = torch.clamp( + torch.sum(y[:, :, 2 * nr_mix: 3 * nr_mix] * one_hot, dim=-1), min=log_scale_min + ) + # sample from logistic & clip to interval + # we don't actually round to the nearest 8bit value when sampling + u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5) + x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1.0 - u)) + + x = torch.clamp(torch.clamp(x, min=-1.0), max=1.0) + + return x + + +def to_one_hot(tensor, n, fill_with=1.0): + # we perform one hot encore with respect to the last axis + one_hot = torch.FloatTensor(tensor.size() + (n,)).zero_() + if tensor.is_cuda: + one_hot = one_hot.cuda() + one_hot.scatter_(len(tensor.size()), tensor.unsqueeze(-1), fill_with) + return one_hot diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fb943a3751a23da486ab8d4615dc4dc5c2150d90 --- /dev/null +++ b/TTS/vocoder/utils/generic_utils.py @@ -0,0 +1,216 @@ +import re +import torch +import importlib +import numpy as np +from matplotlib import pyplot as plt + +from TTS.tts.utils.visual import plot_spectrogram + + +def interpolate_vocoder_input(scale_factor, spec): + """Interpolate spectrogram by the scale factor. + It is mainly used to match the sampling rates of + the tts and vocoder models. + + Args: + scale_factor (float): scale factor to interpolate the spectrogram + spec (np.array): spectrogram to be interpolated + + Returns: + torch.tensor: interpolated spectrogram. + """ + print(" > before interpolation :", spec.shape) + spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) # pylint: disable=not-callable + spec = torch.nn.functional.interpolate(spec, + scale_factor=scale_factor, + recompute_scale_factor=True, + mode='bilinear', + align_corners=False).squeeze(0) + print(" > after interpolation :", spec.shape) + return spec + + +def plot_results(y_hat, y, ap, global_step, name_prefix): + """ Plot vocoder model results """ + + # select an instance from batch + y_hat = y_hat[0].squeeze(0).detach().cpu().numpy() + y = y[0].squeeze(0).detach().cpu().numpy() + + spec_fake = ap.melspectrogram(y_hat).T + spec_real = ap.melspectrogram(y).T + spec_diff = np.abs(spec_fake - spec_real) + + # plot figure and save it + fig_wave = plt.figure() + plt.subplot(2, 1, 1) + plt.plot(y) + plt.title("groundtruth speech") + plt.subplot(2, 1, 2) + plt.plot(y_hat) + plt.title(f"generated speech @ {global_step} steps") + plt.tight_layout() + plt.close() + + figures = { + name_prefix + "spectrogram/fake": plot_spectrogram(spec_fake), + name_prefix + "spectrogram/real": plot_spectrogram(spec_real), + name_prefix + "spectrogram/diff": plot_spectrogram(spec_diff), + name_prefix + "speech_comparison": fig_wave, + } + return figures + + +def to_camel(text): + text = text.capitalize() + return re.sub(r'(?!^)_([a-zA-Z])', lambda m: m.group(1).upper(), text) + + +def setup_wavernn(c): + print(" > Model: WaveRNN") + MyModel = importlib.import_module("TTS.vocoder.models.wavernn") + MyModel = getattr(MyModel, "WaveRNN") + model = MyModel( + rnn_dims=c.wavernn_model_params['rnn_dims'], + fc_dims=c.wavernn_model_params['fc_dims'], + mode=c.mode, + mulaw=c.mulaw, + pad=c.padding, + use_aux_net=c.wavernn_model_params['use_aux_net'], + use_upsample_net=c.wavernn_model_params['use_upsample_net'], + upsample_factors=c.wavernn_model_params['upsample_factors'], + feat_dims=c.audio['num_mels'], + compute_dims=c.wavernn_model_params['compute_dims'], + res_out_dims=c.wavernn_model_params['res_out_dims'], + num_res_blocks=c.wavernn_model_params['num_res_blocks'], + hop_length=c.audio["hop_length"], + sample_rate=c.audio["sample_rate"], + ) + return model + + +def setup_generator(c): + print(" > Generator Model: {}".format(c.generator_model)) + MyModel = importlib.import_module('TTS.vocoder.models.' + + c.generator_model.lower()) + MyModel = getattr(MyModel, to_camel(c.generator_model)) + if c.generator_model.lower() in 'melgan_generator': + model = MyModel( + in_channels=c.audio['num_mels'], + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=c.generator_model_params['upsample_factors'], + res_kernel=3, + num_res_blocks=c.generator_model_params['num_res_blocks']) + if c.generator_model in 'melgan_fb_generator': + pass + if c.generator_model.lower() in 'multiband_melgan_generator': + model = MyModel( + in_channels=c.audio['num_mels'], + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=c.generator_model_params['upsample_factors'], + res_kernel=3, + num_res_blocks=c.generator_model_params['num_res_blocks']) + if c.generator_model.lower() in 'fullband_melgan_generator': + model = MyModel( + in_channels=c.audio['num_mels'], + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=c.generator_model_params['upsample_factors'], + res_kernel=3, + num_res_blocks=c.generator_model_params['num_res_blocks']) + if c.generator_model.lower() in 'parallel_wavegan_generator': + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_res_blocks=c.generator_model_params['num_res_blocks'], + stacks=c.generator_model_params['stacks'], + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=c.audio['num_mels'], + dropout=0.0, + bias=True, + use_weight_norm=True, + upsample_factors=c.generator_model_params['upsample_factors']) + if c.generator_model.lower() in 'wavegrad': + model = MyModel( + in_channels=c['audio']['num_mels'], + out_channels=1, + use_weight_norm=c['model_params']['use_weight_norm'], + x_conv_channels=c['model_params']['x_conv_channels'], + y_conv_channels=c['model_params']['y_conv_channels'], + dblock_out_channels=c['model_params']['dblock_out_channels'], + ublock_out_channels=c['model_params']['ublock_out_channels'], + upsample_factors=c['model_params']['upsample_factors'], + upsample_dilations=c['model_params']['upsample_dilations']) + return model + + +def setup_discriminator(c): + print(" > Discriminator Model: {}".format(c.discriminator_model)) + if 'parallel_wavegan' in c.discriminator_model: + MyModel = importlib.import_module( + 'TTS.vocoder.models.parallel_wavegan_discriminator') + else: + MyModel = importlib.import_module('TTS.vocoder.models.' + + c.discriminator_model.lower()) + MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower())) + if c.discriminator_model in 'random_window_discriminator': + model = MyModel( + cond_channels=c.audio['num_mels'], + hop_length=c.audio['hop_length'], + uncond_disc_donwsample_factors=c. + discriminator_model_params['uncond_disc_donwsample_factors'], + cond_disc_downsample_factors=c. + discriminator_model_params['cond_disc_downsample_factors'], + cond_disc_out_channels=c. + discriminator_model_params['cond_disc_out_channels'], + window_sizes=c.discriminator_model_params['window_sizes']) + if c.discriminator_model in 'melgan_multiscale_discriminator': + model = MyModel( + in_channels=1, + out_channels=1, + kernel_sizes=(5, 3), + base_channels=c.discriminator_model_params['base_channels'], + max_channels=c.discriminator_model_params['max_channels'], + downsample_factors=c. + discriminator_model_params['downsample_factors']) + if c.discriminator_model == 'residual_parallel_wavegan_discriminator': + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=c.discriminator_model_params['num_layers'], + stacks=c.discriminator_model_params['stacks'], + res_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + ) + if c.discriminator_model == 'parallel_wavegan_discriminator': + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=c.discriminator_model_params['num_layers'], + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True + ) + return model + + +# def check_config(c): +# c = None +# pass diff --git a/TTS/vocoder/utils/io.py b/TTS/vocoder/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..5c42dfcaa34c99dd638fa3dfd777cbe975911c7b --- /dev/null +++ b/TTS/vocoder/utils/io.py @@ -0,0 +1,83 @@ +import os +import torch +import datetime +import pickle as pickle_tts + +from TTS.utils.io import RenamingUnpickler + + +def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False): + try: + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + except ModuleNotFoundError: + pickle_tts.Unpickler = RenamingUnpickler + state = torch.load(checkpoint_path, map_location=torch.device('cpu'), pickle_module=pickle_tts) + model.load_state_dict(state['model']) + if use_cuda: + model.cuda() + if eval: + model.eval() + return model, state + + +def save_model(model, optimizer, scheduler, model_disc, optimizer_disc, + scheduler_disc, current_step, epoch, output_path, **kwargs): + if hasattr(model, 'module'): + model_state = model.module.state_dict() + else: + model_state = model.state_dict() + model_disc_state = model_disc.state_dict()\ + if model_disc is not None else None + optimizer_state = optimizer.state_dict()\ + if optimizer is not None else None + optimizer_disc_state = optimizer_disc.state_dict()\ + if optimizer_disc is not None else None + scheduler_state = scheduler.state_dict()\ + if scheduler is not None else None + scheduler_disc_state = scheduler_disc.state_dict()\ + if scheduler_disc is not None else None + state = { + 'model': model_state, + 'optimizer': optimizer_state, + 'scheduler': scheduler_state, + 'model_disc': model_disc_state, + 'optimizer_disc': optimizer_disc_state, + 'scheduler_disc': scheduler_disc_state, + 'step': current_step, + 'epoch': epoch, + 'date': datetime.date.today().strftime("%B %d, %Y"), + } + state.update(kwargs) + torch.save(state, output_path) + + +def save_checkpoint(model, optimizer, scheduler, model_disc, optimizer_disc, + scheduler_disc, current_step, epoch, output_folder, + **kwargs): + file_name = 'checkpoint_{}.pth.tar'.format(current_step) + checkpoint_path = os.path.join(output_folder, file_name) + print(" > CHECKPOINT : {}".format(checkpoint_path)) + save_model(model, optimizer, scheduler, model_disc, optimizer_disc, + scheduler_disc, current_step, epoch, checkpoint_path, **kwargs) + + +def save_best_model(target_loss, best_loss, model, optimizer, scheduler, + model_disc, optimizer_disc, scheduler_disc, current_step, + epoch, output_folder, **kwargs): + if target_loss < best_loss: + file_name = 'best_model.pth.tar' + checkpoint_path = os.path.join(output_folder, file_name) + print(" > BEST MODEL : {}".format(checkpoint_path)) + save_model(model, + optimizer, + scheduler, + model_disc, + optimizer_disc, + scheduler_disc, + current_step, + epoch, + checkpoint_path, + model_loss=target_loss, + **kwargs) + best_loss = target_loss + return best_loss diff --git a/images/example_model_output.png b/images/example_model_output.png new file mode 100644 index 0000000000000000000000000000000000000000..8e83531c117a626c7db8ea23cf994299a6d93fec Binary files /dev/null and b/images/example_model_output.png differ diff --git a/images/model.png b/images/model.png new file mode 100644 index 0000000000000000000000000000000000000000..e2c55269efe82fa8ab7e4d17eb089518823efcbe Binary files /dev/null and b/images/model.png differ diff --git a/images/tts_performance.png b/images/tts_performance.png new file mode 100644 index 0000000000000000000000000000000000000000..bdff06731e6b60ffb4806943aba5dc89363f3ab3 Binary files /dev/null and b/images/tts_performance.png differ diff --git a/notebooks/AngleProto-Speaker_Encoder- ExtractSpeakerEmbeddings-by-sample.ipynb b/notebooks/AngleProto-Speaker_Encoder- ExtractSpeakerEmbeddings-by-sample.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..15206130c6a20ada0e8c590a7972fe65ea8e3a16 --- /dev/null +++ b/notebooks/AngleProto-Speaker_Encoder- ExtractSpeakerEmbeddings-by-sample.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a noteboook used to generate the speaker embeddings with the AngleProto speaker encoder model for multi-speaker training.\n", + "\n", + "Before running this script please DON'T FORGET: \n", + "- to set file paths.\n", + "- to download related model files from TTS.\n", + "- download or clone related repos, linked below.\n", + "- setup the repositories. ```python setup.py install```\n", + "- to checkout right commit versions (given next to the model) of TTS.\n", + "- to set the right paths in the cell below.\n", + "\n", + "Repository:\n", + "- TTS: https://github.com/mozilla/TTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os\n", + "import importlib\n", + "import random\n", + "import librosa\n", + "import torch\n", + "\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from TTS.tts.utils.speakers import save_speaker_mapping, load_speaker_mapping\n", + "\n", + "# you may need to change this depending on your system\n", + "os.environ['CUDA_VISIBLE_DEVICES']='0'\n", + "\n", + "\n", + "from TTS.tts.utils.speakers import save_speaker_mapping, load_speaker_mapping\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should also adjust all the path constants to point at the relevant locations for you locally" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_RUN_PATH = \"../../Mozilla-TTS/checkpoints/libritts_100+360-angleproto-June-06-2020_04+12PM-9c04d1f/\"\n", + "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", + "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", + "\n", + "\n", + "DATASETS_NAME = ['vctk'] # list the datasets\n", + "DATASETS_PATH = ['../../../datasets/VCTK/']\n", + "DATASETS_METAFILE = ['']\n", + "\n", + "USE_CUDA = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Preprocess dataset\n", + "meta_data = []\n", + "for i in range(len(DATASETS_NAME)):\n", + " preprocessor = importlib.import_module('TTS.tts.datasets.preprocess')\n", + " preprocessor = getattr(preprocessor, DATASETS_NAME[i].lower())\n", + " meta_data += preprocessor(DATASETS_PATH[i],DATASETS_METAFILE[i])\n", + " \n", + "meta_data= list(meta_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c = load_config(CONFIG_PATH)\n", + "ap = AudioProcessor(**c['audio'])\n", + "\n", + "model = SpeakerEncoder(**c.model)\n", + "model.load_state_dict(torch.load(MODEL_PATH)['model'])\n", + "model.eval()\n", + "if USE_CUDA:\n", + " model.cuda()\n", + "\n", + "embeddings_dict = {}\n", + "len_meta_data= len(meta_data)\n", + "\n", + "for i in tqdm(range(len_meta_data)):\n", + " _, wav_file, speaker_id = meta_data[i]\n", + " wav_file_name = os.path.basename(wav_file)\n", + " mel_spec = ap.melspectrogram(ap.load_wav(wav_file)).T\n", + " mel_spec = torch.FloatTensor(mel_spec[None, :, :])\n", + " if USE_CUDA:\n", + " mel_spec = mel_spec.cuda()\n", + " embedd = model.compute_embedding(mel_spec).cpu().detach().numpy().reshape(-1)\n", + " embeddings_dict[wav_file_name] = [embedd,speaker_id]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create and export speakers.json\n", + "speaker_mapping = {sample: {'name': embeddings_dict[sample][1], 'embedding':embeddings_dict[sample][0].reshape(-1).tolist()} for i, sample in enumerate(embeddings_dict.keys())}\n", + "save_speaker_mapping(MODEL_RUN_PATH, speaker_mapping)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#test load integrity\n", + "speaker_mapping_load = load_speaker_mapping(MODEL_RUN_PATH)\n", + "assert speaker_mapping == speaker_mapping_load\n", + "print(\"The file speakers.json has been exported to \",MODEL_RUN_PATH, ' with ', len(embeddings_dict.keys()), ' speakers')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb b/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..17403771bed31a87cf051104a6f70889cb7bfb24 --- /dev/null +++ b/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb @@ -0,0 +1,329 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "DDC-TTS_and_MultiBand-MelGAN_Example.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "6LWsNd3_M3MP", + "colab_type": "text" + }, + "source": [ + "# Mozilla TTS on CPU Real-Time Speech Synthesis " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FAqrSIWgLyP0", + "colab_type": "text" + }, + "source": [ + "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", + "\n", + "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", + "\n", + "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", + "\n", + "Note that both model performances can be improved with more training." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ku-dA4DKoeXk", + "colab_type": "text" + }, + "source": [ + "### Download Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jGIgnWhGsxU1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 + }, + "outputId": "88725e41-a8dc-4885-b3bf-cac939f38abe", + "tags": [] + }, + "source": [ + "!gdown --id 1dntzjWFg7ufWaTaFy80nRz-Tu02xWZos -O data/tts_model.pth.tar\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4dnpE0-kvTsu", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "76377c6d-789c-4995-ba00-a21a6e1c401e", + "tags": [] + }, + "source": [ + "!gdown --id 1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K -O data/vocoder_model.pth.tar\n", + "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O data/config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zlgi8fPdpRF0", + "colab_type": "text" + }, + "source": [ + "### Define TTS function" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "f-Yc42nQZG5A", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def tts(model, text, CONFIG, use_cuda, ap, use_gl, figures=True):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars)\n", + " # mel_postnet_spec = ap.denormalize(mel_postnet_spec.T)\n", + " if not use_gl:\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " waveform = waveform.flatten()\n", + " if use_cuda:\n", + " waveform = waveform.cpu()\n", + " waveform = waveform.numpy()\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZksegYQepkFg", + "colab_type": "text" + }, + "source": [ + "### Load Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oVa0kOamprgj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tts.utils.generic_utils import setup_model\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EY-sHVO8IFSH", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# runtime settings\n", + "use_cuda = False" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_1aIUp2FpxOQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# model paths\n", + "TTS_MODEL = \"data/tts_model.pth.tar\"\n", + "TTS_CONFIG = \"data/config.json\"\n", + "VOCODER_MODEL = \"data/vocoder_model.pth.tar\"\n", + "VOCODER_CONFIG = \"data/config_vocoder.json\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CpgmdBVQplbv", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zmrQxiozIUVE", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "outputId": "60c4daa0-4c5b-4a2e-fe0d-be437d003a49", + "tags": [] + }, + "source": [ + "# load the audio processor\n", + "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8fLoI4ipqMeS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "outputId": "b789066e-e305-42ad-b3ca-eba8d9267382", + "tags": [] + }, + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, len(speakers), TTS_CONFIG)\n", + "\n", + "# load model state\n", + "cp = torch.load(TTS_MODEL, map_location=torch.device('cpu'))\n", + "\n", + "# load the model\n", + "model.load_state_dict(cp['model'])\n", + "if use_cuda:\n", + " model.cuda()\n", + "model.eval()\n", + "\n", + "# set model stepsize\n", + "if 'r' in cp:\n", + " model.decoder.set_r(cp['r'])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zKoq0GgzqzhQ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "234efc61-f37a-40bc-95a3-b51896018ccb", + "tags": [] + }, + "source": [ + "from TTS.vocoder.utils.generic_utils import setup_generator\n", + "\n", + "# LOAD VOCODER MODEL\n", + "vocoder_model = setup_generator(VOCODER_CONFIG)\n", + "vocoder_model.load_state_dict(torch.load(VOCODER_MODEL, map_location=\"cpu\")[\"model\"])\n", + "vocoder_model.remove_weight_norm()\n", + "vocoder_model.inference_padding = 0\n", + "\n", + "ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio']) \n", + "if use_cuda:\n", + " vocoder_model.cuda()\n", + "vocoder_model.eval()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws_YkPKsLgo-", + "colab_type": "text" + }, + "source": [ + "## Run Inference" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuWxZ9Ey5Puj", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "outputId": "9c06adad-5451-4393-89a1-a2e7dc39ab91", + "tags": [] + }, + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, use_cuda, ap, use_gl=False, figures=True)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/DDC_TTS_and_MultiBand_MelGAN_TFLite_Example.ipynb b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TFLite_Example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..57d9261b026645852a5cdedf43c05a9613efd76c --- /dev/null +++ b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TFLite_Example.ipynb @@ -0,0 +1,1328 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "DDC-TTS_and_MultiBand-MelGAN_TFLite_Example.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "6LWsNd3_M3MP", + "colab_type": "text" + }, + "source": [ + "# Mozilla TTS on CPU Real-Time Speech Synthesis with TFLite" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FAqrSIWgLyP0", + "colab_type": "text" + }, + "source": [ + "**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n", + "\n", + "#### **Notebook Details**\n", + "These TFLite models support TF 2.3rc0 and for different versions you might need to regenerate them. \n", + "\n", + "TFLite optimizations degrades the TTS model performance and we do not apply\n", + "any optimization for the vocoder model due to the same reason. If you like to\n", + "keep the quality, consider to regenerate TFLite model accordingly.\n", + "\n", + "Models optimized with TFLite can be slow on a regular CPU since it is optimized\n", + "specifically for lower-end systems.\n", + "\n", + "---\n", + "\n", + "\n", + "\n", + "#### **Model Details** \n", + "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", + "\n", + "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", + "\n", + "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", + "\n", + "Note that both model performances can be improved with more training.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ku-dA4DKoeXk", + "colab_type": "text" + }, + "source": [ + "### Download TF Models and configs" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jGIgnWhGsxU1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 + }, + "outputId": "57af701e-77ec-400d-fee5-64aa7603d357" + }, + "source": [ + "!gdown --id 17PYXCmTe0el_SLTwznrt3vOArNGMGo5v -O tts_model.tflite\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O config.json" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading...\n", + "From: https://drive.google.com/uc?id=17PYXCmTe0el_SLTwznrt3vOArNGMGo5v\n", + "To: /content/tts_model.tflite\n", + "30.1MB [00:00, 36.8MB/s]\n", + "Downloading...\n", + "From: https://drive.google.com/uc?id=18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc\n", + "To: /content/config.json\n", + "100% 9.53k/9.53k [00:00<00:00, 7.38MB/s]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4dnpE0-kvTsu", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "6aab0622-9add-4ee4-b9f8-177d6ddc0e86" + }, + "source": [ + "!gdown --id 1aXveT-NjOM1mUr6tM4JfWjshq67GvVIO -O vocoder_model.tflite\n", + "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O scale_stats.npy" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading...\n", + "From: https://drive.google.com/uc?id=1aXveT-NjOM1mUr6tM4JfWjshq67GvVIO\n", + "To: /content/vocoder_model.tflite\n", + "10.2MB [00:00, 16.5MB/s]\n", + "Downloading...\n", + "From: https://drive.google.com/uc?id=1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu\n", + "To: /content/config_vocoder.json\n", + "100% 6.76k/6.76k [00:00<00:00, 11.4MB/s]\n", + "Downloading...\n", + "From: https://drive.google.com/uc?id=11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU\n", + "To: /content/scale_stats.npy\n", + "100% 10.5k/10.5k [00:00<00:00, 16.6MB/s]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_ZuDrj_ioqHE", + "colab_type": "text" + }, + "source": [ + "### Setup Libraries" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "X2axt5BYq7gv", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 964 + }, + "outputId": "aa53986f-f218-4d17-8667-0d74bb90c927" + }, + "source": [ + "# need it for char to phoneme conversion\n", + "! sudo apt-get install espeak" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "The following package was automatically installed and is no longer required:\n", + " libnvidia-common-440\n", + "Use 'sudo apt autoremove' to remove it.\n", + "The following additional packages will be installed:\n", + " espeak-data libespeak1 libportaudio2 libsonic0\n", + "The following NEW packages will be installed:\n", + " espeak espeak-data libespeak1 libportaudio2 libsonic0\n", + "0 upgraded, 5 newly installed, 0 to remove and 35 not upgraded.\n", + "Need to get 1,219 kB of archives.\n", + "After this operation, 3,031 kB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]\n", + "Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsonic0 amd64 0.2.0-6 [13.4 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak-data amd64 1.48.04+dfsg-5 [934 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libespeak1 amd64 1.48.04+dfsg-5 [145 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak amd64 1.48.04+dfsg-5 [61.6 kB]\n", + "Fetched 1,219 kB in 2s (498 kB/s)\n", + "debconf: unable to initialize frontend: Dialog\n", + "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 5.)\n", + "debconf: falling back to frontend: Readline\n", + "debconf: unable to initialize frontend: Readline\n", + "debconf: (This frontend requires a controlling tty.)\n", + "debconf: falling back to frontend: Teletype\n", + "dpkg-preconfigure: unable to re-open stdin: \n", + "Selecting previously unselected package libportaudio2:amd64.\n", + "(Reading database ... 144465 files and directories currently installed.)\n", + "Preparing to unpack .../libportaudio2_19.6.0-1_amd64.deb ...\n", + "Unpacking libportaudio2:amd64 (19.6.0-1) ...\n", + "Selecting previously unselected package libsonic0:amd64.\n", + "Preparing to unpack .../libsonic0_0.2.0-6_amd64.deb ...\n", + "Unpacking libsonic0:amd64 (0.2.0-6) ...\n", + "Selecting previously unselected package espeak-data:amd64.\n", + "Preparing to unpack .../espeak-data_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking espeak-data:amd64 (1.48.04+dfsg-5) ...\n", + "Selecting previously unselected package libespeak1:amd64.\n", + "Preparing to unpack .../libespeak1_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking libespeak1:amd64 (1.48.04+dfsg-5) ...\n", + "Selecting previously unselected package espeak.\n", + "Preparing to unpack .../espeak_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking espeak (1.48.04+dfsg-5) ...\n", + "Setting up libportaudio2:amd64 (19.6.0-1) ...\n", + "Setting up espeak-data:amd64 (1.48.04+dfsg-5) ...\n", + "Setting up libsonic0:amd64 (0.2.0-6) ...\n", + "Setting up libespeak1:amd64 (1.48.04+dfsg-5) ...\n", + "Setting up espeak (1.48.04+dfsg-5) ...\n", + "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n", + "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n", + "/sbin/ldconfig.real: /usr/local/lib/python3.6/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZduAf-qYYEIT", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 144 + }, + "outputId": "c1fcac0d-b8f8-442c-d598-4f549c42b698" + }, + "source": [ + "!git clone https://github.com/mozilla/TTS" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'TTS'...\n", + "remote: Enumerating objects: 107, done.\u001b[K\n", + "remote: Counting objects: 100% (107/107), done.\u001b[K\n", + "remote: Compressing objects: 100% (79/79), done.\u001b[K\n", + "remote: Total 7252 (delta 51), reused 68 (delta 28), pack-reused 7145\u001b[K\n", + "Receiving objects: 100% (7252/7252), 115.36 MiB | 11.38 MiB/s, done.\n", + "Resolving deltas: 100% (4892/4892), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ofPCvPyjZEcT", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "f3d3ea73-eae5-473c-db19-276bd0e721cc" + }, + "source": [ + "%cd TTS\n", + "!git checkout c7296b3\n", + "!pip install -r requirements.txt\n", + "!python setup.py install\n", + "!pip install tensorflow==2.3.0rc0\n", + "%cd .." + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content/TTS\n", + "Note: checking out 'c7296b3'.\n", + "\n", + "You are in 'detached HEAD' state. You can look around, make experimental\n", + "changes and commit them, and you can discard any commits you make in this\n", + "state without impacting any branches by performing another checkout.\n", + "\n", + "If you want to create a new branch to retain commits you create, you may\n", + "do so (now or later) by using -b with the checkout command again. Example:\n", + "\n", + " git checkout -b \n", + "\n", + "HEAD is now at c7296b3 add module requirement\n", + "Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 1)) (1.18.5)\n", + "Requirement already satisfied: torch>=1.5 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 2)) (1.5.1+cu101)\n", + "Requirement already satisfied: librosa>=0.5.1 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 3)) (0.6.3)\n", + "Collecting Unidecode>=0.4.20\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d0/42/d9edfed04228bacea2d824904cae367ee9efd05e6cce7ceaaedd0b0ad964/Unidecode-1.1.1-py2.py3-none-any.whl (238kB)\n", + "\u001b[K |████████████████████████████████| 245kB 2.7MB/s \n", + "\u001b[?25hRequirement already satisfied: tensorboard in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 5)) (2.2.2)\n", + "Collecting tensorboardX\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)\n", + "\u001b[K |████████████████████████████████| 317kB 11.6MB/s \n", + "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 7)) (3.2.2)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 8)) (7.0.0)\n", + "Requirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 9)) (1.1.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 10)) (1.4.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 11)) (4.41.1)\n", + "Collecting soundfile\n", + " Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl\n", + "Collecting phonemizer\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/14/93/b24323b7b7d99d65c41188685f423c66b2e53d0fd959851ac224c2aa2bfb/phonemizer-2.2-py3-none-any.whl (47kB)\n", + "\u001b[K |████████████████████████████████| 51kB 6.0MB/s \n", + "\u001b[?25hRequirement already satisfied: bokeh==1.4.0 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 14)) (1.4.0)\n", + "Requirement already satisfied: inflect in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 15)) (2.1.0)\n", + "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch>=1.5->-r requirements.txt (line 2)) (0.16.0)\n", + "Requirement already satisfied: numba>=0.38.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (0.48.0)\n", + "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (4.4.2)\n", + "Requirement already satisfied: joblib>=0.12 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (0.16.0)\n", + "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (2.1.8)\n", + "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (1.12.0)\n", + "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (0.22.2.post1)\n", + "Requirement already satisfied: resampy>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.5.1->-r requirements.txt (line 3)) (0.2.2)\n", + "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (0.34.2)\n", + "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (3.10.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (3.2.2)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (2.23.0)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (0.9.0)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (1.17.2)\n", + "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (1.30.0)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (49.1.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (1.7.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (1.0.1)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard->-r requirements.txt (line 5)) (0.4.1)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r requirements.txt (line 7)) (2.4.7)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r requirements.txt (line 7)) (0.10.0)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r requirements.txt (line 7)) (2.8.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r requirements.txt (line 7)) (1.2.0)\n", + "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 9)) (7.1.2)\n", + "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 9)) (2.11.2)\n", + "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 9)) (1.1.0)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.6/dist-packages (from soundfile->-r requirements.txt (line 12)) (1.14.0)\n", + "Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.6/dist-packages (from phonemizer->-r requirements.txt (line 13)) (19.3.0)\n", + "Collecting segments\n", + " Downloading https://files.pythonhosted.org/packages/5b/a0/0c3fe64787745c39eb3f2f5f5f9ed8d008d9ef22e9d7f9f52f71ea4712f7/segments-2.1.3-py2.py3-none-any.whl\n", + "Requirement already satisfied: packaging>=16.8 in /usr/local/lib/python3.6/dist-packages (from bokeh==1.4.0->-r requirements.txt (line 14)) (20.4)\n", + "Requirement already satisfied: tornado>=4.3 in /usr/local/lib/python3.6/dist-packages (from bokeh==1.4.0->-r requirements.txt (line 14)) (4.5.3)\n", + "Requirement already satisfied: PyYAML>=3.10 in /usr/local/lib/python3.6/dist-packages (from bokeh==1.4.0->-r requirements.txt (line 14)) (3.13)\n", + "Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba>=0.38.0->librosa>=0.5.1->-r requirements.txt (line 3)) (0.31.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard->-r requirements.txt (line 5)) (1.7.0)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard->-r requirements.txt (line 5)) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard->-r requirements.txt (line 5)) (2020.6.20)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard->-r requirements.txt (line 5)) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard->-r requirements.txt (line 5)) (3.0.4)\n", + "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard->-r requirements.txt (line 5)) (4.6)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard->-r requirements.txt (line 5)) (4.1.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard->-r requirements.txt (line 5)) (0.2.8)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard->-r requirements.txt (line 5)) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->-r requirements.txt (line 9)) (1.1.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi>=1.0->soundfile->-r requirements.txt (line 12)) (2.20)\n", + "Collecting clldutils>=1.7.3\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7b/b3/05882a8d5c8a7f7c69a47500334ac99623928edca930278d6ab88ee6d99b/clldutils-3.5.2-py2.py3-none-any.whl (189kB)\n", + "\u001b[K |████████████████████████████████| 194kB 13.2MB/s \n", + "\u001b[?25hCollecting csvw>=1.5.6\n", + " Downloading https://files.pythonhosted.org/packages/d1/b6/8fef6788b8f05b21424a17ae3881eff916d42e5c7e87f57a85d9d7abf0a1/csvw-1.7.0-py2.py3-none-any.whl\n", + "Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from segments->phonemizer->-r requirements.txt (line 13)) (2019.12.20)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard->-r requirements.txt (line 5)) (3.1.0)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3\"->google-auth<2,>=1.6.3->tensorboard->-r requirements.txt (line 5)) (0.4.8)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard->-r requirements.txt (line 5)) (3.1.0)\n", + "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from clldutils>=1.7.3->segments->phonemizer->-r requirements.txt (line 13)) (0.8.7)\n", + "Collecting colorlog\n", + " Downloading https://files.pythonhosted.org/packages/00/0d/22c73c2eccb21dd3498df7d22c0b1d4a30f5a5fb3feb64e1ce06bc247747/colorlog-4.1.0-py2.py3-none-any.whl\n", + "Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from csvw>=1.5.6->segments->phonemizer->-r requirements.txt (line 13)) (3.0.1)\n", + "Collecting isodate\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)\n", + "\u001b[K |████████████████████████████████| 51kB 6.7MB/s \n", + "\u001b[?25hCollecting rfc3986\n", + " Downloading https://files.pythonhosted.org/packages/78/be/7b8b99fd74ff5684225f50dd0e865393d2265656ef3b4ba9eaaaffe622b8/rfc3986-1.4.0-py2.py3-none-any.whl\n", + "Installing collected packages: Unidecode, tensorboardX, soundfile, isodate, rfc3986, csvw, colorlog, clldutils, segments, phonemizer\n", + "Successfully installed Unidecode-1.1.1 clldutils-3.5.2 colorlog-4.1.0 csvw-1.7.0 isodate-0.6.0 phonemizer-2.2 rfc3986-1.4.0 segments-2.1.3 soundfile-0.10.3.post1 tensorboardX-2.1\n", + "running install\n", + "running bdist_egg\n", + "running egg_info\n", + "creating tts_namespace/TTS.egg-info\n", + "writing tts_namespace/TTS.egg-info/PKG-INFO\n", + "writing dependency_links to tts_namespace/TTS.egg-info/dependency_links.txt\n", + "writing entry points to tts_namespace/TTS.egg-info/entry_points.txt\n", + "writing requirements to tts_namespace/TTS.egg-info/requires.txt\n", + "writing top-level names to tts_namespace/TTS.egg-info/top_level.txt\n", + "writing manifest file 'tts_namespace/TTS.egg-info/SOURCES.txt'\n", + "writing manifest file 'tts_namespace/TTS.egg-info/SOURCES.txt'\n", + "installing library code to build/bdist.linux-x86_64/egg\n", + "running install_lib\n", + "running build_py\n", + "-- Building version 0.0.3+c7296b3\n", + "creating temp_build\n", + "creating temp_build/TTS\n", + "copying tts_namespace/TTS/distribute.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/train.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/version.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/compute_statistics.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/__init__.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/setup.py -> temp_build/TTS\n", + "copying tts_namespace/TTS/synthesize.py -> temp_build/TTS\n", + "creating temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_demo_server.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_text_processing.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_preprocessors.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_loader.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_audio.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/__init__.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_tacotron2_model.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/generic_utils_text.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_tacotron_model.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/symbols_tests.py -> temp_build/TTS/tests\n", + "copying tts_namespace/TTS/tests/test_layers.py -> temp_build/TTS/tests\n", + "creating temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/data.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/radam.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/training.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/console_logger.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/__init__.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/visual.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/audio.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/tensorboard_logger.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/speakers.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/measures.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/generic_utils.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/io.py -> temp_build/TTS/utils\n", + "copying tts_namespace/TTS/utils/synthesis.py -> temp_build/TTS/utils\n", + "creating temp_build/TTS/vocoder\n", + "copying tts_namespace/TTS/vocoder/train.py -> temp_build/TTS/vocoder\n", + "copying tts_namespace/TTS/vocoder/__init__.py -> temp_build/TTS/vocoder\n", + "copying tts_namespace/TTS/vocoder/compute_tts_features.py -> temp_build/TTS/vocoder\n", + "creating temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/compute_embeddings.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/loss.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/train.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/dataset.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/__init__.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/visual.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/model.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/tests.py -> temp_build/TTS/speaker_encoder\n", + "copying tts_namespace/TTS/speaker_encoder/generic_utils.py -> temp_build/TTS/speaker_encoder\n", + "creating temp_build/TTS/models\n", + "copying tts_namespace/TTS/models/tacotron.py -> temp_build/TTS/models\n", + "copying tts_namespace/TTS/models/__init__.py -> temp_build/TTS/models\n", + "copying tts_namespace/TTS/models/tacotron_abstract.py -> temp_build/TTS/models\n", + "copying tts_namespace/TTS/models/tacotron2.py -> temp_build/TTS/models\n", + "creating temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/tacotron.py -> temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/gst_layers.py -> temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/losses.py -> temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/__init__.py -> temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/common_layers.py -> temp_build/TTS/layers\n", + "copying tts_namespace/TTS/layers/tacotron2.py -> temp_build/TTS/layers\n", + "creating temp_build/TTS/server\n", + "copying tts_namespace/TTS/server/server.py -> temp_build/TTS/server\n", + "copying tts_namespace/TTS/server/__init__.py -> temp_build/TTS/server\n", + "copying tts_namespace/TTS/server/synthesizer.py -> temp_build/TTS/server\n", + "creating temp_build/TTS/datasets\n", + "copying tts_namespace/TTS/datasets/TTSDataset.py -> temp_build/TTS/datasets\n", + "copying tts_namespace/TTS/datasets/preprocess.py -> temp_build/TTS/datasets\n", + "copying tts_namespace/TTS/datasets/__init__.py -> temp_build/TTS/datasets\n", + "creating temp_build/TTS/utils/text\n", + "copying tts_namespace/TTS/utils/text/symbols.py -> temp_build/TTS/utils/text\n", + "copying tts_namespace/TTS/utils/text/number_norm.py -> temp_build/TTS/utils/text\n", + "copying tts_namespace/TTS/utils/text/cmudict.py -> temp_build/TTS/utils/text\n", + "copying tts_namespace/TTS/utils/text/__init__.py -> temp_build/TTS/utils/text\n", + "copying tts_namespace/TTS/utils/text/cleaners.py -> temp_build/TTS/utils/text\n", + "creating temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_losses.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_pqmf.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_datasets.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_melgan_discriminator.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_melgan_generator.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/__init__.py -> temp_build/TTS/vocoder/tests\n", + "copying tts_namespace/TTS/vocoder/tests/test_rwd.py -> temp_build/TTS/vocoder/tests\n", + "creating temp_build/TTS/vocoder/utils\n", + "copying tts_namespace/TTS/vocoder/utils/console_logger.py -> temp_build/TTS/vocoder/utils\n", + "copying tts_namespace/TTS/vocoder/utils/__init__.py -> temp_build/TTS/vocoder/utils\n", + "copying tts_namespace/TTS/vocoder/utils/generic_utils.py -> temp_build/TTS/vocoder/utils\n", + "copying tts_namespace/TTS/vocoder/utils/io.py -> temp_build/TTS/vocoder/utils\n", + "creating temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/melgan_discriminator.py -> temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/random_window_discriminator.py -> temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/__init__.py -> temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/multiband_melgan_generator.py -> temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/melgan_multiscale_discriminator.py -> temp_build/TTS/vocoder/models\n", + "copying tts_namespace/TTS/vocoder/models/melgan_generator.py -> temp_build/TTS/vocoder/models\n", + "creating temp_build/TTS/vocoder/layers\n", + "copying tts_namespace/TTS/vocoder/layers/pqmf.py -> temp_build/TTS/vocoder/layers\n", + "copying tts_namespace/TTS/vocoder/layers/losses.py -> temp_build/TTS/vocoder/layers\n", + "copying tts_namespace/TTS/vocoder/layers/__init__.py -> temp_build/TTS/vocoder/layers\n", + "copying tts_namespace/TTS/vocoder/layers/melgan.py -> temp_build/TTS/vocoder/layers\n", + "creating temp_build/TTS/vocoder/datasets\n", + "copying tts_namespace/TTS/vocoder/datasets/preprocess.py -> temp_build/TTS/vocoder/datasets\n", + "copying tts_namespace/TTS/vocoder/datasets/__init__.py -> temp_build/TTS/vocoder/datasets\n", + "copying tts_namespace/TTS/vocoder/datasets/gan_dataset.py -> temp_build/TTS/vocoder/datasets\n", + "creating temp_build/TTS/server/templates\n", + "copying tts_namespace/TTS/server/templates/index.html -> temp_build/TTS/server/templates\n", + "creating build\n", + "creating build/bdist.linux-x86_64\n", + "creating build/bdist.linux-x86_64/egg\n", + "creating build/bdist.linux-x86_64/egg/TTS\n", + "creating build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_demo_server.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_text_processing.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_preprocessors.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_loader.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_audio.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/__init__.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_tacotron2_model.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/generic_utils_text.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_tacotron_model.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/symbols_tests.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "copying temp_build/TTS/tests/test_layers.py -> build/bdist.linux-x86_64/egg/TTS/tests\n", + "creating build/bdist.linux-x86_64/egg/TTS/utils\n", + "creating build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/text/symbols.py -> build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/text/number_norm.py -> build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/text/cmudict.py -> build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/text/__init__.py -> build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/text/cleaners.py -> build/bdist.linux-x86_64/egg/TTS/utils/text\n", + "copying temp_build/TTS/utils/data.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/radam.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/training.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/console_logger.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/__init__.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/visual.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/audio.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/tensorboard_logger.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/speakers.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/measures.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/generic_utils.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/io.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/utils/synthesis.py -> build/bdist.linux-x86_64/egg/TTS/utils\n", + "copying temp_build/TTS/distribute.py -> build/bdist.linux-x86_64/egg/TTS\n", + "copying temp_build/TTS/train.py -> build/bdist.linux-x86_64/egg/TTS\n", + "copying temp_build/TTS/version.py -> build/bdist.linux-x86_64/egg/TTS\n", + "copying temp_build/TTS/compute_statistics.py -> build/bdist.linux-x86_64/egg/TTS\n", + "copying temp_build/TTS/__init__.py -> build/bdist.linux-x86_64/egg/TTS\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_losses.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_pqmf.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_datasets.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_melgan_discriminator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_melgan_generator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "copying temp_build/TTS/vocoder/tests/test_rwd.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/tests\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder/utils\n", + "copying temp_build/TTS/vocoder/utils/console_logger.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/utils\n", + "copying temp_build/TTS/vocoder/utils/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/utils\n", + "copying temp_build/TTS/vocoder/utils/generic_utils.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/utils\n", + "copying temp_build/TTS/vocoder/utils/io.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/utils\n", + "copying temp_build/TTS/vocoder/train.py -> build/bdist.linux-x86_64/egg/TTS/vocoder\n", + "copying temp_build/TTS/vocoder/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/melgan_discriminator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/random_window_discriminator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/multiband_melgan_generator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/melgan_multiscale_discriminator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/models/melgan_generator.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/models\n", + "copying temp_build/TTS/vocoder/compute_tts_features.py -> build/bdist.linux-x86_64/egg/TTS/vocoder\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder/layers\n", + "copying temp_build/TTS/vocoder/layers/pqmf.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/layers\n", + "copying temp_build/TTS/vocoder/layers/losses.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/layers\n", + "copying temp_build/TTS/vocoder/layers/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/layers\n", + "copying temp_build/TTS/vocoder/layers/melgan.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/layers\n", + "creating build/bdist.linux-x86_64/egg/TTS/vocoder/datasets\n", + "copying temp_build/TTS/vocoder/datasets/preprocess.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/datasets\n", + "copying temp_build/TTS/vocoder/datasets/__init__.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/datasets\n", + "copying temp_build/TTS/vocoder/datasets/gan_dataset.py -> build/bdist.linux-x86_64/egg/TTS/vocoder/datasets\n", + "creating build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/compute_embeddings.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/loss.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/train.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/dataset.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/__init__.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/visual.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/model.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/tests.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/speaker_encoder/generic_utils.py -> build/bdist.linux-x86_64/egg/TTS/speaker_encoder\n", + "copying temp_build/TTS/setup.py -> build/bdist.linux-x86_64/egg/TTS\n", + "copying temp_build/TTS/synthesize.py -> build/bdist.linux-x86_64/egg/TTS\n", + "creating build/bdist.linux-x86_64/egg/TTS/models\n", + "copying temp_build/TTS/models/tacotron.py -> build/bdist.linux-x86_64/egg/TTS/models\n", + "copying temp_build/TTS/models/__init__.py -> build/bdist.linux-x86_64/egg/TTS/models\n", + "copying temp_build/TTS/models/tacotron_abstract.py -> build/bdist.linux-x86_64/egg/TTS/models\n", + "copying temp_build/TTS/models/tacotron2.py -> build/bdist.linux-x86_64/egg/TTS/models\n", + "creating build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/tacotron.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/gst_layers.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/losses.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/__init__.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/common_layers.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "copying temp_build/TTS/layers/tacotron2.py -> build/bdist.linux-x86_64/egg/TTS/layers\n", + "creating build/bdist.linux-x86_64/egg/TTS/server\n", + "copying temp_build/TTS/server/server.py -> build/bdist.linux-x86_64/egg/TTS/server\n", + "creating build/bdist.linux-x86_64/egg/TTS/server/templates\n", + "copying temp_build/TTS/server/templates/index.html -> build/bdist.linux-x86_64/egg/TTS/server/templates\n", + "copying temp_build/TTS/server/__init__.py -> build/bdist.linux-x86_64/egg/TTS/server\n", + "copying temp_build/TTS/server/synthesizer.py -> build/bdist.linux-x86_64/egg/TTS/server\n", + "creating build/bdist.linux-x86_64/egg/TTS/datasets\n", + "copying temp_build/TTS/datasets/TTSDataset.py -> build/bdist.linux-x86_64/egg/TTS/datasets\n", + "copying temp_build/TTS/datasets/preprocess.py -> build/bdist.linux-x86_64/egg/TTS/datasets\n", + "copying temp_build/TTS/datasets/__init__.py -> build/bdist.linux-x86_64/egg/TTS/datasets\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_demo_server.py to test_demo_server.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_text_processing.py to test_text_processing.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_preprocessors.py to test_preprocessors.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_loader.py to test_loader.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_audio.py to test_audio.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_tacotron2_model.py to test_tacotron2_model.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/generic_utils_text.py to generic_utils_text.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_tacotron_model.py to test_tacotron_model.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/symbols_tests.py to symbols_tests.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/tests/test_layers.py to test_layers.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/text/symbols.py to symbols.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/text/number_norm.py to number_norm.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/text/cmudict.py to cmudict.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/text/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/text/cleaners.py to cleaners.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/data.py to data.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/radam.py to radam.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/training.py to training.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/console_logger.py to console_logger.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/visual.py to visual.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/audio.py to audio.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/tensorboard_logger.py to tensorboard_logger.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/speakers.py to speakers.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/measures.py to measures.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/generic_utils.py to generic_utils.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/io.py to io.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/utils/synthesis.py to synthesis.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/distribute.py to distribute.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/train.py to train.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/version.py to version.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/compute_statistics.py to compute_statistics.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_losses.py to test_losses.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_pqmf.py to test_pqmf.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_datasets.py to test_datasets.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_melgan_discriminator.py to test_melgan_discriminator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_melgan_generator.py to test_melgan_generator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/tests/test_rwd.py to test_rwd.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/utils/console_logger.py to console_logger.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/utils/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/utils/generic_utils.py to generic_utils.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/utils/io.py to io.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/train.py to train.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/melgan_discriminator.py to melgan_discriminator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/random_window_discriminator.py to random_window_discriminator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/multiband_melgan_generator.py to multiband_melgan_generator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/melgan_multiscale_discriminator.py to melgan_multiscale_discriminator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/models/melgan_generator.py to melgan_generator.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/compute_tts_features.py to compute_tts_features.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/layers/pqmf.py to pqmf.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/layers/losses.py to losses.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/layers/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/layers/melgan.py to melgan.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/datasets/preprocess.py to preprocess.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/datasets/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/vocoder/datasets/gan_dataset.py to gan_dataset.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/compute_embeddings.py to compute_embeddings.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/loss.py to loss.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/train.py to train.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/dataset.py to dataset.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/visual.py to visual.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/model.py to model.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/tests.py to tests.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/speaker_encoder/generic_utils.py to generic_utils.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/setup.py to setup.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/synthesize.py to synthesize.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/models/tacotron.py to tacotron.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/models/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/models/tacotron_abstract.py to tacotron_abstract.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/models/tacotron2.py to tacotron2.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/tacotron.py to tacotron.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/gst_layers.py to gst_layers.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/losses.py to losses.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/common_layers.py to common_layers.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/layers/tacotron2.py to tacotron2.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/server/server.py to server.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/server/__init__.py to __init__.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/server/synthesizer.py to synthesizer.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/datasets/TTSDataset.py to TTSDataset.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/datasets/preprocess.py to preprocess.cpython-36.pyc\n", + "byte-compiling build/bdist.linux-x86_64/egg/TTS/datasets/__init__.py to __init__.cpython-36.pyc\n", + "creating build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/PKG-INFO -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/SOURCES.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/dependency_links.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/entry_points.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/requires.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "copying tts_namespace/TTS.egg-info/top_level.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n", + "zip_safe flag not set; analyzing archive contents...\n", + "TTS.__pycache__.setup.cpython-36: module references __file__\n", + "TTS.__pycache__.train.cpython-36: module references __file__\n", + "TTS.server.__pycache__.server.cpython-36: module references __file__\n", + "TTS.speaker_encoder.__pycache__.tests.cpython-36: module references __file__\n", + "TTS.speaker_encoder.__pycache__.train.cpython-36: module references __file__\n", + "TTS.tests.__pycache__.__init__.cpython-36: module references __file__\n", + "TTS.tests.__pycache__.test_loader.cpython-36: module references __file__\n", + "TTS.tests.__pycache__.test_tacotron2_model.cpython-36: module references __file__\n", + "TTS.tests.__pycache__.test_tacotron_model.cpython-36: module references __file__\n", + "TTS.vocoder.__pycache__.train.cpython-36: module references __file__\n", + "TTS.vocoder.tests.__pycache__.test_datasets.cpython-36: module references __file__\n", + "TTS.vocoder.tests.__pycache__.test_losses.cpython-36: module references __file__\n", + "creating dist\n", + "creating 'dist/TTS-0.0.3+c7296b3-py3.6.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n", + "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n", + "Processing TTS-0.0.3+c7296b3-py3.6.egg\n", + "creating /usr/local/lib/python3.6/dist-packages/TTS-0.0.3+c7296b3-py3.6.egg\n", + "Extracting TTS-0.0.3+c7296b3-py3.6.egg to /usr/local/lib/python3.6/dist-packages\n", + "Adding TTS 0.0.3+c7296b3 to easy-install.pth file\n", + "Installing tts-server script to /usr/local/bin\n", + "\n", + "Installed /usr/local/lib/python3.6/dist-packages/TTS-0.0.3+c7296b3-py3.6.egg\n", + "Processing dependencies for TTS==0.0.3+c7296b3\n", + "Searching for attrdict\n", + "Reading https://pypi.org/simple/attrdict/\n", + "Downloading https://files.pythonhosted.org/packages/ef/97/28fe7e68bc7adfce67d4339756e85e9fcf3c6fd7f0c0781695352b70472c/attrdict-2.0.1-py2.py3-none-any.whl#sha256=9432e3498c74ff7e1b20b3d93b45d766b71cbffa90923496f82c4ae38b92be34\n", + "Best match: attrdict 2.0.1\n", + "Processing attrdict-2.0.1-py2.py3-none-any.whl\n", + "Installing attrdict-2.0.1-py2.py3-none-any.whl to /usr/local/lib/python3.6/dist-packages\n", + "Adding attrdict 2.0.1 to easy-install.pth file\n", + "\n", + "Installed /usr/local/lib/python3.6/dist-packages/attrdict-2.0.1-py3.6.egg\n", + "Searching for unidecode==0.4.20\n", + "Reading https://pypi.org/simple/unidecode/\n", + "Downloading https://files.pythonhosted.org/packages/c3/6f/05f5deb753d0594583aa1cc0d2fe9d631d9a00e9b28d0da49f8d3763755b/Unidecode-0.04.20-py2.py3-none-any.whl#sha256=eedac7bfd886f43484787206f6a141b232e2b2a58652c54d06499b187fd84660\n", + "Best match: Unidecode 0.4.20\n", + "Processing Unidecode-0.04.20-py2.py3-none-any.whl\n", + "Installing Unidecode-0.04.20-py2.py3-none-any.whl to /usr/local/lib/python3.6/dist-packages\n", + "Adding Unidecode 0.4.20 to easy-install.pth file\n", + "Installing unidecode script to /usr/local/bin\n", + "\n", + "Installed /usr/local/lib/python3.6/dist-packages/Unidecode-0.4.20-py3.6.egg\n", + "Searching for librosa==0.6.2\n", + "Reading https://pypi.org/simple/librosa/\n", + "Downloading https://files.pythonhosted.org/packages/09/b4/5b411f19de48f8fc1a0ff615555aa9124952e4156e94d4803377e50cfa4c/librosa-0.6.2.tar.gz#sha256=2aa868b8aade749b9904eeb7034fcf44115601c367969b6d01f5e1b4b9b6031d\n", + "Best match: librosa 0.6.2\n", + "Processing librosa-0.6.2.tar.gz\n", + "Writing /tmp/easy_install-3oxyyk5x/librosa-0.6.2/setup.cfg\n", + "Running librosa-0.6.2/setup.py -q bdist_egg --dist-dir /tmp/easy_install-3oxyyk5x/librosa-0.6.2/egg-dist-tmp-ky3tcqa8\n", + "zip_safe flag not set; analyzing archive contents...\n", + "librosa.util.__pycache__.deprecation.cpython-36: module MAY be using inspect.stack\n", + "creating /usr/local/lib/python3.6/dist-packages/librosa-0.6.2-py3.6.egg\n", + "Extracting librosa-0.6.2-py3.6.egg to /usr/local/lib/python3.6/dist-packages\n", + "Adding librosa 0.6.2 to easy-install.pth file\n", + "\n", + "Installed /usr/local/lib/python3.6/dist-packages/librosa-0.6.2-py3.6.egg\n", + "Searching for phonemizer==2.2\n", + "Best match: phonemizer 2.2\n", + "Adding phonemizer 2.2 to easy-install.pth file\n", + "Installing phonemize script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for SoundFile==0.10.3.post1\n", + "Best match: SoundFile 0.10.3.post1\n", + "Adding SoundFile 0.10.3.post1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for bokeh==1.4.0\n", + "Best match: bokeh 1.4.0\n", + "Adding bokeh 1.4.0 to easy-install.pth file\n", + "Installing bokeh script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for inflect==2.1.0\n", + "Best match: inflect 2.1.0\n", + "Adding inflect 2.1.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for tqdm==4.41.1\n", + "Best match: tqdm 4.41.1\n", + "Adding tqdm 4.41.1 to easy-install.pth file\n", + "Installing tqdm script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for Flask==1.1.2\n", + "Best match: Flask 1.1.2\n", + "Adding Flask 1.1.2 to easy-install.pth file\n", + "Installing flask script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for Pillow==7.0.0\n", + "Best match: Pillow 7.0.0\n", + "Adding Pillow 7.0.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for matplotlib==3.2.2\n", + "Best match: matplotlib 3.2.2\n", + "Adding matplotlib 3.2.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for tensorboardX==2.1\n", + "Best match: tensorboardX 2.1\n", + "Adding tensorboardX 2.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for numpy==1.18.5\n", + "Best match: numpy 1.18.5\n", + "Adding numpy 1.18.5 to easy-install.pth file\n", + "Installing f2py script to /usr/local/bin\n", + "Installing f2py3 script to /usr/local/bin\n", + "Installing f2py3.6 script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for torch==1.5.1+cu101\n", + "Best match: torch 1.5.1+cu101\n", + "Adding torch 1.5.1+cu101 to easy-install.pth file\n", + "Installing convert-caffe2-to-onnx script to /usr/local/bin\n", + "Installing convert-onnx-to-caffe2 script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for scipy==1.4.1\n", + "Best match: scipy 1.4.1\n", + "Adding scipy 1.4.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for attrs==19.3.0\n", + "Best match: attrs 19.3.0\n", + "Adding attrs 19.3.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for segments==2.1.3\n", + "Best match: segments 2.1.3\n", + "Adding segments 2.1.3 to easy-install.pth file\n", + "Installing segments script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for joblib==0.16.0\n", + "Best match: joblib 0.16.0\n", + "Adding joblib 0.16.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for cffi==1.14.0\n", + "Best match: cffi 1.14.0\n", + "Adding cffi 1.14.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for python-dateutil==2.8.1\n", + "Best match: python-dateutil 2.8.1\n", + "Adding python-dateutil 2.8.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for packaging==20.4\n", + "Best match: packaging 20.4\n", + "Adding packaging 20.4 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for Jinja2==2.11.2\n", + "Best match: Jinja2 2.11.2\n", + "Adding Jinja2 2.11.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for PyYAML==3.13\n", + "Best match: PyYAML 3.13\n", + "Adding PyYAML 3.13 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for six==1.12.0\n", + "Best match: six 1.12.0\n", + "Adding six 1.12.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for tornado==4.5.3\n", + "Best match: tornado 4.5.3\n", + "Adding tornado 4.5.3 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for Werkzeug==1.0.1\n", + "Best match: Werkzeug 1.0.1\n", + "Adding Werkzeug 1.0.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for itsdangerous==1.1.0\n", + "Best match: itsdangerous 1.1.0\n", + "Adding itsdangerous 1.1.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for click==7.1.2\n", + "Best match: click 7.1.2\n", + "Adding click 7.1.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for pyparsing==2.4.7\n", + "Best match: pyparsing 2.4.7\n", + "Adding pyparsing 2.4.7 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for cycler==0.10.0\n", + "Best match: cycler 0.10.0\n", + "Adding cycler 0.10.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for kiwisolver==1.2.0\n", + "Best match: kiwisolver 1.2.0\n", + "Adding kiwisolver 1.2.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for protobuf==3.10.0\n", + "Best match: protobuf 3.10.0\n", + "Adding protobuf 3.10.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for numba==0.48.0\n", + "Best match: numba 0.48.0\n", + "Adding numba 0.48.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for resampy==0.2.2\n", + "Best match: resampy 0.2.2\n", + "Adding resampy 0.2.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for decorator==4.4.2\n", + "Best match: decorator 4.4.2\n", + "Adding decorator 4.4.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for scikit-learn==0.22.2.post1\n", + "Best match: scikit-learn 0.22.2.post1\n", + "Adding scikit-learn 0.22.2.post1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for audioread==2.1.8\n", + "Best match: audioread 2.1.8\n", + "Adding audioread 2.1.8 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for future==0.16.0\n", + "Best match: future 0.16.0\n", + "Adding future 0.16.0 to easy-install.pth file\n", + "Installing futurize script to /usr/local/bin\n", + "Installing pasteurize script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for clldutils==3.5.2\n", + "Best match: clldutils 3.5.2\n", + "Adding clldutils 3.5.2 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for regex==2019.12.20\n", + "Best match: regex 2019.12.20\n", + "Adding regex 2019.12.20 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for csvw==1.7.0\n", + "Best match: csvw 1.7.0\n", + "Adding csvw 1.7.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for pycparser==2.20\n", + "Best match: pycparser 2.20\n", + "Adding pycparser 2.20 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for MarkupSafe==1.1.1\n", + "Best match: MarkupSafe 1.1.1\n", + "Adding MarkupSafe 1.1.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for setuptools==49.1.0\n", + "Best match: setuptools 49.1.0\n", + "Adding setuptools 49.1.0 to easy-install.pth file\n", + "Installing easy_install script to /usr/local/bin\n", + "Installing easy_install-3.8 script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for llvmlite==0.31.0\n", + "Best match: llvmlite 0.31.0\n", + "Adding llvmlite 0.31.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for tabulate==0.8.7\n", + "Best match: tabulate 0.8.7\n", + "Adding tabulate 0.8.7 to easy-install.pth file\n", + "Installing tabulate script to /usr/local/bin\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for colorlog==4.1.0\n", + "Best match: colorlog 4.1.0\n", + "Adding colorlog 4.1.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for rfc3986==1.4.0\n", + "Best match: rfc3986 1.4.0\n", + "Adding rfc3986 1.4.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for isodate==0.6.0\n", + "Best match: isodate 0.6.0\n", + "Adding isodate 0.6.0 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Searching for uritemplate==3.0.1\n", + "Best match: uritemplate 3.0.1\n", + "Adding uritemplate 3.0.1 to easy-install.pth file\n", + "\n", + "Using /usr/local/lib/python3.6/dist-packages\n", + "Finished processing dependencies for TTS==0.0.3+c7296b3\n", + "Collecting tensorflow==2.3.0rc0\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/8b/68/7c6c8e2b65ad4a3ff5ef658c04a6c2802ff7fe55fc7eecacb6efee1abc40/tensorflow-2.3.0rc0-cp36-cp36m-manylinux2010_x86_64.whl (320.3MB)\n", + "\u001b[K |████████████████████████████████| 320.3MB 49kB/s \n", + "\u001b[?25hRequirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.6.3)\n", + "Requirement already satisfied: tensorboard<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (2.2.2)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (0.34.2)\n", + "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.18.5)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.30.0)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (0.9.0)\n", + "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.1.2)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.1.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.12.0)\n", + "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (0.3.3)\n", + "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (2.10.0)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.12.1)\n", + "Requirement already satisfied: scipy==1.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (1.4.1)\n", + "Collecting tf-estimator-nightly<2.3.0.dev2020062302,>=2.3.0.dev2020062301\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/17/3b/fb9aafd734da258411bff2a600cabff65c7d201782318791b72422bd973d/tf_estimator_nightly-2.3.0.dev2020062301-py2.py3-none-any.whl (459kB)\n", + "\u001b[K |████████████████████████████████| 460kB 35.1MB/s \n", + "\u001b[?25hRequirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (0.2.0)\n", + "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (3.10.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.3.0rc0) (3.2.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (2.23.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.0.1)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (0.4.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (3.2.2)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (49.1.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.7.0)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.17.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (2020.6.20)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (2.10)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.24.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (3.0.4)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.3.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (1.7.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (4.6)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (0.2.8)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (4.1.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (3.1.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (3.1.0)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3\"->google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow==2.3.0rc0) (0.4.8)\n", + "Installing collected packages: tf-estimator-nightly, tensorflow\n", + " Found existing installation: tensorflow 2.2.0\n", + " Uninstalling tensorflow-2.2.0:\n", + " Successfully uninstalled tensorflow-2.2.0\n", + "Successfully installed tensorflow-2.3.0rc0 tf-estimator-nightly-2.3.0.dev2020062301\n", + "/content\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zlgi8fPdpRF0", + "colab_type": "text" + }, + "source": [ + "### Define TTS function" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "f-Yc42nQZG5A", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def run_vocoder(mel_spec):\n", + " vocoder_inputs = mel_spec[None, :, :]\n", + " # get input and output details\n", + " input_details = vocoder_model.get_input_details()\n", + " # reshape input tensor for the new input shape\n", + " vocoder_model.resize_tensor_input(input_details[0]['index'], vocoder_inputs.shape)\n", + " vocoder_model.allocate_tensors()\n", + " detail = input_details[0]\n", + " vocoder_model.set_tensor(detail['index'], vocoder_inputs)\n", + " # run the model\n", + " vocoder_model.invoke()\n", + " # collect outputs\n", + " output_details = vocoder_model.get_output_details()\n", + " waveform = vocoder_model.get_tensor(output_details[0]['index'])\n", + " return waveform \n", + "\n", + "\n", + "def tts(model, text, CONFIG, p):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars,\n", + " backend='tflite')\n", + " waveform = run_vocoder(mel_postnet_spec.T)\n", + " waveform = waveform[0, 0]\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZksegYQepkFg", + "colab_type": "text" + }, + "source": [ + "### Load TF Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oVa0kOamprgj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tf.utils.tflite import load_tflite_model\n", + "from TTS.tf.utils.io import load_checkpoint\n", + "from TTS.utils.io import load_config\n", + "from TTS.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EY-sHVO8IFSH", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# runtime settings\n", + "use_cuda = False" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_1aIUp2FpxOQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# model paths\n", + "TTS_MODEL = \"tts_model.tflite\"\n", + "TTS_CONFIG = \"config.json\"\n", + "VOCODER_MODEL = \"vocoder_model.tflite\"\n", + "VOCODER_CONFIG = \"config_vocoder.json\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CpgmdBVQplbv", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zmrQxiozIUVE", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "outputId": "ca7e9016-4c28-4cef-efe7-0613d399aa4c" + }, + "source": [ + "# load the audio processor\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " > Setting up Audio Processor...\n", + " | > sample_rate:22050\n", + " | > num_mels:80\n", + " | > min_level_db:-100\n", + " | > frame_shift_ms:None\n", + " | > frame_length_ms:None\n", + " | > ref_level_db:0\n", + " | > fft_size:1024\n", + " | > power:1.5\n", + " | > preemphasis:0.0\n", + " | > griffin_lim_iters:60\n", + " | > signal_norm:True\n", + " | > symmetric_norm:True\n", + " | > mel_fmin:50.0\n", + " | > mel_fmax:7600.0\n", + " | > spec_gain:1.0\n", + " | > stft_pad_mode:reflect\n", + " | > max_norm:4.0\n", + " | > clip_norm:True\n", + " | > do_trim_silence:True\n", + " | > trim_db:60\n", + " | > do_sound_norm:False\n", + " | > stats_path:./scale_stats.npy\n", + " | > hop_length:256\n", + " | > win_length:1024\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8fLoI4ipqMeS", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the models\n", + "model = load_tflite_model(TTS_MODEL)\n", + "vocoder_model = load_tflite_model(VOCODER_MODEL)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws_YkPKsLgo-", + "colab_type": "text" + }, + "source": [ + "## Run Inference" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuWxZ9Ey5Puj", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "outputId": "d1888ebd-3208-42a4-aaf9-78d0e3ec987d" + }, + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, ap)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(185856,)\n", + " > Run-time: 3.8069238662719727\n", + " > Real-time factor: 0.45162849859449977\n", + " > Time per step: 2.048206938938661e-05\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + } + ] +} \ No newline at end of file diff --git a/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7f616138f3abfceb706d0c375b855a30b5d72350 --- /dev/null +++ b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "6LWsNd3_M3MP" + }, + "source": [ + "# Mozilla TTS on CPU Real-Time Speech Synthesis with Tensorflow" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "FAqrSIWgLyP0" + }, + "source": [ + "**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n", + "\n", + "These TF models support TF 2.2 and for different versions you might need to\n", + "regenerate them. \n", + "\n", + "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", + "\n", + "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", + "\n", + "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", + "\n", + "Note that both model performances can be improved with more training.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Ku-dA4DKoeXk" + }, + "source": [ + "### Download Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 + }, + "colab_type": "code", + "id": "jGIgnWhGsxU1", + "outputId": "08b0dddd-4edf-48c9-e8e5-a419b36a5c3d", + "tags": [] + }, + "outputs": [], + "source": [ + "!gdown --id 1p7OSEEW_Z7ORxNgfZwhMy7IiLE1s0aH7 -O data/tts_model.pkl\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "colab_type": "code", + "id": "4dnpE0-kvTsu", + "outputId": "2fe836eb-c7e7-4f1e-9352-0142126bb19f", + "tags": [] + }, + "outputs": [], + "source": [ + "!gdown --id 1rHmj7CqD3Sfa716Y3ub_vpIBrQg_b1yF -O data/vocoder_model.pkl\n", + "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O data/config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Zlgi8fPdpRF0" + }, + "source": [ + "### Define TTS function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "f-Yc42nQZG5A" + }, + "outputs": [], + "source": [ + "def tts(model, text, CONFIG, p):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars,\n", + " backend='tf')\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " waveform = waveform.numpy()[0, 0]\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "ZksegYQepkFg" + }, + "source": [ + "### Load Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "oVa0kOamprgj" + }, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tts.tf.utils.generic_utils import setup_model\n", + "from TTS.tts.tf.utils.io import load_checkpoint\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "EY-sHVO8IFSH" + }, + "outputs": [], + "source": [ + "# runtime settings\n", + "use_cuda = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "_1aIUp2FpxOQ" + }, + "outputs": [], + "source": [ + "# model paths\n", + "TTS_MODEL = \"data/tts_model.pkl\"\n", + "TTS_CONFIG = \"data/config.json\"\n", + "VOCODER_MODEL = \"data/vocoder_model.pkl\"\n", + "VOCODER_CONFIG = \"data/config_vocoder.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "CpgmdBVQplbv" + }, + "outputs": [], + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "colab_type": "code", + "id": "zmrQxiozIUVE", + "outputId": "fa71bd05-401f-4e5b-a6f7-60ae765966db", + "tags": [] + }, + "outputs": [], + "source": [ + "# load the audio processor\n", + "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + }, + "colab_type": "code", + "id": "8fLoI4ipqMeS", + "outputId": "595d990f-930d-4698-ee14-77796b5eed7d", + "tags": [] + }, + "outputs": [], + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, len(speakers), TTS_CONFIG)\n", + "model.build_inference()\n", + "model = load_checkpoint(model, TTS_MODEL)\n", + "model.decoder.set_max_decoder_steps(1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 + }, + "colab_type": "code", + "id": "zKoq0GgzqzhQ", + "outputId": "2cc3deae-144f-4465-da3b-98628d948506" + }, + "outputs": [], + "source": [ + "from TTS.vocoder.tf.utils.generic_utils import setup_generator\n", + "from TTS.vocoder.tf.utils.io import load_checkpoint\n", + "\n", + "# LOAD VOCODER MODEL\n", + "vocoder_model = setup_generator(VOCODER_CONFIG)\n", + "vocoder_model.build_inference()\n", + "vocoder_model = load_checkpoint(vocoder_model, VOCODER_MODEL)\n", + "vocoder_model.inference_padding = 0\n", + "\n", + "ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio']) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Ws_YkPKsLgo-" + }, + "source": [ + "## Run Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "colab_type": "code", + "id": "FuWxZ9Ey5Puj", + "outputId": "07ede6e5-06e6-4612-f687-7984d20e5254" + }, + "outputs": [], + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, ap)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "DDC-TTS_and_MultiBand-MelGAN_TF_Example.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb b/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..35a257e04b4a55e38fe4ac18ecf98ac62c394549 --- /dev/null +++ b/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb @@ -0,0 +1,329 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "DDC-TTS_and_MultiBand-MelGAN_Example.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "6LWsNd3_M3MP", + "colab_type": "text" + }, + "source": [ + "# Mozilla TTS on CPU Real-Time Speech Synthesis " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FAqrSIWgLyP0", + "colab_type": "text" + }, + "source": [ + "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", + "\n", + "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", + "\n", + "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", + "\n", + "Note that both model performances can be improved with more training." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ku-dA4DKoeXk", + "colab_type": "text" + }, + "source": [ + "### Download Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jGIgnWhGsxU1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 + }, + "outputId": "88725e41-a8dc-4885-b3bf-cac939f38abe", + "tags": [] + }, + "source": [ + "!gdown --id 1dntzjWFg7ufWaTaFy80nRz-Tu02xWZos -O data/tts_model.pth.tar\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4dnpE0-kvTsu", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "76377c6d-789c-4995-ba00-a21a6e1c401e", + "tags": [] + }, + "source": [ + "!gdown --id 1X09hHAyAJOnrplCUMAdW_t341Kor4YR4 -O data/vocoder_model.pth.tar\n", + "!gdown --id \"1qN7vQRIYkzvOX_DtiZtTajzoZ1eW1-Eg\" -O data/config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zlgi8fPdpRF0", + "colab_type": "text" + }, + "source": [ + "### Define TTS function" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "f-Yc42nQZG5A", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def tts(model, text, CONFIG, use_cuda, ap, use_gl, figures=True):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars)\n", + " # mel_postnet_spec = ap.denormalize(mel_postnet_spec.T)\n", + " if not use_gl:\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " waveform = waveform.flatten()\n", + " if use_cuda:\n", + " waveform = waveform.cpu()\n", + " waveform = waveform.numpy()\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZksegYQepkFg", + "colab_type": "text" + }, + "source": [ + "### Load Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oVa0kOamprgj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tts.utils.generic_utils import setup_model\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EY-sHVO8IFSH", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# runtime settings\n", + "use_cuda = False" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_1aIUp2FpxOQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# model paths\n", + "TTS_MODEL = \"data/tts_model.pth.tar\"\n", + "TTS_CONFIG = \"data/config.json\"\n", + "VOCODER_MODEL = \"data/vocoder_model.pth.tar\"\n", + "VOCODER_CONFIG = \"data/config_vocoder.json\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CpgmdBVQplbv", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zmrQxiozIUVE", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "outputId": "60c4daa0-4c5b-4a2e-fe0d-be437d003a49", + "tags": [] + }, + "source": [ + "# load the audio processor\n", + "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8fLoI4ipqMeS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "outputId": "b789066e-e305-42ad-b3ca-eba8d9267382", + "tags": [] + }, + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, len(speakers), TTS_CONFIG)\n", + "\n", + "# load model state\n", + "cp = torch.load(TTS_MODEL, map_location=torch.device('cpu'))\n", + "\n", + "# load the model\n", + "model.load_state_dict(cp['model'])\n", + "if use_cuda:\n", + " model.cuda()\n", + "model.eval()\n", + "\n", + "# set model stepsize\n", + "if 'r' in cp:\n", + " model.decoder.set_r(cp['r'])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zKoq0GgzqzhQ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "234efc61-f37a-40bc-95a3-b51896018ccb", + "tags": [] + }, + "source": [ + "from TTS.vocoder.utils.generic_utils import setup_generator\n", + "\n", + "# LOAD VOCODER MODEL\n", + "vocoder_model = setup_generator(VOCODER_CONFIG)\n", + "vocoder_model.load_state_dict(torch.load(VOCODER_MODEL, map_location=\"cpu\")[\"model\"])\n", + "vocoder_model.remove_weight_norm()\n", + "vocoder_model.inference_padding = 0\n", + "\n", + "ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio']) \n", + "if use_cuda:\n", + " vocoder_model.cuda()\n", + "vocoder_model.eval()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws_YkPKsLgo-", + "colab_type": "text" + }, + "source": [ + "## Run Inference" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuWxZ9Ey5Puj", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "outputId": "9c06adad-5451-4393-89a1-a2e7dc39ab91", + "tags": [] + }, + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, use_cuda, ap, use_gl=False, figures=True)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018.ipynb b/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..458422c086915c25eaba7f6cd554a4f90ad13319 --- /dev/null +++ b/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018.ipynb @@ -0,0 +1,637 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Demo-Mozilla-TTS-MultiSpeaker-jia-et-al-2018.ipynb", + "provenance": [], + "collapsed_sections": [ + "vnV-FigfvsS2", + "hkvv7gRcx4WV", + "QJ6VgT2a4vHW" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "yZK6UdwSFnOO", + "colab_type": "text" + }, + "source": [ + "# **Download and install Mozilla TTS**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yvb0pX3WY6MN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os \n", + "!git clone https://github.com/Edresson/TTS -b dev-gst-embeddings" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "iB9nl2UEG3SY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!apt-get install espeak\n", + "os.chdir('TTS')\n", + "!pip install -r requirements.txt\n", + "!python setup.py develop\n", + "os.chdir('..')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w6Krn8k1inC_", + "colab_type": "text" + }, + "source": [ + "\n", + "\n", + "**Download Checkpoint**\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PiYHf3lKhi9z", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!wget -c -q --show-progress -O ./TTS-checkpoint.zip https://github.com/Edresson/TTS/releases/download/v1.0.0/Checkpoints-TTS-MultiSpeaker-Jia-et-al-2018.zip\n", + "!unzip ./TTS-checkpoint.zip\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MpYNgqrZcJKn", + "colab_type": "text" + }, + "source": [ + "**Utils Functions**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4KZA4b_CbMqx", + "colab_type": "code", + "colab": {} + }, + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import argparse\n", + "import json\n", + "# pylint: disable=redefined-outer-name, unused-argument\n", + "import os\n", + "import string\n", + "import time\n", + "import sys\n", + "import numpy as np\n", + "\n", + "TTS_PATH = \"../content/TTS\"\n", + "# add libraries into environment\n", + "sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n", + "\n", + "import torch\n", + "\n", + "from TTS.tts.utils.generic_utils import setup_model\n", + "from TTS.tts.utils.synthesis import synthesis\n", + "from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config\n", + "from TTS.vocoder.utils.generic_utils import setup_generator\n", + "\n", + "\n", + "def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid, speaker_embedding=None):\n", + " t_1 = time.time()\n", + " waveform, _, _, mel_postnet_spec, _, _ = synthesis(model, text, CONFIG, use_cuda, ap, speaker_fileid, None, False, CONFIG.enable_eos_bos_chars, use_gl, speaker_embedding=speaker_embedding)\n", + " if CONFIG.model == \"Tacotron\" and not use_gl:\n", + " mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T\n", + " if not use_gl:\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " if use_cuda and not use_gl:\n", + " waveform = waveform.cpu()\n", + " if not use_gl:\n", + " waveform = waveform.numpy()\n", + " waveform = waveform.squeeze()\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " return waveform\n", + "\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ENA2OumIVeMA", + "colab_type": "text" + }, + "source": [ + "# **Vars definitions**\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jPD0d_XpVXmY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "TEXT = ''\n", + "OUT_PATH = 'tests-audios/'\n", + "# create output path\n", + "os.makedirs(OUT_PATH, exist_ok=True)\n", + "\n", + "SPEAKER_FILEID = None # if None use the first embedding from speakers.json\n", + "\n", + "# model vars \n", + "MODEL_PATH = 'best_model.pth.tar'\n", + "CONFIG_PATH = 'config.json'\n", + "SPEAKER_JSON = 'speakers.json'\n", + "\n", + "# vocoder vars\n", + "VOCODER_PATH = ''\n", + "VOCODER_CONFIG_PATH = ''\n", + "\n", + "USE_CUDA = True" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dV6cXXlfi72r", + "colab_type": "text" + }, + "source": [ + "# **Restore TTS Model**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "x1WgLFauWUPe", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load the config\n", + "C = load_config(CONFIG_PATH)\n", + "C.forward_attn_mask = True\n", + "\n", + "# load the audio processor\n", + "ap = AudioProcessor(**C.audio)\n", + "\n", + "# if the vocabulary was passed, replace the default\n", + "if 'characters' in C.keys():\n", + " symbols, phonemes = make_symbols(**C.characters)\n", + "\n", + "speaker_embedding = None\n", + "speaker_embedding_dim = None\n", + "num_speakers = 0\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " num_speakers = len(speaker_mapping)\n", + " if C.use_external_speaker_embedding_file:\n", + " if SPEAKER_FILEID is not None:\n", + " speaker_embedding = speaker_mapping[SPEAKER_FILEID]['embedding']\n", + " else: # if speaker_fileid is not specificated use the first sample in speakers.json\n", + " choise_speaker = list(speaker_mapping.keys())[0]\n", + " print(\" Speaker: \",choise_speaker.split('_')[0],'was chosen automatically', \"(this speaker seen in training)\")\n", + " speaker_embedding = speaker_mapping[choise_speaker]['embedding']\n", + " speaker_embedding_dim = len(speaker_embedding)\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, num_speakers, C, speaker_embedding_dim)\n", + "cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))\n", + "model.load_state_dict(cp['model'])\n", + "model.eval()\n", + "\n", + "if USE_CUDA:\n", + " model.cuda()\n", + "\n", + "model.decoder.set_r(cp['r'])\n", + "\n", + "# load vocoder model\n", + "if VOCODER_PATH!= \"\":\n", + " VC = load_config(VOCODER_CONFIG_PATH)\n", + " vocoder_model = setup_generator(VC)\n", + " vocoder_model.load_state_dict(torch.load(VOCODER_PATH, map_location=\"cpu\")[\"model\"])\n", + " vocoder_model.remove_weight_norm()\n", + " if USE_CUDA:\n", + " vocoder_model.cuda()\n", + " vocoder_model.eval()\n", + "else:\n", + " vocoder_model = None\n", + " VC = None\n", + "\n", + "# synthesize voice\n", + "use_griffin_lim = VOCODER_PATH== \"\"\n", + "\n", + "if not C.use_external_speaker_embedding_file:\n", + " if SPEAKER_FILEID.isdigit():\n", + " SPEAKER_FILEID = int(SPEAKER_FILEID)\n", + " else:\n", + " SPEAKER_FILEID = None\n", + "else:\n", + " SPEAKER_FILEID = None\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tNvVEoE30qY6", + "colab_type": "text" + }, + "source": [ + "Synthesize sentence with Speaker\n", + "\n", + "> Stop running the cell to leave!\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2o8fXkVSyXOa", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",choise_speaker.split('_')[0], \"(this speaker seen in training)\")\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vnV-FigfvsS2", + "colab_type": "text" + }, + "source": [ + "# **Select Speaker**\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RuCGOnJ_fgDV", + "colab_type": "code", + "colab": {} + }, + "source": [ + "\n", + "# VCTK speakers not seen in training (new speakers)\n", + "VCTK_test_Speakers = [\"p225\", \"p234\", \"p238\", \"p245\", \"p248\", \"p261\", \"p294\", \"p302\", \"p326\", \"p335\", \"p347\"]\n", + "\n", + "# VCTK speakers seen in training\n", + "VCTK_train_Speakers = ['p244', 'p300', 'p303', 'p273', 'p292', 'p252', 'p254', 'p269', 'p345', 'p274', 'p363', 'p285', 'p351', 'p361', 'p295', 'p266', 'p307', 'p230', 'p339', 'p253', 'p310', 'p241', 'p256', 'p323', 'p237', 'p229', 'p298', 'p336', 'p276', 'p305', 'p255', 'p278', 'p299', 'p265', 'p267', 'p280', 'p260', 'p272', 'p262', 'p334', 'p283', 'p247', 'p246', 'p374', 'p297', 'p249', 'p250', 'p304', 'p240', 'p236', 'p312', 'p286', 'p263', 'p258', 'p313', 'p376', 'p279', 'p340', 'p362', 'p284', 'p231', 'p308', 'p277', 'p275', 'p333', 'p314', 'p330', 'p264', 'p226', 'p288', 'p343', 'p239', 'p232', 'p268', 'p270', 'p329', 'p227', 'p271', 'p228', 'p311', 'p301', 'p293', 'p364', 'p251', 'p317', 'p360', 'p281', 'p243', 'p287', 'p233', 'p259', 'p316', 'p257', 'p282', 'p306', 'p341', 'p318']\n", + "\n", + "\n", + "num_samples_speaker = 2 # In theory the more samples of the speaker the more similar to the real voice it will be!\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hkvv7gRcx4WV", + "colab_type": "text" + }, + "source": [ + "## **Example select a VCTK seen speaker in training**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BviNMI9UyCYz", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# get embedding\n", + "Speaker_choise = VCTK_train_Speakers[0] # choise one of training speakers\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " if C.use_external_speaker_embedding_file:\n", + " speaker_embeddings = []\n", + " for key in list(speaker_mapping.keys()):\n", + " if Speaker_choise in key:\n", + " if len(speaker_embeddings) < num_samples_speaker:\n", + " speaker_embeddings.append(speaker_mapping[key]['embedding'])\n", + " # takes the average of the embedings samples of the announcers\n", + " speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()\n", + " " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5e5_XnLsx3jg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker seen in training)\")\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QJ6VgT2a4vHW" + }, + "source": [ + "## **Example select a VCTK not seen speaker in training (new Speakers)**\n", + "\n", + "\n", + "> Fitting new Speakers :)\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "SZS57ZK-4vHa", + "colab": {} + }, + "source": [ + "# get embedding\n", + "Speaker_choise = VCTK_test_Speakers[0] # choise one of training speakers\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " if C.use_external_speaker_embedding_file:\n", + " speaker_embeddings = []\n", + " for key in list(speaker_mapping.keys()):\n", + " if Speaker_choise in key:\n", + " if len(speaker_embeddings) < num_samples_speaker:\n", + " speaker_embeddings.append(speaker_mapping[key]['embedding'])\n", + " # takes the average of the embedings samples of the announcers\n", + " speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()\n", + " " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "bbs85vzz4vHo", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LEE6mQLh5Who" + }, + "source": [ + "# **Example Synthesizing with your own voice :)**\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "La70gSB65nrs", + "colab_type": "text" + }, + "source": [ + " Download and load GE2E Speaker Encoder " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r0IEFZ0B5vQg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!wget -c -q --show-progress -O ./SpeakerEncoder-checkpoint.zip https://github.com/Edresson/TTS/releases/download/v1.0.0/GE2E-SpeakerEncoder-iter25k.zip\n", + "!unzip ./SpeakerEncoder-checkpoint.zip" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jEH8HCTh5mF6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "SE_MODEL_RUN_PATH = \"GE2E-SpeakerEncoder/\"\n", + "SE_MODEL_PATH = os.path.join(SE_MODEL_RUN_PATH, \"best_model.pth.tar\")\n", + "SE_CONFIG_PATH =os.path.join(SE_MODEL_RUN_PATH, \"config.json\")\n", + "USE_CUDA = True" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tOwkfQqT6-Qo", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.speaker_encoder.model import SpeakerEncoder\n", + "se_config = load_config(SE_CONFIG_PATH)\n", + "se_ap = AudioProcessor(**se_config['audio'])\n", + "\n", + "se_model = SpeakerEncoder(**se_config.model)\n", + "se_model.load_state_dict(torch.load(SE_MODEL_PATH)['model'])\n", + "se_model.eval()\n", + "if USE_CUDA:\n", + " se_model.cuda()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0TLlbUFG8O36", + "colab_type": "text" + }, + "source": [ + "Upload a wav audio file in your voice.\n", + "\n", + "\n", + "> We recommend files longer than 3 seconds, the bigger the file the closer to your voice :)\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_FWwHPjJ8NXl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from google.colab import files\n", + "file_list = files.upload()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WWOf6sgbBbGY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# extract embedding from wav files\n", + "speaker_embeddings = []\n", + "for name in file_list.keys():\n", + " if '.wav' in name:\n", + " mel_spec = se_ap.melspectrogram(se_ap.load_wav(name, sr=se_ap.sample_rate)).T\n", + " mel_spec = torch.FloatTensor(mel_spec[None, :, :])\n", + " if USE_CUDA:\n", + " mel_spec = mel_spec.cuda()\n", + " embedd = se_model.compute_embedding(mel_spec).cpu().detach().numpy().reshape(-1)\n", + " speaker_embeddings.append(embedd)\n", + " else:\n", + " print(\" You need upload Wav files, others files is not supported !!\")\n", + "\n", + "# takes the average of the embedings samples of the announcers\n", + "speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "xmItcGac5WiG", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with New Speaker using files: \",file_list.keys(), \"(this speaker not seen in training (new speaker))\")\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018_With_GST.ipynb b/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018_With_GST.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e059461e7f23c2c7c271526f11b5c0fd81663042 --- /dev/null +++ b/notebooks/Demo_Mozilla_TTS_MultiSpeaker_jia_et_al_2018_With_GST.ipynb @@ -0,0 +1,834 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Demo-Mozilla-TTS-MultiSpeaker-jia-et-al-2018-With-GST.ipynb", + "provenance": [], + "collapsed_sections": [ + "yZK6UdwSFnOO", + "ENA2OumIVeMA", + "dV6cXXlfi72r", + "vnV-FigfvsS2", + "g_G_HweN04W-", + "LEE6mQLh5Who" + ], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "yZK6UdwSFnOO", + "colab_type": "text" + }, + "source": [ + "# **Download and install Mozilla TTS**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yvb0pX3WY6MN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os \n", + "!git clone https://github.com/Edresson/TTS -b dev-gst-embeddings" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "iB9nl2UEG3SY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!apt-get install espeak\n", + "os.chdir('TTS')\n", + "!pip install -r requirements.txt\n", + "!python setup.py develop\n", + "os.chdir('..')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w6Krn8k1inC_", + "colab_type": "text" + }, + "source": [ + "\n", + "\n", + "**Download Checkpoint**\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PiYHf3lKhi9z", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!wget -c -q --show-progress -O ./TTS-checkpoint.zip https://github.com/Edresson/TTS/releases/download/v1.0.0/Checkpoints-TTS-MultiSpeaker-Jia-et-al-2018-with-GST.zip\n", + "!unzip ./TTS-checkpoint.zip\n", + "\n", + "# Download gst style example\n", + "!wget https://github.com/Edresson/TTS/releases/download/v1.0.0/gst-style-example.wav" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MpYNgqrZcJKn", + "colab_type": "text" + }, + "source": [ + "**Utils Functions**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4KZA4b_CbMqx", + "colab_type": "code", + "colab": {} + }, + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import argparse\n", + "import json\n", + "# pylint: disable=redefined-outer-name, unused-argument\n", + "import os\n", + "import string\n", + "import time\n", + "import sys\n", + "import numpy as np\n", + "\n", + "TTS_PATH = \"../content/TTS\"\n", + "# add libraries into environment\n", + "sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n", + "\n", + "import torch\n", + "\n", + "from TTS.tts.utils.generic_utils import setup_model\n", + "from TTS.tts.utils.synthesis import synthesis\n", + "from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config\n", + "from TTS.vocoder.utils.generic_utils import setup_generator\n", + "\n", + "\n", + "def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid, speaker_embedding=None, gst_style=None):\n", + " t_1 = time.time()\n", + " waveform, _, _, mel_postnet_spec, _, _ = synthesis(model, text, CONFIG, use_cuda, ap, speaker_fileid, gst_style, False, CONFIG.enable_eos_bos_chars, use_gl, speaker_embedding=speaker_embedding)\n", + " if CONFIG.model == \"Tacotron\" and not use_gl:\n", + " mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T\n", + " if not use_gl:\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " if use_cuda and not use_gl:\n", + " waveform = waveform.cpu()\n", + " if not use_gl:\n", + " waveform = waveform.numpy()\n", + " waveform = waveform.squeeze()\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " return waveform\n", + "\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ENA2OumIVeMA", + "colab_type": "text" + }, + "source": [ + "# **Vars definitions**\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jPD0d_XpVXmY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "TEXT = ''\n", + "OUT_PATH = 'tests-audios/'\n", + "# create output path\n", + "os.makedirs(OUT_PATH, exist_ok=True)\n", + "\n", + "SPEAKER_FILEID = None # if None use the first embedding from speakers.json\n", + "\n", + "# model vars \n", + "MODEL_PATH = 'best_model.pth.tar'\n", + "CONFIG_PATH = 'config.json'\n", + "SPEAKER_JSON = 'speakers.json'\n", + "\n", + "# vocoder vars\n", + "VOCODER_PATH = ''\n", + "VOCODER_CONFIG_PATH = ''\n", + "\n", + "USE_CUDA = True" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dV6cXXlfi72r", + "colab_type": "text" + }, + "source": [ + "# **Restore TTS Model**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "x1WgLFauWUPe", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load the config\n", + "C = load_config(CONFIG_PATH)\n", + "C.forward_attn_mask = True\n", + "\n", + "# load the audio processor\n", + "ap = AudioProcessor(**C.audio)\n", + "\n", + "# if the vocabulary was passed, replace the default\n", + "if 'characters' in C.keys():\n", + " symbols, phonemes = make_symbols(**C.characters)\n", + "\n", + "speaker_embedding = None\n", + "speaker_embedding_dim = None\n", + "num_speakers = 0\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " num_speakers = len(speaker_mapping)\n", + " if C.use_external_speaker_embedding_file:\n", + " if SPEAKER_FILEID is not None:\n", + " speaker_embedding = speaker_mapping[SPEAKER_FILEID]['embedding']\n", + " else: # if speaker_fileid is not specificated use the first sample in speakers.json\n", + " choise_speaker = list(speaker_mapping.keys())[0]\n", + " print(\" Speaker: \",choise_speaker.split('_')[0],'was chosen automatically', \"(this speaker seen in training)\")\n", + " speaker_embedding = speaker_mapping[choise_speaker]['embedding']\n", + " speaker_embedding_dim = len(speaker_embedding)\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, num_speakers, C, speaker_embedding_dim)\n", + "cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))\n", + "model.load_state_dict(cp['model'])\n", + "model.eval()\n", + "\n", + "if USE_CUDA:\n", + " model.cuda()\n", + "\n", + "model.decoder.set_r(cp['r'])\n", + "\n", + "# load vocoder model\n", + "if VOCODER_PATH!= \"\":\n", + " VC = load_config(VOCODER_CONFIG_PATH)\n", + " vocoder_model = setup_generator(VC)\n", + " vocoder_model.load_state_dict(torch.load(VOCODER_PATH, map_location=\"cpu\")[\"model\"])\n", + " vocoder_model.remove_weight_norm()\n", + " if USE_CUDA:\n", + " vocoder_model.cuda()\n", + " vocoder_model.eval()\n", + "else:\n", + " vocoder_model = None\n", + " VC = None\n", + "\n", + "# synthesize voice\n", + "use_griffin_lim = VOCODER_PATH== \"\"\n", + "\n", + "if not C.use_external_speaker_embedding_file:\n", + " if SPEAKER_FILEID.isdigit():\n", + " SPEAKER_FILEID = int(SPEAKER_FILEID)\n", + " else:\n", + " SPEAKER_FILEID = None\n", + "else:\n", + " SPEAKER_FILEID = None\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tNvVEoE30qY6", + "colab_type": "text" + }, + "source": [ + "Synthesize sentence with Speaker\n", + "\n", + "> Stop running the cell to leave!\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2o8fXkVSyXOa", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",choise_speaker.split('_')[0], \"(this speaker seen in training)\")\n", + "gst_style = 'gst-style-example.wav'\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vnV-FigfvsS2", + "colab_type": "text" + }, + "source": [ + "# **Select Speaker**\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RuCGOnJ_fgDV", + "colab_type": "code", + "colab": {} + }, + "source": [ + "\n", + "# VCTK speakers not seen in training (new speakers)\n", + "VCTK_test_Speakers = [\"p225\", \"p234\", \"p238\", \"p245\", \"p248\", \"p261\", \"p294\", \"p302\", \"p326\", \"p335\", \"p347\"]\n", + "\n", + "# VCTK speakers seen in training\n", + "VCTK_train_Speakers = ['p244', 'p300', 'p303', 'p273', 'p292', 'p252', 'p254', 'p269', 'p345', 'p274', 'p363', 'p285', 'p351', 'p361', 'p295', 'p266', 'p307', 'p230', 'p339', 'p253', 'p310', 'p241', 'p256', 'p323', 'p237', 'p229', 'p298', 'p336', 'p276', 'p305', 'p255', 'p278', 'p299', 'p265', 'p267', 'p280', 'p260', 'p272', 'p262', 'p334', 'p283', 'p247', 'p246', 'p374', 'p297', 'p249', 'p250', 'p304', 'p240', 'p236', 'p312', 'p286', 'p263', 'p258', 'p313', 'p376', 'p279', 'p340', 'p362', 'p284', 'p231', 'p308', 'p277', 'p275', 'p333', 'p314', 'p330', 'p264', 'p226', 'p288', 'p343', 'p239', 'p232', 'p268', 'p270', 'p329', 'p227', 'p271', 'p228', 'p311', 'p301', 'p293', 'p364', 'p251', 'p317', 'p360', 'p281', 'p243', 'p287', 'p233', 'p259', 'p316', 'p257', 'p282', 'p306', 'p341', 'p318']\n", + "\n", + "\n", + "num_samples_speaker = 2 # In theory the more samples of the speaker the more similar to the real voice it will be!\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hkvv7gRcx4WV", + "colab_type": "text" + }, + "source": [ + "## **Example select a VCTK seen speaker in training**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BviNMI9UyCYz", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# get embedding\n", + "Speaker_choise = VCTK_train_Speakers[0] # choise one of training speakers\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " if C.use_external_speaker_embedding_file:\n", + " speaker_embeddings = []\n", + " for key in list(speaker_mapping.keys()):\n", + " if Speaker_choise in key:\n", + " if len(speaker_embeddings) < num_samples_speaker:\n", + " speaker_embeddings.append(speaker_mapping[key]['embedding'])\n", + " # takes the average of the embedings samples of the announcers\n", + " speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()\n", + " " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5e5_XnLsx3jg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker seen in training)\")\n", + "gst_style = 'gst-style-example.wav'\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QJ6VgT2a4vHW" + }, + "source": [ + "## **Example select a VCTK not seen speaker in training (new Speakers)**\n", + "\n", + "\n", + "> Fitting new Speakers :)\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "SZS57ZK-4vHa", + "colab": {} + }, + "source": [ + "# get embedding\n", + "Speaker_choise = VCTK_test_Speakers[0] # choise one of training speakers\n", + "# load speakers\n", + "if SPEAKER_JSON != '':\n", + " speaker_mapping = json.load(open(SPEAKER_JSON, 'r'))\n", + " if C.use_external_speaker_embedding_file:\n", + " speaker_embeddings = []\n", + " for key in list(speaker_mapping.keys()):\n", + " if Speaker_choise in key:\n", + " if len(speaker_embeddings) < num_samples_speaker:\n", + " speaker_embeddings.append(speaker_mapping[key]['embedding'])\n", + " # takes the average of the embedings samples of the announcers\n", + " speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()\n", + " " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "bbs85vzz4vHo", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "gst_style = 'gst-style-example.wav'\n", + "while True:\n", + " TEXT = input(\"Enter sentence: \")\n", + " print(\" > Text: {}\".format(TEXT))\n", + " wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + " IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + " # save the results\n", + " file_name = TEXT.replace(\" \", \"_\")\n", + " file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + " out_path = os.path.join(OUT_PATH, file_name)\n", + " print(\" > Saving output to {}\".format(out_path))\n", + " ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g_G_HweN04W-", + "colab_type": "text" + }, + "source": [ + "# **Changing GST tokens manually (without wav reference)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jyFP5syW2bjt", + "colab_type": "text" + }, + "source": [ + "You can define tokens manually, this way you can increase/decrease the function of a given GST token. For example a token is responsible for the length of the speaker's pauses, if you increase the value of that token you will have longer pauses and if you decrease it you will have shorter pauses." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SpwjDjCM2a3Y", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# set gst tokens, in this model we have 5 tokens\n", + "gst_style = {\"0\": 0, \"1\": 0, \"3\": 0, \"4\": 0}" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "qWChMbI_0z5X", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "uFjUi9xQ3mG3", + "colab_type": "code", + "colab": {} + }, + "source": [ + "gst_style = {\"0\": 0.9, \"1\": 0, \"3\": 0, \"4\": 0}\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Uw0d6gWg4L27", + "colab_type": "code", + "colab": {} + }, + "source": [ + "gst_style = {\"0\": -0.9, \"1\": 0, \"3\": 0, \"4\": 0}\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "V9izw4-54-Tl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "gst_style = {\"0\": 0, \"1\": 0.9, \"3\": 0, \"4\": 0}\n", + "print(\"Synthesize sentence with Speaker: \",Speaker_choise.split('_')[0], \"(this speaker not seen in training (new speaker))\")\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LEE6mQLh5Who" + }, + "source": [ + "# **Example Synthesizing with your own voice :)**\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "La70gSB65nrs", + "colab_type": "text" + }, + "source": [ + " Download and load GE2E Speaker Encoder " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r0IEFZ0B5vQg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!wget -c -q --show-progress -O ./SpeakerEncoder-checkpoint.zip https://github.com/Edresson/TTS/releases/download/v1.0.0/GE2E-SpeakerEncoder-iter25k.zip\n", + "!unzip ./SpeakerEncoder-checkpoint.zip" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jEH8HCTh5mF6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "SE_MODEL_RUN_PATH = \"GE2E-SpeakerEncoder/\"\n", + "SE_MODEL_PATH = os.path.join(SE_MODEL_RUN_PATH, \"best_model.pth.tar\")\n", + "SE_CONFIG_PATH =os.path.join(SE_MODEL_RUN_PATH, \"config.json\")\n", + "USE_CUDA = True" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tOwkfQqT6-Qo", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.speaker_encoder.model import SpeakerEncoder\n", + "se_config = load_config(SE_CONFIG_PATH)\n", + "se_ap = AudioProcessor(**se_config['audio'])\n", + "\n", + "se_model = SpeakerEncoder(**se_config.model)\n", + "se_model.load_state_dict(torch.load(SE_MODEL_PATH)['model'])\n", + "se_model.eval()\n", + "if USE_CUDA:\n", + " se_model.cuda()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0TLlbUFG8O36", + "colab_type": "text" + }, + "source": [ + "Upload one or more wav audio files in your voice.\n", + "\n", + "\n", + "> We recommend files longer than 3 seconds, the bigger the file the closer to your voice :)\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_FWwHPjJ8NXl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# select one or more wav files\n", + "from google.colab import files\n", + "file_list = files.upload()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WWOf6sgbBbGY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# extract embedding from wav files\n", + "speaker_embeddings = []\n", + "for name in file_list.keys():\n", + " if '.wav' in name:\n", + " mel_spec = se_ap.melspectrogram(se_ap.load_wav(name, sr=se_ap.sample_rate)).T\n", + " mel_spec = torch.FloatTensor(mel_spec[None, :, :])\n", + " if USE_CUDA:\n", + " mel_spec = mel_spec.cuda()\n", + " embedd = se_model.compute_embedding(mel_spec).cpu().detach().numpy().reshape(-1)\n", + " speaker_embeddings.append(embedd)\n", + " else:\n", + " print(\"You need upload Wav files, others files is not supported !!\")\n", + "\n", + "# takes the average of the embedings samples of the announcers\n", + "speaker_embedding = np.mean(np.array(speaker_embeddings), axis=0).tolist()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "AQ7eP31d9yzq", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import IPython\n", + "from IPython.display import Audio\n", + "print(\"Synthesize sentence with New Speaker using files: \",file_list.keys(), \"(this speaker not seen in training (new speaker))\")\n", + "gst_style = {\"0\": 0, \"1\": 0.0, \"3\": 0, \"4\": 0}\n", + "gst_style = 'gst-style-example.wav'\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "11i10yE1-LMJ", + "colab_type": "text" + }, + "source": [ + "Uploading your own GST reference wav file" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eKohSQG1-KkT", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# select one wav file for GST reference\n", + "from google.colab import files\n", + "file_list = files.upload()\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "xmItcGac5WiG", + "colab": {} + }, + "source": [ + "print(\"Synthesize sentence with New Speaker using files: \",file_list.keys(), \"(this speaker not seen in training (new speaker))\")\n", + "gst_style = list(file_list.keys())[0]\n", + "TEXT = input(\"Enter sentence: \")\n", + "print(\" > Text: {}\".format(TEXT))\n", + "wav = tts(model, vocoder_model, TEXT, C, USE_CUDA, ap, use_griffin_lim, SPEAKER_FILEID, speaker_embedding=speaker_embedding, gst_style=gst_style)\n", + "IPython.display.display(Audio(wav, rate=ap.sample_rate))\n", + "# save the results\n", + "file_name = TEXT.replace(\" \", \"_\")\n", + "file_name = file_name.translate(\n", + " str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'\n", + "out_path = os.path.join(OUT_PATH, file_name)\n", + "print(\" > Saving output to {}\".format(out_path))\n", + "ap.save_wav(wav, out_path)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/ExtractTTSpectrogram.ipynb b/notebooks/ExtractTTSpectrogram.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b28489e0500355912d6ff9d45fa8e0837477b496 --- /dev/null +++ b/notebooks/ExtractTTSpectrogram.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a notebook to generate mel-spectrograms from a TTS model to be used for WaveRNN training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os\n", + "import sys\n", + "import torch\n", + "import importlib\n", + "import numpy as np\n", + "from tqdm import tqdm as tqdm\n", + "from torch.utils.data import DataLoader\n", + "from TTS.tts.datasets.TTSDataset import MyDataset\n", + "from TTS.tts.layers.losses import L1LossMasked\n", + "from TTS.tts.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.visual import plot_spectrogram\n", + "from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n", + "from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", + "\n", + "%matplotlib inline\n", + "\n", + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES']='0'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def set_filename(wav_path, out_path):\n", + " wav_file = os.path.basename(wav_path)\n", + " file_name = wav_file.split('.')[0]\n", + " os.makedirs(os.path.join(out_path, \"quant\"), exist_ok=True)\n", + " os.makedirs(os.path.join(out_path, \"mel\"), exist_ok=True)\n", + " os.makedirs(os.path.join(out_path, \"wav_gl\"), exist_ok=True)\n", + " wavq_path = os.path.join(out_path, \"quant\", file_name)\n", + " mel_path = os.path.join(out_path, \"mel\", file_name)\n", + " wav_path = os.path.join(out_path, \"wav_gl\", file_name)\n", + " return file_name, wavq_path, mel_path, wav_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "OUT_PATH = \"/home/erogol/Data/LJSpeech-1.1/ljspeech-March-17-2020_01+16AM-871588c/\"\n", + "DATA_PATH = \"/home/erogol/Data/LJSpeech-1.1/\"\n", + "DATASET = \"ljspeech\"\n", + "METADATA_FILE = \"metadata.csv\"\n", + "CONFIG_PATH = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/config.json\"\n", + "MODEL_FILE = \"/home/erogol/Models/LJSpeech/ljspeech-March-17-2020_01+16AM-871588c/checkpoint_420000.pth.tar\"\n", + "BATCH_SIZE = 32\n", + "\n", + "QUANTIZED_WAV = False\n", + "QUANTIZE_BIT = 9\n", + "DRY_RUN = False # if False, does not generate output files, only computes loss and visuals.\n", + "\n", + "use_cuda = torch.cuda.is_available()\n", + "print(\" > CUDA enabled: \", use_cuda)\n", + "\n", + "C = load_config(CONFIG_PATH)\n", + "C.audio['do_trim_silence'] = False # IMPORTANT!!!!!!!!!!!!!!! disable to align mel specs with the wav files\n", + "ap = AudioProcessor(bits=QUANTIZE_BIT, **C.audio)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# if the vocabulary was passed, replace the default\n", + "if 'characters' in C.keys():\n", + " symbols, phonemes = make_symbols(**C.characters)\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n", + "# TODO: multiple speaker\n", + "model = setup_model(num_chars, num_speakers=0, c=C)\n", + "checkpoint = torch.load(MODEL_FILE)\n", + "model.load_state_dict(checkpoint['model'])\n", + "print(checkpoint['step'])\n", + "model.eval()\n", + "model.decoder.set_r(checkpoint['r'])\n", + "if use_cuda:\n", + " model = model.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocessor = importlib.import_module('TTS.tts.datasets.preprocess')\n", + "preprocessor = getattr(preprocessor, DATASET.lower())\n", + "meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n", + "dataset = MyDataset(checkpoint['r'], C.text_cleaner, False, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n", + "loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate model outputs " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "file_idxs = []\n", + "metadata = []\n", + "losses = []\n", + "postnet_losses = []\n", + "criterion = L1LossMasked(seq_len_norm=C.seq_len_norm)\n", + "with torch.no_grad():\n", + " for data in tqdm(loader):\n", + " # setup input data\n", + " text_input = data[0]\n", + " text_lengths = data[1]\n", + " linear_input = data[3]\n", + " mel_input = data[4]\n", + " mel_lengths = data[5]\n", + " stop_targets = data[6]\n", + " item_idx = data[7]\n", + "\n", + " # dispatch data to GPU\n", + " if use_cuda:\n", + " text_input = text_input.cuda()\n", + " text_lengths = text_lengths.cuda()\n", + " mel_input = mel_input.cuda()\n", + " mel_lengths = mel_lengths.cuda()\n", + "\n", + " mask = sequence_mask(text_lengths)\n", + " mel_outputs, postnet_outputs, alignments, stop_tokens = model.forward(text_input, text_lengths, mel_input)\n", + " \n", + " # compute loss\n", + " loss = criterion(mel_outputs, mel_input, mel_lengths)\n", + " loss_postnet = criterion(postnet_outputs, mel_input, mel_lengths)\n", + " losses.append(loss.item())\n", + " postnet_losses.append(loss_postnet.item())\n", + "\n", + " # compute mel specs from linear spec if model is Tacotron\n", + " if C.model == \"Tacotron\":\n", + " mel_specs = []\n", + " postnet_outputs = postnet_outputs.data.cpu().numpy()\n", + " for b in range(postnet_outputs.shape[0]):\n", + " postnet_output = postnet_outputs[b]\n", + " mel_specs.append(torch.FloatTensor(ap.out_linear_to_mel(postnet_output.T).T).cuda())\n", + " postnet_outputs = torch.stack(mel_specs)\n", + " elif C.model == \"Tacotron2\":\n", + " postnet_outputs = postnet_outputs.detach().cpu().numpy()\n", + " alignments = alignments.detach().cpu().numpy()\n", + "\n", + " if not DRY_RUN:\n", + " for idx in range(text_input.shape[0]):\n", + " wav_file_path = item_idx[idx]\n", + " wav = ap.load_wav(wav_file_path)\n", + " file_name, wavq_path, mel_path, wav_path = set_filename(wav_file_path, OUT_PATH)\n", + " file_idxs.append(file_name)\n", + "\n", + " # quantize and save wav\n", + " if QUANTIZED_WAV:\n", + " wavq = ap.quantize(wav)\n", + " np.save(wavq_path, wavq)\n", + "\n", + " # save TTS mel\n", + " mel = postnet_outputs[idx]\n", + " mel_length = mel_lengths[idx]\n", + " mel = mel[:mel_length, :].T\n", + " np.save(mel_path, mel)\n", + "\n", + " metadata.append([wav_file_path, mel_path])\n", + "\n", + " # for wavernn\n", + " if not DRY_RUN:\n", + " pickle.dump(file_idxs, open(OUT_PATH+\"/dataset_ids.pkl\", \"wb\")) \n", + " \n", + " # for pwgan\n", + " with open(os.path.join(OUT_PATH, \"metadata.txt\"), \"w\") as f:\n", + " for data in metadata:\n", + " f.write(f\"{data[0]}|{data[1]+'.npy'}\\n\")\n", + "\n", + " print(np.mean(losses))\n", + " print(np.mean(postnet_losses))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for pwgan\n", + "with open(os.path.join(OUT_PATH, \"metadata.txt\"), \"w\") as f:\n", + " for data in metadata:\n", + " f.write(f\"{data[0]}|{data[1]+'.npy'}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sanity Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "idx = 1\n", + "ap.melspectrogram(ap.load_wav(item_idx[idx])).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import soundfile as sf\n", + "wav, sr = sf.read(item_idx[idx])\n", + "mel_postnet = postnet_outputs[idx][:mel_lengths[idx], :]\n", + "mel_decoder = mel_outputs[idx][:mel_lengths[idx], :].detach().cpu().numpy()\n", + "mel_truth = ap.melspectrogram(wav)\n", + "print(mel_truth.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot posnet output\n", + "plot_spectrogram(mel_postnet, ap);\n", + "print(mel_postnet[:mel_lengths[idx], :].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot decoder output\n", + "plot_spectrogram(mel_decoder, ap);\n", + "print(mel_decoder.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot GT specgrogram\n", + "print(mel_truth.shape)\n", + "plot_spectrogram(mel_truth.T, ap);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# postnet, decoder diff\n", + "from matplotlib import pylab as plt\n", + "mel_diff = mel_decoder - mel_postnet\n", + "plt.figure(figsize=(16, 10))\n", + "plt.imshow(abs(mel_diff[:mel_lengths[idx],:]).T,aspect=\"auto\", origin=\"lower\");\n", + "plt.colorbar()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# PLOT GT SPECTROGRAM diff\n", + "from matplotlib import pylab as plt\n", + "mel_diff2 = mel_truth.T - mel_decoder\n", + "plt.figure(figsize=(16, 10))\n", + "plt.imshow(abs(mel_diff2).T,aspect=\"auto\", origin=\"lower\");\n", + "plt.colorbar()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# PLOT GT SPECTROGRAM diff\n", + "from matplotlib import pylab as plt\n", + "mel = postnet_outputs[idx]\n", + "mel_diff2 = mel_truth.T - mel[:mel_truth.shape[1]]\n", + "plt.figure(figsize=(16, 10))\n", + "plt.imshow(abs(mel_diff2).T,aspect=\"auto\", origin=\"lower\");\n", + "plt.colorbar()\n", + "plt.tight_layout()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/GE2E-CorentinJ-ExtractSpeakerEmbeddings-by-sample.ipynb b/notebooks/GE2E-CorentinJ-ExtractSpeakerEmbeddings-by-sample.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..576a95fed6fe5babb60357a91e060b44582c702e --- /dev/null +++ b/notebooks/GE2E-CorentinJ-ExtractSpeakerEmbeddings-by-sample.ipynb @@ -0,0 +1,25495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a noteboook used to generate the speaker embeddings with the CorentinJ GE2E model trained with Angular Prototypical loss for multi-speaker training.\n", + "\n", + "Before running this script please DON'T FORGET:\n", + "- to set the right paths in the cell below.\n", + "\n", + "Repositories:\n", + "- TTS: https://github.com/mozilla/TTS\n", + "- CorentinJ GE2E: https://github.com/Edresson/GE2E-Speaker-Encoder" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import os\n", + "import importlib\n", + "import random\n", + "import librosa\n", + "import torch\n", + "\n", + "import numpy as np\n", + "from TTS.utils.io import load_config\n", + "from tqdm import tqdm\n", + "from TTS.tts.utils.speakers import save_speaker_mapping, load_speaker_mapping\n", + "\n", + "# you may need to change this depending on your system\n", + "os.environ['CUDA_VISIBLE_DEVICES']='0'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'Real-Time-Voice-Cloning'...\n", + "remote: Enumerating objects: 5, done.\u001b[K\n", + "remote: Counting objects: 100% (5/5), done.\u001b[K\n", + "remote: Compressing objects: 100% (5/5), done.\u001b[K\n", + "remote: Total 2508 (delta 0), reused 3 (delta 0), pack-reused 2503\u001b[K\n", + "Receiving objects: 100% (2508/2508), 360.78 MiB | 17.84 MiB/s, done.\n", + "Resolving deltas: 100% (1387/1387), done.\n", + "Checking connectivity... done.\n" + ] + } + ], + "source": [ + "# Clone encoder \n", + "!git clone https://github.com/CorentinJ/Real-Time-Voice-Cloning.git\n", + "os.chdir('Real-Time-Voice-Cloning/')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#Install voxceleb_trainer Requeriments\n", + "!python -m pip install umap-learn visdom webrtcvad librosa>=0.5.1 matplotlib>=2.0.2 numpy>=1.14.0 scipy>=1.0.0 tqdm sounddevice Unidecode inflect multiprocess numba" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-08-05 06:51:05-- https://github.com/Edresson/Real-Time-Voice-Cloning/releases/download/checkpoints/pretrained.zip\n", + "Resolving github.com (github.com)... 18.231.5.6\n", + "Connecting to github.com (github.com)|18.231.5.6|:443... connected.\n", + "HTTP request sent, awaiting response... 301 Moved Permanently\n", + "Location: https://github.com/Edresson/GE2E-Speaker-Encoder/releases/download/checkpoints/pretrained.zip [following]\n", + "--2020-08-05 06:51:05-- https://github.com/Edresson/GE2E-Speaker-Encoder/releases/download/checkpoints/pretrained.zip\n", + "Reusing existing connection to github.com:443.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/263893598/f7f31d80-96df-11ea-8345-261fc35f9849?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20200805%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200805T101614Z&X-Amz-Expires=300&X-Amz-Signature=df7724c28668ebd5dfbcc6a9b51f6afb78193c30119f3a1c3eef678188aabd1e&X-Amz-SignedHeaders=host&actor_id=0&repo_id=263893598&response-content-disposition=attachment%3B%20filename%3Dpretrained.zip&response-content-type=application%2Foctet-stream [following]\n", + "--2020-08-05 06:51:05-- https://github-production-release-asset-2e65be.s3.amazonaws.com/263893598/f7f31d80-96df-11ea-8345-261fc35f9849?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20200805%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200805T101614Z&X-Amz-Expires=300&X-Amz-Signature=df7724c28668ebd5dfbcc6a9b51f6afb78193c30119f3a1c3eef678188aabd1e&X-Amz-SignedHeaders=host&actor_id=0&repo_id=263893598&response-content-disposition=attachment%3B%20filename%3Dpretrained.zip&response-content-type=application%2Foctet-stream\n", + "Resolving github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)... 52.216.18.24\n", + "Connecting to github-production-release-asset-2e65be.s3.amazonaws.com (github-production-release-asset-2e65be.s3.amazonaws.com)|52.216.18.24|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 383640573 (366M) [application/octet-stream]\n", + "Saving to: ‘pretrained.zip’\n", + "\n", + "pretrained.zip 100%[===================>] 365,87M 6,62MB/s in 56s \n", + "\n", + "2020-08-05 06:52:03 (6,48 MB/s) - ‘pretrained.zip’ saved [383640573/383640573]\n", + "\n", + "Archive: pretrained.zip\n", + " creating: encoder/saved_models/\n", + " inflating: encoder/saved_models/pretrained.pt \n", + " creating: synthesizer/saved_models/\n", + " creating: synthesizer/saved_models/logs-pretrained/\n", + " creating: synthesizer/saved_models/logs-pretrained/taco_pretrained/\n", + " extracting: synthesizer/saved_models/logs-pretrained/taco_pretrained/checkpoint \n", + " inflating: synthesizer/saved_models/logs-pretrained/taco_pretrained/tacotron_model.ckpt-278000.data-00000-of-00001 \n", + " inflating: synthesizer/saved_models/logs-pretrained/taco_pretrained/tacotron_model.ckpt-278000.index \n", + " inflating: synthesizer/saved_models/logs-pretrained/taco_pretrained/tacotron_model.ckpt-278000.meta \n", + " creating: vocoder/saved_models/\n", + " creating: vocoder/saved_models/pretrained/\n", + " inflating: vocoder/saved_models/pretrained/pretrained.pt \n" + ] + } + ], + "source": [ + "#Download encoder Checkpoint\n", + "!wget https://github.com/Edresson/Real-Time-Voice-Cloning/releases/download/checkpoints/pretrained.zip\n", + "!unzip pretrained.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from encoder import inference as encoder\n", + "from encoder.params_model import model_embedding_size as speaker_embedding_size\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Preparing the encoder, the synthesizer and the vocoder...\n", + "Loaded encoder \"pretrained.pt\" trained to step 1564501\n", + "Testing your configuration with small inputs.\n", + "\tTesting the encoder...\n", + "(256,)\n" + ] + } + ], + "source": [ + "print(\"Preparing the encoder, the synthesizer and the vocoder...\")\n", + "encoder.load_model(Path('encoder/saved_models/pretrained.pt'))\n", + "print(\"Testing your configuration with small inputs.\")\n", + "# Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's\n", + "# sampling rate, which may differ.\n", + "# If you're unfamiliar with digital audio, know that it is encoded as an array of floats \n", + "# (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1.\n", + "# The sampling rate is the number of values (samples) recorded per second, it is set to\n", + "# 16000 for the encoder. Creating an array of length will always correspond \n", + "# to an audio of 1 second.\n", + "print(\"\\tTesting the encoder...\")\n", + "\n", + "wav = np.zeros(encoder.sampling_rate) \n", + "embed = encoder.embed_utterance(wav)\n", + "print(embed.shape)\n", + "\n", + "# Embeddings are L2-normalized (this isn't important here, but if you want to make your own \n", + "# embeddings it will be).\n", + "#embed /= np.linalg.norm(embed) # for random embedding\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "SAVE_PATH = '../'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Set constants\n", + "DATASETS_NAME = ['vctk'] # list the datasets\n", + "DATASETS_PATH = ['../../../../../datasets/VCTK-Corpus-removed-silence/']\n", + "DATASETS_METAFILE = ['']\n", + "USE_CUDA = True" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " 0%| | 0/44063 [00:00>>>>>> dev + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import glob\n", + "import random\n", + "import numpy as np\n", + "import torch\n", + "import umap\n", + "\n", + "from TTS.speaker_encoder.model import SpeakerEncoder\n", +<<<<<<< HEAD + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config\n", +======= + "from TTS.tts.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.generic_utils import load_config\n", +>>>>>>> dev + "\n", + "from bokeh.io import output_notebook, show\n", + "from bokeh.plotting import figure\n", + "from bokeh.models import HoverTool, ColumnDataSource, BoxZoomTool, ResetTool, OpenURL, TapTool\n", + "from bokeh.transform import factor_cmap, factor_mark\n", + "from bokeh.palettes import Category10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For larger sets of speakers, you can use **Category20**, but you need to change it in the **pal** variable too\n", + "\n", + "List of Bokeh palettes here: http://docs.bokeh.org/en/1.4.0/docs/reference/palettes.html\n", + "\n", + "**NB:** if you have problems with other palettes, first see https://stackoverflow.com/questions/48333820/why-do-some-bokeh-palettes-raise-a-valueerror-when-used-in-factor-cmap" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(\"1001\");\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };var element = document.getElementById(\"1001\");\n", + " if (element == null) {\n", + " console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n", + " return false;\n", + " }\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " if (force === true) {\n", + " display_loaded();\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "output_notebook()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should also adjust all the path constants to point at the relevant locations for you locally" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", + "MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n", +======= + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", +>>>>>>> dev + "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", + "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", + "\n", + "# My single speaker locations\n", + "#EMBED_PATH = \"/home/neil/main/Projects/TTS3/embeddings/neil14/\"\n", + "#AUDIO_PATH = \"/home/neil/data/Projects/NeilTTS/neil14/wavs/\"\n", + "\n", + "# My multi speaker locations\n", + "EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n", +<<<<<<< HEAD + "AUDIO_PATH = \"datasets/LibriTTS/test-clean/\"" +======= + "AUDIO_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360/\"" +>>>>>>> dev + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best_model.pth.tar\r\n", + "config.json\r\n", + "events.out.tfevents.1569660396.erogol-desktop\r\n" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "!ls -1 $MODEL_RUN_PATH" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Setting up Audio Processor...\n", + " | > sample_rate:16000\n", + " | > num_mels:40\n", + " | > min_level_db:-100\n", + " | > frame_shift_ms:12.5\n", + " | > frame_length_ms:50\n", + " | > ref_level_db:20\n", + " | > fft_size:1024\n", + " | > power:None\n", + " | > preemphasis:0.98\n", + " | > griffin_lim_iters:None\n", + " | > signal_norm:True\n", + " | > symmetric_norm:True\n", + " | > mel_fmin:0\n", + " | > mel_fmax:8000.0\n", + " | > spec_gain:20.0\n", + " | > stft_pad_mode:reflect\n", + " | > max_norm:4.0\n", + " | > clip_norm:True\n", + " | > do_trim_silence:False\n", + " | > trim_db:60\n", + " | > do_sound_norm:False\n", + " | > stats_path:None\n", + " | > hop_length:200\n", + " | > win_length:800\n" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "CONFIG = load_config(CONFIG_PATH)\n", + "ap = AudioProcessor(**CONFIG['audio'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bring in the embeddings created by **compute_embeddings.py**" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings found: 0\n" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n", + "print(f'Embeddings found: {len(embed_files)}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check that we did indeed find an embedding" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "embed_files[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the speakers\n", + "\n", + "Assumes count of **speaker_paths** corresponds to number of speakers (so a corpus in just one directory would be treated like a single speaker and the multiple directories of LibriTTS are treated as distinct speakers)" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Speaker count: 0\n" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n", + "speaker_to_utter = {}\n", + "for embed_file in embed_files:\n", + " speaker_path = os.path.dirname(os.path.dirname(embed_file))\n", + " try:\n", + " speaker_to_utter[speaker_path].append(embed_file)\n", + " except:\n", + " speaker_to_utter[speaker_path]=[embed_file]\n", + "print(f'Speaker count: {len(speaker_paths)}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the embeddings\n", + "\n", + "Adjust the number of speakers to select and the number of utterances from each speaker and they will be randomly sampled from the corpus" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "'a' cannot be empty unless no samples are taken", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken" + ] + } + ], + "source": [ + "ttsembeds = []\n", +======= + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embeds = []\n", +>>>>>>> dev + "labels = []\n", + "locations = []\n", + "\n", + "# single speaker \n", + "#num_speakers = 1\n", + "#num_utters = 1000\n", + "\n", + "# multi speaker\n", + "num_speakers = 10\n", + "num_utters = 20\n", + "\n", + "\n", + "speaker_idxs = np.random.choice(range(len(speaker_paths)), num_speakers, replace=False )\n", + "\n", + "for speaker_num, speaker_idx in enumerate(speaker_idxs):\n", + " speaker_path = speaker_paths[speaker_idx]\n", + " speakers_utter = speaker_to_utter[speaker_path]\n", + " utter_idxs = np.random.randint(0, len(speakers_utter) , num_utters)\n", + " for utter_idx in utter_idxs:\n", + " embed_path = speaker_to_utter[speaker_path][utter_idx]\n", + " embed = np.load(embed_path)\n", + " embeds.append(embed)\n", + " labels.append(str(speaker_num))\n", +<<<<<<< HEAD + " #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n", +======= + " locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n", +>>>>>>> dev + "embeds = np.concatenate(embeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load embeddings with UMAP" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'umap' has no attribute 'UMAP'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'" + ] + } + ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> dev + "source": [ + "model = umap.UMAP()\n", + "projection = model.fit_transform(embeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interactively charting the data in Bokeh\n", + "\n", + "Set up various details for Bokeh to plot the data\n", + "\n", + "You can use the regular Bokeh [tools](http://docs.bokeh.org/en/1.4.0/docs/user_guide/tools.html?highlight=tools) to explore the data, with reset setting it back to normal\n", + "\n", + "Once you have started the local server (see cell below) you can then click on plotted points which will open a tab to play the audio for that point, enabling easy exploration of your corpus\n", + "\n", + "File location in the tooltip is given relative to **AUDIO_PATH**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "source_wav_stems = ColumnDataSource(\n", + " data=dict(\n", + " x = projection.T[0].tolist(),\n", + " y = projection.T[1].tolist(),\n", + " desc=locations,\n", + " label=labels\n", + " )\n", + " )\n", + "\n", + "hover = HoverTool(\n", + " tooltips=[\n", + " (\"file\", \"@desc\"),\n", + " (\"speaker\", \"@label\"),\n", + " ]\n", + " )\n", + "\n", + "# optionally consider adding these to the tooltips if you want additional detail\n", + "# for the coordinates: (\"(x,y)\", \"($x, $y)\"),\n", + "# for the index of the embedding / wav file: (\"index\", \"$index\"),\n", + "\n", + "factors = list(set(labels))\n", + "pal_size = max(len(factors), 3)\n", + "pal = Category10[pal_size]\n", + "\n", + "p = figure(plot_width=600, plot_height=400, tools=[hover,BoxZoomTool(), ResetTool(), TapTool()])\n", + "\n", + "\n", + "p.circle('x', 'y', source=source_wav_stems, color=factor_cmap('label', palette=pal, factors=factors),)\n", + "\n", + "url = \"http://localhost:8000/@desc\"\n", + "taptool = p.select(type=TapTool)\n", + "taptool.callback = OpenURL(url=url)\n", + "\n", + "show(p)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Local server to serve wav files from corpus\n", + "\n", + "This is required so that when you click on a data point the hyperlink associated with it will be served the file locally.\n", + "\n", + "There are other ways to serve this if you prefer and you can also run the commands manually on the command line\n", + "\n", + "The server will continue to run until stopped. To stop it simply interupt the kernel (ie square button or under Kernel menu)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%cd $AUDIO_PATH\n", + "%pwd\n", + "!python -m http.server" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", +<<<<<<< HEAD + "version": "3.8.5" +======= + "version": "3.7.4" +>>>>>>> dev + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..de58e3755a2a3901b8e737ec9c735d3388dd4334 --- /dev/null +++ b/notebooks/TestAttention.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "This notebook is to test attention performance of a TTS model on a list of sentences taken from DeepVoice paper.\n", + "### Features of this notebook\n", + "- You can see visually how your model performs on each sentence and try to dicern common problems.\n", + "- At the end, final attention score would be printed showing the ultimate performace of your model. You can use this value to perform model selection.\n", + "- You can change the list of sentences byt providing a different sentence file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "scrolled": true + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import torch \n", + "import time\n", + "import numpy as np\n", + "from matplotlib import pylab as plt\n", + "\n", + "%pylab inline\n", + "plt.rcParams[\"figure.figsize\"] = (16,5)\n", + "\n", + "import librosa\n", + "import librosa.display\n", + "\n", + "from TTS.tts.layers import *\n", + "from TTS.tts.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.generic_utils import setup_model\n", + "from TTS.tts.utils.io import load_config\n", + "from TTS.tts.utils.text import text_to_sequence\n", + "from TTS.tts.utils.synthesis import synthesis\n", + "from TTS.tts.utils.visual import plot_alignment\n", + "from TTS.tts.utils.measures import alignment_diagonal_score\n", + "\n", + "import IPython\n", + "from IPython.display import Audio\n", + "\n", + "os.environ['CUDA_VISIBLE_DEVICES']='1'\n", + "\n", + "def tts(model, text, CONFIG, use_cuda, ap):\n", + " t_1 = time.time()\n", + " # run the model\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, None, False, CONFIG.enable_eos_bos_chars, True)\n", + " if CONFIG.model == \"Tacotron\" and not use_gl:\n", + " mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T\n", + " # plotting\n", + " attn_score = alignment_diagonal_score(torch.FloatTensor(alignment).unsqueeze(0))\n", + " print(f\" > {text}\")\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=ap.sample_rate))\n", + " fig = plot_alignment(alignment, fig_size=(8, 5))\n", + " IPython.display.display(fig)\n", + " #saving results\n", + " os.makedirs(OUT_FOLDER, exist_ok=True)\n", + " file_name = text[:200].replace(\" \", \"_\").replace(\".\",\"\") + \".wav\"\n", + " out_path = os.path.join(OUT_FOLDER, file_name)\n", + " ap.save_wav(waveform, out_path)\n", + " return attn_score\n", + "\n", + "# Set constants\n", + "ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n", + "MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n", + "CONFIG_PATH = ROOT_PATH + '/config.json'\n", + "OUT_FOLDER = './hard_sentences/'\n", + "CONFIG = load_config(CONFIG_PATH)\n", + "SENTENCES_PATH = 'sentences.txt'\n", + "use_cuda = True\n", + "\n", + "# Set some config fields manually for testing\n", + "# CONFIG.windowing = False\n", + "# CONFIG.prenet_dropout = False\n", + "# CONFIG.separate_stopnet = True\n", + "CONFIG.use_forward_attn = False\n", + "# CONFIG.forward_attn_mask = True\n", + "# CONFIG.stopnet = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# LOAD TTS MODEL\n", + "from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", + "\n", + "# multi speaker \n", + "if CONFIG.use_speaker_embedding:\n", + " speakers = json.load(open(f\"{ROOT_PATH}/speakers.json\", 'r'))\n", + " speakers_idx_to_id = {v: k for k, v in speakers.items()}\n", + "else:\n", + " speakers = []\n", + " speaker_id = None\n", + "\n", + "# if the vocabulary was passed, replace the default\n", + "if 'characters' in CONFIG.keys():\n", + " symbols, phonemes = make_symbols(**CONFIG.characters)\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, len(speakers), CONFIG)\n", + "\n", + "# load the audio processor\n", + "ap = AudioProcessor(**CONFIG.audio) \n", + "\n", + "\n", + "# load model state\n", + "if use_cuda:\n", + " cp = torch.load(MODEL_PATH)\n", + "else:\n", + " cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)\n", + "\n", + "# load the model\n", + "model.load_state_dict(cp['model'])\n", + "if use_cuda:\n", + " model.cuda()\n", + "model.eval()\n", + "print(cp['step'])\n", + "print(cp['r'])\n", + "\n", + "# set model stepsize\n", + "if 'r' in cp:\n", + " model.decoder.set_r(cp['r'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "model.decoder.max_decoder_steps=3000\n", + "attn_scores = []\n", + "with open(SENTENCES_PATH, 'r') as f:\n", + " for text in f:\n", + " attn_score = tts(model, text, CONFIG, use_cuda, ap)\n", + " attn_scores.append(attn_score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "np.mean(attn_scores)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/Tutorial_Converting_PyTorch_to_TF_to_TFlite.ipynb b/notebooks/Tutorial_Converting_PyTorch_to_TF_to_TFlite.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0a9090e865e983da4bb8869997c52cbf32aec4b8 --- /dev/null +++ b/notebooks/Tutorial_Converting_PyTorch_to_TF_to_TFlite.ipynb @@ -0,0 +1,412 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Tutorial_Converting_PyTorch_to_TF_to_TFlite.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "6LWsNd3_M3MP", + "colab_type": "text" + }, + "source": [ + "# Converting Pytorch models to Tensorflow and TFLite by MozillaTTS" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FAqrSIWgLyP0", + "colab_type": "text" + }, + "source": [ + "This is a tutorial demonstrating Mozilla TTS capabilities to convert \n", + "trained PyTorch models to Tensorflow and Tflite.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MBJjGYnoEo4v", + "colab_type": "text" + }, + "source": [ + "# Installation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ku-dA4DKoeXk", + "colab_type": "text" + }, + "source": [ + "### Download TF Models and configs" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jGIgnWhGsxU1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 + }, + "outputId": "b461952f-8507-4dd2-af06-4e6b8692765d", + "tags": [] + }, + "source": [ + "!gdown --id 1dntzjWFg7ufWaTaFy80nRz-Tu02xWZos -O data/tts_model.pth.tar\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4dnpE0-kvTsu", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "f67c3138-bda0-4b3e-ffcc-647f9feec23e", + "tags": [] + }, + "source": [ + "!gdown --id 1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K -O data/vocoder_model.pth.tar\n", + "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O data/config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3IGvvCRMEwqn", + "colab_type": "text" + }, + "source": [ + "# Model Conversion PyTorch -> TF -> TFLite" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tLhz8SAf8Pgp", + "colab_type": "text" + }, + "source": [ + "## Converting PyTorch to Tensorflow\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xsrvr_WQ8Ib5", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "dae96616-e5f7-41b6-cdb9-5026cfcd3214", + "tags": [] + }, + "source": [ + "# convert TTS model to Tensorflow\n", + "!python ../TTS/bin/convert_tacotron2_torch_to_tf.py --config_path data/config.json --torch_model_path data/tts_model.pth.tar --output_path data/tts_model_tf.pkl" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VJ4NA5If9ljv", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "1520dca8-1db8-4e07-bc0c-b1d5941c775e", + "tags": [] + }, + "source": [ + "# convert Vocoder model to Tensorflow\n", + "!python ../TTS/bin/convert_melgan_torch_to_tf.py --config_path data/config_vocoder.json --torch_model_path data/vocoder_model.pth.tar --output_path data/vocoder_model_tf.pkl" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7d5vTkBZ-BYQ", + "colab_type": "text" + }, + "source": [ + "## Converting Tensorflow to TFLite" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "33hTfpuU99cg", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 927 + }, + "outputId": "8a0e5be1-23a2-4128-ee37-8232adcb8ff0", + "tags": [] + }, + "source": [ + "# convert TTS model to TFLite\n", + "!python ../TTS/bin/convert_tacotron2_tflite.py --config_path data/config.json --tf_model data/tts_model_tf.pkl --output_path data/tts_model.tflite" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "e00Hm75Y-wZ2", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "outputId": "42381b05-3c9d-44f0-dac7-d81efd95eadf", + "tags": [] + }, + "source": [ + "# convert Vocoder model to TFLite\n", + "!python ../TTS/bin/convert_melgan_tflite.py --config_path data/config_vocoder.json --tf_model data/vocoder_model_tf.pkl --output_path data/vocoder_model.tflite" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zlgi8fPdpRF0", + "colab_type": "text" + }, + "source": [ + "# Run Inference with TFLite " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "f-Yc42nQZG5A", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def run_vocoder(mel_spec):\n", + " vocoder_inputs = mel_spec[None, :, :]\n", + " # get input and output details\n", + " input_details = vocoder_model.get_input_details()\n", + " # reshape input tensor for the new input shape\n", + " vocoder_model.resize_tensor_input(input_details[0]['index'], vocoder_inputs.shape)\n", + " vocoder_model.allocate_tensors()\n", + " detail = input_details[0]\n", + " vocoder_model.set_tensor(detail['index'], vocoder_inputs)\n", + " # run the model\n", + " vocoder_model.invoke()\n", + " # collect outputs\n", + " output_details = vocoder_model.get_output_details()\n", + " waveform = vocoder_model.get_tensor(output_details[0]['index'])\n", + " return waveform \n", + "\n", + "\n", + "def tts(model, text, CONFIG, p):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars,\n", + " backend='tflite')\n", + " waveform = run_vocoder(mel_postnet_spec.T)\n", + " waveform = waveform[0, 0]\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZksegYQepkFg", + "colab_type": "text" + }, + "source": [ + "### Load TF Models" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oVa0kOamprgj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tts.tf.utils.tflite import load_tflite_model\n", + "from TTS.tts.tf.utils.io import load_checkpoint\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EY-sHVO8IFSH", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# runtime settings\n", + "use_cuda = False" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_1aIUp2FpxOQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# model paths\n", + "TTS_MODEL = \"data/tts_model.tflite\"\n", + "TTS_CONFIG = \"data/config.json\"\n", + "VOCODER_MODEL = \"data/vocoder_model.tflite\"\n", + "VOCODER_CONFIG = \"data/config_vocoder.json\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CpgmdBVQplbv", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zmrQxiozIUVE", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "outputId": "21cda136-de87-4d55-fd46-7d5306103d90", + "tags": [] + }, + "source": [ + "# load the audio processor\n", + "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8fLoI4ipqMeS", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the models\n", + "model = load_tflite_model(TTS_MODEL)\n", + "vocoder_model = load_tflite_model(VOCODER_MODEL)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws_YkPKsLgo-", + "colab_type": "text" + }, + "source": [ + "## Run Sample Sentence" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuWxZ9Ey5Puj", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "outputId": "535c2df1-c27c-458b-e14b-41a977635aa1", + "tags": [] + }, + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, ap)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/dataset_analysis/AnalyzeDataset-Copy1.ipynb b/notebooks/dataset_analysis/AnalyzeDataset-Copy1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..390b20e2fd229bd976c17da4161697dfd2d17803 --- /dev/null +++ b/notebooks/dataset_analysis/AnalyzeDataset-Copy1.ipynb @@ -0,0 +1,3406 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "TTS_PATH = \"/home/erogol/projects/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n", + "import glob\n", + "import librosa\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "from tqdm import tqdm_notebook as tqdm\n", + "from multiprocessing import Pool\n", + "from matplotlib import pylab as plt\n", + "from collections import Counter\n", + "from TTS.tts.datasets.preprocess import *\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "DATA_PATH = \"/home/erogol/Data/Spectie/audio/output/\"\n", + "META_DATA = \"metadata.txt\"\n", + "NUM_PROC = 8" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/home/erogol/Data/Spectie/audio/output/metadata.txt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# use your own preprocessor at this stage - TTS/datasets/proprocess.py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmozilla_de\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDATA_PATH\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mMETA_DATA\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" > Number of audio files: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/TTS/tts_namespace/TTS/datasets/preprocess.py\u001b[0m in \u001b[0;36mmozilla_de\u001b[0;34m(root_path, meta_file)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0mspeaker_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"mozilla\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 83\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtxt_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'r'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ISO 8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mttf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 84\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mttf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mcols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'|'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/home/erogol/Data/Spectie/audio/output/metadata.txt'" + ] + } + ], + "source": [ + "# use your own preprocessor at this stage - TTS/datasets/proprocess.py\n", + "items = mozilla_de(DATA_PATH, META_DATA)\n", + "print(\" > Number of audio files: {}\".format(len(items)))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# check wavs if exist\n", + "wav_files = []\n", + "for item in items:\n", + " wav_file = item[1].strip()\n", + " wav_files.append(wav_file)\n", + " if not os.path.exists(wav_file):\n", + " print(wav_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_119.wav']\n" + ] + } + ], + "source": [ + "# show duplicate items\n", + "c = Counter(wav_files)\n", + "duplicates = [item for item, count in c.items() if count > 1]\n", + "print(duplicates)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "folders = [w.split('/')[5] for w in wav_files]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BATCH_10_FINAL',\n", + " 'BATCH_11_FINAL',\n", + " 'BATCH_12_FINAL',\n", + " 'BATCH_13_FINAL',\n", + " 'BATCH_14_FINAL',\n", + " 'BATCH_15_FINAL',\n", + " 'BATCH_16_FINAL',\n", + " 'BATCH_17_FINAL',\n", + " 'BATCH_18_FINAL',\n", + " 'BATCH_19_FINAL',\n", + " 'BATCH_1_FINAL',\n", + " 'BATCH_20_FINAL',\n", + " 'BATCH_2_FINAL',\n", + " 'BATCH_3_FINAL',\n", + " 'BATCH_4_FINAL',\n", + " 'BATCH_5_FINAL',\n", + " 'BATCH_6_FINAL',\n", + " 'BATCH_7_FINAL',\n", + " 'BATCH_8_FINAL',\n", + " 'BATCH_9_FINAL'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(folders)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erogol/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:18: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "afbb94c274fe4913b256a8756584c0f6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=14610.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "def load_item(item):\n", + " file_name = item[1].strip()\n", + " text = item[0].strip()\n", + " audio = librosa.load(file_name, sr=None)\n", + " sr = audio[1]\n", + " audio = audio[0]\n", + " audio_len = len(audio) / sr\n", + " text_len = len(text)\n", + " return file_name, text, text_len, audio, audio_len\n", + "\n", + "# This will take a while depending on size of dataset\n", + "if NUM_PROC == 1:\n", + " data = []\n", + " for m in tqdm(items):\n", + " data += [load_item(m)]\n", + "else:\n", + " with Pool(8) as p:\n", + " data = list(tqdm(p.imap(load_item, items), total=len(items)))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erogol/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "53b7f6adb4db47279927ec064addb3c7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=14610.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " > Number of words: 27102\n" + ] + } + ], + "source": [ + "# count words in the dataset\n", + "w_count = Counter()\n", + "for item in tqdm(data):\n", + " text = item[1].lower().strip()\n", + " for word in text.split():\n", + " w_count[word] += 1\n", + "print(\" > Number of words: {}\".format(len(w_count)))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erogol/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8b48c3415e2a4ac1a174502c2308501d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=14610.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "text_vs_durs = {} # text length vs audio duration\n", + "text_len_counter = Counter() # number of sentences with the keyed length\n", + "for item in tqdm(data):\n", + " text = item[1].lower().strip()\n", + " text_len = len(text)\n", + " text_len_counter[text_len] += 1\n", + " audio_len = item[-1]\n", + " try:\n", + " text_vs_durs[text_len] += [audio_len]\n", + " except:\n", + " text_vs_durs[text_len] = [audio_len]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# text_len vs avg_audio_len, median_audio_len, std_audio_len\n", + "text_vs_avg = {}\n", + "text_vs_median = {}\n", + "text_vs_std = {}\n", + "for key, durs in text_vs_durs.items():\n", + " text_vs_avg[key] = np.mean(durs)\n", + " text_vs_median[key] = np.median(durs)\n", + " text_vs_std[key] = np.std(durs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Avg audio length per char" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "Collapsed": "false", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_7.wav', 'Schickes Heimkino!', 18, array([1.28518932e-05, 1.68334354e-05, 1.03571265e-05, ...,\n", + " 2.77877753e-05, 1.10460878e-05, 2.05760971e-05], dtype=float32), 1.5862083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_12.wav', 'Das sieht ihm ähnlich.', 23, array([7.6380376e-05, 9.3327515e-05, 6.1386294e-05, ..., 3.4380835e-05,\n", + " 2.6692895e-05, 2.2882025e-06], dtype=float32), 1.6567083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_20.wav', 'Oh, das Programm ist mir neu.', 29, array([-3.6327918e-05, -5.8332487e-05, -5.0294046e-05, ...,\n", + " -3.2606560e-05, -5.3037817e-05, -3.6754736e-05], dtype=float32), 1.8241458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_21.wav', 'Niemand ist ein Alleskönner.', 29, array([2.5469655e-05, 1.5675920e-05, 2.6378759e-05, ..., 3.4840865e-05,\n", + " 3.4687979e-05, 2.3448023e-05], dtype=float32), 1.9034583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_25.wav', 'Dagegen ist kein Kraut gewachsen.', 33, array([8.6409571e-05, 1.6211446e-04, 1.2149933e-04, ..., 1.4264301e-05,\n", + " 2.6473885e-05, 4.1174495e-05], dtype=float32), 1.91225)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_39.wav', 'Seid gegrüÃ\\x9ft!', 15, array([-4.95165441e-05, -9.18527076e-05, -1.06668835e-04, ...,\n", + " -4.00948884e-05, -6.23805026e-05, -4.42093369e-05], dtype=float32), 1.1808541666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_43.wav', 'Nicht mit dem FuÃ\\x9f!', 19, array([-2.4153460e-05, -9.5195399e-05, -1.8093537e-04, ...,\n", + " 2.0667248e-05, 2.7399163e-05, 5.0344559e-05], dtype=float32), 1.4363958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_44.wav', 'Wissen ist Macht.', 17, array([-1.9221216e-05, -2.1811753e-05, -4.0165878e-06, ...,\n", + " -5.0537183e-06, -1.3825783e-05, -2.8384518e-05], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_45.wav', 'Guck mal, ein Eichhörnchen!', 28, array([-8.8387278e-05, -7.1484370e-05, -9.1183894e-05, ...,\n", + " -2.6602589e-05, 1.1369466e-05, -1.4236821e-06], dtype=float32), 1.5245208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_58.wav', 'Ich bin mein eigener Hund.', 26, array([-1.3441265e-05, -1.3771249e-05, 2.1415319e-06, ...,\n", + " -2.9998329e-05, 6.4692267e-06, 1.6420488e-05], dtype=float32), 1.91225)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_68.wav', 'Lach ich, oder was?', 19, array([1.20631594e-04, 2.69133277e-04, 3.61918297e-04, ...,\n", + " 2.52288628e-05, 1.12787602e-05, 2.01150815e-05], dtype=float32), 1.7272083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_76.wav', 'Moment mal, das ist neu.', 24, array([-4.0444505e-05, -5.6087447e-05, -7.0869857e-05, ...,\n", + " -5.9735464e-07, 1.4513580e-05, 1.7241922e-05], dtype=float32), 1.6743333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_79.wav', 'Wie lange zieht der Tee schon?', 30, array([ 1.3359761e-05, 1.4845427e-06, -8.4266394e-06, ...,\n", + " 8.4090761e-06, 5.6682808e-07, 1.4266146e-06], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_95.wav', 'Schlaf gut!', 11, array([-8.3705861e-05, -1.3769916e-04, -1.0772650e-04, ...,\n", + " -1.2876300e-05, -3.5042558e-05, -1.5538299e-05], dtype=float32), 1.0839166666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_97.wav', 'Entschuldigen Sie die Verwechslung!', 35, array([-4.3585667e-05, -4.9360351e-05, -2.4610319e-05, ...,\n", + " -1.4282005e-05, -7.0760620e-07, -2.8634834e-06], dtype=float32), 1.9210833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_99.wav', 'Schönes Ding!', 14, array([-4.9598326e-05, -4.2029962e-05, -2.2566113e-05, ...,\n", + " 7.5142352e-06, -3.1275456e-05, -1.8421564e-05], dtype=float32), 0.9252916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_101.wav', 'Dann nichts wie weg hier!', 25, array([ 1.2582598e-05, 1.4227808e-05, 1.0588883e-05, ...,\n", + " 1.8725707e-07, -4.0784824e-05, -7.0644560e-06], dtype=float32), 1.7095833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_120.wav', \"Wie geht's?\", 11, array([ 3.6131805e-05, 2.3445213e-05, 4.7948160e-05, ...,\n", + " -3.3656095e-05, -4.0791183e-05, -4.5296023e-05], dtype=float32), 0.9341041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_179.wav', 'Das ganze Haus hat gewackelt.', 29, array([ 1.31893430e-05, -2.02163919e-05, -5.92077959e-06, ...,\n", + " -8.03239527e-06, -1.91841791e-05, -1.46886205e-05], dtype=float32), 1.9034583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_186.wav', 'Woher kommt all der Hass?', 25, array([-1.0393358e-05, -4.2540119e-05, -1.8952907e-05, ...,\n", + " 1.9931360e-05, 2.8833035e-06, 2.6874868e-06], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_189.wav', 'Stillgestanden!', 15, array([ 4.4343769e-06, 1.3210945e-05, 1.7683087e-05, ...,\n", + " 2.6131744e-05, -5.4923967e-06, 9.4311863e-06], dtype=float32), 1.2689791666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_199.wav', 'Eine Sache zur Zeit.', 20, array([5.1501018e-05, 6.3279913e-05, 7.3763011e-05, ..., 1.0348874e-05,\n", + " 1.0562905e-05, 3.0424892e-05], dtype=float32), 1.4804583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_218.wav', 'Nichts für ungut!', 18, array([-4.0355466e-05, -4.5107645e-05, -7.7510209e-05, ...,\n", + " -2.0305148e-05, -3.0419576e-05, -1.7718892e-05], dtype=float32), 1.2337291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_220.wav', 'Sieh genau hin!', 15, array([-1.2045763e-02, -1.6849384e-02, -1.4799301e-02, ...,\n", + " 1.6059141e-06, -1.4713467e-05, 1.0609662e-05], dtype=float32), 1.3042291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_224.wav', 'Und welches Baujahr?', 20, array([-3.5566740e-05, -2.3342436e-05, -2.8526230e-05, ...,\n", + " 3.1306794e-05, 3.2872085e-05, 2.9171426e-05], dtype=float32), 1.6743333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_226.wav', 'Sofort umkehren!', 16, array([ 1.2734158e-04, 1.4998924e-04, 1.2418727e-04, ...,\n", + " -6.3872926e-06, -5.1714401e-06, -1.2052229e-05], dtype=float32), 1.3923541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_232.wav', 'Da muss man locker bleiben.', 27, array([-3.2585725e-05, -3.3840271e-05, 1.3126293e-05, ...,\n", + " -1.8632261e-05, -6.3017387e-06, -5.6675367e-06], dtype=float32), 1.6567083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_237.wav', 'Probier es mal mit Aceton.', 26, array([ 7.5771743e-05, 1.0223542e-04, 1.0343192e-04, ...,\n", + " -2.1570906e-05, -3.1918564e-05, -1.1135696e-05], dtype=float32), 1.8858125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_244.wav', 'Kommt drauf an.', 15, array([ 2.7207607e-05, 1.8057373e-05, 1.2512723e-05, ...,\n", + " -6.0103289e-06, -2.1828011e-05, -8.1472344e-06], dtype=float32), 1.3571041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_246.wav', 'Man darf gespannt sein.', 23, array([-2.3668355e-03, -3.7321844e-03, -3.6732492e-03, ...,\n", + " 1.7768043e-06, 2.0778492e-05, 5.1516781e-06], dtype=float32), 1.5685833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_251.wav', 'Daran scheiden sich die Geister.', 32, array([-2.39492147e-05, -4.70898958e-05, -2.53186899e-05, ...,\n", + " -4.88899059e-06, -1.34801885e-05, 1.04552892e-05], dtype=float32), 1.8153333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_258.wav', 'Was habt ihr heute erlebt?', 26, array([ 3.5868085e-05, 8.2530729e-05, 4.6677309e-05, ...,\n", + " -8.4167405e-06, -2.0942105e-05, -6.2113932e-06], dtype=float32), 1.7888958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_265.wav', 'Lass das sein!', 14, array([2.4356419e-05, 5.5347311e-05, 5.1189338e-05, ..., 2.7182332e-05,\n", + " 1.6106302e-05, 2.1714099e-05], dtype=float32), 1.2425208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_267.wav', 'Auch heute noch.', 16, array([ 1.6202603e-05, 1.8275598e-05, 1.5345126e-05, ...,\n", + " -9.9319268e-06, -1.4463866e-05, 7.9376441e-06], dtype=float32), 1.4363958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_270.wav', 'Wir sehen uns in Bielefeld.', 27, array([5.0975410e-05, 4.6619494e-05, 5.2299667e-05, ..., 2.4641362e-05,\n", + " 2.0409352e-05, 1.7508868e-05], dtype=float32), 1.8065208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_274.wav', 'Gerald muss Dampf ablassen.', 27, array([-1.4112990e-04, -2.2197423e-04, -2.2060136e-04, ...,\n", + " -4.0291343e-05, -3.2744192e-05, -1.7507429e-05], dtype=float32), 1.7712708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_277.wav', 'Sehen Sie selbst!', 17, array([-3.6524234e-05, -2.8097162e-05, 4.4066533e-06, ...,\n", + " 2.1528131e-06, -1.2273627e-05, -8.5409883e-06], dtype=float32), 1.4275833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_282.wav', 'Haben wir jemanden vergessen?', 29, array([-2.1900923e-05, -8.0311016e-05, -4.5058856e-05, ...,\n", + " 8.6369282e-06, 2.3358027e-05, 1.4141980e-05], dtype=float32), 1.6919583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_300.wav', 'Oh, der Besuch ist da!', 22, array([-1.1763951e-06, -6.4509544e-07, -2.1343028e-05, ...,\n", + " 8.3751611e-06, -2.0755753e-05, -3.9365756e-07], dtype=float32), 1.5157083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_303.wav', 'Kannst du das bitte übernehmen?', 32, array([1.9790201e-05, 2.5795589e-05, 2.3016226e-05, ..., 4.4700668e-05,\n", + " 2.9440445e-05, 4.1151830e-05], dtype=float32), 1.965125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_309.wav', 'Ich muss verrückt sein.', 24, array([-3.7773843e-05, -2.5238944e-05, -4.5549310e-05, ...,\n", + " -1.4228171e-05, -1.3738420e-05, -2.5079733e-05], dtype=float32), 1.4099583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_312.wav', 'Gestrichen!', 11, array([4.6765574e-05, 8.2428312e-05, 6.1315681e-05, ..., 1.7959255e-06,\n", + " 5.7119927e-08, 3.7900886e-06], dtype=float32), 0.9693541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_321.wav', 'Gott atmet nicht.', 17, array([3.9337472e-05, 4.7041980e-05, 5.6819965e-05, ..., 1.6601467e-05,\n", + " 1.5404070e-05, 3.0179035e-05], dtype=float32), 1.6831458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_327.wav', 'Das ist mir auch klar.', 22, array([ 6.4578126e-05, 9.0902526e-05, 7.7864941e-05, ...,\n", + " -1.0411938e-05, -3.7324537e-06, 1.4365208e-05], dtype=float32), 1.5421458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_329.wav', 'Es sieht nach Unsinn aus.', 25, array([ 1.1480927e-06, 7.0667493e-06, -3.8140864e-05, ...,\n", + " 5.6332779e-06, 3.7668069e-05, 7.3043757e-06], dtype=float32), 1.9827708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_333.wav', 'Das ist nur von auÃ\\x9fen.', 23, array([-3.8521201e-05, -4.7468315e-05, -3.4236415e-05, ...,\n", + " 5.2493826e-05, 3.7984686e-05, 3.3584591e-05], dtype=float32), 1.9915625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_334.wav', 'Ich habe gerade ein DéjÃ\\xa0-vu.', 30, array([ 4.4728897e-04, 3.7400136e-04, -4.0894563e-04, ...,\n", + " 2.4757979e-05, 1.1479871e-05, 2.5551706e-05], dtype=float32), 1.9387083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_336.wav', 'Ich muss mich verzählt haben.', 30, array([-3.9173494e-05, -2.9986420e-05, -1.9012801e-05, ...,\n", + " -6.0724019e-06, 2.7600961e-05, -3.4350986e-05], dtype=float32), 1.6831458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_342.wav', 'So kann man sich täuschen.', 27, array([-3.5296402e-05, -6.0332448e-05, -5.2051670e-05, ...,\n", + " -1.2274999e-05, -6.2373409e-05, 1.2240975e-05], dtype=float32), 1.5068958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_345.wav', 'Ich weiÃ\\x9f nicht woher.', 22, array([-2.05518299e-05, -1.30783865e-05, -1.48754107e-05, ...,\n", + " -5.49699544e-05, -3.01012133e-05, -1.70801268e-05], dtype=float32), 1.4980833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_352.wav', 'Bist du jetzt beleidigt?', 24, array([-1.0385954e-05, 1.1672010e-05, -2.3844843e-05, ...,\n", + " 6.0053999e-06, -2.3204884e-05, -9.7573111e-06], dtype=float32), 1.9298958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_357.wav', 'Gib mir zwei Minuten, ja?', 25, array([-1.8705783e-05, -3.0273133e-05, -2.4814160e-05, ...,\n", + " 1.4705538e-05, 9.7520942e-06, 1.7873571e-06], dtype=float32), 1.8065208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_360.wav', 'Voll der Psycho-Blick!', 22, array([ 5.0691519e-06, 1.2665058e-05, 1.4902340e-06, ...,\n", + " 9.9865492e-06, -2.0948526e-05, -1.1750392e-05], dtype=float32), 1.4980833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_365.wav', 'Mein Freund ist Musiker.', 24, array([ 4.2413834e-05, 2.3999601e-05, 1.0646096e-05, ...,\n", + " -1.9632445e-05, -2.5183452e-05, -1.8877656e-05], dtype=float32), 1.7272083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_386.wav', 'Hast du Knoblauch gegessen?', 27, array([ 4.2124993e-06, 1.6061234e-05, 1.6008022e-05, ...,\n", + " 4.7057729e-05, -5.8230005e-05, -6.6850065e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_391.wav', 'Ist mir gar nicht aufgefallen.', 30, array([-1.2801524e-04, -1.8332504e-04, -1.6864720e-04, ...,\n", + " -1.7935792e-05, 1.3743926e-05, 4.5144670e-06], dtype=float32), 1.6390833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_396.wav', 'Verdammt noch mal!', 18, array([-1.9188805e-05, 2.9282862e-06, 3.1274089e-06, ...,\n", + " 3.8011989e-05, 4.4447512e-05, 3.0465781e-05], dtype=float32), 1.3218541666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_403.wav', 'Klingt moralisch einwandfrei.', 29, array([-1.5154625e-06, -1.1907745e-05, -3.7140951e-06, ...,\n", + " 1.4816231e-06, -1.0694354e-05, -2.7909247e-05], dtype=float32), 1.8770208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_412.wav', 'Wie wunderschön du bist.', 25, array([ 8.1452117e-06, 1.2316134e-05, 1.2410718e-05, ...,\n", + " -2.5919973e-05, -1.5394140e-05, -1.6787388e-05], dtype=float32), 1.7800833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_419.wav', 'Ich kann nichts erkennen.', 25, array([-2.1261691e-05, -2.6662590e-05, -3.2895186e-05, ...,\n", + " -8.6166056e-06, 1.0871788e-06, -5.8716050e-06], dtype=float32), 1.4363958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_423.wav', 'Jetzt aber zackig!', 18, array([ 2.4374567e-06, 2.0842881e-05, -1.5250983e-05, ...,\n", + " -1.6002667e-05, -4.2002972e-05, -2.0723968e-05], dtype=float32), 1.2953958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_425.wav', 'Ich bin schon ganz wirr im Kopf.', 32, array([2.9025901e-05, 3.5920395e-05, 4.5607205e-05, ..., 1.6718976e-05,\n", + " 2.1111184e-05, 3.3797973e-05], dtype=float32), 1.98275)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_430.wav', 'Ihr gefällt die Kulisse.', 25, array([ 2.0069625e-05, 6.2984320e-05, 4.6121866e-05, ...,\n", + " -3.1357740e-05, -2.2353357e-05, -2.2545100e-05], dtype=float32), 1.6919583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_445.wav', 'GrüÃ\\x9f dich!', 12, array([-1.0602423e-05, -7.0546007e-06, 1.1231577e-05, ...,\n", + " -4.8423290e-06, -2.5039872e-05, -2.4532073e-05], dtype=float32), 0.7842916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_456.wav', 'Nach mir die Sintflut!', 22, array([ 2.0728099e-05, -9.0359263e-06, -4.4944873e-06, ...,\n", + " 6.8659042e-06, -1.2404760e-05, -2.2153192e-06], dtype=float32), 1.5862083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_460.wav', 'Was soll das denn bringen?', 26, array([ 3.9292016e-05, 5.6996982e-05, 6.4746971e-05, ...,\n", + " -3.1001658e-05, -9.7075417e-06, -1.9902369e-05], dtype=float32), 1.7888958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_461.wav', 'Er lädt immer noch.', 20, array([-1.6651324e-05, -5.8167420e-06, 5.8412393e-06, ...,\n", + " -5.8599158e-05, -5.3942535e-05, -2.6054968e-05], dtype=float32), 1.2337291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_479.wav', 'Was sollen wir nur tun?', 23, array([-4.4440752e-05, -5.3991145e-05, -4.1732972e-05, ...,\n", + " -5.2980035e-06, 1.0908753e-05, 1.9730707e-05], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_481.wav', 'Schluss damit!', 14, array([-2.9023191e-05, -4.2109135e-05, -3.8624265e-05, ...,\n", + " -1.9805097e-05, -6.0203884e-06, 1.1789062e-05], dtype=float32), 0.9605416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_483.wav', 'Können sie mir ihr Passwort geben?', 35, array([ 2.5537942e-05, 5.2574283e-05, 5.7736743e-05, ...,\n", + " -5.4731267e-06, -2.9014491e-05, 3.6238887e-06], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_491.wav', 'Sie muss zum BogenschieÃ\\x9fen.', 28, array([-3.1108371e-05, -5.1357423e-05, -7.0860064e-05, ...,\n", + " -4.0438888e-05, -2.6810346e-06, -1.3582417e-05], dtype=float32), 1.9387083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_513.wav', 'Gib ihm die Schaufel wieder!', 28, array([-2.5840678e-05, -2.4174828e-05, -1.2895588e-05, ...,\n", + " 3.6998503e-05, 3.0887943e-05, 1.9229607e-05], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_514.wav', 'Ich will mich kurzfassen.', 25, array([-5.4538796e-06, 1.6863480e-05, -2.4184583e-05, ...,\n", + " -7.9238208e-07, 9.8597202e-06, 2.5041477e-06], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_515.wav', 'Die ist hart im Nehmen.', 23, array([ 3.2496322e-05, 3.8166479e-05, 3.2249674e-05, ...,\n", + " -1.0363748e-05, 1.9095280e-05, 9.2708688e-06], dtype=float32), 1.7360208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_516.wav', 'Oh mein Gott!', 13, array([ 1.0293347e-05, 2.3256578e-05, -2.6419082e-06, ...,\n", + " -1.2127157e-05, 1.4263560e-06, 3.2800324e-06], dtype=float32), 0.8812291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_517.wav', 'Einer noch!', 11, array([ 1.8490386e-05, 9.7866017e-05, 1.1555837e-04, ...,\n", + " -5.3282761e-08, -1.5481584e-05, 1.1070631e-06], dtype=float32), 0.7578541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_536.wav', 'Da hat er sich verhaspelt.', 26, array([-1.2101016e-05, -4.1350278e-05, -2.5068364e-05, ...,\n", + " -9.8568984e-05, 1.2527088e-04, 2.5078503e-04], dtype=float32), 1.6390833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_538.wav', 'Kann ich mir nicht vorstellen.', 30, array([-7.1259085e-05, -6.6917557e-05, -7.5606287e-05, ...,\n", + " -1.7281625e-05, 1.9208239e-06, 9.8984492e-06], dtype=float32), 1.5950208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_541.wav', 'Kannst du sie mal anstupsen?', 28, array([-3.0119493e-06, 3.5770699e-06, 8.4955855e-06, ...,\n", + " 1.3389642e-05, 2.2122082e-05, 1.8456800e-05], dtype=float32), 1.67875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_554.wav', 'Das wird nicht billig.', 22, array([-1.2833251e-05, -2.6942225e-05, -1.1592191e-05, ...,\n", + " -1.1226616e-05, 2.4460544e-05, 4.6120007e-05], dtype=float32), 1.3570833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_555.wav', 'Ã\\x9cberall wird hier gebaut.', 26, array([ 3.0397489e-06, 1.6576083e-05, 1.7184460e-05, ...,\n", + " -4.7443868e-06, 1.7984281e-07, 1.7898132e-05], dtype=float32), 1.5950208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_556.wav', 'Was möchten Sie zu trinken?', 28, array([3.6597925e-05, 3.9522194e-05, 3.4265908e-05, ..., 4.9602304e-04,\n", + " 4.0240673e-04, 2.1699475e-04], dtype=float32), 1.7888958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_559.wav', 'Waren Sie schon einmal bei uns?', 31, array([ 2.5204083e-06, -9.7146321e-06, 1.0508998e-05, ...,\n", + " 1.6337053e-05, 4.2958636e-05, 3.6466561e-05], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_579.wav', 'Traut sich sonst noch jemand?', 29, array([-3.4311914e-05, -1.9934920e-05, -3.6420348e-05, ...,\n", + " -8.5477677e-06, -8.7745884e-06, -2.7311040e-05], dtype=float32), 1.9739583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_587.wav', 'Hier noch mal die Kurzform.', 27, array([ 4.8683055e-06, -9.0082349e-06, -6.4492651e-06, ...,\n", + " 1.2890940e-05, 1.4272653e-05, 9.0988487e-06], dtype=float32), 1.9475)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_594.wav', 'Haste mal nen Euro?', 19, array([-8.6395357e-06, -1.0812845e-05, -3.0906973e-05, ...,\n", + " 9.5510404e-06, 1.9230547e-05, 3.1346096e-06], dtype=float32), 1.4011458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_599.wav', 'Wie schreibt man das?', 21, array([-3.6024519e-06, -2.5525418e-05, -2.9170100e-05, ...,\n", + " -1.0803048e-05, 3.5519159e-05, 6.3340508e-06], dtype=float32), 1.6831458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_600.wav', 'Er kann es nicht mehr hören.', 29, array([-3.8066657e-05, -3.2469205e-05, -5.3206204e-05, ...,\n", + " 2.6021740e-05, -1.0833596e-06, 1.9787998e-05], dtype=float32), 1.9210833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_601.wav', 'Bleib einfach cool.', 19, array([-4.1984731e-05, -2.3916245e-05, -3.1576215e-05, ...,\n", + " -1.8820670e-05, 6.2404342e-07, -9.7557686e-06], dtype=float32), 1.7712708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_603.wav', 'Davon können Sie ausgehen.', 27, array([ 1.0824577e-05, -1.7968627e-05, -1.6179658e-05, ...,\n", + " -5.5361601e-05, -4.2508735e-05, -3.1106232e-05], dtype=float32), 1.8153333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_606.wav', 'So ist das im Leben.', 20, array([ 1.0786475e-05, -1.3495748e-05, 6.5641157e-06, ...,\n", + " -3.1349493e-05, -2.5596510e-05, -2.9100025e-05], dtype=float32), 1.6655208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_625.wav', 'Du musst anders fragen.', 23, array([ 4.8367940e-03, 6.8724523e-03, 6.1804145e-03, ...,\n", + " -7.8923513e-06, 1.7550767e-06, 7.2876783e-06], dtype=float32), 1.7360208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_628.wav', 'Es war nicht alles schlecht.', 28, array([ 1.08825125e-05, 1.04639130e-05, 8.46001694e-06, ...,\n", + " -2.05042506e-05, 7.06381434e-06, 2.37766089e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_643.wav', 'Das lasse ich mir nicht bieten!', 31, array([-8.2775728e-07, -4.0987805e-05, -1.7558119e-05, ...,\n", + " -2.1388867e-06, -4.9800960e-06, -1.3807499e-05], dtype=float32), 1.8065208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_665.wav', 'Hallo, ich bin der Neue!', 24, array([-2.4004371e-04, -3.8098267e-04, -3.8909691e-04, ...,\n", + " -3.5481004e-05, 3.5560199e-05, -1.3612277e-05], dtype=float32), 1.7800833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_667.wav', 'Fastest du?', 11, array([-6.0218765e-05, -8.1393919e-05, -8.6645297e-05, ...,\n", + " 6.8678496e-06, -8.2385115e-05, -5.4868913e-05], dtype=float32), 1.2072708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_675.wav', 'Nur um das klarzustellen.', 25, array([ 2.7598284e-05, 4.3499585e-05, -7.3542742e-06, ...,\n", + " 4.4517365e-06, -9.3571025e-06, 3.8795395e-05], dtype=float32), 1.8681875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_699.wav', 'Jetzt wird es gemein.', 21, array([ 2.8973442e-05, 5.4584369e-05, 2.5356880e-05, ...,\n", + " 7.6631528e-05, 5.6628844e-05, -4.1394928e-06], dtype=float32), 1.8681875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_704.wav', 'So sieht das aus.', 17, array([7.2620540e-05, 1.0683333e-04, 1.9689680e-04, ..., 2.9477818e-05,\n", + " 1.5229379e-05, 4.7805424e-05], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_710.wav', 'Gute Nacht ihr Lausbuben!', 25, array([-3.4681521e-04, -4.7425818e-04, -4.6133957e-04, ...,\n", + " 8.0735008e-06, -6.7210376e-06, 6.1622823e-06], dtype=float32), 1.8153333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_727.wav', 'Tschüss, Mädels!', 18, array([ 5.8768086e-07, -7.6773445e-05, -4.4017674e-05, ...,\n", + " -7.9999263e-05, 3.1158263e-06, 9.4530027e-05], dtype=float32), 1.4275833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_750.wav', 'Geh mir nicht auf den Keks.', 27, array([ 3.7033031e-05, -1.8765691e-05, 3.5605895e-05, ...,\n", + " -4.1894207e-05, -5.0918239e-05, -8.2971856e-05], dtype=float32), 1.8505833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_758.wav', \"Dir werd ich's zeigen.\", 22, array([ 5.9986287e-05, 3.1676023e-05, 9.2681257e-05, ...,\n", + " -2.7595996e-05, -4.2494954e-05, -1.1851616e-06], dtype=float32), 1.8505833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_764.wav', 'Macht euch bereit!', 18, array([1.5598367e-04, 1.9868747e-04, 1.1692408e-04, ..., 8.2378487e-05,\n", + " 6.5455366e-05, 4.8687412e-05], dtype=float32), 1.4628333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_766.wav', 'Da kiekste wa?', 14, array([ 5.4184136e-07, -6.1094812e-05, -6.1461476e-05, ...,\n", + " 9.7159907e-05, 2.3223305e-05, 8.9147768e-05], dtype=float32), 1.5862083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_778.wav', 'Das gibt es ja nicht!', 21, array([ 2.0350570e-04, 3.1676778e-04, 2.1080665e-04, ...,\n", + " -6.1200735e-05, 1.1813832e-05, -2.1792879e-05], dtype=float32), 1.3570833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_10_FINAL/10_789.wav', 'Das ist nicht mein Problem.', 27, array([-5.5885310e-05, -6.4690561e-05, -3.0270432e-05, ...,\n", + " -7.1330876e-05, -1.6931441e-05, -1.1536635e-05], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_23.wav', 'Finde dich damit ab.', 20, array([ 7.2009592e-05, -2.1050539e-05, -8.4551131e-05, ...,\n", + " 5.7306173e-05, 9.7603959e-05, 1.5820342e-04], dtype=float32), 1.3394583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_41.wav', 'Wie im Wilden Westen!', 21, array([ 1.4756477e-05, 3.1426986e-05, 9.2355578e-05, ...,\n", + " 8.1666811e-05, 7.9924212e-06, -1.6274511e-05], dtype=float32), 1.9915729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_63.wav', 'Da gehe ich mit.', 16, array([-1.10742374e-04, -1.88132090e-05, 1.54691588e-05, ...,\n", + " 2.89936361e-06, -3.01086147e-05, 3.05973408e-05], dtype=float32), 1.7183958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_75.wav', 'Warum nur werktags?', 19, array([-0.00052728, -0.00052381, -0.00042873, ..., -0.00014365,\n", + " -0.00010449, -0.00010741], dtype=float32), 1.7183958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_76.wav', 'Geht ihr zur Kommunion?', 23, array([-1.0898075e-04, -9.7388023e-05, -6.8978305e-05, ...,\n", + " -5.0831288e-05, -1.5921889e-05, 6.4072694e-05], dtype=float32), 1.7271979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_80.wav', 'Ihr Blick spricht Bände.', 25, array([-4.6483423e-05, -1.6536529e-04, -9.5357966e-05, ...,\n", + " -8.0715154e-06, -4.8390953e-05, -5.0536739e-05], dtype=float32), 1.6655104166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_85.wav', 'Ruhe in Frieden.', 16, array([ 1.12481954e-04, 1.02392871e-04, 1.89193961e-05, ...,\n", + " -1.02047234e-05, -6.91346722e-05, -7.76782108e-05], dtype=float32), 1.7095729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_97.wav', 'Es wird hart gekämpft.', 23, array([-0.0001628 , -0.00018412, -0.00010292, ..., 0.0001769 ,\n", + " 0.00018152, 0.00018817], dtype=float32), 1.8681979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_98.wav', 'Warum das alles?', 16, array([-9.8717544e-05, -8.1991704e-05, -1.4659751e-04, ...,\n", + " -6.5778313e-06, -7.7343866e-05, 1.8901783e-05], dtype=float32), 1.3218333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_109.wav', 'Und Action!', 11, array([-2.8484770e-05, 8.8463985e-06, 5.4628901e-05, ...,\n", + " 6.9029898e-05, -7.5049247e-06, 2.7110993e-05], dtype=float32), 1.23371875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_112.wav', 'Bist du dir sicher?', 19, array([ 1.8312603e-05, -8.6757791e-07, -5.3837293e-06, ...,\n", + " 1.1187289e-05, -3.2346459e-05, 9.6363983e-06], dtype=float32), 1.6302708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_113.wav', 'Nur über meine Leiche!', 23, array([ 7.7449629e-05, 1.5036203e-04, 1.0243297e-04, ...,\n", + " -9.4819125e-06, -6.9288013e-05, 2.3950559e-05], dtype=float32), 1.8858229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_120.wav', 'Hoffentlich schafft er das.', 27, array([-1.6298418e-05, 1.6150392e-05, 2.2071041e-04, ...,\n", + " 5.1459443e-05, -2.1589445e-05, 3.2091139e-05], dtype=float32), 1.9210729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_147.wav', 'Komm, spiel mit mir!', 20, array([ 1.9483854e-05, 1.7799211e-06, 3.3775228e-05, ...,\n", + " 2.8417478e-05, -4.2961314e-05, -3.5597783e-05], dtype=float32), 1.9386979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_152.wav', 'Ui ui ui!', 9, array([5.5120941e-05, 5.6017692e-05, 4.3216096e-06, ..., 7.1505703e-05,\n", + " 3.5192006e-05, 7.0440023e-05], dtype=float32), 1.14559375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_157.wav', 'Riech mal!', 10, array([ 1.6765174e-05, 6.2451771e-05, 1.0707039e-04, ...,\n", + " -7.5908087e-05, -1.0923214e-04, -7.9517071e-05], dtype=float32), 1.03984375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_165.wav', 'Ich war nicht dabei.', 20, array([-9.2572387e-05, -7.4509240e-05, -3.5020537e-05, ...,\n", + " 2.8946462e-05, 6.8536661e-05, 1.4004428e-05], dtype=float32), 1.8065104166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_170.wav', 'Danke für die Einladung.', 25, array([-5.4829288e-05, -5.2409945e-05, -1.6216440e-05, ...,\n", + " 1.8202516e-05, 1.6152997e-05, 7.3245174e-05], dtype=float32), 1.5597708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_197.wav', 'So soll es sein.', 16, array([ 6.0843304e-05, 1.4244186e-05, -1.4521269e-05, ...,\n", + " -1.3551622e-04, -8.4085783e-05, -1.3086156e-04], dtype=float32), 1.4363958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_200.wav', 'Erschütternd!', 14, array([-1.85466139e-04, -1.61985561e-04, -1.26282161e-04, ...,\n", + " 6.37752237e-05, 1.00840225e-04, 1.20959485e-04], dtype=float32), 1.1543958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_203.wav', 'Nur das Ã\\x9cbliche.', 17, array([ 7.9542246e-05, 8.5164116e-05, 5.9246326e-05, ...,\n", + " -2.9600615e-05, 4.1036237e-05, 5.5239609e-05], dtype=float32), 1.8153229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_206.wav', 'Die hat nämlich ein Loch.', 26, array([-1.4263311e-05, 3.4131535e-05, -3.4750206e-05, ...,\n", + " -5.7866608e-05, 1.9035106e-05, 3.3172044e-05], dtype=float32), 1.9827604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_230.wav', 'Hol das Stöckchen.', 19, array([-0.00064988, -0.00065917, -0.00059873, ..., 0.00020419,\n", + " 0.00022752, 0.00016691], dtype=float32), 1.4452083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_237.wav', 'Und bei dir?', 12, array([-2.9914919e-04, -2.2948935e-04, -2.3748397e-04, ...,\n", + " 1.1257434e-05, -3.9087045e-05, -2.3366434e-05], dtype=float32), 1.07509375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_264.wav', 'Es liegt in der Natur der Sache.', 32, array([ 3.1785059e-04, 3.4756004e-04, 3.4774767e-04, ...,\n", + " -3.1788899e-05, -7.7856974e-05, -7.3492403e-05], dtype=float32), 1.9563229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_268.wav', 'Mission erfolgreich!', 20, array([-5.1757845e-05, -2.9873547e-05, -5.2602922e-05, ...,\n", + " -1.0881226e-04, -7.0386566e-05, -4.1912252e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_274.wav', 'Kommt nicht in die Tüte!', 25, array([-2.6346192e-05, -6.4550313e-06, -4.2296477e-05, ...,\n", + " 6.7257854e-05, 5.5296507e-05, 6.6974962e-06], dtype=float32), 1.8505729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_284.wav', 'Ja, guten Tag!', 14, array([ 3.1975062e-05, 7.6259523e-05, 7.8669080e-05, ...,\n", + " -1.8048113e-05, -4.4206077e-05, -4.7247828e-05], dtype=float32), 1.9739375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_308.wav', 'Es ist noch nicht lange her.', 28, array([ 2.2859822e-06, 6.0211198e-05, 5.7821064e-05, ...,\n", + " -8.3175619e-06, -2.3456680e-05, -1.9626390e-05], dtype=float32), 1.8681979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_316.wav', 'Wiedersehen!', 12, array([2.8599703e-05, 6.1528997e-05, 8.9646070e-05, ..., 2.7208553e-06,\n", + " 2.9898734e-05, 9.2172457e-05], dtype=float32), 1.12796875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_328.wav', 'Mir ist schwindelig.', 20, array([ 2.4521294e-05, 5.4549360e-05, 2.9534258e-06, ...,\n", + " -8.9185494e-05, -1.0303867e-04, -5.3436386e-05], dtype=float32), 1.7976979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_329.wav', 'Sprechen sie deutsch?', 21, array([-2.4279220e-04, -2.6937225e-04, -2.3713916e-04, ...,\n", + " -2.8695989e-05, -2.7513888e-06, 5.1191882e-06], dtype=float32), 1.5333333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_360.wav', 'So war es nicht gemeint.', 24, array([-5.8561371e-05, 8.4504954e-06, 3.6038864e-06, ...,\n", + " 9.6144824e-05, 5.4328477e-05, 8.8002511e-05], dtype=float32), 1.8681979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_399.wav', 'Schluss jetzt!', 14, array([ 1.60011361e-04, 1.10784895e-04, 1.05728453e-04, ...,\n", + " 1.56215738e-05, -7.51677726e-06, 3.21154062e-06], dtype=float32), 1.1940625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_414.wav', 'Sehen Sie genau hin!', 20, array([ 4.0775692e-05, 7.8341225e-05, 5.9709568e-05, ...,\n", + " 1.6227934e-05, 3.3044285e-05, -1.1752409e-06], dtype=float32), 1.7448229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_432.wav', 'Christina Habeck?', 17, array([-7.0921145e-05, -8.7887020e-05, -1.0741340e-04, ...,\n", + " 6.9928697e-05, 6.0020051e-05, 4.4092048e-05], dtype=float32), 1.6831354166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_491.wav', 'Olé, olé!', 11, array([-3.5300669e-05, -3.0546897e-05, -4.6127847e-05, ...,\n", + " -4.5910983e-06, 9.3032322e-06, 4.1992083e-05], dtype=float32), 1.3394583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_498.wav', 'Nur mal so als Anregung.', 24, array([-5.8754493e-05, -2.6690983e-05, -4.8782116e-05, ...,\n", + " -4.1356816e-05, -3.8702921e-05, -2.8129245e-05], dtype=float32), 1.929875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_503.wav', 'Ich glaube ihr kein Wort.', 25, array([-1.92081643e-06, -2.77346317e-05, -5.22437476e-05, ...,\n", + " 6.71621965e-05, 1.27864005e-05, 3.48269168e-05], dtype=float32), 1.9915625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_537.wav', 'Wie könnt ihr es wagen?', 24, array([-1.4561453e-03, -1.4608348e-03, -1.4617005e-03, ...,\n", + " 7.5047151e-06, -8.1957251e-07, 1.6147833e-05], dtype=float32), 1.8417604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_540.wav', 'Nach was schmeckt das genau?', 28, array([5.2316565e-05, 4.9443977e-05, 5.7626901e-05, ..., 2.5021756e-05,\n", + " 4.5578519e-05, 5.3426527e-05], dtype=float32), 1.9651354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_548.wav', 'Gänsehaut pur!', 15, array([-9.5325144e-05, -7.7983823e-05, -6.6722314e-05, ...,\n", + " 5.7276593e-05, 2.5111651e-05, 1.1992834e-05], dtype=float32), 1.4628333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_564.wav', 'Höret, höret!', 15, array([-6.9055131e-05, -6.1163970e-05, -7.0053116e-05, ...,\n", + " -1.7221355e-05, -7.2541329e-06, 1.8846076e-06], dtype=float32), 1.3658958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_574.wav', 'Das Haus ist umstellt.', 22, array([ 4.3151813e-05, 5.5632776e-05, 2.7663889e-05, ...,\n", + " -4.0600127e-05, -3.0027895e-05, -4.6370071e-05], dtype=float32), 1.7183958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_606.wav', 'Den versteht keiner.', 20, array([-6.2417603e-05, -8.2428480e-05, -4.4267428e-05, ...,\n", + " -6.2675332e-05, -4.0452942e-05, -5.3965356e-05], dtype=float32), 1.7272083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_612.wav', 'Halten Sie sich fest!', 21, array([2.8007184e-05, 3.2632157e-05, 6.2635645e-06, ..., 5.3581707e-06,\n", + " 1.5780075e-05, 2.3362747e-06], dtype=float32), 1.6390729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_640.wav', 'Können Sie sich ausweisen?', 27, array([-4.1133004e-05, -3.4346365e-05, -2.0997140e-06, ...,\n", + " 2.5395755e-05, 1.5488129e-05, 1.3214269e-05], dtype=float32), 1.9298854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_645.wav', 'Genug ist genug.', 16, array([1.4217473e-04, 1.3088981e-04, 1.2007774e-04, ..., 8.0914921e-05,\n", + " 5.1820301e-05, 7.9144287e-05], dtype=float32), 1.7448229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_647.wav', 'Da bin ich ganz bei Ihnen!', 26, array([-6.2454426e-05, -7.3873220e-05, -9.7365184e-05, ...,\n", + " 1.7943923e-05, 1.8189858e-05, 2.0363577e-05], dtype=float32), 1.7183854166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_666.wav', 'Ich hasse dich!', 15, array([-4.7738231e-06, 1.0362664e-06, 9.6731110e-06, ...,\n", + " 3.2887896e-05, 6.7240894e-06, 7.3296378e-06], dtype=float32), 1.5509583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_676.wav', 'Jetzt weiÃ\\x9f ich es wieder.', 26, array([-2.9731807e-05, -2.5498804e-05, -5.7221558e-05, ...,\n", + " -1.3199271e-05, -1.1122796e-05, -1.5994978e-05], dtype=float32), 1.9915729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_708.wav', 'Täuschkörper einsetzen!', 25, array([3.3980694e-05, 5.6047942e-05, 3.6845995e-05, ..., 2.0433601e-05,\n", + " 5.5359560e-05, 3.6635800e-05], dtype=float32), 1.9563229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_711.wav', 'So sind die Regeln.', 19, array([ 1.0646171e-05, 2.1217951e-05, -8.0062582e-06, ...,\n", + " -4.2156036e-05, -1.8816583e-05, -4.4005763e-05], dtype=float32), 1.6038229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_712.wav', 'Es schmeckt nach Zimt.', 22, array([ 2.2929296e-05, 2.9111379e-05, 4.6064979e-05, ...,\n", + " -1.8768259e-06, 7.4329464e-06, 1.2982395e-05], dtype=float32), 1.6831354166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_717.wav', 'Auch bei feuchtem Wetter nicht.', 31, array([1.6887316e-05, 6.2355371e-05, 7.5977659e-05, ..., 1.6490449e-05,\n", + " 2.1054177e-05, 1.1164552e-05], dtype=float32), 1.965125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_731.wav', 'Warum denn nicht?', 17, array([ 6.4304750e-06, -6.7788221e-07, -1.0204109e-06, ...,\n", + " -9.7024295e-06, -3.1934254e-05, -2.7286467e-05], dtype=float32), 1.25134375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_746.wav', 'Was isst du da?', 15, array([ 4.1260464e-05, 1.0193682e-05, 3.5085955e-05, ...,\n", + " -3.5494733e-05, -1.2306450e-05, 1.2647797e-05], dtype=float32), 1.6919479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_750.wav', 'Alle schreien hier!', 19, array([-1.3079788e-04, -1.3171590e-04, -1.1580650e-04, ...,\n", + " -2.0512020e-05, -2.3779969e-05, -2.4454272e-05], dtype=float32), 1.7007708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_754.wav', 'Das ist genau mein Ding.', 24, array([-1.1629934e-05, -2.1403244e-05, 1.6778110e-06, ...,\n", + " 1.0532378e-05, 4.3498221e-05, 4.0848565e-05], dtype=float32), 1.6390729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_757.wav', 'Wo denken Sie hin?', 18, array([ 2.1430247e-05, 2.1772265e-05, 2.0838190e-05, ...,\n", + " 2.2910473e-05, -5.1848092e-06, -1.5559262e-06], dtype=float32), 1.4540208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_758.wav', 'Reine Gewöhnungssache.', 23, array([-4.3785589e-05, -4.8620215e-05, -4.8604503e-05, ...,\n", + " 1.0856102e-05, 7.9429465e-06, 6.5844351e-06], dtype=float32), 1.6126458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_760.wav', 'Tschüss!', 9, array([1.6893557e-05, 3.7733011e-05, 4.6923491e-05, ..., 3.5450230e-05,\n", + " 5.7595411e-05, 5.0426086e-05], dtype=float32), 0.6873541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_765.wav', 'Vergiss die Waschtasche nicht!', 30, array([-5.2931227e-05, -5.9350517e-05, -5.4635959e-05, ...,\n", + " -3.9712177e-05, -3.0881067e-05, -1.9957897e-05], dtype=float32), 1.929875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_788.wav', 'Längs oder quer?', 17, array([-5.8456011e-05, -4.5964895e-05, -2.6546955e-05, ...,\n", + " 1.1356072e-05, 1.8672996e-05, -7.0059104e-07], dtype=float32), 1.5597708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_14.wav', 'Wer hat euch geschickt?', 23, array([-1.1148760e-04, 2.4612555e-05, 9.3476447e-05, ...,\n", + " -9.7927412e-05, -3.4095574e-05, -1.7279797e-05], dtype=float32), 1.856)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_34.wav', 'Wo bin ich hier nur gelandet?', 29, array([-1.3307537e-05, -1.0089541e-04, -1.2360289e-05, ...,\n", + " -4.9649680e-05, -7.3272109e-05, -6.8251233e-05], dtype=float32), 1.9306666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_40.wav', 'Natürlich behauptet sie das.', 29, array([ 1.2778574e-04, 5.9959311e-05, -8.1008322e-05, ...,\n", + " 1.9905625e-04, 2.6344018e-05, 1.1490170e-04], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_43.wav', 'Du hattest recht.', 17, array([-1.1000242e-04, -1.6242996e-04, -2.2294538e-04, ...,\n", + " 1.1730633e-04, -8.3676481e-05, -2.5764350e-05], dtype=float32), 1.152)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_44.wav', 'Verklagen Sie mich doch!', 24, array([ 1.94306958e-05, 1.91541476e-04, 6.15894969e-05, ...,\n", + " -1.00529454e-04, -2.00755429e-04, 5.24241113e-05], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_45.wav', 'Die Bremse schleift.', 20, array([ 1.8599353e-04, 8.8273533e-05, 1.5005667e-04, ...,\n", + " -1.6525917e-04, -2.2365544e-05, -2.3978014e-04], dtype=float32), 1.5466666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_51.wav', 'Hilfe!', 6, array([-1.7958642e-04, -2.2338594e-04, -2.7969983e-04, ...,\n", + " -1.4840752e-04, -3.4539087e-05, 3.2946355e-06], dtype=float32), 0.704)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_57.wav', 'Jetzt liegt es an dir.', 22, array([ 2.1328227e-04, 8.1810067e-05, -1.6158322e-04, ...,\n", + " 1.6350237e-04, 1.0099774e-04, 1.6040609e-05], dtype=float32), 1.568)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_58.wav', 'Wo kann ich das kaufen?', 23, array([-9.1674337e-05, -1.6169342e-04, -1.8347435e-04, ...,\n", + " 4.6268760e-06, 2.3974455e-05, -1.1637783e-04], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_60.wav', 'Kann man jetzt auch nicht mehr ändern.', 39, array([-3.5826775e-04, -3.3033665e-04, -2.3628448e-04, ...,\n", + " -1.9967039e-04, -1.7616056e-05, 6.7053217e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_68.wav', 'Hör mir doch mal zu.', 21, array([-1.0109342e-04, -3.4855773e-06, 9.0611480e-05, ...,\n", + " -1.0345047e-04, -4.0894301e-05, -6.3259591e-05], dtype=float32), 1.4613333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_75.wav', 'Gibt es die Person wirklich?', 28, array([1.8891362e-04, 2.3809298e-04, 1.1160582e-04, ..., 2.3936841e-06,\n", + " 4.5461587e-05, 9.1474227e-05], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_79.wav', 'Wo waren wir stehen geblieben?', 30, array([-6.7620305e-05, 3.2152042e-05, 6.8106332e-05, ...,\n", + " -1.8769420e-04, -6.5137865e-05, -2.5653889e-04], dtype=float32), 1.824)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_91.wav', 'Grundgütiger!', 14, array([ 7.70497209e-05, -5.13312625e-05, 7.22193681e-06, ...,\n", + " -1.11605725e-04, -1.26782295e-04, 8.50337819e-05], dtype=float32), 1.3546666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_92.wav', 'Wer bist du?', 12, array([-4.3348764e-04, -4.4667200e-04, -4.2408684e-04, ...,\n", + " -3.9185648e-05, -3.1797776e-05, -2.2222506e-04], dtype=float32), 1.024)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_95.wav', 'Schon gut.', 10, array([-3.07407812e-04, -4.31929773e-04, -5.19388705e-04, ...,\n", + " -1.07154076e-04, -7.57433227e-05, -1.24133236e-04], dtype=float32), 0.9173333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_99.wav', 'Murat, was ist los mit dir?', 27, array([-3.84323685e-05, 6.48807691e-05, -5.84455011e-05, ...,\n", + " 1.45171012e-04, -1.50349506e-05, 1.20676006e-04], dtype=float32), 1.8453333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_101.wav', 'HeiÃ\\x9fe Würstchen!', 18, array([-0.00027939, -0.00039175, -0.00025548, ..., 0.00027689,\n", + " 0.00011903, 0.00012768], dtype=float32), 1.3866666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_106.wav', 'Ich will auch mal einer werden.', 31, array([ 1.36086979e-04, -1.76298781e-05, -4.00176577e-05, ...,\n", + " 1.72844579e-04, 1.29597363e-04, -1.02162725e-04], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_112.wav', 'Ich will auch haben!', 20, array([-4.40885342e-05, -2.34828622e-04, -3.29593284e-04, ...,\n", + " -3.05666414e-04, -1.31685141e-04, -1.00833015e-04], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_116.wav', 'Setz dich bitte gerade hin!', 27, array([-2.2211492e-04, -2.0630175e-04, -1.4655131e-04, ...,\n", + " 1.6456892e-04, 1.0634777e-06, -1.4669505e-04], dtype=float32), 1.9306666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_122.wav', 'Findest du mich erwachsen?', 26, array([3.0208268e-04, 3.6579225e-04, 3.3154435e-04, ..., 6.2579543e-06,\n", + " 4.9250040e-05, 1.8107957e-04], dtype=float32), 1.696)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_123.wav', 'Schrei nicht so!', 16, array([ 8.03208750e-05, 1.33657450e-04, -1.13144284e-04, ...,\n", + " 4.64295183e-04, 4.82034549e-04, 2.86602415e-04], dtype=float32), 1.152)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_136.wav', 'Das kam unerwartet.', 19, array([-3.3067852e-05, -4.8878199e-05, 5.8831414e-05, ...,\n", + " -3.5621467e-04, -3.7723745e-04, -2.3875662e-04], dtype=float32), 1.7386666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_149.wav', 'Das ergibt doch keinen Sinn.', 28, array([6.0471892e-05, 8.1125305e-05, 2.7437322e-04, ..., 9.1583250e-05,\n", + " 2.0055164e-04, 2.2477485e-04], dtype=float32), 1.9733333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_155.wav', 'Aller Abschied fällt schwer.', 29, array([-2.2813781e-04, -5.5478893e-05, 1.6814301e-04, ...,\n", + " 1.2765558e-04, 1.7368943e-04, 2.6105065e-04], dtype=float32), 1.6533333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_165.wav', 'Erkennst du mich nicht?', 23, array([-2.3624673e-04, -3.1934463e-04, -2.9434697e-04, ...,\n", + " 1.7059442e-04, 1.9742029e-06, 1.3172596e-04], dtype=float32), 1.4293333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_169.wav', 'Willst du sie mal streicheln?', 29, array([ 1.9991475e-04, 3.4090909e-04, 3.2008073e-04, ...,\n", + " 4.6425943e-05, -8.5656990e-05, -1.2934266e-05], dtype=float32), 1.9413333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_181.wav', 'Zur Anmeldung klicken Sie hier.', 31, array([ 5.3989668e-05, -9.8630007e-05, -1.1361165e-04, ...,\n", + " -2.2555150e-05, 3.3015600e-05, 1.0129590e-04], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_194.wav', 'Elvis war nie tot.', 18, array([-6.78355209e-05, -5.90024465e-05, -1.47034181e-04, ...,\n", + " 1.19253775e-04, 2.40493591e-05, 3.28276219e-04], dtype=float32), 1.696)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_196.wav', 'Irgendetwas zu verzollen?', 25, array([-1.2399687e-04, -3.0497483e-06, -1.2210968e-04, ...,\n", + " 1.4703360e-05, 4.4073422e-05, 2.5880148e-04], dtype=float32), 1.696)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_216.wav', 'Du bist doch nicht aus Zucker.', 30, array([-3.7417009e-05, -2.1370529e-04, -1.0503333e-04, ...,\n", + " -3.4687804e-05, -1.0006884e-04, 8.2270970e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_217.wav', 'Bald hat er sein Abi.', 21, array([-7.6955817e-05, -7.4724245e-05, -5.4779473e-05, ...,\n", + " -3.2609492e-05, -1.9532166e-04, -4.0988740e-05], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_227.wav', 'Da lacht das Herz.', 18, array([0.000232 , 0.00019664, 0.00015979, ..., 0.00012966, 0.0001156 ,\n", + " 0.00015061], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_238.wav', 'Steht mir die Bluse?', 20, array([ 5.00293754e-05, 1.15090246e-04, -1.61606382e-04, ...,\n", + " -1.10758898e-04, 9.87306703e-05, 2.25929121e-04], dtype=float32), 1.3653333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_240.wav', 'Kommt ihr zurecht?', 18, array([-1.4166623e-04, -1.7185905e-04, -1.0146119e-04, ...,\n", + " -1.9281202e-05, -4.6475827e-05, -7.9622550e-05], dtype=float32), 1.5466666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_246.wav', 'Her damit!', 10, array([-1.0743736e-04, -6.3287393e-05, 5.4618115e-05, ...,\n", + " 1.7166793e-04, 1.5052129e-04, -4.3305259e-05], dtype=float32), 0.9386666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_256.wav', 'Talente muss man fördern.', 26, array([ 2.9789119e-06, 2.0445570e-05, 3.6582744e-05, ...,\n", + " -8.0595542e-05, 2.8049317e-06, -2.4196431e-04], dtype=float32), 1.6426666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_298.wav', 'Kein Kommentar!', 15, array([2.0757825e-04, 2.0225085e-05, 1.0584419e-04, ..., 2.2611262e-05,\n", + " 2.2597586e-04, 5.2457988e-05], dtype=float32), 1.1093333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_304.wav', 'Der atmet noch.', 15, array([-0.0001642 , -0.00022683, -0.00021831, ..., 0.00013961,\n", + " 0.00017319, 0.00013602], dtype=float32), 1.2586666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_308.wav', 'Das dauert aber lange!', 22, array([4.1067542e-05, 4.3461972e-05, 1.7915755e-04, ..., 1.1849359e-04,\n", + " 1.6261388e-04, 1.4937650e-05], dtype=float32), 1.44)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_311.wav', 'Du kennst mich, Danton.', 23, array([-5.2089547e-04, -4.7035489e-04, -5.9835758e-04, ...,\n", + " -9.4374191e-05, -2.0053205e-05, 1.2992002e-06], dtype=float32), 1.8346666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_314.wav', 'Mein Gott, Walter!', 18, array([ 4.9858125e-05, -2.4514409e-05, -4.7797763e-05, ...,\n", + " -2.9001143e-05, -1.4190034e-04, -2.5762929e-05], dtype=float32), 1.2586666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_318.wav', 'Und was machst du sonst so?', 27, array([ 0.00041733, 0.00037329, 0.00035271, ..., -0.00016106,\n", + " -0.00041058, -0.00029774], dtype=float32), 1.6106666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_331.wav', 'Dort wird dir geholfen.', 23, array([-1.9671346e-04, -1.1574107e-04, 5.4965103e-06, ...,\n", + " 4.3039094e-05, -3.2543256e-05, -7.8007070e-05], dtype=float32), 1.5466666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_336.wav', 'Was ist denn hier los?', 22, array([0.00012079, 0.00029083, 0.00013022, ..., 0.00036718, 0.00031168,\n", + " 0.00049887], dtype=float32), 1.4506666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_358.wav', 'Gleich sind wir dort.', 21, array([ 1.5992192e-04, 2.5509403e-04, 2.3052108e-04, ...,\n", + " 1.9194868e-04, 6.2326435e-05, -2.0080882e-04], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_366.wav', 'Sind sie gut informiert?', 24, array([-1.2915327e-04, 5.4154119e-05, 9.4311297e-05, ...,\n", + " 1.4842945e-04, 1.6595995e-04, 1.6055972e-04], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_376.wav', \"Was soll's, ich bin bereit.\", 27, array([-0.00025371, -0.00037118, -0.00054651, ..., -0.00013142,\n", + " 0.000133 , 0.0001903 ], dtype=float32), 1.8133333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_387.wav', 'Was soll das heiÃ\\x9fen?', 21, array([ 6.26799228e-05, -1.15550021e-04, -1.60253039e-04, ...,\n", + " -1.14853225e-04, 3.62789683e-06, -1.25641367e-04], dtype=float32), 1.6106666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_398.wav', 'Oder so!', 8, array([-0.00011172, -0.00021632, -0.0003379 , ..., 0.00016637,\n", + " 0.00021105, 0.00035037], dtype=float32), 0.9386666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_431.wav', 'Fauche mich nicht so an!', 24, array([-1.69856430e-04, -2.14659201e-04, -1.17017007e-04, ...,\n", + " 1.06098436e-04, 1.30685687e-04, 8.11223654e-05], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_439.wav', 'Genau zweihundert.', 18, array([ 4.3691549e-04, 4.2721629e-04, 2.1283170e-04, ...,\n", + " -1.0831581e-05, 6.4474931e-05, 1.3399551e-04], dtype=float32), 1.4186666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_446.wav', 'Ja ja, das schickt!', 19, array([-1.5079082e-05, 1.2119063e-04, 1.9518439e-04, ...,\n", + " -8.6470172e-05, -3.4930470e-04, -3.7717246e-04], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_460.wav', 'Stein schlägt Schere.', 22, array([ 5.7708825e-05, 1.6740670e-04, 1.9982990e-04, ...,\n", + " -3.3077580e-05, 1.1591193e-04, 7.5874494e-05], dtype=float32), 1.936)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_468.wav', 'Simsalabim!', 11, array([-1.8192175e-05, -1.2427589e-04, 4.0916457e-05, ...,\n", + " -3.6532696e-05, 2.9238325e-05, 2.0148496e-05], dtype=float32), 1.0506666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_478.wav', 'Bitte Zutreffendes ankreuzen.', 29, array([-5.4858734e-05, -6.8480607e-05, -7.1117909e-05, ...,\n", + " -3.5092820e-05, 4.6205354e-05, 3.1237360e-05], dtype=float32), 1.968)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_480.wav', 'Dich kenne ich doch!', 20, array([-3.4106572e-04, -2.6489299e-04, -1.9887066e-04, ...,\n", + " 5.8086891e-05, 2.0823347e-04, -4.3870667e-05], dtype=float32), 1.4026666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_484.wav', 'Und los!', 8, array([ 2.0759732e-04, 2.4903464e-04, -3.9741102e-05, ...,\n", + " -1.4017121e-04, -2.2582384e-04, -2.2852831e-04], dtype=float32), 0.8906666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_485.wav', 'Der Patient ist eh schon tot.', 29, array([ 2.8383749e-04, 1.6098749e-04, 5.8996215e-05, ...,\n", + " -1.5776475e-04, -1.0137054e-04, -1.0374457e-04], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_487.wav', 'Und zwar nicht zu knapp!', 24, array([-4.9983555e-05, 1.0859955e-04, 1.3262806e-04, ...,\n", + " 1.4716771e-04, 2.1034098e-04, 2.6678585e-04], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_488.wav', 'Was ist mit dem Co-Piloten?', 27, array([-4.6707326e-04, -3.3664281e-04, -1.6913723e-04, ...,\n", + " 9.7057833e-05, -3.0600113e-05, -3.3933247e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_510.wav', 'Sie würde ihr letztes Hemd geben.', 34, array([ 1.5112071e-04, 9.9046929e-06, -7.1756775e-05, ...,\n", + " 1.4958363e-04, 2.2523174e-04, 4.5510088e-04], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_521.wav', 'Das wird eh nur Werbung sein.', 29, array([-0.00043494, -0.00045403, -0.00052693, ..., -0.00037776,\n", + " -0.00013905, -0.00029146], dtype=float32), 1.84)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_539.wav', 'Jetzt gibt es Kloppe.', 21, array([ 5.6757370e-05, 1.2752461e-05, -1.0132902e-04, ...,\n", + " -2.8363563e-04, -4.8957689e-04, -4.9631519e-04], dtype=float32), 1.4666666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_549.wav', 'Nee, lieber nicht.', 18, array([-6.2041539e-03, -6.1025852e-03, -5.7721483e-03, ...,\n", + " -4.7201215e-06, -8.9430447e-05, -4.9632461e-05], dtype=float32), 1.5626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_563.wav', 'Er soll schlieÃ\\x9flich etwas lernen.', 34, array([-5.03349729e-05, -2.22053477e-05, 5.14282438e-05, ...,\n", + " 1.08890556e-04, 3.83222614e-05, 6.10036659e-05], dtype=float32), 1.8346666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_566.wav', 'Angeblich ja.', 13, array([ 1.7242544e-04, 1.8572621e-04, 1.3631192e-04, ...,\n", + " -4.0973751e-05, -1.5965881e-04, -1.0953719e-04], dtype=float32), 1.2373333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_567.wav', 'Wie wäre es mit Wiesbaden?', 27, array([-9.5517004e-05, -2.3826263e-04, -1.0132407e-04, ...,\n", + " 4.5667308e-05, 1.4000830e-04, 2.1524900e-05], dtype=float32), 1.9093333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_576.wav', 'Hört mal zu, ihr Checker!', 26, array([-0.00049925, -0.00049119, -0.00044878, ..., 0.00019171,\n", + " 0.00023476, 0.00022403], dtype=float32), 1.7013333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_580.wav', \"Irgendwann wird's langweilig.\", 29, array([-0.00039041, -0.00038523, -0.00025343, ..., -0.00031044,\n", + " -0.00019142, -0.00014154], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_586.wav', 'Spuck ihn wieder aus!', 21, array([ 0.00012375, 0.00025117, 0.0001871 , ..., -0.00021903,\n", + " -0.00034992, -0.00024192], dtype=float32), 1.712)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_589.wav', 'Unterschätze den Knirps nicht.', 31, array([2.5606243e-04, 2.5400775e-04, 2.3841709e-04, ..., 2.1033855e-05,\n", + " 1.9420990e-04, 1.0694992e-04], dtype=float32), 1.968)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_596.wav', 'Darf ich vorkosten?', 19, array([-1.3477511e-04, -2.3315112e-04, 1.3153857e-05, ...,\n", + " 1.0751128e-04, 1.8084023e-04, 1.6106233e-04], dtype=float32), 1.4506666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_597.wav', 'Ich traue mich nicht!', 21, array([-2.9329595e-04, -3.9892262e-04, -2.9478277e-04, ...,\n", + " -1.0763263e-04, 1.1553553e-04, 7.1091476e-05], dtype=float32), 1.4506666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_602.wav', 'Warum seid ihr so leise?', 24, array([ 2.9226076e-05, 1.6949150e-04, 1.3950269e-04, ...,\n", + " 2.4965027e-05, 7.3044146e-05, -1.8916466e-05], dtype=float32), 1.5786666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_603.wav', 'Nun stellt euch nicht so an!', 28, array([1.4806543e-04, 1.4012858e-04, 7.7195640e-05, ..., 1.4235765e-04,\n", + " 1.3738184e-04, 1.3289873e-05], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_608.wav', 'Das Essen wird kalt.', 20, array([ 2.36780070e-05, -1.06394495e-04, -1.18256241e-04, ...,\n", + " 8.05624004e-05, -4.60968913e-05, -8.52375670e-05], dtype=float32), 1.3866666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_614.wav', 'Fachidioten soll es auch geben.', 31, array([ 7.9924423e-05, 2.0709680e-04, -6.6771558e-05, ...,\n", + " 2.4189356e-05, 6.7659719e-05, -2.3424522e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_627.wav', 'Du bist vielleicht eine Knalltüte!', 35, array([ 1.7171216e-04, -3.8676033e-05, -8.2237340e-05, ...,\n", + " -1.8530877e-04, -1.3380373e-04, -1.6169780e-04], dtype=float32), 1.8773333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_629.wav', 'Natürlich nicht seine eigene.', 30, array([-2.2751655e-04, -1.5005520e-04, -9.8528086e-05, ...,\n", + " 1.8771169e-04, 2.7484499e-04, 3.0332521e-04], dtype=float32), 1.8026666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_630.wav', 'Halten Sie die Presse zurück!', 30, array([ 3.1129293e-06, 7.3669260e-05, 3.3459681e-05, ...,\n", + " -1.5276406e-04, 2.6472675e-05, -1.9852230e-05], dtype=float32), 1.76)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_636.wav', 'Ruf schnell die Polizei!', 24, array([5.1400399e-05, 6.7014749e-05, 5.1501669e-05, ..., 1.8976731e-04,\n", + " 2.0147586e-04, 1.5075490e-04], dtype=float32), 1.5573333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_637.wav', 'Dann nimmt man sie sich.', 24, array([-0.00050762, -0.00047607, -0.00053025, ..., 0.00035113,\n", + " 0.00017673, 0.00026363], dtype=float32), 1.856)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_674.wav', 'Gibst du mir deine Nummer?', 26, array([-1.0660516e-04, -1.8238377e-05, 9.7913333e-05, ...,\n", + " 3.0329258e-05, 9.0803427e-05, 2.0600615e-05], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_675.wav', 'Man kann nicht alles haben.', 27, array([ 3.6246947e-04, 3.3836463e-04, 3.9515106e-04, ...,\n", + " 1.9603693e-05, -1.0797187e-07, 4.7195343e-05], dtype=float32), 1.696)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_686.wav', 'Wie oft denn noch?', 18, array([-0.00025807, -0.00045327, -0.00041516, ..., -0.00053778,\n", + " -0.00065512, -0.00057833], dtype=float32), 1.2906666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_703.wav', 'Der Erste in was?', 17, array([3.7513164e-05, 2.3692524e-05, 9.2795723e-05, ..., 1.8559145e-04,\n", + " 8.4898209e-05, 1.3820640e-05], dtype=float32), 1.4323645833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_720.wav', 'Wie denn nun?', 13, array([-7.8975081e-06, -2.1718148e-05, 2.7641279e-05, ...,\n", + " 3.3564411e-05, 3.3564411e-05, 1.9743769e-05], dtype=float32), 0.9525625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_722.wav', 'Ihm wäre das zu müÃ\\x9fig.', 25, array([ 5.1333802e-05, 6.3180065e-05, -1.3820640e-05, ...,\n", + " -1.9743769e-05, 3.9487541e-06, -4.7385049e-05], dtype=float32), 1.93334375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_724.wav', 'Ã\\x96l ist ausgelaufen.', 20, array([-3.7513164e-05, -7.8975081e-06, -1.5795016e-05, ...,\n", + " -1.3820640e-05, -1.3820640e-05, 4.5410670e-05], dtype=float32), 1.6087708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_730.wav', 'Willkommen im Neuland!', 22, array([-6.910320e-05, -6.515444e-05, 1.382064e-05, ..., -3.356441e-05,\n", + " -1.974377e-06, 8.489821e-05], dtype=float32), 1.6652083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_737.wav', 'Kannst du mich mal zwicken?', 27, array([ 3.9487539e-05, 3.9487541e-06, 3.3564411e-05, ...,\n", + " -1.3820640e-05, -3.1590032e-05, 5.9231312e-05], dtype=float32), 1.6087604166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_752.wav', 'Friede sei mit dir.', 19, array([-0.00018362, -0.00025075, -0.00027839, ..., -0.00025864,\n", + " -0.0002389 , -0.00026457], dtype=float32), 1.2347916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_753.wav', 'Mit Speck fängt man Mäuse.', 28, array([-1.61898919e-04, -1.04641986e-04, -8.68725911e-05, ...,\n", + " -5.92313118e-05, 6.31800649e-05, 7.70007027e-05], dtype=float32), 1.6087604166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_766.wav', 'Bin ich die Auskunft oder was?', 30, array([2.96156559e-05, 1.04641986e-04, 1.26360130e-04, ...,\n", + " 2.46797135e-04, 2.94182188e-04, 3.25772213e-04], dtype=float32), 1.99684375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_767.wav', 'Sesam, öffne dich!', 19, array([-3.8500351e-04, -3.3366971e-04, -3.5933661e-04, ...,\n", + " -5.9231312e-05, -2.3692524e-05, 2.9615656e-05], dtype=float32), 1.4253125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_772.wav', 'Er kennt seine Pappenheimer.', 28, array([-3.7513164e-05, -1.9743769e-05, -1.3820640e-05, ...,\n", + " -8.6872591e-05, -1.5202703e-04, -1.7177081e-04], dtype=float32), 1.7146145833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_792.wav', 'Da geht noch was.', 17, array([ 2.0336083e-04, 1.6979642e-04, 1.6189892e-04, ...,\n", + " -4.9359427e-05, -2.9615656e-05, -7.3051953e-05], dtype=float32), 1.25596875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_803.wav', 'Er macht es eben gründlich.', 28, array([-5.5282559e-05, -8.2923834e-05, 1.9743769e-05, ...,\n", + " -9.4770097e-05, -1.8361707e-04, -2.5469463e-04], dtype=float32), 1.9615625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_816.wav', 'Spionierst du mich aus?', 23, array([3.5538786e-04, 4.5015797e-04, 4.8767112e-04, ..., 4.3436296e-05,\n", + " 1.7769393e-04, 1.7769393e-04], dtype=float32), 1.7992708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_817.wav', 'Komm zurück!', 13, array([4.0672167e-04, 2.2902773e-04, 6.3180065e-05, ..., 3.7513164e-05,\n", + " 4.7385049e-05, 6.3180065e-05], dtype=float32), 1.11484375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_819.wav', 'Sie schwebt auf Wolke sieben.', 29, array([7.5026328e-05, 1.2438576e-04, 1.5005266e-04, ..., 1.1056512e-04,\n", + " 1.4215514e-04, 1.3820639e-04], dtype=float32), 1.9756770833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_826.wav', 'Wehret den Anfängen!', 21, array([ 1.4610391e-04, 1.3425764e-04, 1.2636013e-04, ...,\n", + " -5.9231311e-06, -1.5795016e-05, -2.9615656e-05], dtype=float32), 1.8486666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_834.wav', 'Altes Haus, lass dich drücken!', 31, array([1.75719557e-04, 1.63873294e-04, 8.88469658e-05, ...,\n", + " 1.04641986e-04, 2.15207096e-04, 1.46103906e-04], dtype=float32), 1.9333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_837.wav', 'Nicht nötig.', 13, array([-1.6189892e-04, -7.7000703e-05, -5.7256933e-05, ...,\n", + " 3.5538786e-05, 4.5410670e-05, 1.9743769e-05], dtype=float32), 1.2277395833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_838.wav', 'Wir sind eine Familie.', 22, array([-1.2241138e-04, -1.5992454e-04, -2.3100211e-04, ...,\n", + " 7.3051953e-05, 5.9231312e-05, 6.9103196e-05], dtype=float32), 1.7146041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_842.wav', 'Was schlagen Sie vor?', 21, array([ 3.1590032e-05, 3.5538786e-05, 4.9359427e-05, ...,\n", + " -8.6872591e-05, -6.1205690e-05, -1.2438576e-04], dtype=float32), 1.3406458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_844.wav', 'Probier mal!', 12, array([ 1.4018077e-04, 1.6782204e-04, 2.2902773e-04, ...,\n", + " -2.1718148e-05, 4.9359427e-05, 7.3051953e-05], dtype=float32), 1.0583958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_855.wav', 'Der Schein trügt.', 18, array([ 1.3228325e-04, 4.3436296e-05, 9.8718847e-06, ...,\n", + " 7.5026328e-05, 7.8975081e-06, -3.9487541e-06], dtype=float32), 1.45353125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_861.wav', 'Du hast mich nie geliebt.', 25, array([ 1.02667604e-04, 1.57950155e-04, 1.50052656e-04, ...,\n", + " -2.17181478e-05, 2.76412793e-05, 0.00000000e+00], dtype=float32), 1.7146041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_872.wav', 'Chili ist scharf.', 17, array([-1.1253949e-04, -8.6872591e-05, -1.1648824e-04, ...,\n", + " -1.1846262e-04, -2.5666901e-05, 1.9743770e-06], dtype=float32), 1.7710520833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_874.wav', 'Das lässt mich kalt.', 21, array([ 2.1718148e-05, 3.3564411e-05, 5.3308180e-05, ...,\n", + " -1.1846262e-05, -1.9743769e-05, -7.3051953e-05], dtype=float32), 1.5805416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_12_FINAL/12_881.wav', 'Kinder brauchen Helden.', 23, array([-1.8361707e-04, -1.4610391e-04, -1.1846262e-04, ...,\n", + " -1.9743770e-06, -2.7641279e-05, 5.9231312e-05], dtype=float32), 1.79221875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_3.wav', 'Voll der gute Vergleich!', 24, array([-3.94875406e-06, -1.08590735e-04, -1.40180768e-04, ...,\n", + " 3.94875387e-05, 1.12539492e-04, 1.16488241e-04], dtype=float32), 1.6087604166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_14.wav', 'Gibt es das überhaupt?', 23, array([-1.0069323e-04, -1.5202703e-04, -1.8164268e-04, ...,\n", + " -6.9103196e-05, -3.9487539e-05, -6.5154440e-05], dtype=float32), 1.5523125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_35.wav', 'Bleib wachsam.', 14, array([-1.5597578e-04, -1.4807828e-04, -3.1590032e-05, ...,\n", + " -1.9743770e-06, -5.9231311e-06, 4.5410670e-05], dtype=float32), 1.2983020833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_43.wav', 'Jeder hat das Recht auf Bildung.', 32, array([ 5.72569334e-05, 1.04641986e-04, 1.89540195e-04, ...,\n", + " -7.50263280e-05, -5.92313118e-05, -1.14513867e-04], dtype=float32), 1.8204479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_64.wav', 'Nur nicht politisch werden!', 27, array([-7.8975081e-06, 8.2923834e-05, 1.3425764e-04, ...,\n", + " -8.0949460e-05, -6.3180065e-05, -1.3623202e-04], dtype=float32), 1.6652083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_67.wav', 'Wir sprechen uns später noch mal.', 34, array([ 6.8037030e-03, 6.8649091e-03, 7.0327311e-03, ...,\n", + " 5.9231311e-06, -3.1590032e-05, -1.5795016e-05], dtype=float32), 1.9051145833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_69.wav', 'Wem gehört welcher Becher?', 27, array([ 6.0810812e-04, 1.8756582e-04, 8.8846966e-05, ...,\n", + " 8.6872591e-05, -1.5795016e-05, -2.1323272e-04], dtype=float32), 1.7498854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_74.wav', 'Was kann der Arbeiter dafür?', 29, array([ 6.71288217e-05, 7.89750775e-05, 1.02667604e-04, ...,\n", + " -5.52825586e-05, -2.56669009e-05, -1.57950162e-05], dtype=float32), 1.86278125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_94.wav', 'Wir möchten abreisen.', 22, array([-1.2636013e-04, -7.3051953e-05, -7.7000703e-05, ...,\n", + " -3.1590032e-05, -4.1461917e-05, -1.7769393e-05], dtype=float32), 1.7075520833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_97.wav', 'Halbe Fahrt voraus!', 19, array([ 5.3308180e-05, 2.7641279e-05, -1.1253949e-04, ...,\n", + " -7.8975081e-06, 1.9743769e-05, 7.3051953e-05], dtype=float32), 1.5382083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_101.wav', 'Gute Wahl!', 10, array([-5.3308180e-05, -4.1461917e-05, -4.3436296e-05, ...,\n", + " 1.9743769e-05, 2.5666901e-05, -1.9743769e-05], dtype=float32), 0.8608333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_111.wav', 'Ich kenne den doch gar nicht!', 29, array([ 4.9359427e-05, 3.5538786e-05, 6.9103196e-05, ...,\n", + " -2.7641279e-05, 1.3228325e-04, 7.7000703e-05], dtype=float32), 1.98978125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_135.wav', 'Die Hände auf den Rücken!', 27, array([-7.7000703e-05, -5.1333802e-05, -7.1077571e-05, ...,\n", + " -2.7641279e-05, -4.1461917e-05, 1.7769393e-05], dtype=float32), 1.6087604166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_142.wav', 'Am Deal wird nichts geändert.', 30, array([1.4412952e-04, 1.6979642e-04, 1.7571956e-04, ..., 4.5410670e-05,\n", + " 5.7256933e-05, 6.1205690e-05], dtype=float32), 1.9051145833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_148.wav', 'Das ist eine Wucht.', 19, array([-4.93594271e-05, -1.57950155e-04, -1.08590735e-04, ...,\n", + " 2.44822761e-04, 1.61898919e-04, 1.16488241e-04], dtype=float32), 1.58053125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_152.wav', 'Renitent!', 9, array([2.8233591e-04, 2.6061776e-04, 2.2902773e-04, ..., 1.5795015e-04,\n", + " 1.5202703e-04, 2.9615656e-05], dtype=float32), 1.3124166666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_161.wav', 'Ist noch Kaffee da?', 19, array([-1.46103906e-04, -6.91031964e-05, -1.02667604e-04, ...,\n", + " -7.89750775e-05, -2.17181478e-05, 7.89750811e-06], dtype=float32), 1.6369895833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_166.wav', 'Da werden Erinnerungen wach.', 28, array([ 2.1718148e-05, 1.9743769e-05, -9.8718854e-05, ...,\n", + " 8.4898209e-05, 9.2795723e-05, 1.1846262e-05], dtype=float32), 1.7922291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_175.wav', 'Suchen Sie die Herausforderung?', 31, array([-1.4215514e-04, -9.4770097e-05, -1.2833450e-04, ...,\n", + " -4.5410670e-05, -8.2923834e-05, -6.9103196e-05], dtype=float32), 1.764)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_213.wav', 'Kommt ihr mit zur Demo?', 23, array([-7.3051953e-05, -3.7513164e-05, -6.3180065e-05, ...,\n", + " 6.1205690e-05, 1.2241138e-04, 1.4807828e-04], dtype=float32), 1.7781041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_214.wav', 'Was sagt er?', 12, array([-2.6456651e-04, -2.2507898e-04, -2.0928397e-04, ...,\n", + " 4.3436296e-05, 8.0949460e-05, 1.8164268e-04], dtype=float32), 1.622875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_215.wav', 'Ich will mehr Geld!', 19, array([-8.4898209e-05, -9.4770097e-05, -1.1451387e-04, ...,\n", + " -1.1056512e-04, -8.2923834e-05, -1.1846262e-04], dtype=float32), 1.5664270833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_219.wav', 'Du bist überstimmt.', 20, array([ 1.04641986e-04, 6.91031964e-05, 2.76412793e-05, ...,\n", + " -1.02667604e-04, -2.58643384e-04, -2.05335207e-04], dtype=float32), 1.52409375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_242.wav', 'Rutsch mir doch den Buckel runter.', 34, array([-5.7256933e-05, -3.9487541e-06, 4.5410670e-05, ...,\n", + " 1.6979642e-04, 7.5026328e-05, -1.5795016e-05], dtype=float32), 1.9615625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_280.wav', 'Und ab dafür!', 14, array([ 7.1077571e-05, 1.1056512e-04, 2.0138646e-04, ...,\n", + " -4.3436296e-05, 2.7641279e-05, -6.9103196e-05], dtype=float32), 1.2030416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_283.wav', 'Er meint den Doppeldecker.', 26, array([-8.0949460e-05, -7.7000703e-05, -2.9615656e-05, ...,\n", + " -1.2833450e-04, -8.0949460e-05, -1.8164268e-04], dtype=float32), 1.79221875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_284.wav', 'Oder spricht etwas dagegen?', 27, array([ 2.0533521e-04, 1.4215514e-04, 1.4018077e-04, ...,\n", + " -1.3820639e-04, -7.8975077e-05, -1.6584767e-04], dtype=float32), 1.7851666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_288.wav', 'Auf zu neuen Ufern!', 19, array([ 3.5736224e-04, 4.6990174e-04, 6.1798003e-04, ...,\n", + " 9.2795723e-05, 2.1718148e-05, -4.9359427e-05], dtype=float32), 1.7216666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_291.wav', 'Kostprobe gefällig?', 20, array([-1.7571956e-04, -2.3889962e-04, -1.9348894e-04, ...,\n", + " -2.5864338e-04, -1.6584767e-04, -2.9615656e-05], dtype=float32), 1.4182604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_299.wav', 'Der Wein muss noch atmen.', 25, array([-3.5341349e-04, -2.4482276e-04, -2.2705336e-04, ...,\n", + " -6.1205690e-05, 5.9231311e-06, 4.5410670e-05], dtype=float32), 1.9333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_300.wav', 'Das ist nichts Ernstes.', 23, array([1.5597578e-04, 1.7177081e-04, 6.1205690e-05, ..., 2.7641279e-05,\n", + " 3.1590032e-05, 4.9359427e-05], dtype=float32), 1.9121666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_311.wav', 'Nee, lass mal stecken.', 22, array([ 2.3692524e-05, 3.1590032e-05, -4.7385049e-05, ...,\n", + " 3.8105476e-04, 4.1264479e-04, 6.8313448e-04], dtype=float32), 1.79221875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_321.wav', 'Ha, das war die Rache!', 22, array([-1.6979642e-04, 3.3564411e-05, 1.1056512e-04, ...,\n", + " 1.6387329e-04, 2.7048966e-04, 2.0533521e-04], dtype=float32), 1.7922291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_326.wav', 'Eigentlich ist es logisch.', 26, array([ 7.3051953e-05, 3.9487541e-06, 2.5666901e-05, ...,\n", + " -1.5795016e-05, -7.1077571e-05, 7.8975081e-06], dtype=float32), 1.7075520833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_351.wav', 'Der Wein muss atmen können.', 28, array([2.9615656e-05, 4.3436296e-05, 8.0949460e-05, ..., 4.7385049e-05,\n", + " 1.7769393e-05, 1.9743770e-06], dtype=float32), 1.8063229166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_354.wav', 'Mieter haben Rechte.', 20, array([-1.5795016e-05, -9.8718847e-06, 3.3564411e-05, ...,\n", + " -2.1520710e-04, -1.5992454e-04, -4.5410670e-05], dtype=float32), 1.7216666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_369.wav', 'Was für eine Erkenntnis!', 25, array([-1.02667604e-04, -8.68725911e-05, -4.73850487e-05, ...,\n", + " 3.35644108e-05, 7.70007027e-05, 8.68725911e-05], dtype=float32), 1.9192291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_371.wav', 'Ich schieÃ\\x9fe mit rechts.', 24, array([ 1.3623202e-04, 7.8975077e-05, 4.3436296e-05, ...,\n", + " -1.1056512e-04, -1.1451387e-04, -7.3051953e-05], dtype=float32), 1.86278125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_376.wav', 'Ist Scooter nicht eine Band?', 28, array([ 3.5538786e-05, 0.0000000e+00, -5.9231311e-06, ...,\n", + " -6.3180065e-05, -1.3820639e-04, -1.2043700e-04], dtype=float32), 1.9474479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_387.wav', 'Wir sind Dickhäuter.', 21, array([1.52027031e-04, 1.12539492e-04, 1.02667604e-04, ...,\n", + " 1.38206397e-05, 5.92313108e-06, 8.09494595e-05], dtype=float32), 1.5946458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_389.wav', 'Sei nicht so streng mit ihm!', 28, array([-3.5538786e-05, 1.7769393e-05, 7.1077571e-05, ...,\n", + " -1.1451387e-04, -1.6189892e-04, -2.0928397e-04], dtype=float32), 1.9474479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_390.wav', 'Na ja, was willst du machen?', 28, array([-1.3820640e-05, -4.1461917e-05, -4.5410670e-05, ...,\n", + " -9.0821341e-05, -1.1846262e-05, -4.3436296e-05], dtype=float32), 1.93334375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_419.wav', 'Die Einschläge kommen näher.', 30, array([ 7.5026328e-05, 5.5282559e-05, 1.5597578e-04, ...,\n", + " 3.1590032e-05, 2.1718148e-05, -4.7385049e-05], dtype=float32), 1.9192291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_420.wav', 'Willst du mit mir gehen?', 24, array([-1.2438576e-04, -1.9546332e-04, -1.6782204e-04, ...,\n", + " -3.7513164e-05, -1.0661636e-04, 7.7000703e-05], dtype=float32), 1.8204479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_426.wav', 'Hier bitte eine Unterschrift.', 29, array([1.6979642e-04, 1.8361707e-04, 1.7177081e-04, ..., 1.5202703e-04,\n", + " 2.1718148e-05, 0.0000000e+00], dtype=float32), 1.891)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_430.wav', 'Zum Glück nicht.', 17, array([-6.7128822e-05, -9.8718854e-05, -3.1590032e-05, ...,\n", + " -7.3051953e-05, -9.4770097e-05, -1.1056512e-04], dtype=float32), 1.1571770833333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_437.wav', 'Einfach nur top!', 16, array([-3.9684979e-04, -4.2646544e-04, -4.0277292e-04, ...,\n", + " -2.4087400e-04, -3.7513164e-05, -1.4412952e-04], dtype=float32), 1.35475)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_439.wav', 'Mach dir nichts daraus.', 23, array([ 4.2843982e-04, 5.0938927e-04, 4.6595297e-04, ...,\n", + " 7.8975081e-06, -3.1590032e-05, 2.5666901e-05], dtype=float32), 1.5523229166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_443.wav', 'Lauf doch nicht immer durchs Bild!', 34, array([-3.5538786e-04, -1.7769393e-04, -1.1451387e-04, ...,\n", + " -3.9487539e-05, -4.3436296e-05, -3.9487539e-05], dtype=float32), 1.9192291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_464.wav', 'Hände hoch!', 12, array([-2.0138646e-04, -1.3425764e-04, -8.0949460e-05, ...,\n", + " 2.0138646e-04, 1.8756582e-04, 2.6061776e-04], dtype=float32), 1.04428125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_475.wav', 'Was weiÃ\\x9f ich denn?', 19, array([-2.7641279e-05, -1.9743770e-06, 8.2923834e-05, ...,\n", + " 7.3051953e-05, 9.8718854e-05, -4.9359427e-05], dtype=float32), 1.52409375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_487.wav', 'Ich will noch nicht ins Bett!', 29, array([ 5.7256933e-05, -7.8975081e-06, 1.7769393e-05, ...,\n", + " -2.9615656e-05, -1.1846262e-05, 2.5666901e-05], dtype=float32), 1.9615625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_499.wav', 'Darüber kann man streiten.', 27, array([ 0.00011846, 0.00020534, 0.00027839, ..., -0.00031195,\n", + " -0.00021521, -0.00017769], dtype=float32), 1.8486666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_500.wav', 'Dazu braucht man Ruhe.', 22, array([-1.1056512e-04, -1.4610391e-04, -1.3425764e-04, ...,\n", + " 6.9103196e-05, 1.6189892e-04, 2.2507898e-04], dtype=float32), 1.5523229166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_524.wav', 'Das Zeug ist wirklich gut.', 26, array([-3.9487541e-06, 3.5538786e-05, -9.8718847e-06, ...,\n", + " 1.1846262e-05, 1.9743769e-05, 9.8718847e-06], dtype=float32), 1.891)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_539.wav', 'Betäubungsgewehr geladen!', 26, array([ 3.5538786e-05, 2.3692524e-05, 0.0000000e+00, ...,\n", + " -9.2795723e-05, -1.9151457e-04, -1.8756582e-04], dtype=float32), 1.8768854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_547.wav', 'Was wird wie geschrieben?', 25, array([-0.00012636, -0.00020336, -0.0002231 , ..., 0.00021521,\n", + " 0.00020336, 0.0001619 ], dtype=float32), 1.8768854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_569.wav', 'Nicht schon wieder, bitte.', 26, array([ 0.00035736, 0.00043436, 0.00037316, ..., -0.00013821,\n", + " -0.00013031, -0.0001619 ], dtype=float32), 1.5523229166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_582.wav', 'Bist du blind?', 14, array([ 1.9743769e-05, -1.5795016e-05, -5.7256933e-05, ...,\n", + " 0.0000000e+00, 8.6872591e-05, 4.5410670e-05], dtype=float32), 1.2841875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_592.wav', 'Blinzeln zählt nicht.', 22, array([ 5.9231311e-06, -4.5410670e-05, -9.8718854e-05, ...,\n", + " 1.6387329e-04, 1.3820639e-04, 7.1077571e-05], dtype=float32), 1.8768958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_595.wav', 'Ja, warum denn bitte schön nicht?', 34, array([-0.00036329, -0.00033959, -0.00036131, ..., -0.00016585,\n", + " -0.00021521, -0.0001619 ], dtype=float32), 1.93334375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_597.wav', 'Mir wäre das peinlich.', 23, array([ 5.9231312e-05, 1.2241138e-04, 7.5026328e-05, ...,\n", + " -1.5795016e-05, -8.2923834e-05, -6.7128822e-05], dtype=float32), 1.8345520833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_637.wav', 'So kann es gehen.', 17, array([-7.8975081e-06, 3.9487541e-06, 4.1461917e-05, ...,\n", + " 5.1333802e-05, 1.3030888e-04, 3.9487539e-05], dtype=float32), 1.3688645833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_642.wav', 'Es bleibt spannend.', 19, array([1.8559145e-04, 1.8559145e-04, 1.5597578e-04, ..., 7.3051953e-05,\n", + " 5.7256933e-05, 1.1451387e-04], dtype=float32), 1.559375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_647.wav', 'Marek will noch mal.', 20, array([-2.44822761e-04, -1.04641986e-04, -8.09494595e-05, ...,\n", + " 1.48078281e-04, 1.81642681e-04, 2.50745885e-04], dtype=float32), 1.79221875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_661.wav', 'Ruhig Brauner!', 14, array([-1.02667604e-04, -4.73850487e-05, 8.09494595e-05, ...,\n", + " -9.87188469e-06, -8.88469658e-05, -1.12539492e-04], dtype=float32), 1.2771354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_682.wav', 'Meine Rede!', 11, array([5.9428747e-04, 5.0544052e-04, 2.0730958e-04, ..., 7.5026328e-05,\n", + " 6.5154440e-05, 6.5154440e-05], dtype=float32), 1.0725104166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_685.wav', 'Was versprichst du dir davon?', 29, array([-1.1846262e-05, -9.8718847e-06, 4.3436296e-05, ...,\n", + " -2.3692524e-05, 1.9743770e-06, 2.7641279e-05], dtype=float32), 1.7710520833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_693.wav', 'Ich nehme euch alle.', 20, array([-3.1590032e-05, -5.9231311e-06, -7.5026328e-05, ...,\n", + " -8.8846966e-05, -7.3051953e-05, -5.1333802e-05], dtype=float32), 1.7851666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_711.wav', 'Warum nämlich?', 15, array([ 5.9231312e-05, 5.9231312e-05, 3.1590032e-05, ...,\n", + " 1.1846262e-05, -5.9231311e-06, -7.5026328e-05], dtype=float32), 1.3688645833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_712.wav', 'Das hätte ich beinahe vergessen.', 33, array([-2.1125835e-04, -2.4482276e-04, -1.4610391e-04, ...,\n", + " 9.0821341e-05, 1.7966831e-04, 1.0661636e-04], dtype=float32), 1.9192291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_726.wav', 'Möchtest du auch einen Muffin?', 31, array([-3.9487539e-05, -2.7641279e-05, 6.3180065e-05, ...,\n", + " 1.7769393e-05, 6.7128822e-05, 7.1077571e-05], dtype=float32), 1.9545104166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_727.wav', 'Es hat nichts mit dir zu tun.', 29, array([-1.6584767e-04, -1.9348894e-04, -2.7641279e-04, ...,\n", + " 6.5154440e-05, 4.3436296e-05, 1.2438576e-04], dtype=float32), 1.7569479166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_732.wav', 'Vielleicht war ich etwas vorschnell.', 36, array([1.7177081e-04, 1.6584767e-04, 8.6872591e-05, ..., 1.9546332e-04,\n", + " 1.8954019e-04, 1.5597578e-04], dtype=float32), 1.9192291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_735.wav', 'Hatschi!', 8, array([ 1.2043700e-04, -1.7769393e-05, -1.9743770e-06, ...,\n", + " -1.1846262e-05, -4.5410670e-05, -7.7000703e-05], dtype=float32), 0.8114375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_739.wav', 'Ich bleibe dabei.', 17, array([-2.0533521e-04, -1.2438576e-04, -5.5282559e-05, ...,\n", + " 4.5410670e-05, -1.3820640e-05, -7.7000703e-05], dtype=float32), 1.2418541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_762.wav', 'Nicht zu fassen!', 16, array([1.7414006e-03, 1.4353720e-03, 9.6547039e-04, ..., 6.3180065e-05,\n", + " 1.8164268e-04, 8.0949460e-05], dtype=float32), 1.1712916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_773.wav', 'Gute Besserung!', 15, array([-5.1333802e-05, 0.0000000e+00, 2.1718148e-05, ...,\n", + " -1.2636013e-04, -1.9546332e-04, -1.4215514e-04], dtype=float32), 1.2771354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_775.wav', 'Ja, so ist es wohl.', 19, array([-1.61898919e-04, 1.97437694e-05, 1.02667604e-04, ...,\n", + " -6.51544397e-05, -1.26360130e-04, -6.71288217e-05], dtype=float32), 1.44646875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_778.wav', 'Mich selbst hat das überrascht.', 32, array([7.7000703e-05, 1.1846262e-04, 1.2241138e-04, ..., 1.3820639e-04,\n", + " 9.8718847e-06, 1.3820640e-05], dtype=float32), 1.8275)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_782.wav', 'Wer kennt das nicht?', 20, array([-2.7641279e-05, 7.8975081e-06, -3.7513164e-05, ...,\n", + " -2.3297648e-04, -2.2902773e-04, -2.4087400e-04], dtype=float32), 1.72871875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_787.wav', 'Ich liebe diese Musik!', 22, array([-1.8361707e-04, -6.9103196e-05, -9.0821341e-05, ...,\n", + " 5.6862057e-04, 6.2587752e-04, 5.3110742e-04], dtype=float32), 1.8580729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_800.wav', 'Na endlich!', 11, array([-1.1846262e-04, -1.5202703e-04, -8.4898209e-05, ...,\n", + " 9.0821341e-05, -9.0821341e-05, -7.8975081e-06], dtype=float32), 0.91021875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_805.wav', 'Juliane gruselt sich.', 21, array([1.3425764e-04, 7.1077571e-05, 6.5154440e-05, ..., 9.8718854e-05,\n", + " 8.6872591e-05, 5.1333802e-05], dtype=float32), 1.86278125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_808.wav', 'Der andere nimmt.', 17, array([-8.6872591e-05, -1.1451387e-04, -8.2923834e-05, ...,\n", + " 2.5666901e-05, -7.3051953e-05, -7.5026328e-05], dtype=float32), 1.52409375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_814.wav', 'Wieso ich?', 10, array([-1.14513867e-04, -1.02667604e-04, -1.77693932e-04, ...,\n", + " -1.18462622e-05, 0.00000000e+00, 1.38206397e-05], dtype=float32), 0.9031666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_816.wav', 'Die Haare müssen ab.', 21, array([ 1.9546332e-04, 1.2636013e-04, 2.1125835e-04, ...,\n", + " 9.8718847e-06, -4.1461917e-05, -5.5282559e-05], dtype=float32), 1.2065729166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_13_FINAL/13_844.wav', 'Die ganze Woche steht das schon an.', 35, array([ 1.0602404e-03, 1.1017023e-03, 9.0031594e-04, ...,\n", + " -3.3564411e-05, -3.5538786e-05, 0.0000000e+00], dtype=float32), 1.8839479166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_7.wav', 'Meinen Respekt hast du.', 23, array([-8.1613541e-07, 3.6258320e-05, 5.8615900e-05, ...,\n", + " -3.0361010e-05, 4.6051988e-05, 6.1613529e-05], dtype=float32), 1.568)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_12.wav', 'Mein SchweiÃ\\x9f stinkt nicht.', 27, array([1.2758464e-03, 1.4472028e-03, 1.4819785e-03, ..., 1.1448720e-05,\n", + " 2.5002395e-05, 5.3266147e-05], dtype=float32), 1.872)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_24.wav', 'So sieht es jedenfalls aus.', 27, array([ 3.5462443e-05, -3.6511621e-05, -2.4387444e-05, ...,\n", + " 7.4399744e-05, 7.2159133e-07, 2.3660252e-05], dtype=float32), 1.808)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_31.wav', 'Es brennt lichterloh.', 21, array([-7.8527468e-05, -1.9054073e-04, -1.8275550e-04, ...,\n", + " -1.4771417e-05, 2.4868292e-05, -1.4910699e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_36.wav', 'Hat jemand Deo dabei?', 21, array([5.0298637e-05, 4.8803475e-05, 5.4532258e-05, ..., 3.4226623e-06,\n", + " 9.2322180e-06, 3.0618612e-05], dtype=float32), 1.7386666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_44.wav', 'Der Hund will raus.', 19, array([-8.2374172e-05, -8.4805586e-05, -9.4096496e-05, ...,\n", + " 2.0108973e-05, 3.4747383e-05, -3.9627314e-05], dtype=float32), 1.5413333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_46.wav', 'Nur Fliegen ist schöner.', 25, array([-2.5430196e-05, -6.4560918e-05, -6.8181558e-05, ...,\n", + " 6.0105547e-05, 9.7991426e-05, 2.9888753e-05], dtype=float32), 1.6693333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_57.wav', 'Endlich wieder Nachschub!', 25, array([-3.0662410e-05, -3.7799236e-05, -1.0512020e-04, ...,\n", + " -1.2799338e-04, -3.7069469e-05, 3.4687200e-05], dtype=float32), 1.568)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_63.wav', \"Jetzt langt's dann aber.\", 24, array([ 1.3113129e-06, -5.7142366e-05, 3.9664551e-06, ...,\n", + " 4.8476216e-04, 4.0935431e-04, 5.0957059e-04], dtype=float32), 1.8453333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_76.wav', 'Ist hier noch ein Platz frei?', 29, array([ 4.6084756e-06, 2.1333383e-06, 1.0840034e-05, ...,\n", + " 4.7717163e-05, -4.3301993e-06, 5.9024904e-07], dtype=float32), 1.7653333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_91.wav', 'Möchten Sie durch?', 19, array([5.3242915e-05, 1.1775635e-04, 9.1564674e-05, ..., 6.9772730e-05,\n", + " 3.2825061e-05, 5.5504606e-05], dtype=float32), 1.1786666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_94.wav', 'Du hast sie angemalt.', 21, array([-8.2009647e-06, -7.8560508e-05, -1.1781590e-04, ...,\n", + " 5.8809797e-05, 3.5827401e-05, -3.8682600e-05], dtype=float32), 1.5946666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_97.wav', 'Anfassen heiÃ\\x9ft kaufen.', 23, array([ 6.7132327e-04, 6.4567651e-04, 4.5344225e-04, ...,\n", + " -2.1742040e-05, -1.2411790e-04, -3.8199389e-05], dtype=float32), 1.472)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_104.wav', 'Warum nicht lieber hier?', 24, array([-1.0701143e-05, -1.5738879e-06, 6.8153045e-06, ...,\n", + " -6.3156702e-05, -1.6941859e-04, -6.0139148e-05], dtype=float32), 1.4986666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_112.wav', 'Das war ein Abenteuer.', 22, array([ 2.6408197e-05, -6.0915321e-05, -9.1295704e-05, ...,\n", + " -5.6715970e-05, -3.1489210e-05, 1.5612791e-06], dtype=float32), 1.9466666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_127.wav', 'Das wäre fatal.', 16, array([ 4.4660061e-05, -6.5924425e-05, -5.6830704e-05, ...,\n", + " -5.5352357e-06, 3.0260082e-05, 9.7271128e-05], dtype=float32), 1.4666666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_141.wav', 'Nicht doch!', 11, array([-1.4546166e-04, -1.4626759e-04, -9.7611184e-05, ...,\n", + " 9.3360104e-05, 3.5025540e-05, -1.6926177e-06], dtype=float32), 0.928)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_146.wav', 'Heiliger Strohsack!', 19, array([-3.7175673e-04, -2.1206291e-04, -8.9090288e-05, ...,\n", + " 1.0547445e-04, 1.0614831e-04, 5.8346381e-05], dtype=float32), 1.376)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_153.wav', 'Gehen wir in die Eisdiele?', 26, array([-3.72752729e-05, -6.43968451e-05, -1.19852075e-05, ...,\n", + " 6.90084271e-05, -1.81738214e-05, -2.24471933e-05], dtype=float32), 1.4826666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_158.wav', 'Das ist halt so.', 16, array([ 2.1661433e-05, -9.2656213e-05, -2.0038491e-05, ...,\n", + " 3.4980503e-06, 8.1309692e-05, -1.6156602e-05], dtype=float32), 1.2853333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_174.wav', 'Ich habe dich noch nie gesehen.', 31, array([ 1.68298247e-05, 2.35711445e-06, -1.13152724e-04, ...,\n", + " -5.31522637e-05, 5.38938584e-05, 1.89053408e-05], dtype=float32), 1.8773333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_179.wav', 'Das muss hart für dich sein.', 29, array([-9.2038817e-06, -9.7612574e-06, -6.3460277e-05, ...,\n", + " -5.0950723e-05, 2.0168585e-05, -1.5738755e-05], dtype=float32), 1.5893333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_183.wav', \"Packen wir's!\", 13, array([-2.2114466e-05, 6.0876686e-05, -8.3392551e-05, ...,\n", + " 3.5826326e-06, -1.4385004e-05, -5.6348257e-05], dtype=float32), 0.9546666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_194.wav', 'Wir werden siegen!', 18, array([ 1.6911860e-04, 7.4598174e-05, 1.0261347e-04, ...,\n", + " 6.5378241e-05, 3.2076507e-06, -6.6169787e-06], dtype=float32), 1.3333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_197.wav', 'Darf ich mal bei dir abbeiÃ\\x9fen?', 31, array([-1.0340806e-05, 7.1646286e-06, 3.3313339e-05, ...,\n", + " -7.5323747e-05, -2.6892374e-07, -3.3816039e-05], dtype=float32), 1.76)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_199.wav', 'Das ging aber fix!', 18, array([-9.3143040e-05, -4.3784836e-05, -1.1206182e-04, ...,\n", + " 8.7669920e-05, 1.0557293e-05, 4.2041685e-07], dtype=float32), 1.328)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_211.wav', 'Ich habe nachgedacht.', 21, array([ 5.0232731e-05, 1.2072114e-04, 1.8210443e-04, ...,\n", + " -6.5402834e-05, -5.1763345e-05, -6.0046054e-06], dtype=float32), 1.5093333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_229.wav', 'Wir lassen uns nicht erpressen.', 31, array([1.37981799e-04, 1.52958339e-04, 1.10953624e-04, ...,\n", + " 6.50644288e-05, 8.02592767e-05, 1.01248879e-04], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_236.wav', 'Sag du es mir.', 14, array([ 7.4462928e-06, -2.0409609e-05, -3.6314952e-05, ...,\n", + " -2.1986765e-05, -8.3042978e-05, 8.2145634e-06], dtype=float32), 1.216)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_240.wav', 'Ich vermisse ihn seit gestern.', 30, array([ 2.9365596e-04, 3.4678026e-04, 3.5397714e-04, ...,\n", + " -1.5735781e-05, -2.9272232e-05, 4.2558597e-05], dtype=float32), 1.9893333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_257.wav', 'So kannte ich sie gar nicht.', 28, array([ 4.4733344e-05, 7.7341829e-05, 1.1480036e-04, ...,\n", + " -1.8965245e-04, -1.4387793e-04, -1.2223862e-04], dtype=float32), 1.8133333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_258.wav', 'Dem Kind geht es gut.', 21, array([ 2.3389544e-05, -1.0488247e-05, 1.0429079e-05, ...,\n", + " -8.0030593e-05, -9.8967379e-05, -4.5314195e-05], dtype=float32), 1.3066666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_260.wav', 'Lasst es krachen!', 17, array([-2.1083563e-04, -8.3892046e-05, -3.2037347e-05, ...,\n", + " -6.8306355e-05, -1.3884228e-04, -6.5104126e-05], dtype=float32), 1.2)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_266.wav', 'Wie sehen Sie überhaupt aus?', 29, array([-1.0680479e-05, -1.9320854e-05, -7.0852952e-06, ...,\n", + " -1.0408241e-05, 3.3198389e-06, 2.1512881e-06], dtype=float32), 1.8826666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_281.wav', 'Damit könnte es klappen.', 25, array([-2.3432081e-05, -2.4900844e-05, -1.3450766e-04, ...,\n", + " 2.1617279e-05, 3.1534404e-05, -2.2315735e-05], dtype=float32), 1.488)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_307.wav', 'Tut das Husten weh?', 19, array([ 9.1145994e-06, 1.5820089e-05, 5.0116945e-05, ...,\n", + " 1.9206882e-05, -2.6969181e-05, -2.7526901e-05], dtype=float32), 1.5626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_310.wav', 'Und jetzt kräftig kurbeln!', 27, array([-8.4867512e-05, -1.3528325e-05, 6.7344299e-05, ...,\n", + " -5.5355646e-05, 3.2757125e-05, -1.3706725e-05], dtype=float32), 1.968)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_311.wav', 'Und was bekommt man geboten?', 28, array([-9.42486338e-07, -6.20736901e-05, -1.13615904e-04, ...,\n", + " 1.05647247e-04, 4.75407724e-05, 7.68981190e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_313.wav', 'Nimm doch mal den Hut ab!', 25, array([-1.4411381e-06, 1.8580539e-04, 1.8933907e-04, ...,\n", + " -1.0257358e-04, -9.1900030e-05, -2.2193763e-04], dtype=float32), 1.5733333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_322.wav', 'Der ist sauber.', 15, array([1.3459381e-04, 1.1068168e-04, 1.4088971e-04, ..., 1.4206764e-04,\n", + " 1.0958829e-05, 9.0381429e-05], dtype=float32), 1.344)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_333.wav', 'Danke für nichts!', 18, array([-2.6258719e-04, -2.9124424e-04, -4.0630574e-04, ...,\n", + " 9.1923815e-05, -9.6123731e-06, 3.9555922e-05], dtype=float32), 1.408)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_351.wav', 'Hier ist sie.', 13, array([-3.23740860e-05, -1.03745086e-04, -6.84802653e-05, ...,\n", + " 6.36538107e-06, 6.47425259e-05, -2.68384956e-05], dtype=float32), 1.2693333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_354.wav', 'Ist sie international bekannt?', 30, array([ 1.5060005e-05, 5.7448578e-05, 1.3811006e-04, ...,\n", + " 6.0413648e-05, -4.7934391e-05, -1.9190535e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_363.wav', 'Ich meine ja nur.', 17, array([ 5.6321147e-05, 9.9655284e-05, -8.9936962e-05, ...,\n", + " 1.1549123e-05, 3.7268135e-05, 7.3645397e-06], dtype=float32), 1.1253333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_390.wav', 'Gib mal die Seriennummer durch.', 31, array([ 7.2849958e-05, 9.1718932e-05, 5.6555116e-05, ...,\n", + " -2.9702240e-05, 3.8465154e-05, 2.2035034e-05], dtype=float32), 1.9466666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_399.wav', 'Steht das Wasser auf dem Herd?', 30, array([6.5801214e-05, 1.3084775e-04, 8.1372353e-05, ..., 6.8494905e-05,\n", + " 2.1234882e-06, 2.7409065e-05], dtype=float32), 1.84)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_401.wav', 'Oh ja!', 6, array([ 2.2632883e-05, -2.7574149e-05, 2.7717488e-05, ...,\n", + " 2.9032512e-07, 1.7548422e-05, -1.3465881e-05], dtype=float32), 0.7146666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_409.wav', 'Ja oder nein?', 13, array([ 3.4988134e-05, -6.8858870e-05, -8.5955844e-06, ...,\n", + " -4.4800227e-06, 1.7184280e-05, 3.7901282e-05], dtype=float32), 1.4346666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_411.wav', 'Ist doch Jacke wie Hose.', 24, array([ 1.1507938e-04, 5.0565839e-05, -2.7287895e-05, ...,\n", + " 3.7775626e-05, -1.4040452e-05, 1.4159415e-06], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_413.wav', 'Ich habe es nie gelernt.', 24, array([ 2.58978853e-05, 6.50478396e-05, -1.03702390e-04, ...,\n", + " 8.01785427e-05, 3.00699157e-05, -1.05522995e-04], dtype=float32), 1.776)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_429.wav', 'Nicht schon wieder eine Razzia!', 31, array([-5.1378167e-05, -2.5352152e-05, -3.2764001e-05, ...,\n", + " 2.1145966e-05, 5.4651609e-05, -7.9359561e-05], dtype=float32), 1.888)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_431.wav', 'Niemand will es gewesen sein.', 29, array([6.13634029e-06, 1.00043821e-04, 1.26646410e-04, ...,\n", + " 4.00160025e-05, 6.57281998e-05, 1.20079676e-04], dtype=float32), 1.6426666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_443.wav', 'Ihr seid doch bloÃ\\x9f neidisch.', 29, array([ 4.71922749e-06, -1.42986255e-05, 4.10590292e-05, ...,\n", + " -1.13690789e-04, -4.82848300e-05, 3.64537264e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_453.wav', 'Lesen lohnt sich.', 17, array([-1.1143904e-04, -9.7466742e-05, -1.4505965e-04, ...,\n", + " -1.1429377e-04, -8.0892445e-05, -8.6921274e-05], dtype=float32), 1.6426666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_458.wav', 'Oder er wurde dabei gestört.', 29, array([-1.8823694e-05, -3.1060394e-05, -9.3846960e-05, ...,\n", + " -1.2105788e-05, -3.4755056e-05, 3.5802004e-05], dtype=float32), 1.84)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_459.wav', 'Die Seele baumeln lassen.', 25, array([-4.6934008e-05, -1.4115409e-04, -1.9004452e-04, ...,\n", + " -4.7015623e-05, -2.2894224e-07, -4.3300730e-05], dtype=float32), 1.6746666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_468.wav', 'Der Nächste, bitte!', 20, array([ 8.1093880e-05, 2.9958397e-05, -3.9947310e-05, ...,\n", + " 6.6704742e-05, 1.2609754e-04, 1.1871241e-04], dtype=float32), 1.3386666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_469.wav', 'Wird schon schiefgehen.', 23, array([-1.8012641e-05, -6.1548446e-05, -1.2534855e-04, ...,\n", + " -2.9845067e-05, 3.1653948e-05, 1.2874776e-04], dtype=float32), 1.552)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_476.wav', 'Keine falsche Bewegung!', 23, array([-1.3065083e-04, -1.9577878e-04, -9.6719399e-05, ...,\n", + " 9.7838973e-05, -1.6546634e-05, 3.1119489e-05], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_484.wav', 'Danach geht es ins Bett.', 24, array([1.4125947e-04, 1.4533960e-04, 1.3352933e-04, ..., 4.6569412e-06,\n", + " 8.5400243e-06, 1.0347654e-04], dtype=float32), 1.8826666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_495.wav', 'Vorwärts immer, rückwarts nimmer!', 35, array([ 9.8868964e-05, 1.4638813e-04, 8.2029030e-05, ...,\n", + " 3.1947344e-05, -3.3244356e-05, -8.5653497e-05], dtype=float32), 1.5893333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_508.wav', 'Ein Spanngurt ist gerissen.', 27, array([-1.3210842e-05, 5.2183852e-05, 1.1509426e-05, ...,\n", + " -6.6147322e-06, -1.3790486e-05, 4.0188141e-05], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_511.wav', 'Das musst du gerade sagen!', 26, array([ 8.16162283e-05, 1.48853534e-04, 1.20252385e-04, ...,\n", + " -2.43115683e-05, 3.36854064e-05, -3.11621625e-05], dtype=float32), 1.9893333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_517.wav', 'Lösen Sie das Captcha!', 23, array([-3.2288870e-05, 5.6598521e-05, 4.2188087e-05, ...,\n", + " 7.7064447e-05, -4.7475376e-05, 4.4163811e-05], dtype=float32), 1.6746666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_520.wav', 'Ihr werdet schon sehen.', 23, array([-6.5363100e-05, 4.7253379e-05, 5.9942446e-05, ...,\n", + " 3.2326661e-05, 8.2957842e-05, 7.4098658e-05], dtype=float32), 1.7973333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_521.wav', 'Ich erkläre es dir.', 20, array([ 5.3491673e-05, -1.2072490e-05, 3.4197161e-05, ...,\n", + " -3.4515979e-05, -5.6132449e-05, 1.3709931e-04], dtype=float32), 1.5093333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_525.wav', 'Hau rein!', 9, array([ 2.57931824e-04, 2.11816674e-04, 1.78339556e-04, ...,\n", + " 7.76832676e-05, 1.51795175e-05, -4.37384588e-05], dtype=float32), 1.104)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_526.wav', 'Tief durchatmen!', 16, array([-2.6787920e-05, -3.2204316e-05, -5.5490927e-05, ...,\n", + " 2.2508255e-05, 5.4639313e-05, 1.8989524e-05], dtype=float32), 1.5253333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_538.wav', 'Und was bringt das?', 19, array([-5.9224880e-05, -4.4477289e-05, 3.8521583e-05, ...,\n", + " 9.5605545e-05, 1.2830349e-06, 1.5070126e-05], dtype=float32), 1.6213333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_541.wav', 'Karnickelfangschlag?', 20, array([3.9227842e-05, 3.2782922e-05, 4.6346566e-05, ..., 1.3389443e-05,\n", + " 3.6067817e-05, 6.0468155e-05], dtype=float32), 1.728)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_562.wav', 'Ist ja mega!', 12, array([-1.1508126e-04, -1.5385580e-04, -1.8046032e-04, ...,\n", + " -4.1180385e-05, 2.7804810e-05, -9.9901524e-07], dtype=float32), 0.992)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_570.wav', 'Jasmin, du bist dran.', 21, array([-6.0017886e-05, 3.1120195e-05, 1.0854354e-04, ...,\n", + " -2.5416332e-06, 4.4546370e-05, -4.6334655e-05], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_579.wav', 'Läuft es separat ab?', 21, array([ 2.2939121e-05, 2.0304271e-05, 4.7305216e-06, ...,\n", + " -4.0958774e-05, 8.3991254e-06, -4.0800154e-05], dtype=float32), 1.7813333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_594.wav', 'Ich zitiere!', 12, array([ 7.3269119e-05, 4.1316580e-06, -7.5483302e-05, ...,\n", + " 4.5700057e-05, 1.0702889e-06, 1.2143076e-05], dtype=float32), 1.2853333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_597.wav', 'Die Karten sind ja markiert!', 28, array([-7.7787427e-06, 1.3373171e-05, 1.1130486e-04, ...,\n", + " -3.4429740e-05, -9.2525712e-05, -3.0399795e-05], dtype=float32), 1.8613333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_600.wav', 'Weniger ist manchmal mehr.', 26, array([-3.2105188e-05, -1.2411436e-04, -1.7373836e-04, ...,\n", + " 1.9536817e-05, 4.0033923e-05, -4.9835093e-05], dtype=float32), 1.6693333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_610.wav', 'Zur Hölle mit ihm!', 19, array([ 4.1287938e-05, -1.5668693e-05, -4.7829257e-05, ...,\n", + " 1.2091287e-04, 3.0301053e-05, 5.0707073e-05], dtype=float32), 1.28)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_622.wav', 'Sonst kommt die Polizei.', 24, array([ 1.33967542e-05, -2.86651575e-05, 1.20430150e-05, ...,\n", + " -4.97728324e-05, -9.77511445e-05, -1.07504595e-04], dtype=float32), 1.9786666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_623.wav', 'Papa fährt immer schneller.', 28, array([-4.1551000e-05, 1.8333099e-05, -4.5995697e-05, ...,\n", + " 7.4864365e-05, -2.8456698e-05, -3.1763777e-06], dtype=float32), 1.7653333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_627.wav', 'Das Problem kenne ich.', 22, array([-1.6575548e-06, -6.4681786e-05, -2.4183499e-05, ...,\n", + " -6.1924133e-05, 4.0877181e-05, -4.8742072e-06], dtype=float32), 1.3973333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_646.wav', 'Gerade jetzt wird es spannend.', 30, array([-7.0382644e-05, -2.6976499e-05, -8.4537001e-05, ...,\n", + " 1.9848225e-05, 1.8570287e-05, 1.1454727e-04], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_650.wav', 'Pass mal auf!', 13, array([ 8.8038476e-05, 6.2287538e-05, 8.6767104e-05, ...,\n", + " -4.7867183e-05, 1.7106903e-06, -2.8001863e-05], dtype=float32), 1.0773333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_653.wav', 'Führe mich nicht in Versuchung!', 32, array([ 1.5389375e-04, 8.4856605e-05, 1.1764471e-04, ...,\n", + " -4.1702488e-06, 4.8200640e-05, 3.7042355e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_658.wav', 'Dabei soll es bleiben.', 22, array([-6.8817273e-05, -1.4116750e-04, -2.5068663e-04, ...,\n", + " 3.3109423e-05, -1.2034771e-05, 5.3297503e-05], dtype=float32), 1.3653333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_677.wav', 'Ich denke nicht daran.', 22, array([ 2.7965652e-06, -8.1217448e-05, -1.5171595e-04, ...,\n", + " -6.0021226e-05, 5.8105360e-07, -2.3721210e-05], dtype=float32), 1.472)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_702.wav', 'Sieh zu, dass du Land gewinnst!', 31, array([-3.9686485e-05, -4.1371659e-05, -5.1444043e-05, ...,\n", + " -6.5746033e-05, -6.9277223e-05, -3.0258396e-05], dtype=float32), 1.9466666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_705.wav', 'Was sagt uns das?', 17, array([-1.11950721e-04, -1.12432775e-04, -1.54395209e-04, ...,\n", + " 1.18786911e-05, -6.98161457e-05, -2.93514750e-05], dtype=float32), 1.6426666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_715.wav', 'Von nichts komm nichts.', 23, array([ 5.0694278e-05, -1.0824220e-04, -7.8278521e-05, ...,\n", + " 5.2878531e-05, 3.1005864e-05, 2.5896241e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_718.wav', 'Warum auch?', 11, array([ 2.5824769e-05, 7.0119269e-05, 3.9937982e-05, ...,\n", + " 1.3905319e-05, -2.6308078e-05, -5.1800267e-05], dtype=float32), 0.9493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_721.wav', 'Wo wohne ich noch mal?', 22, array([ 1.1702570e-04, 1.8368529e-04, 1.5237987e-04, ...,\n", + " -3.3846823e-05, -4.2944125e-06, 2.2590933e-05], dtype=float32), 1.6)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_725.wav', 'Zum Wohl!', 9, array([-2.1576473e-06, 2.8079157e-05, -2.9355248e-05, ...,\n", + " -2.9330091e-05, -3.0764484e-05, -1.3724362e-05], dtype=float32), 0.7466666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_735.wav', 'Wie geht es dir?', 16, array([ 3.0780422e-05, -4.9582297e-05, -8.5829226e-05, ...,\n", + " 2.1407772e-05, -4.8474238e-05, -4.5784309e-05], dtype=float32), 1.232)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_746.wav', 'Einmal drücken reicht.', 23, array([-4.4286557e-05, -5.6155724e-05, -5.2055671e-05, ...,\n", + " -5.5887984e-05, 1.7236773e-05, 9.8498596e-05], dtype=float32), 1.4373333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_761.wav', 'Ersatz muss her.', 16, array([ 8.3686442e-05, 9.1279635e-06, -8.3661522e-05, ...,\n", + " 3.3542208e-05, 9.7035401e-05, -4.7421363e-05], dtype=float32), 1.3333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_769.wav', 'Kennen Sie diesen Eisbären?', 28, array([ 1.8226114e-04, 1.1602399e-04, 8.7942906e-05, ...,\n", + " -3.1415253e-05, 6.8828485e-05, 2.8598015e-05], dtype=float32), 1.7173333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_774.wav', 'Du tüdelst wohl!', 17, array([4.2244592e-05, 4.7479767e-05, 4.4327684e-05, ..., 2.9398587e-05,\n", + " 1.3265206e-04, 9.8947305e-05], dtype=float32), 1.312)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_776.wav', 'Einen Versuch ist es wert.', 26, array([-2.0919964e-05, -8.0129103e-05, -7.8644814e-05, ...,\n", + " 3.4572986e-05, 8.1091166e-05, 5.6626621e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_779.wav', 'Kruzifix noch mal!', 18, array([ 5.9276794e-05, 7.1346542e-05, 1.3115312e-05, ...,\n", + " -7.0933937e-05, 2.6771322e-05, 3.3997876e-05], dtype=float32), 1.792)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_781.wav', 'Sind die echt?', 14, array([-3.2039690e-05, -4.8189206e-05, -9.0187306e-05, ...,\n", + " 2.1210299e-05, 9.5539394e-07, -6.0049209e-05], dtype=float32), 1.1946666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_789.wav', 'Wie war euer Jahrgangstreffen?', 30, array([ 9.86098894e-05, 1.05807514e-04, 1.31781504e-04, ...,\n", + " -6.47349443e-05, 5.55652514e-06, 6.68639914e-05], dtype=float32), 1.9946666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_796.wav', 'Langt das?', 10, array([-2.58835917e-05, -1.11602596e-04, -2.00994928e-05, ...,\n", + " 3.40378210e-05, 4.15314862e-05, -2.47353237e-05], dtype=float32), 1.2586666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_851.wav', 'Nein, das gehört so.', 21, array([4.30460314e-05, 1.00948644e-04, 1.14135793e-04, ...,\n", + " 2.88395531e-04, 1.62498865e-04, 8.75307087e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_852.wav', 'Stellen Sie Blickkontakt her.', 29, array([-2.3877754e-05, -3.1883523e-05, -1.3378897e-04, ...,\n", + " -3.8810729e-05, 4.3067663e-05, 3.8920269e-05], dtype=float32), 1.9946666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_858.wav', 'Also echt jetzt!', 16, array([ 1.62354499e-05, 4.22473058e-05, -1.46273105e-05, ...,\n", + " -2.93930316e-05, 5.34094252e-05, 7.98595574e-05], dtype=float32), 1.216)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_7.wav', 'Ich glaube nicht.', 17, array([-1.0143876e-05, -3.8619244e-05, 8.2748767e-05, ...,\n", + " -9.9806406e-05, -4.3946784e-05, 6.9558562e-05], dtype=float32), 1.1946666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_18.wav', 'Hier ist es sicherer.', 21, array([ 4.6870970e-05, 9.9823235e-05, -4.0877108e-05, ...,\n", + " -1.4616339e-05, 7.3614872e-05, 1.0970575e-04], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_49.wav', 'Ja ja, als ob!', 14, array([ 5.3198488e-05, 1.8346685e-04, -2.1753046e-06, ...,\n", + " 1.7834389e-05, 5.3522737e-05, 8.4725587e-05], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_59.wav', 'Geh, such deine Schwester!', 26, array([ 9.13840049e-05, 1.68439132e-04, 3.04173911e-04, ...,\n", + " -8.56241095e-05, -1.02150196e-04, 8.91289255e-06], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_94.wav', 'Gib mir meinen Becher wieder!', 29, array([-2.1092707e-04, -2.3195105e-04, -2.0152969e-04, ...,\n", + " 8.9153917e-05, -2.4260396e-06, 5.9283586e-05], dtype=float32), 1.8453333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_100.wav', 'Das führt doch zu nichts.', 26, array([-1.0273771e-04, -8.6229462e-05, -1.2574486e-04, ...,\n", + " 2.4963025e-05, 4.4582037e-05, 4.7964921e-05], dtype=float32), 1.9733333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_105.wav', 'Wo denn?', 8, array([-4.0845240e-05, 1.0149255e-04, 5.9910049e-05, ...,\n", + " -3.8421931e-05, 2.8110459e-05, 1.7339922e-05], dtype=float32), 0.9493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_106.wav', 'Du sitzt hinten.', 16, array([ 1.1350374e-04, 1.3197908e-04, 5.9344729e-05, ...,\n", + " -1.6409816e-04, -7.1399249e-05, -4.2459251e-05], dtype=float32), 1.44)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_112.wav', 'Das kann ich nicht.', 19, array([-9.4199102e-05, -3.3980414e-05, 9.0330948e-05, ...,\n", + " 1.1509175e-04, 2.2319029e-05, 5.1328014e-05], dtype=float32), 1.4186666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_139.wav', 'Das hat sie gelernt.', 20, array([ 1.5456244e-04, 3.1872053e-04, 3.7880472e-04, ...,\n", + " -8.6764321e-06, -1.7240205e-05, -5.7155878e-05], dtype=float32), 1.4826666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_153.wav', 'Nicht alle Teenager sind so.', 28, array([7.9220721e-05, 5.8759109e-05, 1.1493213e-04, ..., 6.8786328e-05,\n", + " 1.5815135e-04, 8.5130850e-05], dtype=float32), 1.9946666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_156.wav', 'Frische Seeluft macht gesund.', 29, array([ 1.8124521e-04, 1.7306159e-04, 5.9669415e-05, ...,\n", + " 4.9480139e-05, 1.2296322e-04, -5.5897519e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_164.wav', 'Gönn dir!', 10, array([ 5.2993961e-05, 2.8179937e-05, 7.8242076e-05, ...,\n", + " -4.9057824e-05, 1.8003910e-05, 8.8817593e-05], dtype=float32), 0.9386666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_176.wav', 'Sag ich doch!', 13, array([ 4.2398951e-05, 5.6847359e-05, 7.0788061e-05, ...,\n", + " -3.2739328e-05, 9.7135853e-05, 6.0795941e-05], dtype=float32), 1.2373333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_177.wav', 'Das darf doch nicht wahr sein.', 30, array([-5.1426803e-05, -5.0517308e-05, 4.6803252e-05, ...,\n", + " -8.1146150e-05, 2.9068062e-05, 7.5193479e-05], dtype=float32), 1.8773333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_199.wav', 'Jetzt sind wir quitt.', 21, array([-2.4918138e-05, 8.0159109e-05, -7.1328832e-05, ...,\n", + " -2.1099215e-04, -3.0862509e-05, -3.5725458e-05], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_207.wav', 'Eben ging das noch.', 19, array([-5.0324921e-05, 1.3549793e-04, -3.3347860e-05, ...,\n", + " 9.8024408e-05, 1.5384333e-04, 1.5966935e-04], dtype=float32), 1.53875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_208.wav', 'Bug oder Feature?', 17, array([-3.7243055e-06, 6.9413843e-05, 7.5392752e-05, ...,\n", + " 5.2070121e-05, 2.8219682e-05, 8.4193009e-05], dtype=float32), 1.8053020833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_274.wav', 'Wir brauchen mehr davon!', 24, array([-2.0753406e-04, -1.9484414e-05, -2.8117347e-04, ...,\n", + " 1.2726737e-04, 2.6360145e-04, 2.9073044e-04], dtype=float32), 1.91434375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_280.wav', 'Lass uns raus gehen.', 20, array([ 1.03469618e-04, 1.97744346e-04, -7.93442814e-06, ...,\n", + " 8.44921742e-05, 2.30915975e-05, -1.33781205e-05], dtype=float32), 1.5508645833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_286.wav', 'SchluÃ\\x9f mit lustig.', 19, array([ 2.99623178e-04, 2.43378381e-04, 1.65333462e-04, ...,\n", + " -2.71533063e-05, 7.85075972e-05, -1.17198346e-04], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_302.wav', 'Woher nehmt ihr eure Bildung?', 29, array([1.7700881e-04, 2.1893253e-04, 1.3036304e-04, ..., 1.3868474e-04,\n", + " 1.0062666e-04, 8.4173589e-05], dtype=float32), 1.9749270833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_308.wav', 'Du fährst, ich schieÃ\\x9fe!', 25, array([1.5563566e-04, 1.4856170e-04, 2.2446582e-04, ..., 6.8505600e-05,\n", + " 2.0769508e-04, 1.1925176e-04], dtype=float32), 1.99915625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_328.wav', 'Wirkt die Betäubung noch?', 26, array([-8.7537330e-05, -3.0825776e-04, -2.8424736e-04, ...,\n", + " 1.1261477e-04, 2.0012977e-04, 1.0000553e-04], dtype=float32), 1.9022291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_385.wav', 'Es kann nur einen geben!', 24, array([-1.8947560e-04, -2.3450297e-05, -1.2145152e-04, ...,\n", + " -6.9378242e-05, -1.1301338e-04, -2.5457976e-04], dtype=float32), 1.8901145833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_400.wav', 'Wer weiÃ\\x9f es?', 13, array([ 8.2401210e-05, 1.2261249e-05, 1.3193028e-04, ...,\n", + " -9.9374527e-05, -2.4473227e-05, 7.3499345e-05], dtype=float32), 1.49028125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_406.wav', 'Tja, das ist Pech.', 18, array([2.4313416e-04, 4.7331341e-05, 1.6022228e-04, ..., 3.0806483e-04,\n", + " 2.9170502e-04, 3.0395557e-04], dtype=float32), 1.7810729166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_412.wav', 'Alles muss raus.', 16, array([2.3146431e-04, 2.1641712e-04, 1.4716707e-04, ..., 1.4341300e-04,\n", + " 3.7975753e-06, 9.1287213e-05], dtype=float32), 1.708375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_415.wav', 'Stell die Heizung höher.', 25, array([-3.96930409e-05, 1.02812344e-04, 1.21250734e-04, ...,\n", + " -3.47016321e-05, -2.01824150e-04, -9.76954325e-05], dtype=float32), 1.74471875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_418.wav', 'Etwa über mich?', 16, array([-0.00020996, -0.00011494, -0.00010331, ..., -0.00017556,\n", + " -0.00020319, -0.00027111], dtype=float32), 1.7689479166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_421.wav', 'Das ist natürlich bitter.', 26, array([-3.3627803e-04, -2.5203897e-04, -2.3072124e-04, ...,\n", + " 4.6018063e-06, 1.7239379e-05, 4.0267703e-05], dtype=float32), 1.878)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_440.wav', 'Hier knicken.', 13, array([-0.000481 , -0.00023708, -0.00018911, ..., -0.00022185,\n", + " -0.00025873, -0.00026997], dtype=float32), 1.30853125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_464.wav', 'Alles Lügen!', 13, array([-0.00027017, -0.00016623, -0.00022159, ..., -0.00033337,\n", + " -0.00044782, -0.00022404], dtype=float32), 1.4175833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_465.wav', 'Alles oder nichts!', 18, array([2.8375158e-05, 6.5034241e-05, 9.6457785e-05, ..., 1.0699107e-04,\n", + " 9.6596435e-05, 1.2572719e-04], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_467.wav', 'Warum bleibst du stehen?', 24, array([-1.4808709e-04, -1.8631479e-04, -1.2836477e-04, ...,\n", + " -6.0794730e-05, -1.5104183e-05, -2.5347929e-04], dtype=float32), 1.91434375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_473.wav', 'Zumindest ein bisschen.', 23, array([-0.00024013, -0.00025727, -0.00025987, ..., -0.00023257,\n", + " -0.00033333, -0.00025996], dtype=float32), 1.5993229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_474.wav', 'Sprich mir nach!', 16, array([-1.7584162e-04, -1.6248986e-04, -8.6785782e-05, ...,\n", + " 3.5318243e-04, 3.7314874e-04, 3.2366288e-04], dtype=float32), 1.4175833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_500.wav', 'Sehr witzig!', 12, array([ 7.5077987e-05, 1.1926649e-04, 1.8323194e-04, ...,\n", + " -3.8680941e-04, -3.2216642e-04, -3.3234112e-04], dtype=float32), 1.39334375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_502.wav', 'Achtung, Achtung!', 17, array([-4.0950408e-04, -2.9606355e-04, -3.7786187e-04, ...,\n", + " -2.1742952e-05, 3.0543149e-05, 8.8129680e-05], dtype=float32), 1.5145104166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_512.wav', 'Wo bitte schön steht das?', 26, array([ 2.2647387e-04, 1.4740237e-04, 1.2381608e-04, ...,\n", + " -1.1670060e-04, -5.8438465e-05, -5.2704141e-05], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_513.wav', 'SchlieÃ\\x9fen Sie bitte die Luke.', 30, array([ 0.00012086, 0.00019177, 0.00012352, ..., -0.00014259,\n", + " -0.00024671, -0.00014045], dtype=float32), 1.69625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_549.wav', 'Ich hasse meinen Wecker.', 24, array([-1.9575720e-05, -1.5009989e-04, -1.6873972e-04, ...,\n", + " -6.5268898e-05, -1.8595096e-04, -1.7330179e-04], dtype=float32), 1.6235625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_576.wav', 'Nicht so laut!', 14, array([-1.6541444e-04, -8.3816949e-06, -1.0135791e-04, ...,\n", + " 3.1510697e-04, 4.1878404e-04, 3.6531710e-04], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_595.wav', 'Ich tu mein Bestes.', 19, array([ 8.3501960e-05, 1.7197721e-04, 2.2250456e-04, ...,\n", + " -1.2569079e-04, -1.3276993e-04, -2.5823418e-04], dtype=float32), 1.74471875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_597.wav', 'Alle guten Dinge sind drei.', 27, array([-1.1909505e-05, -8.7172106e-05, -1.2401433e-04, ...,\n", + " -1.4987224e-04, -1.3219267e-05, -7.9211000e-05], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_607.wav', 'Welche Vase?', 12, array([-1.8119848e-04, -2.7736003e-04, -1.8833524e-04, ...,\n", + " 5.6385907e-05, 1.3869893e-04, 1.9968288e-04], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_651.wav', 'Zeig mal deine Muckis.', 22, array([-0.00038406, -0.0003124 , -0.00026326, ..., 0.00032153,\n", + " 0.00029355, 0.0004676 ], dtype=float32), 1.82953125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_661.wav', 'Wir sind umzingelt.', 19, array([ 4.0317194e-05, 2.1714004e-04, 1.5210512e-04, ...,\n", + " 1.1821459e-04, 9.8579549e-05, -3.1008281e-06], dtype=float32), 1.57509375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_669.wav', 'Du zitterst ja!', 15, array([-0.0002655 , -0.00018808, -0.00023504, ..., 0.00028222,\n", + " 0.00025013, 0.00041103], dtype=float32), 1.2116145833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_687.wav', 'Ob sie schon Hunger haben?', 26, array([-7.1925861e-05, 1.8567745e-06, -5.7103756e-05, ...,\n", + " 2.6770154e-04, 7.6355340e-05, 2.2662200e-05], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_699.wav', 'Das wird schon wieder.', 22, array([-2.5816666e-04, -8.4095438e-05, -1.2401373e-05, ...,\n", + " -1.9085000e-04, -2.3972438e-04, -1.5835713e-04], dtype=float32), 1.69625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_700.wav', 'Köpfe runter!', 14, array([ 8.14295272e-05, 1.14302085e-04, 1.28549975e-04, ...,\n", + " -2.10746948e-04, -2.65351351e-04, -3.40027531e-04], dtype=float32), 1.32065625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_712.wav', 'Sie sollten sich schämen!', 26, array([ 2.6346499e-04, 9.5443167e-05, 1.6159609e-04, ...,\n", + " -2.1241463e-04, -1.5395934e-04, -8.9938527e-05], dtype=float32), 1.6477916666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_732.wav', 'Schwund ist überall.', 21, array([-0.00039054, -0.00025168, -0.00026237, ..., 0.00020222,\n", + " 0.0002156 , 0.00019633], dtype=float32), 1.6356666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_734.wav', 'Schon fertig?', 13, array([-6.8748363e-06, 5.9082297e-05, -3.8726441e-05, ...,\n", + " -1.3909466e-04, -2.0350730e-04, -1.0977411e-04], dtype=float32), 1.2237291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_743.wav', 'Musst du da reinschieÃ\\x9fen?', 26, array([0.00038867, 0.00026221, 0.0002308 , ..., 0.0001513 , 0.00017203,\n", + " 0.00012958], dtype=float32), 1.91434375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_764.wav', 'Das wäre mir neu.', 18, array([-1.6335897e-04, -1.3920359e-04, -6.9949492e-05, ...,\n", + " 3.2939854e-05, 3.5769459e-05, -3.7220154e-05], dtype=float32), 1.91434375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_773.wav', 'Mission gescheitert!', 20, array([ 5.22215014e-05, 1.20894714e-04, 1.96668057e-04, ...,\n", + " -2.58956774e-04, -1.39872835e-04, -1.39142721e-04], dtype=float32), 1.82953125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_782.wav', 'Dir kann geholfen werden.', 25, array([-5.4091932e-05, -2.9271763e-05, 1.2364880e-04, ...,\n", + " -1.4125406e-04, -2.3545137e-04, -2.5170582e-04], dtype=float32), 1.7810625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_790.wav', 'Vertraust du mir blind?', 23, array([-1.3496955e-04, -4.5282133e-05, 1.7263924e-04, ...,\n", + " 1.0330205e-05, -1.9022463e-04, -1.3715150e-04], dtype=float32), 1.6235520833333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_793.wav', 'Wie stellen Sie sich das vor?', 29, array([5.7090012e-05, 9.3246163e-05, 1.4314597e-04, ..., 1.8600497e-04,\n", + " 1.2342732e-04, 2.2610810e-04], dtype=float32), 1.8901145833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_802.wav', 'Ist es nicht so?', 16, array([ 8.5881460e-05, 1.9039282e-04, 2.1635044e-04, ...,\n", + " 1.2600829e-04, 4.5968747e-05, -1.7667595e-05], dtype=float32), 1.4297083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_808.wav', 'Willst du mich umbringen?', 25, array([3.4704231e-04, 2.2213293e-04, 1.1007244e-04, ..., 1.0426929e-05,\n", + " 6.0499657e-05, 4.4495686e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_817.wav', 'Da ist die Tür!', 16, array([ 0.00014472, 0.00027025, 0.00040617, ..., -0.0001791 ,\n", + " -0.00014576, -0.00017543], dtype=float32), 1.7931770833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_15_FINAL/15_820.wav', 'Ihr könnt nicht fliehen.', 25, array([ 3.3208958e-04, 1.8373384e-04, 2.8849186e-05, ...,\n", + " -1.9994991e-04, -4.2732576e-05, 5.1437601e-05], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_8.wav', 'Erkennst du ihn wieder?', 23, array([-7.1132112e-05, 1.8191178e-04, 2.2640963e-04, ...,\n", + " -1.5948209e-04, -4.8810096e-05, -7.1736489e-05], dtype=float32), 1.69625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_29.wav', 'Du bist so ein Charmeur!', 24, array([ 8.7156383e-05, -7.5441625e-05, -8.7413508e-05, ...,\n", + " -3.7287452e-04, -2.6756592e-04, -2.7199855e-04], dtype=float32), 1.99915625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_30.wav', 'Das Wochenende war sehr schón.', 31, array([0.00010696, 0.00019241, 0.00022398, ..., 0.00018996, 0.00018264,\n", + " 0.00021606], dtype=float32), 1.9506875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_44.wav', 'Na, GroÃ\\x9fer!', 12, array([2.9556373e-05, 1.2606342e-04, 2.0366564e-04, ..., 1.8486078e-04,\n", + " 1.2593277e-04, 1.4429759e-04], dtype=float32), 1.7083645833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_53.wav', 'Lassen wir das!', 15, array([-1.0015550e-03, -1.1123064e-03, -1.0633026e-03, ...,\n", + " -8.7814760e-06, 1.5665671e-04, 2.6885752e-04], dtype=float32), 1.2843020833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_57.wav', 'Es kann jeden treffen.', 22, array([-1.2930187e-04, -3.5622310e-05, 1.1325534e-04, ...,\n", + " 2.8466255e-05, -1.7107872e-04, -3.0454184e-04], dtype=float32), 1.8295416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_61.wav', 'Das dürfen Sie nicht!', 22, array([7.9696401e-05, 2.5238540e-05, 2.6919068e-05, ..., 2.0004300e-04,\n", + " 1.7159608e-04, 2.0384404e-04], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_69.wav', 'Oder muss man die einfrieren?', 29, array([ 2.3387831e-04, 2.0287969e-04, 2.3305746e-04, ...,\n", + " -2.0109433e-04, -1.5938835e-04, 1.9864538e-06], dtype=float32), 1.9628020833333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_75.wav', 'Nur für einen Tag.', 19, array([ 1.1010072e-04, 7.5059768e-05, 1.5811465e-04, ...,\n", + " -1.6034159e-04, 6.0707155e-09, -5.6600587e-05], dtype=float32), 1.6114375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_85.wav', 'Ã\\x9cberall lauern Fallen.', 23, array([ 7.3672440e-05, 1.1084337e-04, 5.4723707e-05, ...,\n", + " -3.4976221e-04, -1.6772485e-04, -2.3993225e-04], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_89.wav', 'Schön, dass du da warst.', 25, array([-1.5644990e-04, -1.6062504e-04, -1.5125731e-04, ...,\n", + " -1.4215022e-04, -3.6906120e-05, -1.1689674e-04], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_104.wav', 'Bleib, wo du bist!', 18, array([-6.7565779e-05, -2.1604590e-06, 1.6737869e-04, ...,\n", + " -5.7721576e-05, -1.0027820e-05, -4.2661872e-05], dtype=float32), 1.9022291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_113.wav', 'Erwischt!', 9, array([ 8.53675301e-05, -1.39195807e-04, -1.12849986e-04, ...,\n", + " -6.49508947e-05, -6.88307264e-05, -2.25101539e-04], dtype=float32), 1.06621875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_119.wav', 'Dann lass es liegen.', 20, array([-1.4928725e-04, 2.6696865e-05, -8.1158723e-05, ...,\n", + " 1.0134692e-04, 7.8540448e-05, -3.6887606e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_125.wav', \"Mach 'ne Fliege!\", 16, array([-4.2133670e-05, -4.1710995e-05, -9.2710856e-05, ...,\n", + " 6.1932937e-05, 5.9015078e-05, 1.2269965e-04], dtype=float32), 1.2964166666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_132.wav', 'Bei wem?', 8, array([ 3.2050626e-05, -1.8802975e-05, 6.2951531e-06, ...,\n", + " 3.6152644e-05, 5.9682232e-05, 1.7530509e-04], dtype=float32), 1.2479583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_164.wav', 'Einer reicht.', 13, array([-2.7248763e-05, -1.8096254e-04, -6.8749752e-05, ...,\n", + " -5.8457640e-06, -6.7224923e-06, -2.3102484e-05], dtype=float32), 1.5145104166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_167.wav', 'Komm du mal hier her!', 21, array([-1.5554769e-04, 3.7891259e-06, 4.7066398e-05, ...,\n", + " -2.3639805e-05, 2.0737947e-05, 4.9913662e-05], dtype=float32), 1.9022291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_187.wav', 'Die Dämmerung bricht an.', 25, array([-1.3250955e-06, 2.9998255e-05, 7.1768205e-05, ...,\n", + " 8.1620914e-05, -2.1789680e-05, -2.0792277e-04], dtype=float32), 1.82953125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_209.wav', 'Ich will nur mal gucken!', 24, array([ 2.0323754e-05, -4.8527312e-05, 7.2813884e-05, ...,\n", + " 5.2759733e-05, -1.1957207e-05, -4.8190817e-05], dtype=float32), 1.74471875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_219.wav', 'Weg damit!', 10, array([-3.5334317e-05, -1.1389485e-04, -8.2927254e-05, ...,\n", + " 9.7957432e-05, 2.3025880e-04, 8.2124512e-05], dtype=float32), 0.9935208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_228.wav', 'Der kleine Tümpel?', 19, array([-1.11052366e-04, -1.58417228e-04, 1.12858004e-04, ...,\n", + " -7.95750821e-05, 1.25983679e-05, 3.80305464e-05], dtype=float32), 1.7810729166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_261.wav', 'Danke vielmals!', 15, array([-1.0886707e-04, -2.8663597e-04, -2.3995244e-04, ...,\n", + " -9.9315126e-05, -1.0518550e-04, 8.9717643e-05], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_262.wav', 'Greifen Sie zu!', 15, array([ 1.7402765e-04, 5.4675427e-05, -2.1378555e-05, ...,\n", + " -3.0241612e-05, -1.6510607e-05, 1.9972253e-05], dtype=float32), 1.5145208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_270.wav', 'Sein Telefon ist verwanzt.', 26, array([ 1.7227376e-05, 1.3369569e-04, 2.4036576e-04, ...,\n", + " -1.2941840e-04, -7.5057469e-05, 4.6790487e-05], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_280.wav', 'Das kann ich nicht gutheiÃ\\x9fen.', 30, array([-2.46016367e-04, -1.46169405e-04, -1.01338104e-04, ...,\n", + " -2.12353916e-06, -4.44089965e-05, 4.71521271e-05], dtype=float32), 1.9385833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_284.wav', 'Nicht im Geringsten.', 20, array([ 6.9896785e-05, 4.9565413e-05, -5.2745858e-05, ...,\n", + " 4.9021692e-05, 4.1371193e-05, -4.8943206e-05], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_290.wav', 'Magst du Sushi?', 15, array([ 4.5281922e-06, -7.7349956e-05, -9.6111427e-05, ...,\n", + " 6.7945102e-06, 5.8605725e-05, -4.7947608e-05], dtype=float32), 1.5993229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_294.wav', 'Ich hätte warten sollen.', 25, array([ 1.3215349e-05, 2.5886698e-05, 9.2406181e-06, ...,\n", + " 3.3613727e-05, -7.8962090e-05, 3.6267331e-05], dtype=float32), 1.5872083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_300.wav', 'Vielen Dank für den Hinweis.', 29, array([ 1.21899466e-04, 1.44075893e-04, 1.06153289e-04, ...,\n", + " 1.94679887e-04, -1.92022708e-05, -8.20819259e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_306.wav', 'Her mit dem Zaster!', 19, array([ 9.2032889e-05, -7.7123856e-05, 1.8857928e-06, ...,\n", + " 5.2272848e-05, 1.2463648e-04, -4.8004724e-05], dtype=float32), 1.90221875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_324.wav', 'Moment mal!', 11, array([-9.6486969e-05, -8.5642452e-05, 1.3726056e-05, ...,\n", + " 3.6692109e-05, 2.4882122e-05, -5.4820499e-05], dtype=float32), 1.2721875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_332.wav', 'Lass es sein.', 13, array([-6.2611114e-05, 8.5420121e-05, 1.1575574e-06, ...,\n", + " 1.8824625e-05, 2.6618896e-05, 5.5844474e-05], dtype=float32), 1.4296979166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_334.wav', 'Wir kommen bei ihnen vorbei.', 28, array([ 3.2983281e-04, 5.1712846e-05, -1.6061698e-04, ...,\n", + " 8.1734914e-05, -2.4410097e-05, 1.5291570e-04], dtype=float32), 1.99915625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_350.wav', 'Es fehlt nicht mehr viel.', 25, array([ 3.4581102e-05, -3.2403619e-05, 6.4223466e-05, ...,\n", + " -4.1160070e-05, 2.3247363e-05, 1.4443042e-04], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_351.wav', 'So entdeckt man Fehler.', 23, array([-1.5804017e-05, -7.4724383e-05, 1.1222719e-05, ...,\n", + " 4.8898462e-05, 3.6749603e-05, -3.3983986e-05], dtype=float32), 1.708375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_356.wav', 'Salve!', 6, array([-1.3447071e-04, 5.3523188e-05, 8.5717998e-05, ...,\n", + " 4.4749868e-05, -5.5393906e-05, 1.0913220e-05], dtype=float32), 1.0056354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_374.wav', 'Angeber!', 8, array([ 4.8461781e-05, 1.5487269e-04, 9.4685849e-05, ...,\n", + " -1.4769383e-04, -1.8351457e-05, -1.8764535e-05], dtype=float32), 1.1146875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_376.wav', 'Wer duckt sich da weg?', 22, array([ 8.9025889e-05, 2.0651723e-04, -8.5901571e-05, ...,\n", + " 8.8148518e-05, 1.3756873e-04, 1.2379605e-04], dtype=float32), 1.6356770833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_16_FINAL/16_396.wav', 'Schlaf schön.', 14, array([ 1.56835347e-04, 2.10795515e-05, 6.19498023e-05, ...,\n", + " -4.29836909e-05, -1.05784595e-04, 4.19116714e-06], dtype=float32), 1.1631458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_22.wav', 'Eindeutig nein.', 15, array([ 1.7040480e-06, -2.4771760e-05, 2.0656289e-05, ...,\n", + " -4.9639581e-05, -6.2789266e-05, -6.4883228e-05], dtype=float32), 1.885)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_26.wav', 'Sie nickte.', 11, array([ 1.3571361e-04, 1.4810856e-04, 1.6444136e-04, ...,\n", + " -8.4158353e-05, -6.3345658e-05, -6.6707049e-05], dtype=float32), 1.3556458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_34.wav', 'Von wegen Rabenmutter!', 22, array([ 3.9614300e-05, 3.0917236e-05, 1.4100775e-05, ...,\n", + " 3.3664131e-05, -3.6520869e-05, -5.6032222e-05], dtype=float32), 1.7171666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_35.wav', 'Woran liegt das?', 16, array([ 1.03992148e-04, 8.12370126e-05, 1.09074477e-04, ...,\n", + " 5.26995609e-05, -2.80062741e-05, -1.37729285e-05], dtype=float32), 1.4718541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_68.wav', 'Das ist schlecht fürs Geschäft.', 33, array([ 3.3433552e-04, 4.7215325e-04, 3.9332887e-04, ...,\n", + " -3.3291522e-05, -7.3073941e-05, -6.2871884e-05], dtype=float32), 1.975375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_70.wav', 'Das überlege ich mir noch.', 27, array([-2.7926452e-04, -4.7232458e-04, -4.5905521e-04, ...,\n", + " -5.0401053e-05, -7.6573851e-05, -1.9868592e-05], dtype=float32), 1.7688125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_71.wav', 'Oder er behält ihn.', 20, array([-1.95691573e-05, 1.42454119e-05, -1.12822245e-05, ...,\n", + " 6.27729896e-05, 6.37731318e-06, 7.33020497e-05], dtype=float32), 1.7429791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_76.wav', 'Viel Vergnügen!', 16, array([-1.4641756e-04, -2.3690579e-04, -2.0291538e-04, ...,\n", + " -6.4597036e-05, -3.9596798e-05, -5.9615340e-05], dtype=float32), 1.2975625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_82.wav', 'Sehr schön erklärt.', 21, array([ 2.8675116e-04, 4.4330378e-04, 3.8435950e-04, ...,\n", + " 9.6497361e-06, 3.9338884e-06, -3.2766162e-05], dtype=float32), 1.5880625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_104.wav', 'Du bist nicht fair.', 19, array([ 8.8045017e-05, 1.6864744e-04, 1.3682757e-04, ...,\n", + " -9.7046555e-05, -1.7125324e-04, -8.5282416e-05], dtype=float32), 1.5105833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_119.wav', 'Die in Pulverform.', 18, array([ 2.4460370e-04, 3.1504090e-04, 2.7829470e-04, ...,\n", + " 4.2608990e-05, -1.4765085e-05, -1.9486206e-05], dtype=float32), 1.7817083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_141.wav', 'Hier machen wir einen Schnitt.', 30, array([ 1.7057944e-04, 2.5346698e-04, 2.6541931e-04, ...,\n", + " -5.5827346e-05, -5.5662604e-05, -4.4612902e-05], dtype=float32), 1.6074166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_161.wav', 'Ganz und gar nicht!', 19, array([-1.09976885e-04, -1.06159037e-04, -9.40025275e-05, ...,\n", + " 5.14636531e-06, -7.86106375e-06, -1.38592986e-05], dtype=float32), 1.6655208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_162.wav', 'Du schnarchst.', 14, array([ 9.8031691e-05, 1.0789345e-04, 1.0408189e-04, ...,\n", + " 2.8527650e-06, 1.8555178e-05, -1.7833072e-05], dtype=float32), 1.2911041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_175.wav', 'Die in der zweiten Reihe.', 25, array([-7.3503418e-04, -1.0330433e-03, -9.6690352e-04, ...,\n", + " 1.0845856e-04, 9.5128053e-05, 1.3117766e-04], dtype=float32), 1.6590625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_185.wav', 'Viertel nach neun.', 18, array([-0.00025316, -0.00042128, -0.00041847, ..., 0.00012852,\n", + " 0.00010431, 0.00010823], dtype=float32), 1.794625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_214.wav', 'Der hat gut reden!', 18, array([-3.1999915e-04, -4.8188152e-04, -4.3341244e-04, ...,\n", + " 7.4479853e-05, 1.0070496e-04, 9.9988407e-05], dtype=float32), 1.5105833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_230.wav', \"Was gibt's denn?\", 16, array([ 5.3894956e-04, 7.5124111e-04, 6.7086820e-04, ...,\n", + " -4.5820485e-05, -5.6413213e-05, -2.6967809e-05], dtype=float32), 1.6719791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_233.wav', 'Fahren Sie bitte schneller.', 27, array([ 4.8254660e-04, 7.2192971e-04, 6.9296843e-04, ...,\n", + " -3.3325745e-05, 1.5315249e-05, 3.6237780e-05], dtype=float32), 1.8204583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_243.wav', 'Keine Ursache!', 14, array([-2.2485174e-04, -3.4637007e-04, -2.4121681e-04, ...,\n", + " -5.3969983e-05, -1.2160699e-05, -7.7381246e-06], dtype=float32), 1.2588333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_245.wav', 'Ich glaube, es geht los.', 24, array([-7.1201968e-05, -1.1457155e-04, -8.4426887e-05, ...,\n", + " 6.9712019e-05, 1.4468420e-05, 7.2575887e-05], dtype=float32), 1.8398125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_266.wav', 'Nicht sehr lange.', 17, array([-4.25688399e-04, -5.72862104e-04, -4.54291090e-04, ...,\n", + " 1.15649045e-05, -7.03342175e-06, 9.42021143e-06], dtype=float32), 1.34275)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_268.wav', 'Fahr vorsichtig!', 16, array([ 4.7249952e-05, 6.6685003e-05, 8.1438702e-05, ...,\n", + " -7.7767829e-05, -4.4103599e-05, -3.7954072e-05], dtype=float32), 1.4589375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_278.wav', 'Dann aber mit Fanfare.', 22, array([ 3.0009818e-04, 5.0011458e-04, 4.6210812e-04, ...,\n", + " -1.1364354e-04, -6.8604320e-05, -7.7980949e-05], dtype=float32), 1.975375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_280.wav', 'Habe ich doch!', 14, array([-0.00020733, -0.00032169, -0.00027389, ..., -0.00016337,\n", + " -0.00020018, -0.00013392], dtype=float32), 1.233)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_311.wav', 'Regnet es drauÃ\\x9fen?', 19, array([ 7.4172771e-04, 9.9716149e-04, 9.2472351e-04, ...,\n", + " -1.0082213e-04, -1.2750884e-04, -8.1061611e-05], dtype=float32), 1.8721041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_316.wav', 'Das ist eine lange Geschichte.', 30, array([ 2.24287433e-04, 1.93610642e-04, 1.16401294e-04, ...,\n", + " -1.26720734e-05, 2.45919164e-05, 5.34417049e-05], dtype=float32), 1.975375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_342.wav', 'Welches Rad?', 12, array([4.8121543e-05, 4.5563989e-05, 2.0835963e-05, ..., 3.9729348e-05,\n", + " 3.7650581e-05, 3.3080996e-05], dtype=float32), 1.6397083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_367.wav', 'Nichts zu danken!', 17, array([-3.7277619e-05, -4.9238584e-05, -7.1403243e-05, ...,\n", + " -3.3696429e-05, 3.0755796e-06, -3.4646106e-05], dtype=float32), 1.5105833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_376.wav', 'Bitte noch einmal!', 18, array([ 0.00030744, 0.00045197, 0.00040104, ..., -0.00010688,\n", + " -0.00015312, -0.00013671], dtype=float32), 1.6267916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_378.wav', 'Immer in diese Richtung!', 24, array([-4.00174977e-05, 3.99114288e-05, 1.92868242e-06, ...,\n", + " -1.14653565e-04, -7.80621922e-05, -3.85478379e-05], dtype=float32), 1.975375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_380.wav', 'Gefällt dir die Farbe rot?', 27, array([ 3.0378540e-04, 4.3046009e-04, 3.8851614e-04, ...,\n", + " 2.1661093e-05, -2.6406319e-06, -1.4788465e-05], dtype=float32), 1.9495625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_393.wav', 'Gib mir mal die Knarre.', 23, array([-4.2177099e-04, -5.7642709e-04, -4.9111585e-04, ...,\n", + " -8.2453604e-05, -1.6147584e-05, -7.7549201e-05], dtype=float32), 1.8075416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_410.wav', 'Einer geht noch.', 16, array([ 1.5394030e-04, 2.1875372e-04, 2.0080485e-04, ...,\n", + " -5.6117624e-05, -5.4007505e-05, -1.0993878e-05], dtype=float32), 1.3169166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_418.wav', 'Setzen Sie sich!', 16, array([-2.4320874e-05, -3.2748470e-05, -2.0884192e-05, ...,\n", + " 6.3705025e-05, 1.3131127e-04, 7.7887824e-05], dtype=float32), 1.4976875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_423.wav', 'Es geht ja nicht anders.', 24, array([-2.8326374e-04, -3.8826582e-04, -3.3924755e-04, ...,\n", + " -6.5105633e-05, -6.3098807e-05, -8.6217944e-05], dtype=float32), 1.9495625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_451.wav', 'Kopf hoch!', 10, array([-1.68412909e-04, -1.73757420e-04, -1.55442147e-04, ...,\n", + " -8.23870796e-05, -1.52904060e-04, -1.15380506e-04], dtype=float32), 1.3685625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_458.wav', 'Endlich geht es weiter!', 23, array([-1.5851386e-03, 2.6465717e-03, 5.2893539e-03, ...,\n", + " 3.7729558e-06, 3.5277069e-05, -3.3997758e-06], dtype=float32), 1.70425)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_460.wav', 'Schluss mit der Raserei!', 24, array([ 2.88873882e-04, 4.21624194e-04, 4.14417736e-04, ...,\n", + " -1.55140384e-04, -1.10896304e-04, -8.53765887e-05], dtype=float32), 1.6526041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_470.wav', 'Der Kerl ist dufte.', 19, array([-7.1235799e-04, -1.0205780e-03, -9.3518692e-04, ...,\n", + " -1.5202124e-04, -1.4708345e-04, -7.5756463e-05], dtype=float32), 1.9624791666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_475.wav', 'Nicht hauen!', 12, array([-3.9160714e-04, -5.2419491e-04, -4.0734027e-04, ...,\n", + " -3.5391298e-05, -1.9862022e-05, -4.2017076e-05], dtype=float32), 1.613875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_489.wav', 'Davon ist auszugehen.', 21, array([-3.8098158e-05, -1.8117305e-05, -9.3444651e-05, ...,\n", + " -4.6410118e-05, -5.4083579e-05, -6.1566949e-05], dtype=float32), 1.8591875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_490.wav', 'Ã\\x84ndern wir das!', 16, array([-3.1039584e-04, -5.0911406e-04, -3.8009215e-04, ...,\n", + " -1.0358073e-05, 2.3063526e-06, -3.8572562e-05], dtype=float32), 1.4847708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_537.wav', 'Viel hilft viel.', 16, array([-7.0020906e-04, -9.7590697e-04, -8.4232452e-04, ...,\n", + " 2.6748754e-05, 3.9436178e-05, -1.5542679e-05], dtype=float32), 1.5105833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_561.wav', 'Voll abgezogen!', 15, array([0.0009425 , 0.00131688, 0.00114336, ..., 0.00054311, 0.00053014,\n", + " 0.00059172], dtype=float32), 1.304)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_569.wav', 'Was ist Liebe?', 14, array([ 1.7119097e-04, 2.4002905e-04, 1.4028113e-04, ...,\n", + " -1.1777198e-05, 4.3154125e-07, 1.1548834e-05], dtype=float32), 1.3814791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_613.wav', 'Bitte wenden Sie.', 17, array([-3.2704248e-04, -4.7001868e-04, -4.4811977e-04, ...,\n", + " 3.9887604e-05, 4.2593329e-05, -1.2635800e-05], dtype=float32), 1.5751458333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_618.wav', \"Das spar'n wir uns jetzt.\", 25, array([-2.1968294e-04, -2.5130660e-04, -2.3470224e-04, ...,\n", + " 4.6512545e-05, 1.0168094e-04, 8.9639499e-05], dtype=float32), 1.9882916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_625.wav', 'Doppelt hält besser.', 21, array([1.0242802e-04, 1.4422902e-04, 1.5433358e-04, ..., 1.8618872e-05,\n", + " 2.6657151e-05, 8.0320706e-06], dtype=float32), 1.3169166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_645.wav', 'Die beiden werden bestimmt schwer.', 34, array([ 2.2716461e-04, 3.7214963e-04, 3.4043228e-04, ...,\n", + " -7.0017355e-05, -5.9255068e-05, -4.9753759e-05], dtype=float32), 1.975375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_650.wav', 'Dort steppt der Bär.', 21, array([3.4111144e-05, 3.9471229e-06, 1.3943841e-05, ..., 2.8798750e-04,\n", + " 3.4306329e-04, 2.3900693e-04], dtype=float32), 1.8204583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_658.wav', 'Offensichtlich nicht.', 21, array([-6.4643849e-05, -1.4843927e-04, -1.9616121e-04, ...,\n", + " 6.0427959e-05, 2.8176541e-05, 1.0887287e-04], dtype=float32), 1.8462708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_690.wav', 'Ganz sicher.', 12, array([-2.1219352e-04, -2.6916104e-04, -2.2152660e-04, ...,\n", + " -8.3999286e-05, -3.9927592e-05, -1.1057539e-04], dtype=float32), 1.4460208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_734.wav', 'Bin ich ein Mensch?', 19, array([7.3225739e-05, 8.4229468e-05, 6.0397753e-05, ..., 1.4409037e-04,\n", + " 5.4610227e-05, 2.8432718e-05], dtype=float32), 1.8721041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_737.wav', 'Wohl bekommts.', 14, array([3.8544985e-04, 5.4862851e-04, 4.7615587e-04, ..., 1.1308860e-05,\n", + " 1.5347328e-05, 3.9165672e-05], dtype=float32), 1.5880416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_747.wav', 'So eine will ich auch.', 22, array([-0.00023466, -0.00034498, -0.00035786, ..., 0.00014857,\n", + " 0.00014895, 0.00018565], dtype=float32), 1.9366458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_751.wav', 'Guter Rat ist teuer.', 20, array([-5.8482616e-05, -9.7700511e-05, -1.4372601e-04, ...,\n", + " 8.8569423e-06, 4.0626270e-05, -2.2441051e-05], dtype=float32), 1.885)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_785.wav', 'Noch Fragen?', 12, array([-5.4637530e-05, -9.7329437e-05, -6.5443433e-05, ...,\n", + " 1.3526098e-05, -1.7008400e-05, -2.3395469e-05], dtype=float32), 1.542875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_790.wav', 'Wie tut man das?', 16, array([ 1.4673925e-06, -7.7766053e-06, 2.2737586e-05, ...,\n", + " -2.2371720e-04, -2.6603421e-04, -2.1358255e-04], dtype=float32), 1.6009583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_794.wav', 'Ein billiger Trick.', 19, array([ 2.5642010e-05, 5.9448335e-05, 7.9047953e-05, ...,\n", + " -1.4398795e-05, -2.7475784e-05, -3.0437941e-05], dtype=float32), 1.4912291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_1.wav', 'Woher soll ich sie kennen?', 26, array([-7.0670452e-05, -2.2751169e-04, 3.6274258e-05, ...,\n", + " 6.2137144e-05, -1.4069478e-04, 1.5651318e-04], dtype=float32), 1.865875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_4.wav', 'Wo soll es hingehen?', 20, array([-4.3062766e-05, 6.9635964e-05, 2.7200711e-05, ...,\n", + " 7.3389943e-05, 9.7813630e-05, 7.5023250e-05], dtype=float32), 1.526625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_5.wav', 'Ã\\x84tsch!', 7, array([-5.5343335e-05, -1.0754153e-04, 1.0636374e-04, ...,\n", + " -2.3993191e-04, -1.1428300e-04, -1.9587418e-04], dtype=float32), 1.2964166666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_24.wav', 'Den mit dem Hund.', 17, array([-2.9083933e-05, -4.5743432e-06, -1.1590145e-04, ...,\n", + " -6.4060594e-05, -5.3663935e-06, -6.9100148e-05], dtype=float32), 1.5993333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_42.wav', 'Sieh mal schnell nach!', 22, array([ 1.1187345e-05, -2.7101662e-04, -4.0457569e-05, ...,\n", + " 3.8478026e-04, 1.3185160e-04, 1.9724603e-04], dtype=float32), 1.8295416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_43.wav', 'Zieh Leine!', 11, array([ 1.4326118e-04, 1.4733149e-04, 2.3666536e-04, ...,\n", + " -8.1889502e-06, -2.2159066e-04, -1.0789347e-04], dtype=float32), 1.4115416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_55.wav', 'Meistens eher nicht.', 20, array([-1.2833555e-04, -4.5777502e-04, -2.9062675e-04, ...,\n", + " 3.7303114e-05, 1.7912805e-04, 9.5502997e-05], dtype=float32), 1.968875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_89.wav', \"Komm Du 'mal hier her!\", 22, array([-2.4593925e-05, -1.5391175e-04, -3.5177112e-05, ...,\n", + " -2.8054212e-05, -8.3761133e-06, -3.3427594e-05], dtype=float32), 1.8840625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_102.wav', 'Keine halben Sachen.', 20, array([-0.00010684, -0.00018609, -0.00036967, ..., 0.00014736,\n", + " 0.00013171, 0.00024668], dtype=float32), 1.7810625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_172.wav', 'Zugriff!', 8, array([-1.7191633e-04, -2.6422989e-04, -1.8970467e-04, ...,\n", + " 1.4085844e-05, -6.5849432e-05, -1.2668260e-04], dtype=float32), 1.3994166666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_187.wav', 'Viel SpaÃ\\x9f dabei!', 17, array([ 1.26890489e-04, 4.78873408e-04, 3.36644967e-04, ...,\n", + " -1.14277915e-04, 1.15070587e-04, -4.50995103e-05], dtype=float32), 1.8234791666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_189.wav', 'Krass, oder?', 12, array([ 5.2673863e-06, -3.2042470e-05, 6.3032145e-05, ...,\n", + " 4.9474946e-04, 4.8315409e-04, 3.1584961e-04], dtype=float32), 1.4054791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_195.wav', 'Hat es geregnet?', 16, array([-1.5500655e-05, -2.4765370e-05, -1.3535780e-04, ...,\n", + " 1.0218658e-04, -7.7519953e-06, 8.1419450e-05], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_200.wav', 'Die Maschine läuft heiÃ\\x9f.', 26, array([-2.9662095e-05, -1.3571499e-04, -4.9048278e-05, ...,\n", + " 4.0860983e-04, 3.3467117e-04, 2.8713685e-04], dtype=float32), 1.890125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_232.wav', 'Friss ScheiÃ\\x9fe!', 15, array([-3.1462018e-04, -4.3994249e-04, -1.8601233e-04, ...,\n", + " 1.2004693e-04, 6.4006366e-05, 1.4038217e-04], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_244.wav', 'Wasser marsch!', 14, array([-5.2966818e-05, -1.3111959e-06, -2.3756520e-05, ...,\n", + " -4.7830945e-05, -1.0526282e-04, 5.8504538e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_255.wav', 'Ein Halbstarker!', 16, array([8.9307170e-05, 4.3556365e-04, 5.6998286e-04, ..., 7.5660588e-05,\n", + " 1.9409347e-04, 7.0803260e-05], dtype=float32), 1.6841458333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_266.wav', 'Stell den Fernseher ab!', 23, array([-6.1183324e-05, -1.4089182e-04, -1.1948228e-04, ...,\n", + " -1.9923897e-04, -1.7150129e-04, -2.3940729e-04], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_283.wav', 'Kopf oder Zahl?', 15, array([-1.3454640e-04, -4.2848653e-05, -2.3553993e-04, ...,\n", + " -6.3240882e-06, -5.2672884e-05, -1.6467538e-04], dtype=float32), 1.550875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_300.wav', 'Jetzt verstanden?', 17, array([-1.7701862e-04, 3.8073360e-06, 6.6768931e-05, ...,\n", + " 1.5635177e-04, 2.4184166e-04, 2.0308173e-04], dtype=float32), 1.4781666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_305.wav', 'Jeder nur eine Kugel!', 21, array([ 1.0893906e-04, 3.5140860e-05, -8.6934997e-05, ...,\n", + " -1.5842280e-04, -7.1798029e-05, -2.1561602e-05], dtype=float32), 1.9385833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_327.wav', 'Leider nein.', 12, array([ 0.00020925, 0.00038225, 0.00030209, ..., -0.0002834 ,\n", + " -0.00024066, -0.000164 ], dtype=float32), 1.0783333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_345.wav', 'Irgendwas ist anders.', 21, array([-0.00026138, -0.00012453, -0.00022627, ..., -0.00013074,\n", + " -0.00016786, -0.00011485], dtype=float32), 1.9991666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_371.wav', 'Ein bisschen.', 13, array([ 6.19466882e-05, 1.81855256e-04, 2.56517378e-04, ...,\n", + " 9.61327260e-06, 2.89863237e-05, -1.07233864e-04], dtype=float32), 1.211625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_375.wav', 'Wir sitzen fest.', 16, array([-1.0016920e-05, -5.8360743e-05, -5.3961080e-06, ...,\n", + " -1.4201126e-07, -8.1081940e-05, -1.3083526e-05], dtype=float32), 1.5326875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_424.wav', 'Hat er nicht gesagt.', 20, array([1.4237937e-04, 3.5439979e-04, 4.2451522e-04, ..., 2.9889754e-05,\n", + " 4.3811939e-05, 5.3790947e-05], dtype=float32), 1.6235625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_428.wav', 'Mach das ordentlich!', 20, array([-2.2249017e-04, -3.4736985e-04, -2.4423364e-04, ...,\n", + " -4.8614937e-05, 1.6576583e-04, 1.4303469e-04], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_430.wav', 'Zurück zum Thema.', 18, array([0.00021488, 0.00048195, 0.00039156, ..., 0.00020808, 0.0002092 ,\n", + " 0.00014525], dtype=float32), 1.7023125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_433.wav', 'Auf mich hört sowieso niemand.', 31, array([-4.4078504e-05, 1.2701395e-04, 1.5659831e-04, ...,\n", + " 3.2407068e-05, 1.3882274e-04, 3.7292095e-06], dtype=float32), 1.8295416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_434.wav', 'Weiter so!', 10, array([ 7.8434707e-05, 2.3782127e-04, 2.0620505e-04, ...,\n", + " -3.0293613e-06, 7.3579846e-05, 2.1203174e-04], dtype=float32), 0.9571666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_437.wav', 'Darüber herrscht Konsens.', 26, array([ 2.0915098e-04, 1.6340525e-04, -4.4762099e-05, ...,\n", + " 3.0228088e-05, -5.6204710e-05, 1.4202976e-04], dtype=float32), 1.9991666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_440.wav', 'Was ist so schlimm daran?', 25, array([ 5.3402138e-05, -1.7599798e-04, 1.1747003e-04, ...,\n", + " 1.8220089e-04, 2.5114723e-04, 2.9130204e-04], dtype=float32), 1.8053125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_447.wav', 'Brüllend komisch!', 18, array([2.5463186e-04, 3.0699532e-04, 1.7949699e-04, ..., 1.3379526e-04,\n", + " 6.0049937e-05, 4.3341170e-05], dtype=float32), 1.4660416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_462.wav', 'Sehr einfallsreich!', 19, array([ 1.6625131e-04, 1.4804797e-04, 6.6010347e-05, ...,\n", + " -2.8519373e-05, -1.5197203e-05, -1.2542940e-04], dtype=float32), 1.6356875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_468.wav', 'Einer fehlt hier noch.', 22, array([0.00021585, 0.0002281 , 0.00034421, ..., 0.00031288, 0.00025684,\n", + " 0.00014126], dtype=float32), 1.5448125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_505.wav', 'Wollen wir?', 11, array([-0.000173 , -0.00033364, -0.00012876, ..., 0.00012244,\n", + " 0.00032144, 0.00014797], dtype=float32), 1.029875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_514.wav', 'Und wir singen zusammen!', 24, array([ 0.00028886, 0.00030063, 0.00037314, ..., -0.00011231,\n", + " -0.00017524, -0.00013442], dtype=float32), 1.890125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_541.wav', 'Hier, fang!', 11, array([-9.6539197e-06, 9.8090044e-05, 7.5100412e-05, ...,\n", + " 1.8568999e-04, 3.1414471e-04, 1.8397035e-04], dtype=float32), 1.5326875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_548.wav', 'Ignorieren Sie die Warnung nicht.', 33, array([-7.0703449e-05, -2.1341034e-06, -2.6835096e-05, ...,\n", + " 1.0051801e-04, 6.5389222e-06, 2.1216212e-04], dtype=float32), 1.9809791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_558.wav', 'Nirgends ist ein Ausweg.', 24, array([ 0.0002789 , 0.00025432, 0.00026059, ..., -0.0001307 ,\n", + " -0.00015316, -0.0001602 ], dtype=float32), 1.8295416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_563.wav', 'Er will schmusen.', 17, array([ 8.3865758e-05, -4.9942853e-05, 5.9117421e-05, ...,\n", + " -4.3004973e-05, -1.0278272e-04, -8.9234527e-05], dtype=float32), 1.3146041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_590.wav', 'GrüÃ\\x9f Gott!', 12, array([ 3.8686660e-05, 8.4167688e-05, -4.1444160e-05, ...,\n", + " 7.9078745e-05, 6.6285960e-05, 7.3457479e-05], dtype=float32), 1.1328541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_591.wav', 'Doch, muss es.', 14, array([-2.7301039e-05, -9.8715776e-05, -5.1679286e-05, ...,\n", + " 1.7480909e-04, 8.8697474e-05, -8.7942986e-05], dtype=float32), 1.5811666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_594.wav', 'Höchst verdächtig!', 20, array([-1.5668831e-04, -1.4814634e-05, 1.2133464e-06, ...,\n", + " 1.1010807e-04, 5.0348262e-05, 3.2340708e-05], dtype=float32), 1.4781666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_597.wav', 'Hat man das schon mal gehört?', 30, array([ 2.9468083e-05, 8.5217485e-05, -1.1223685e-05, ...,\n", + " 1.4429020e-05, -3.4263925e-05, -1.7569761e-04], dtype=float32), 1.708375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_628.wav', 'Habt noch ein wenig Geduld.', 27, array([-3.1721203e-05, -6.6361958e-05, 6.2947714e-05, ...,\n", + " 9.7825025e-05, -1.3173591e-04, 3.6439680e-05], dtype=float32), 1.9143541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_630.wav', 'Och, Schnucki!', 14, array([-3.5877591e-05, -2.9018152e-04, -1.0041694e-04, ...,\n", + " 1.2557590e-04, 8.4289997e-05, 1.0620209e-04], dtype=float32), 1.4781666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_654.wav', 'Womit kann ich dienen?', 22, array([-3.79744961e-05, 4.58159229e-05, 5.13197449e-07, ...,\n", + " 5.17356311e-05, 2.12984141e-05, 1.14942064e-04], dtype=float32), 1.6235625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_657.wav', 'Ich bin der Gerichtsvollzieher.', 31, array([ 2.9084453e-05, -2.4720324e-05, 1.8879551e-06, ...,\n", + " -2.5064335e-04, -1.8888044e-04, -4.7750240e-05], dtype=float32), 1.9749166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_670.wav', 'Gute Nacht zusammen!', 20, array([ 1.36263785e-04, 8.22485454e-05, 1.07259955e-04, ...,\n", + " -1.70976884e-04, -4.60869487e-05, -1.28792832e-04], dtype=float32), 1.7810833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_691.wav', 'Läuft die Waschmaschine noch?', 30, array([-1.7628371e-04, 3.7217360e-05, 5.7620698e-05, ...,\n", + " 4.7630738e-06, -1.4578988e-04, -2.1564976e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_728.wav', 'Der zweite war nicht mehr so chic.', 34, array([ 3.7413691e-05, 2.5557930e-04, 3.8776739e-06, ...,\n", + " -1.6214621e-04, -2.7943292e-05, -4.3322394e-05], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_748.wav', 'Das Licht wird schwächer.', 26, array([-8.6605805e-06, -9.4557421e-05, -4.0338778e-05, ...,\n", + " -4.2446409e-05, 4.2122399e-05, -6.5777012e-06], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_789.wav', 'Du hast mich durchschaut.', 25, array([-7.2653616e-05, -5.6117566e-05, -2.1032026e-04, ...,\n", + " -1.6650984e-05, -4.1212854e-05, 1.1137113e-04], dtype=float32), 1.7144375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_797.wav', 'Kennt ihr den Weg?', 18, array([-1.6756072e-04, -1.5301499e-04, -6.5641878e-05, ...,\n", + " 2.0324395e-04, 1.4747797e-04, 2.2508665e-04], dtype=float32), 1.5205833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_835.wav', 'Alles klar bei dir?', 19, array([ 1.1695884e-04, 1.1995935e-05, -1.2846527e-04, ...,\n", + " -1.9988464e-04, -2.4078601e-05, -4.2752044e-06], dtype=float32), 1.4054583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_18_FINAL/18_841.wav', 'Kommt jemand mit?', 17, array([ 4.9882954e-05, 4.0318602e-05, 1.2408203e-04, ...,\n", + " -1.1336284e-04, -1.6859797e-04, -3.4263285e-05], dtype=float32), 1.6356875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_6.wav', 'Nur vom Hörensagen.', 20, array([ 4.0408637e-04, 5.5643718e-04, 5.7215214e-04, ...,\n", + " -7.1763410e-05, -1.0798458e-04, -3.2582655e-05], dtype=float32), 1.7205)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_14.wav', 'Ich weiÃ\\x9f es nicht mehr.', 24, array([0.00023374, 0.00015971, 0.0001749 , ..., 0.00011659, 0.00024648,\n", + " 0.00010209], dtype=float32), 1.6233125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_17.wav', 'Lass es raus!', 13, array([-3.1531116e-04, -3.3344212e-04, -5.9053692e-04, ...,\n", + " 5.4772248e-05, -1.1641844e-05, -6.8900968e-05], dtype=float32), 1.4902916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_68.wav', 'Sie müssen mir glauben!', 24, array([ 1.4851260e-04, 2.9638095e-04, 2.5485444e-04, ...,\n", + " -1.8143297e-05, 4.6757654e-05, 4.2184558e-05], dtype=float32), 1.708375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_92.wav', 'Ihre Bestellung, bitte!', 23, array([ 9.8706114e-05, 2.2661808e-04, 1.6781769e-04, ...,\n", + " 5.1173961e-06, -2.6828362e-04, -2.2934456e-04], dtype=float32), 1.4297083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_95.wav', 'Was können Sie mir anbieten?', 29, array([-1.9375395e-04, -3.1588171e-04, -3.9896931e-04, ...,\n", + " 1.0834881e-04, -1.4949654e-05, -1.3323028e-05], dtype=float32), 1.7689583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_100.wav', 'Also nicht missverstehen!', 25, array([-1.1475936e-04, 3.5450608e-05, 5.9234120e-05, ...,\n", + " 7.9908222e-07, -7.6752185e-05, 3.1952815e-05], dtype=float32), 1.9507083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_101.wav', 'Jeder macht mal Fehler.', 23, array([-2.0121370e-05, 3.3358188e-05, 1.4433647e-05, ...,\n", + " 2.5029780e-04, 1.0649080e-04, 2.8118977e-04], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_107.wav', 'Immer dasselbe mit dir.', 23, array([ 4.04063358e-05, 2.61971072e-05, -1.03683014e-04, ...,\n", + " -2.34830455e-04, -1.33784546e-04, -7.84191070e-05], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_113.wav', 'Jetzt erinnere ich mich.', 24, array([ 5.7016779e-05, 9.8553166e-05, 8.2001083e-05, ...,\n", + " 2.6238111e-05, 1.3704958e-05, -8.3586237e-05], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_131.wav', 'Freiwillige vor!', 16, array([ 5.72854588e-05, 1.07770924e-04, 1.99439557e-04, ...,\n", + " -4.32070919e-05, -3.67913685e-06, 1.42182573e-04], dtype=float32), 1.5300416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_137.wav', 'Ich lehne ihn sogar ab.', 23, array([ 4.1758478e-05, 1.8570285e-05, 2.1333873e-04, ...,\n", + " 2.0144802e-05, -3.2468499e-05, 4.0363415e-05], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_148.wav', 'Setz dich!', 10, array([-1.4053716e-04, -1.2715683e-04, -3.6183195e-04, ...,\n", + " 8.8158406e-05, -4.2700492e-05, 1.4811622e-04], dtype=float32), 1.1631458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_159.wav', 'Wie lief die Klausur?', 21, array([4.5470217e-05, 1.4640424e-04, 9.2724607e-05, ..., 1.4090222e-04,\n", + " 1.8730978e-04, 8.1763144e-05], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_188.wav', 'So viel Zeit muss sein!', 23, array([-7.5860844e-05, -1.8835207e-04, -2.0893685e-04, ...,\n", + " -5.3442498e-05, -6.1138802e-05, -8.8275759e-05], dtype=float32), 1.7810833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_194.wav', 'Zeit fürs Bettchen.', 20, array([-9.7486656e-05, -5.1642677e-05, -8.1966471e-05, ...,\n", + " -7.5118078e-05, -3.0586343e-05, -7.1709837e-05], dtype=float32), 1.6599166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_236.wav', 'Wir sind gleich da.', 19, array([-6.8177519e-06, 6.7671383e-05, -1.0620675e-04, ...,\n", + " 4.5802376e-06, -7.1226568e-05, -5.8944144e-05], dtype=float32), 1.6622916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_243.wav', 'Herrgott noch mal!', 18, array([ 1.7256364e-04, 1.5818405e-04, 2.4684667e-04, ...,\n", + " -1.7978776e-04, -2.2976559e-05, -3.1599044e-05], dtype=float32), 1.4440208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_262.wav', 'Früher war alles besser.', 25, array([ 1.6410025e-04, 2.0620895e-04, 2.0922835e-04, ...,\n", + " 4.5493864e-05, -7.6417935e-05, 7.0160553e-05], dtype=float32), 1.9385833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_264.wav', 'Wie heiÃ\\x9ft du?', 14, array([ 2.3004458e-04, 3.3690900e-04, 3.8855671e-04, ...,\n", + " -1.7735986e-04, -6.0517366e-05, 1.4090910e-05], dtype=float32), 1.24025)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_267.wav', 'Siehst du?', 10, array([ 8.0912840e-05, 5.0722783e-06, 6.0588944e-05, ...,\n", + " -1.2716564e-04, 2.9675630e-05, -1.6470523e-05], dtype=float32), 1.187375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_269.wav', 'Totgesagte leben länger.', 25, array([-1.0916409e-05, -1.7836766e-05, -5.1411305e-05, ...,\n", + " -1.2148214e-04, -2.2084620e-04, 8.5974034e-06], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_291.wav', 'Bin ich männlich?', 18, array([-2.0014251e-05, 2.6616051e-05, 1.2375216e-04, ...,\n", + " 1.3375390e-04, 5.5609209e-05, -7.4272582e-05], dtype=float32), 1.4418125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_295.wav', 'Was war in dem Umschlag?', 24, array([-6.2635612e-05, -4.7769913e-06, -1.3995348e-05, ...,\n", + " 7.0862757e-06, 9.2074784e-05, 9.0880349e-06], dtype=float32), 1.9507083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_316.wav', 'Ich bin bedient.', 16, array([2.5768091e-05, 1.6018275e-05, 3.7452736e-04, ..., 7.7061843e-05,\n", + " 1.8039568e-04, 7.1911185e-05], dtype=float32), 1.6599166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_317.wav', 'Tschüssikowski!', 16, array([ 1.3183661e-04, 8.4080348e-05, -2.6853681e-05, ...,\n", + " 5.1806877e-05, 1.5268542e-05, -6.9305977e-05], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_323.wav', 'Fang mich, wenn du kannst!', 26, array([-4.0345873e-05, 3.4187411e-05, -3.7680857e-05, ...,\n", + " -8.6350832e-05, -1.6245214e-04, -5.1246581e-05], dtype=float32), 1.7447291666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_343.wav', 'Ich bin kein Einbrecher!', 24, array([ 2.2356608e-05, -6.4235406e-05, -9.0699705e-06, ...,\n", + " 1.2990409e-04, 7.6688739e-05, -4.0372826e-05], dtype=float32), 1.878)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_346.wav', 'Hör nicht auf ihn.', 19, array([-2.9778299e-05, 3.8957646e-06, -7.7031938e-05, ...,\n", + " 1.9274552e-04, 1.7162508e-04, -1.3842691e-06], dtype=float32), 1.2964166666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_349.wav', 'Eine letzte Windung noch.', 25, array([-1.8898114e-05, -4.0488834e-05, 1.2324851e-04, ...,\n", + " -7.7293364e-05, 8.3202161e-05, 1.5701227e-04], dtype=float32), 1.5508541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_359.wav', 'Mir nach!', 9, array([ 9.4505500e-05, 2.3980458e-04, 3.7063317e-05, ...,\n", + " -4.1811028e-04, -4.7733358e-04, -4.6703668e-04], dtype=float32), 1.3489375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_360.wav', 'Schon wieder?', 13, array([-2.7792374e-04, -4.0585164e-04, -4.3411212e-04, ...,\n", + " -6.9041176e-05, -2.6838092e-07, 5.3586686e-05], dtype=float32), 1.0783333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_363.wav', 'Heidi funkelt ihn an.', 21, array([-1.39060983e-04, -9.78735334e-05, 9.33348783e-05, ...,\n", + " -1.00029130e-04, -1.25095859e-04, -1.00360034e-04], dtype=float32), 1.9506875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_375.wav', 'Kein Signal gefunden.', 21, array([-1.1299809e-04, -9.9104131e-05, -2.1005377e-05, ...,\n", + " -2.4724935e-04, 5.5919631e-06, 4.7323024e-06], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_403.wav', 'Entschuldigen Sie die Störung!', 31, array([ 6.84832412e-05, 1.86067002e-04, -1.04915016e-04, ...,\n", + " 1.84468547e-04, 4.62387870e-05, -5.50564218e-05], dtype=float32), 1.8174166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_404.wav', 'Guter Mann!', 11, array([ 4.2475749e-05, -3.8101676e-05, 8.2924860e-05, ...,\n", + " -9.0844223e-06, 8.0864724e-05, -4.9711874e-05], dtype=float32), 1.1268125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_416.wav', 'Oder etwa nicht?', 16, array([ 1.6924678e-05, 8.7618108e-05, 1.1962327e-04, ...,\n", + " -1.5572428e-04, -1.2718650e-04, -2.7018292e-05], dtype=float32), 1.5266458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_427.wav', 'Wer weiÃ\\x9f das schon.', 20, array([-1.2090163e-05, -1.1217411e-04, -3.4340650e-05, ...,\n", + " -1.9305095e-05, 1.0599474e-04, -7.2453047e-05], dtype=float32), 1.9157916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_440.wav', 'Walter hat es verpatzt.', 23, array([-9.9328121e-05, -3.7155328e-07, -5.4411164e-05, ...,\n", + " 1.3715628e-04, -4.9349186e-05, -1.4098950e-04], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_451.wav', 'So läuft das nicht.', 20, array([-1.21481185e-04, -1.13304653e-04, -2.73915475e-07, ...,\n", + " 1.47375540e-04, 1.44234422e-04, -2.10445778e-05], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_457.wav', 'Unverhofft kommt oft.', 21, array([-1.8882036e-05, -2.5487921e-05, 2.6220470e-04, ...,\n", + " 5.6016044e-05, -7.5536453e-05, -4.1967660e-06], dtype=float32), 1.865875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_461.wav', 'Bist du noch Single?', 20, array([-7.4286567e-05, -1.6158549e-04, -1.6719839e-04, ...,\n", + " -9.1800161e-05, -1.2240406e-04, 3.6517587e-05], dtype=float32), 1.8416458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_468.wav', 'Mein Licht ist kaputt.', 22, array([ 4.02122387e-05, -1.00659774e-04, -8.88236755e-05, ...,\n", + " -4.64872028e-05, -2.63940365e-06, 7.19727832e-05], dtype=float32), 1.7735)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_489.wav', 'Gemeinsam sind wir dumm!', 24, array([-1.4583243e-04, -2.6087323e-04, -2.3470599e-05, ...,\n", + " -2.4694938e-04, -1.5543406e-04, -6.1786144e-05], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_518.wav', 'Er hat Mama gesagt!', 19, array([ 2.6662483e-05, -7.8772522e-05, -5.4227519e-05, ...,\n", + " 1.4953410e-05, -6.7233414e-05, -9.8744909e-05], dtype=float32), 1.9264791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_551.wav', 'Ciao!', 5, array([-8.1419050e-05, -2.2554104e-05, -9.1002643e-05, ...,\n", + " 8.3599451e-05, -1.5038802e-05, 1.8543131e-05], dtype=float32), 0.8966041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_557.wav', 'Die Welt ist ungerecht.', 23, array([-7.9495927e-05, -2.2434435e-04, -1.8575993e-05, ...,\n", + " 4.3908138e-05, 4.8930386e-05, 1.4439608e-04], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_573.wav', 'Wer weiÃ\\x9f?', 10, array([-2.4007348e-05, 2.8211702e-05, 1.1010996e-04, ...,\n", + " 3.2032028e-04, 2.8236501e-04, 3.1412503e-04], dtype=float32), 1.6356666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_594.wav', 'Feierabend!', 11, array([-1.4223782e-05, -5.6433430e-05, -3.3835067e-06, ...,\n", + " -1.2677837e-04, 4.7294146e-05, 7.4652962e-05], dtype=float32), 1.6356875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_599.wav', 'Sag bloÃ\\x9f!', 10, array([ 2.12539035e-05, -1.20294884e-04, -8.79466315e-05, ...,\n", + " 2.56883359e-04, 2.45794392e-04, 4.15721239e-04], dtype=float32), 1.4781666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_618.wav', 'Geht das in Ordnung?', 20, array([-1.7039385e-04, -4.3828294e-04, -3.7954788e-04, ...,\n", + " 2.5719850e-04, 3.6655194e-05, 4.4241093e-05], dtype=float32), 1.7204791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_649.wav', 'Komm noch etwas näher!', 23, array([ 2.4222159e-06, -1.3579089e-04, -4.4756231e-05, ...,\n", + " -1.4951664e-04, -2.2786215e-04, -3.1124309e-04], dtype=float32), 1.4418125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_656.wav', 'Lach nicht!', 11, array([ 1.79771829e-04, 1.79155570e-04, 4.07271327e-05, ...,\n", + " 1.34896531e-04, 1.24606095e-05, -4.19603248e-06], dtype=float32), 1.3327708333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_678.wav', 'Ich fasse zusammen.', 19, array([-1.0120855e-04, 6.3165186e-05, -2.2567945e-05, ...,\n", + " 6.0140010e-05, 9.6748437e-05, 3.0506399e-05], dtype=float32), 1.708375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_695.wav', 'Umtausch ausgeschlossen!', 24, array([ 7.3856318e-05, 2.8886712e-05, 1.5315624e-04, ...,\n", + " -9.7581760e-05, 8.5684667e-05, -3.2478438e-05], dtype=float32), 1.6720208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_699.wav', 'Setzt euch.', 11, array([-1.6188849e-04, -1.0612092e-04, -6.7996967e-05, ...,\n", + " -1.1114984e-04, -2.0633070e-04, -1.5339212e-05], dtype=float32), 1.3085416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_703.wav', 'Ja, ist sie.', 12, array([-8.3997344e-05, -2.7474607e-05, -1.9123188e-05, ...,\n", + " 1.8876011e-04, 5.0511160e-05, 9.6139847e-05], dtype=float32), 1.9809791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_707.wav', 'Nehmt sie ihnen ab!', 19, array([ 4.0254617e-04, 4.7474771e-04, 3.5727478e-04, ...,\n", + " -1.1594634e-06, -1.5993090e-04, -1.5013713e-05], dtype=float32), 1.8477083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_718.wav', 'Bitte schön lächeln.', 22, array([-5.2708318e-05, -1.2709903e-04, -3.1722573e-04, ...,\n", + " -1.4999519e-04, 1.3614057e-04, -2.6379108e-05], dtype=float32), 1.9809791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_732.wav', 'Kamelle!', 8, array([ 3.4038111e-04, 4.9238594e-04, 3.1708140e-04, ...,\n", + " -8.7314249e-05, -4.2823103e-05, 4.8170114e-06], dtype=float32), 1.0541041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_733.wav', 'Nichts daran war schlimm.', 25, array([2.4388860e-04, 1.5891306e-04, 1.7636098e-04, ..., 6.8294656e-05,\n", + " 7.4376767e-05, 9.9975718e-05], dtype=float32), 1.79925)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_742.wav', 'Hast du das auch gehört?', 25, array([ 2.2057726e-04, 3.3742579e-04, 1.5720318e-05, ...,\n", + " 1.6000369e-05, -1.9323647e-04, -1.1723922e-04], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_751.wav', 'Irritiert dich das?', 19, array([ 3.1597829e-05, -1.0975795e-04, -4.8185088e-05, ...,\n", + " -7.8868754e-05, 9.2668552e-06, 1.6543895e-04], dtype=float32), 1.7750208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_758.wav', 'Das ist gar nicht so lange her.', 31, array([-0.00046848, -0.00072762, -0.00048674, ..., 0.00027484,\n", + " 0.00023592, 0.00020132], dtype=float32), 1.7750208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_759.wav', 'Die Chemie muss stimmen.', 24, array([ 2.8143785e-04, 3.1653995e-04, 3.5444429e-04, ...,\n", + " 8.1970691e-05, -5.0139199e-05, -1.7111432e-05], dtype=float32), 1.9446458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_763.wav', 'Stimmt eigentlich.', 18, array([-1.2765415e-06, -4.4488741e-05, -1.0883755e-04, ...,\n", + " 2.9581884e-04, 4.5865582e-04, 6.1051000e-04], dtype=float32), 1.3024791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_782.wav', 'Sagen Sie den Zielort.', 22, array([ 1.0137472e-04, 2.3555224e-04, 2.6113808e-04, ...,\n", + " -2.9943618e-05, 3.1559110e-05, 2.7199069e-06], dtype=float32), 1.7810625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_792.wav', 'Meldet euch freiwillig!', 23, array([2.3276571e-04, 3.9564463e-04, 2.9302380e-04, ..., 1.1956793e-04,\n", + " 7.0350601e-05, 1.8581332e-04], dtype=float32), 1.8052916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_793.wav', 'Die Boote liegen auf dem Trockenen.', 35, array([-0.00011364, -0.00017169, -0.00019618, ..., 0.00044204,\n", + " 0.00018713, 0.00049593], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_19_FINAL/19_802.wav', 'Gesundheit!', 11, array([-8.3673913e-05, -7.9538848e-05, -6.8612273e-05, ...,\n", + " 4.4534498e-04, 4.3816061e-04, 2.6374889e-04], dtype=float32), 1.2722083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_18.wav', 'Aber auch nur gerade so.', 24, array([-2.2079168e-05, -1.6145856e-05, 2.9195176e-06, ...,\n", + " -1.0078496e-05, -6.2482263e-06, -5.8464525e-06], dtype=float32), 1.8333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_83.wav', 'Aber gerne!', 11, array([ 8.6439795e-06, -4.9609935e-07, -6.4880319e-06, ...,\n", + " 3.4692115e-05, 2.2026890e-05, 7.4778809e-06], dtype=float32), 1.0416666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_84.wav', 'Aber heute bleiben wir nicht so lang.', 37, array([-1.8894493e-06, 2.0465507e-06, 9.1691445e-06, ...,\n", + " -7.1275235e-06, -1.7749519e-05, -2.3891846e-05], dtype=float32), 1.9166666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_91.wav', 'Aber ich kÃ\\x83¶nnte das nicht.', 29, array([-1.3084863e-05, -2.4588813e-05, -3.0510082e-05, ...,\n", + " 9.0740468e-06, 7.3771143e-06, 4.7309027e-06], dtype=float32), 1.7916666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_93.wav', 'Aber ich schweife ab.', 21, array([ 2.0572887e-05, 5.2324990e-06, 8.2274501e-06, ...,\n", + " -4.5831721e-06, -5.6718955e-06, 1.2206646e-06], dtype=float32), 1.3333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_107.wav', 'Aber ja!', 8, array([ 6.5076074e-06, 9.5467785e-06, 6.4050842e-06, ...,\n", + " -2.8310139e-06, -1.7247042e-06, 4.6768464e-06], dtype=float32), 1.25)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_115.wav', 'Aber locker!', 12, array([-3.1642696e-05, -3.3065215e-05, -3.9417675e-05, ...,\n", + " 5.7364587e-06, 8.1942826e-06, 2.0739385e-06], dtype=float32), 1.125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_135.wav', 'Aber nicht mein Koch.', 21, array([-3.2864332e-06, 6.4927585e-06, 1.8139610e-05, ...,\n", + " -1.9440764e-05, 6.6915834e-07, -2.3949342e-06], dtype=float32), 1.875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_150.wav', 'Aber sie wirkt.', 15, array([ 4.7021126e-06, 7.9376932e-06, 1.9524101e-05, ...,\n", + " -1.0560079e-05, 2.2925117e-07, 7.0664414e-06], dtype=float32), 1.6666666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_154.wav', 'Aber sonst schon.', 17, array([ 1.3162755e-05, 5.1608640e-06, 2.6601656e-06, ...,\n", + " -1.9497929e-05, -1.3883044e-05, -2.9709727e-05], dtype=float32), 1.9166666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_172.wav', 'Aber wie kann das sein?', 23, array([-1.0407030e-05, -1.3223411e-05, -2.4366140e-05, ...,\n", + " 3.1900552e-06, -6.4861370e-06, -5.3326958e-06], dtype=float32), 1.9166666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_183.wav', 'Abgemacht!', 10, array([ 4.3209253e-05, 3.8841117e-05, 2.0105661e-05, ...,\n", + " 3.7174163e-07, -1.4371894e-05, -1.6794727e-05], dtype=float32), 1.375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_203.wav', 'Ach Mann.', 9, array([-1.1161302e-05, -4.8241122e-06, 1.0564104e-06, ...,\n", + " 5.0679973e-06, 7.8539133e-06, 9.7488000e-06], dtype=float32), 1.0833333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_205.wav', 'Ach die!', 8, array([-1.2094329e-05, -6.8277895e-06, -9.1963557e-07, ...,\n", + " 1.1451033e-05, -2.4406472e-06, 1.2908078e-05], dtype=float32), 1.125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_206.wav', 'Ach du ScheiÃ\\x83Â\\x9fe!', 18, array([ 2.9578983e-05, 1.8899245e-05, 2.3418788e-05, ...,\n", + " -2.3013935e-07, 1.0615421e-05, 1.1895302e-05], dtype=float32), 1.5416666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_208.wav', 'Ach du liebe Zeit!', 18, array([-1.7297025e-05, -4.8105571e-06, 4.0550490e-06, ...,\n", + " 1.3112809e-06, 2.7569813e-06, -5.3473241e-06], dtype=float32), 1.9166666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_209.wav', 'Ach du meine GÃ\\x83¼te!', 21, array([-1.4435645e-06, 1.5456475e-05, 7.5820367e-06, ...,\n", + " -5.9919462e-06, -2.8870822e-06, -8.3686264e-06], dtype=float32), 1.875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_219.wav', 'Ach nein?', 9, array([ 2.6512873e-05, 3.2190139e-05, 2.3575940e-05, ...,\n", + " 1.2494418e-06, -4.9369064e-06, 5.6602944e-06], dtype=float32), 1.0416666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_220.wav', 'Ach so das.', 11, array([1.7692106e-05, 1.0481614e-05, 2.4560395e-05, ..., 1.1682997e-05,\n", + " 1.4096242e-05, 1.0814229e-05], dtype=float32), 1.25)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_221.wav', 'Ach so geht das.', 16, array([-4.7354648e-04, -1.6085681e-04, 6.9589930e-04, ...,\n", + " 2.8736700e-05, 3.1944357e-05, 3.1408650e-05], dtype=float32), 1.3333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_223.wav', 'Ach so.', 7, array([ 1.7158927e-04, 2.4213194e-04, 3.3745603e-04, ...,\n", + " -7.4672876e-06, -9.1694219e-06, 5.6827762e-06], dtype=float32), 0.75)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_227.wav', 'Ach, da bist du ja!', 19, array([2.9949500e-05, 1.6420616e-05, 3.4700156e-06, ..., 1.3191027e-05,\n", + " 1.0943100e-05, 1.8516728e-06], dtype=float32), 1.875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_1_FINAL/1_247.wav', 'Achte auf den Verkehr.', 22, array([-3.3732314e-05, -1.7520404e-05, 3.1957079e-05, ...,\n", + " 9.2553882e-06, 1.9688600e-06, 8.4563535e-06], dtype=float32), 1.8333333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_22.wav', 'Eine letzte Sache noch.', 23, array([6.1327388e-05, 1.8792783e-04, 6.4210355e-05, ..., 9.2773196e-05,\n", + " 9.0997717e-05, 9.3233648e-05], dtype=float32), 1.9870416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_23.wav', 'Es ist aus und vorbei.', 22, array([-1.40103046e-04, -1.22702273e-04, 9.30938695e-05, ...,\n", + " 3.74735857e-04, 3.98035394e-04, 1.15837705e-04], dtype=float32), 1.9022291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_57.wav', 'Wie machst du das?', 18, array([ 2.4910548e-04, 4.8663982e-04, 3.5670877e-04, ...,\n", + " -7.4250769e-05, -2.8972838e-05, 5.8696533e-05], dtype=float32), 1.4660625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_88.wav', 'Die Göre lügt wie gedruckt.', 29, array([-2.1256006e-04, -1.5941747e-04, -9.0014306e-05, ...,\n", + " 8.4916828e-05, -1.1791480e-04, 2.8579583e-04], dtype=float32), 1.9022291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_94.wav', 'Nur wenn das Essen nicht schmeckt.', 34, array([-7.44715726e-05, -1.21678349e-04, 3.31091655e-07, ...,\n", + " -1.03946346e-04, -1.27610518e-04, -1.86876860e-04], dtype=float32), 1.878)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_97.wav', 'Niemals!', 8, array([-4.9271861e-05, 5.3212247e-05, 3.3188411e-05, ...,\n", + " 6.3736064e-05, 4.1986009e-06, 8.9537862e-05], dtype=float32), 1.2479583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_98.wav', 'Und nun zum Wetter.', 19, array([ 5.21471120e-05, -9.25690911e-05, -1.22024496e-04, ...,\n", + " 6.86152780e-05, -3.58715624e-05, 9.09384198e-06], dtype=float32), 1.6356666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_99.wav', 'Die Ã\\x96ffnung ist dehnbar.', 25, array([ 5.6826313e-05, 6.8275417e-06, 9.2087415e-05, ...,\n", + " 3.3015142e-05, 6.6053515e-05, -1.5007930e-04], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_108.wav', 'Habt ihr schon angefangen?', 26, array([1.8725816e-04, 1.5125435e-04, 1.8410715e-04, ..., 7.2607516e-05,\n", + " 2.0626400e-04, 8.0785358e-05], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_120.wav', 'Wie konnte das passieren?', 25, array([8.6616979e-05, 1.3365489e-04, 4.9586175e-05, ..., 2.3242908e-06,\n", + " 9.4004557e-05, 2.2714035e-04], dtype=float32), 1.5751041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_121.wav', 'Schnappen Sie die!', 18, array([ 3.8893679e-05, -8.0967751e-05, 9.0245063e-05, ...,\n", + " -1.8313204e-04, 3.8293081e-05, -2.9012112e-06], dtype=float32), 1.38125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_127.wav', 'Kommst du mit auf die Demo?', 27, array([2.5501425e-04, 3.7619186e-04, 2.3280202e-04, ..., 1.0214894e-04,\n", + " 8.1334627e-05, 1.0037446e-04], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_131.wav', 'Neymar schummelt immer.', 23, array([ 9.5439558e-05, -2.0274975e-04, -2.7297903e-05, ...,\n", + " -1.8293603e-04, -8.1430808e-05, 2.3813642e-05], dtype=float32), 1.6962708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_146.wav', 'Hä, wie jetzt?', 15, array([ 1.0428468e-05, 1.2862872e-04, 1.4709163e-04, ...,\n", + " 2.5179393e-06, -3.9250128e-05, 1.4990567e-04], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_154.wav', 'Gehst du mit mir kicken?', 24, array([-2.3674214e-05, 1.5158611e-04, 2.0247647e-04, ...,\n", + " -5.0921575e-05, 1.6530334e-04, 2.6747581e-05], dtype=float32), 1.5508541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_165.wav', 'Die Erlösung naht.', 19, array([-2.7500470e-05, 4.6476634e-05, 9.3239294e-05, ...,\n", + " 1.3720182e-04, 3.3580043e-05, 1.6966692e-04], dtype=float32), 1.6114375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_170.wav', 'Worauf wartest du noch?', 23, array([ 1.9643597e-04, 1.8858226e-04, 1.2341220e-04, ...,\n", + " 1.9399264e-04, 7.9539248e-05, -8.9550871e-05], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_174.wav', 'Wegen der Sicherheit.', 21, array([ 5.0312192e-05, -4.7642745e-05, 7.9094330e-05, ...,\n", + " 1.6562216e-04, -3.8164351e-05, -8.3325220e-05], dtype=float32), 1.53875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_179.wav', 'Was tun Sie da?', 15, array([8.1822727e-05, 1.5520566e-04, 2.9996689e-04, ..., 9.4358256e-05,\n", + " 6.1927640e-05, 1.5151841e-04], dtype=float32), 1.550875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_190.wav', 'Die Play-Offs haben begonnen.', 29, array([-2.7691500e-04, -2.5398214e-04, -1.5421546e-04, ...,\n", + " 3.4238459e-05, -1.6769451e-04, -1.3444168e-04], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_193.wav', 'Spinnst du?', 11, array([ 1.0871515e-04, 1.6241276e-04, -7.8830650e-05, ...,\n", + " -1.6421604e-04, -1.6669222e-04, -1.5261788e-04], dtype=float32), 1.5993333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_194.wav', 'Nicht mit mir!', 14, array([ 4.5433408e-05, -1.3075510e-04, 6.4006963e-05, ...,\n", + " -2.2528745e-04, -1.7135930e-05, -1.1135123e-04], dtype=float32), 1.4539375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_197.wav', 'Lang lebe die Königin!', 23, array([-1.6047362e-04, -1.5451153e-05, -1.0221335e-04, ...,\n", + " 7.2540395e-05, 9.8553333e-05, -3.9703427e-05], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_220.wav', 'Der Punkt geht an euch.', 23, array([ 6.8754802e-05, -3.1321447e-06, 2.6729414e-05, ...,\n", + " 5.2136878e-05, 6.9546691e-06, 1.5569202e-04], dtype=float32), 1.878)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_253.wav', 'Ich geh jetzt duschen.', 22, array([-3.17401755e-05, 7.48557359e-05, -5.43324859e-05, ...,\n", + " -1.39205178e-04, -6.44034174e-07, 1.28346255e-05], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_270.wav', 'Ich bin stärker.', 17, array([ 9.11816460e-05, 1.44324003e-04, -2.98500763e-05, ...,\n", + " 1.31568195e-05, 6.36509794e-05, 6.90339657e-05], dtype=float32), 1.3933541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_273.wav', 'Wie lange fahre ich noch?', 25, array([ 2.9487488e-05, -1.3105408e-04, 5.8441510e-05, ...,\n", + " 3.1229702e-05, -5.4796135e-05, -6.3286854e-05], dtype=float32), 1.6841458333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_275.wav', 'Wer hat es dir verraten?', 24, array([ 1.2313928e-04, 1.3087156e-04, -1.2932777e-04, ...,\n", + " 4.8921556e-05, 1.4495553e-04, -3.3808697e-05], dtype=float32), 1.8295416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_293.wav', 'Gib nicht anderen die Schuld.', 29, array([-7.5512668e-05, -3.6905835e-06, 6.9531779e-05, ...,\n", + " 4.3623371e-05, 1.8721327e-04, 7.1873088e-05], dtype=float32), 1.9628125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_297.wav', 'Ist es schon so weit gekommen?', 30, array([-4.3128319e-05, -1.7937485e-04, -1.0890597e-04, ...,\n", + " -2.6245858e-04, -1.7716063e-04, 2.2997918e-04], dtype=float32), 1.6114375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_318.wav', 'Einfach reinstechen!', 20, array([ 9.1551570e-05, 8.9795518e-05, -6.6505017e-05, ...,\n", + " 1.0614502e-04, 1.8572784e-05, 1.7793228e-04], dtype=float32), 1.7568333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_320.wav', 'Also bleibt alles beim Alten.', 29, array([-4.5057204e-06, 1.0390608e-04, 2.8324797e-05, ...,\n", + " -9.8345605e-05, -4.1500021e-05, -2.5271966e-05], dtype=float32), 1.8053125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_324.wav', 'Fragen wir das Publikum!', 24, array([ 3.0478600e-06, -1.7624698e-04, -1.1634296e-04, ...,\n", + " 1.3709384e-04, 8.2070706e-05, 1.4319613e-04], dtype=float32), 1.7931875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_336.wav', 'Nicht nur in Norddeutschland.', 29, array([ 1.6894817e-05, 7.2304661e-05, -1.7737957e-04, ...,\n", + " 7.4396456e-05, 1.5326528e-04, -3.0850897e-05], dtype=float32), 1.8537708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_339.wav', 'Lass uns welche wegräumen.', 27, array([-1.3355519e-04, 3.6361063e-05, 1.2765500e-04, ...,\n", + " -4.6465106e-05, -9.3052886e-06, -3.1085176e-06], dtype=float32), 1.9264583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_356.wav', 'Bauch schlägt Hirn.', 20, array([-8.7791312e-05, -9.9132430e-06, -7.8506528e-05, ...,\n", + " -1.2898828e-04, 1.9388601e-05, -7.8024947e-05], dtype=float32), 1.7326041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_20_FINAL/20_381.wav', 'Polen Sie die Maschine um!', 26, array([ 8.2736617e-05, 1.0996176e-04, 9.2422182e-05, ...,\n", + " -2.2247934e-05, 7.0410904e-05, -2.1137239e-05], dtype=float32), 1.9385833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_125.wav', 'Danke sehr!', 11, array([ 3.3982175e-05, 3.0489264e-05, -3.2230830e-05, ...,\n", + " 1.3063883e-04, 6.5418164e-05, 1.0737507e-04], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_132.wav', 'Nerve ich dich?', 15, array([-1.3204044e-04, -3.8424434e-05, -1.6640245e-04, ...,\n", + " 2.0048997e-04, 2.0114701e-04, 2.8921696e-04], dtype=float32), 1.8133333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_188.wav', 'Kann ich mal riechen?', 21, array([7.4782380e-05, 1.5360968e-04, 1.7683143e-04, ..., 7.1163136e-05,\n", + " 3.2413329e-05, 1.6134117e-04], dtype=float32), 1.5949583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_195.wav', 'Sehe ich das richtig?', 21, array([-4.4274679e-03, -6.2118913e-03, -5.6534973e-03, ...,\n", + " -5.3494594e-05, 1.0948109e-05, 2.8244473e-05], dtype=float32), 1.8706875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_211.wav', 'Ein Dessert gefällig?', 22, array([ 6.1982937e-05, 8.9088433e-05, 2.1896411e-04, ...,\n", + " -5.3060539e-05, 5.5113655e-05, 2.0669409e-06], dtype=float32), 1.6305416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_264.wav', 'Was schätzen Sie?', 18, array([-9.39443475e-04, -1.31584110e-03, -1.22378767e-03, ...,\n", + " 5.19938067e-06, -1.39896365e-05, 3.26375412e-05], dtype=float32), 1.9933125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_266.wav', 'Hast du Geld dabei?', 19, array([-1.3200377e-05, 3.8996362e-04, 1.0263748e-03, ...,\n", + " -2.9147041e-05, 9.2981281e-06, -4.0353654e-05], dtype=float32), 1.707375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_327.wav', 'Augen auf die StraÃ\\x9fe!', 22, array([-1.1210357e-04, -1.8035798e-04, -1.8643556e-04, ...,\n", + " 8.4691441e-05, 5.8400867e-05, 5.8256945e-05], dtype=float32), 1.8399791666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_346.wav', 'Was soll ich da machen?', 23, array([-2.5878362e-05, 2.1881026e-05, -1.2260079e-05, ...,\n", + " 4.5499460e-06, 4.0606970e-05, -2.3619448e-05], dtype=float32), 1.9433541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_349.wav', 'Immer dasselbe mit euch!', 24, array([-2.3236821e-04, -3.3517351e-04, -3.0884243e-04, ...,\n", + " 8.0186677e-05, 1.6797509e-05, -1.6808892e-05], dtype=float32), 1.9652708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_373.wav', 'Kennen wir uns?', 15, array([-5.0764916e-06, -7.3543859e-05, 1.1312031e-05, ...,\n", + " -3.2780910e-05, -1.3342450e-04, -8.3744824e-05], dtype=float32), 1.2833125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_378.wav', 'Redet ihr nicht miteinander?', 28, array([ 3.3598881e-05, 2.8617033e-05, -4.8224880e-05, ...,\n", + " 7.4195086e-06, -4.8723170e-05, 6.5784006e-05], dtype=float32), 1.9491458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_420.wav', 'Ich hasse Rituale.', 18, array([ 7.1912136e-06, 3.0618376e-06, 8.3010753e-05, ...,\n", + " -1.4567961e-05, 1.1762774e-05, 3.1641615e-05], dtype=float32), 1.9995833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_486.wav', 'Wie groÃ\\x9f ist er denn?', 22, array([ 3.0858202e-05, 7.4509022e-05, 1.3619277e-04, ...,\n", + " -3.3022930e-06, 9.8051796e-06, -2.7459086e-05], dtype=float32), 1.867625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_537.wav', 'Es ist zum Heulen.', 18, array([-1.91718082e-05, 6.43216190e-05, 1.19517106e-04, ...,\n", + " 1.98961898e-05, 2.61543628e-05, -1.34301990e-06], dtype=float32), 1.9879583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_544.wav', 'Nimm es ihm nicht übel.', 24, array([ 4.2532893e-08, -6.0193088e-05, 4.5228205e-07, ...,\n", + " 1.0533330e-04, 4.6245714e-05, -1.5597003e-05], dtype=float32), 1.7243333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_547.wav', 'Um Gottes Willen!', 17, array([-1.3659755e-05, -1.1149528e-04, -7.7302495e-05, ...,\n", + " -5.2225241e-05, -6.4986933e-05, -1.9107327e-05], dtype=float32), 1.5258125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_570.wav', 'Voll der Lauch!', 15, array([ 2.3544633e-05, -8.2356913e-05, -8.4443280e-05, ...,\n", + " -8.3270104e-05, -1.1799393e-04, -4.4736080e-05], dtype=float32), 1.8773958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_587.wav', 'Das will ich meinen!', 20, array([ 1.15228731e-05, -1.00152036e-04, -3.91713802e-05, ...,\n", + " -3.00788033e-05, -2.60362140e-05, -2.54406623e-05], dtype=float32), 1.823375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_595.wav', 'Gib dir keine Mühe!', 20, array([1.1918874e-05, 7.7710565e-06, 2.2653954e-05, ..., 1.2088865e-06,\n", + " 7.3900424e-05, 4.7324560e-05], dtype=float32), 1.7467083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_612.wav', 'Entschuldige', 12, array([-3.3377805e-06, -1.3742609e-05, -3.8612947e-05, ...,\n", + " -4.1617693e-07, -5.6907498e-05, -6.3263155e-06], dtype=float32), 1.096375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_21.wav', 'Ich glaube, ja.', 15, array([-8.5291895e-06, -1.9790486e-05, 2.0588757e-05, ...,\n", + " 4.3540977e-06, 3.3659559e-05, 2.8167133e-05], dtype=float32), 1.7166458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_48.wav', 'Was denn jetzt?', 15, array([3.3551037e-06, 7.2315837e-05, 9.8261240e-05, ..., 1.8147666e-04,\n", + " 1.3495231e-04, 1.4128252e-05], dtype=float32), 1.5235625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_53.wav', 'Ist es das wert?', 16, array([ 6.972987e-06, -6.975743e-05, -8.996664e-05, ..., -8.399185e-06,\n", + " -8.876120e-05, -7.246290e-05], dtype=float32), 1.8518125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_118.wav', 'Findest du?', 11, array([-1.12564965e-04, -6.36710465e-05, -1.04058718e-05, ...,\n", + " 9.31948132e-04, 8.68959934e-04, 9.69569141e-04], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_139.wav', \"Wohl bekommt's.\", 15, array([-5.15776883e-05, -1.17497526e-04, -1.66595215e-04, ...,\n", + " 2.18412912e-04, 1.14814145e-04, 9.11775787e-05], dtype=float32), 1.792)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_242.wav', 'An die Latte!', 13, array([-2.9736115e-05, 6.2128674e-05, -1.7713173e-06, ...,\n", + " -9.5688220e-06, -3.3155960e-05, -2.0475885e-05], dtype=float32), 1.3866666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_256.wav', 'Wie lange noch?', 15, array([-2.0701043e-05, 4.3786262e-05, -9.4478482e-06, ...,\n", + " -5.2062300e-05, -2.7314949e-05, -9.1643757e-05], dtype=float32), 1.728)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_287.wav', 'Halt die Klappe!', 16, array([5.4399417e-05, 1.7967819e-04, 1.5970672e-04, ..., 6.5669185e-05,\n", + " 5.5145654e-05, 4.6019220e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_320.wav', 'Mach selber!', 12, array([-6.9740723e-05, 4.4339331e-06, -8.3184044e-05, ...,\n", + " 1.4031340e-05, 1.2219901e-05, 7.0223352e-05], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_406.wav', 'Alles frisch?', 13, array([-1.15522525e-04, -1.33178124e-04, -1.96026522e-04, ...,\n", + " 5.01462309e-05, 9.76682568e-05, 2.38532848e-05], dtype=float32), 1.4626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_577.wav', 'Nun ja.', 7, array([-4.87583275e-05, -1.09872217e-05, -2.24729556e-05, ...,\n", + " 4.66253441e-05, 1.96394685e-04, 1.52344255e-05], dtype=float32), 1.2373333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_584.wav', 'Wer macht Kaffee?', 17, array([ 3.8115049e-05, -9.6357744e-06, 7.8119905e-05, ...,\n", + " -2.0809734e-04, -1.8620661e-04, -1.3914006e-04], dtype=float32), 1.792)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_3_FINAL/3_666.wav', 'Verflixt noch mal!', 18, array([-2.2882066e-04, -2.9250007e-04, -2.8351255e-04, ...,\n", + " 1.1955178e-04, 1.7373663e-04, 7.4429918e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_30.wav', 'Schweigen Sie!', 14, array([-3.1788008e-05, -3.4064793e-05, -2.7987528e-05, ...,\n", + " -1.5091732e-05, -2.6680038e-05, -3.8527149e-05], dtype=float32), 1.7066666666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_152.wav', 'Danke für die Blumen.', 22, array([ 1.7122936e-06, 6.9385942e-06, 3.6246149e-07, ...,\n", + " -1.4888439e-05, 2.3918087e-06, -7.6587348e-06], dtype=float32), 1.8791666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_218.wav', 'Und das stimmt sogar.', 21, array([ 4.1728057e-05, 5.5362845e-05, 6.8501140e-05, ...,\n", + " -2.8829272e-05, -9.4307861e-06, -1.7323953e-05], dtype=float32), 1.77075)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_228.wav', 'Oder etwa doch?', 15, array([-1.9058538e-05, -1.6082793e-05, -2.4990761e-05, ...,\n", + " -3.7682898e-05, -2.6903717e-05, -2.3563476e-05], dtype=float32), 1.8430416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_235.wav', 'Lass es gut sein.', 17, array([2.5800218e-05, 2.4886122e-05, 2.6301905e-05, ..., 2.0628368e-05,\n", + " 1.3992375e-05, 1.1405512e-05], dtype=float32), 1.8430416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_243.wav', 'Was für ein Schwachsinn!', 25, array([-3.7606616e-05, -4.6087491e-05, -5.2579282e-05, ...,\n", + " -9.6937197e-07, -2.7171711e-05, -4.9796104e-06], dtype=float32), 1.79625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_247.wav', 'Meinen Sie etwa mich?', 21, array([3.4092998e-05, 2.4871710e-05, 3.1290274e-05, ..., 3.8184229e-05,\n", + " 3.8311930e-05, 1.9864283e-05], dtype=float32), 1.7936666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_266.wav', 'Doch, der kommt mit.', 20, array([-8.7682038e-06, 3.3905403e-06, -2.5130439e-06, ...,\n", + " -7.3065071e-06, -4.2862930e-06, -2.6758978e-06], dtype=float32), 1.9898125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_324.wav', 'Du willst eine Revanche?', 24, array([ 7.33632942e-06, 5.97303369e-06, 5.83600695e-06, ...,\n", + " 1.49849775e-05, 1.08204476e-05, -3.58769762e-06], dtype=float32), 1.9875833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_359.wav', 'Achtung, Lebensgefahr!', 22, array([ 1.4763166e-05, 2.4559184e-05, -6.1735605e-06, ...,\n", + " -4.0966352e-06, -3.3091931e-06, -8.6383498e-06], dtype=float32), 1.9786666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_368.wav', 'Sag doch was!', 13, array([ 2.2444649e-06, 7.6022111e-06, 4.6965952e-06, ...,\n", + " -3.8131137e-05, -2.2596261e-05, -3.6410544e-05], dtype=float32), 1.6553333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_377.wav', 'Klar geht das!', 14, array([ 7.9997551e-07, 7.2854018e-06, 1.5502587e-06, ...,\n", + " 4.2983497e-06, 1.1067883e-06, -6.2062031e-06], dtype=float32), 1.6706666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_399.wav', 'Ganz wie ihre Mutter!', 21, array([-1.3625373e-05, -1.5324851e-05, -8.2329316e-06, ...,\n", + " -3.1325493e-05, -3.4243036e-05, -3.8296192e-05], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_404.wav', \"Und ab geht's!\", 14, array([-1.6434673e-05, -4.6597820e-06, -3.0193429e-05, ...,\n", + " 5.6945028e-06, 4.0367054e-06, 2.6991445e-06], dtype=float32), 1.7606666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_409.wav', 'Mahlzeit!', 9, array([-1.6801674e-05, -1.1057600e-05, -2.5246043e-05, ...,\n", + " -5.8098987e-08, -1.3756068e-05, 7.1873791e-07], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_417.wav', 'Was für ein Ding?', 18, array([ 6.9620419e-06, 2.2064933e-05, -7.5111966e-06, ...,\n", + " -2.0811036e-05, -7.9874835e-06, -4.7895933e-06], dtype=float32), 1.6473333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_468.wav', 'Genau einen.', 12, array([-7.29009771e-05, -8.52458907e-05, -1.06200605e-04, ...,\n", + " -5.32185413e-06, -1.07338547e-05, -8.40487064e-06], dtype=float32), 1.3666666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_514.wav', 'Zu Befehl!', 10, array([-2.3591008e-05, -3.5732090e-05, -3.4227767e-05, ...,\n", + " -2.8442626e-05, 1.2019399e-05, -1.3777444e-05], dtype=float32), 1.728)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_611.wav', 'So viel dazu.', 13, array([ 7.4472086e-06, 7.6988908e-06, 1.9191646e-05, ...,\n", + " -3.9837760e-06, -5.9473659e-06, -1.5347923e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_633.wav', 'Doch nicht diese!', 17, array([-1.5188496e-05, -1.3384078e-05, -2.5278267e-05, ...,\n", + " -9.0744479e-06, -1.7723884e-05, -8.7737453e-06], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_637.wav', 'Da musste durch.', 16, array([-6.1405983e-05, -6.6703440e-05, -6.7519111e-05, ...,\n", + " -3.0437115e-05, -1.0807975e-05, -2.7072128e-05], dtype=float32), 1.752)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_660.wav', 'Bitte haben Sie Geduld.', 23, array([-5.3847558e-05, -7.3710136e-05, -6.7579982e-05, ...,\n", + " -1.0283680e-05, -3.1539796e-05, -2.2386694e-05], dtype=float32), 1.7706666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_668.wav', 'Na logo!', 8, array([-2.3636436e-05, -1.5810723e-05, -2.8241622e-05, ...,\n", + " -1.3751334e-06, 1.1204750e-05, 6.0684874e-06], dtype=float32), 0.992)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_676.wav', 'Ich bin Student.', 16, array([ 7.12830888e-06, -1.04677674e-05, 5.06380366e-06, ...,\n", + " 2.56778890e-06, 2.41716316e-06, 1.42220715e-05], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_721.wav', 'Warum glaubst du ihm?', 21, array([-2.8855115e-05, -2.1601849e-05, -4.5714023e-05, ...,\n", + " 1.0700950e-06, -8.6324471e-06, -1.1586128e-05], dtype=float32), 1.888)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_767.wav', 'Alle Lichter einschalten', 24, array([ 3.82986327e-05, 4.59369221e-05, 5.11867729e-05, ...,\n", + " -3.22036831e-05, -1.03011635e-05, -3.75456489e-06], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_775.wav', 'Schlaf dich gesund!', 19, array([ 8.9927544e-06, 3.7294924e-07, 2.0666816e-07, ...,\n", + " -1.4574092e-05, 9.9155943e-07, -1.1447136e-05], dtype=float32), 1.8826666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_4_FINAL/4_785.wav', 'Wer spricht da?', 15, array([-5.0560098e-05, -5.3028423e-05, -5.4164509e-05, ...,\n", + " 1.4739732e-05, 9.2475852e-07, 2.9554553e-06], dtype=float32), 1.8953333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_9.wav', 'Kannst du häkeln?', 18, array([ 5.7386926e-05, 8.2160957e-05, 5.5038501e-05, ...,\n", + " -4.3172963e-06, 4.1677453e-05, 4.7943948e-05], dtype=float32), 1.6993333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_44.wav', 'Bitte kommen!', 13, array([1.0956727e-04, 1.5614097e-04, 1.3331856e-04, ..., 1.3650022e-05,\n", + " 1.1109641e-05, 1.3527738e-06], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_53.wav', 'Hör zu!', 8, array([-6.0608932e-06, -4.1002470e-05, 2.2774377e-05, ...,\n", + " -8.5628499e-06, -1.7102975e-05, -5.2866948e-05], dtype=float32), 1.3013333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_54.wav', 'Bitte, bleib da.', 16, array([ 3.5020625e-05, 5.4955650e-05, 8.0653575e-05, ...,\n", + " -2.3735600e-05, 3.2219548e-05, -2.8188835e-05], dtype=float32), 1.3893333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_64.wav', 'Was piept hier so?', 18, array([4.8969712e-05, 1.0184415e-04, 1.0672094e-04, ..., 1.0047335e-04,\n", + " 8.2428909e-05, 7.4903524e-05], dtype=float32), 1.476)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_65.wav', 'Die Tränen sind echt.', 22, array([-2.5628888e-04, -3.2446094e-04, -2.8078147e-04, ...,\n", + " 6.0525483e-05, 4.5224155e-05, 3.3287215e-05], dtype=float32), 1.6746666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_66.wav', 'Oh, wie ist das schön!', 23, array([-1.3561957e-04, -2.9620592e-04, -1.1127204e-04, ...,\n", + " -1.3441611e-05, -2.0591922e-05, -4.1845051e-05], dtype=float32), 1.9373333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_70.wav', 'Nein, die andere.', 17, array([1.08759763e-04, 2.17104956e-04, 2.50456098e-04, ...,\n", + " 1.99571132e-05, 1.15319264e-04, 1.09982837e-04], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_73.wav', 'Der Hunger treibt es hinein!', 28, array([-7.6006359e-04, -1.0618430e-03, -9.1635465e-04, ...,\n", + " -2.1929874e-05, -3.9133694e-05, -2.3749919e-05], dtype=float32), 1.8006666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_81.wav', 'Dann machen alle Mann kehrt.', 28, array([-1.5950583e-04, -1.6477516e-04, -1.3784993e-04, ...,\n", + " 6.2336148e-05, 1.8180552e-05, 9.2034599e-05], dtype=float32), 1.952)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_90.wav', 'Komm mal klar.', 14, array([2.0439363e-04, 2.6905714e-04, 1.8548965e-04, ..., 3.1710202e-05,\n", + " 2.3530252e-05, 2.1564969e-05], dtype=float32), 1.4186666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_95.wav', 'Ist noch alles dran?', 20, array([-2.2047247e-04, -3.2201153e-04, -2.8738266e-04, ...,\n", + " -7.7452714e-05, -4.3362299e-05, 7.5945250e-06], dtype=float32), 1.632)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_99.wav', 'Nie glaubt sie mir.', 19, array([ 1.5801163e-05, 5.7899309e-05, 3.1942949e-05, ...,\n", + " -3.0608622e-05, -8.0015372e-05, -3.3063152e-05], dtype=float32), 1.5613333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_207.wav', 'Sperrt sie ein!', 15, array([1.7913821e-04, 3.0638310e-04, 2.4345164e-04, ..., 5.7913669e-05,\n", + " 2.3223187e-05, 5.4880878e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_249.wav', 'Ja was geht denn ab?', 20, array([-1.0661902e-04, -9.4065879e-05, -6.9818758e-05, ...,\n", + " -3.3508950e-05, 3.7770699e-06, 2.3758860e-06], dtype=float32), 1.9973333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_250.wav', 'Dümmste Ausrede ever!', 22, array([ 3.16905534e-05, 3.74705655e-06, -2.55898794e-05, ...,\n", + " 4.44019097e-05, 2.41961206e-05, 1.06514235e-05], dtype=float32), 1.9806666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_251.wav', 'Wir sind hier ja unter uns.', 27, array([-3.3862656e-04, -5.0057843e-04, -4.7798100e-04, ...,\n", + " 3.9128430e-05, -4.0246316e-05, -1.3086459e-05], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_278.wav', 'Er ist ein User!', 16, array([ 5.7516689e-05, 4.9558192e-05, 6.3942927e-05, ...,\n", + " -2.3214375e-06, 1.1798247e-05, 3.6477853e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_282.wav', 'Zurückbleiben, bitte!', 22, array([ 1.8404999e-04, 2.6386097e-04, 3.0643051e-04, ...,\n", + " -6.5650514e-05, -5.8646885e-05, -6.5778695e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_287.wav', 'Gut getrollt.', 13, array([-3.0470208e-05, -6.1425657e-05, -3.8205933e-05, ...,\n", + " 6.9129404e-05, 1.1258064e-04, 1.2031732e-04], dtype=float32), 1.728)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_336.wav', 'Ganz sicher sogar.', 18, array([2.2912030e-04, 2.5114618e-04, 1.9525687e-04, ..., 8.7549386e-05,\n", + " 8.5029111e-05, 7.8950601e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_341.wav', 'Wohl kaum.', 10, array([ 1.6102573e-04, 1.7911245e-04, 1.5706589e-04, ...,\n", + " -2.9753184e-05, -4.4280365e-05, 3.1124373e-06], dtype=float32), 1.2586666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_405.wav', 'Wie geht das?', 13, array([-1.6796951e-04, -1.9163813e-04, -1.9830326e-04, ...,\n", + " -5.0582935e-06, 1.2309533e-05, -2.6891148e-05], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_407.wav', 'Befehl ist Befehl!', 18, array([ 9.3892188e-05, 1.0890782e-04, 9.6308002e-05, ...,\n", + " -3.0468544e-05, -2.8461071e-05, -7.1021976e-05], dtype=float32), 1.792)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_412.wav', 'Mit wem spreche ich?', 20, array([ 7.7782068e-05, 9.2144561e-05, 2.8574361e-05, ...,\n", + " -1.1466493e-05, 5.7958755e-06, 6.2275390e-06], dtype=float32), 1.7813333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_422.wav', 'An schlechten Tagen ja.', 23, array([ 4.2690190e-05, -2.3120232e-05, -2.5523063e-05, ...,\n", + " 2.1898361e-05, -2.7946093e-05, 4.6620054e-05], dtype=float32), 1.9833333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_460.wav', 'Sie haben richtig geraten!', 26, array([-9.0950904e-05, -1.4647168e-04, -7.1847418e-05, ...,\n", + " 2.8589966e-05, -2.2244849e-05, 1.1577226e-05], dtype=float32), 1.9626666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_476.wav', 'Alle sprechen so leise.', 23, array([-6.9834332e-06, -3.1972188e-05, -3.9375213e-05, ...,\n", + " -2.6475973e-05, 1.4716678e-05, -4.5046556e-05], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_477.wav', 'Woher willst du das wissen?', 27, array([-2.12417421e-04, -2.56415573e-04, -2.42886104e-04, ...,\n", + " 9.67599408e-05, 9.51452384e-05, 1.15144765e-04], dtype=float32), 1.9413333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_488.wav', 'Anders als man denkt.', 21, array([ 1.8948530e-04, 3.4113604e-04, 1.9700162e-04, ...,\n", + " -7.6619792e-05, -3.6041514e-05, -1.6451453e-06], dtype=float32), 1.9413333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_495.wav', 'Runter mit den Waffen!', 22, array([ 1.12369155e-04, 4.44092657e-05, 8.84383553e-05, ...,\n", + " -7.52444794e-06, -4.84231314e-05, -4.22670855e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_504.wav', 'Und jetzt?', 10, array([-5.6267181e-06, -5.9708807e-05, -3.4106170e-06, ...,\n", + " -1.0430286e-04, -1.2670284e-04, -1.4261479e-04], dtype=float32), 1.344)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_511.wav', 'Jein.', 5, array([ 5.89297160e-05, 1.19100565e-04, 6.77589633e-05, ...,\n", + " -1.61726966e-05, -7.95948727e-05, -2.88161173e-05], dtype=float32), 1.0453333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_522.wav', 'Vorsicht Stufe!', 15, array([ 6.2581657e-06, 4.7380847e-05, 8.6832886e-05, ...,\n", + " 6.6710568e-06, 2.2640632e-05, -3.9922857e-06], dtype=float32), 1.3866666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_526.wav', 'War ich zu zickig?', 18, array([1.6193213e-03, 2.2825657e-03, 2.0064272e-03, ..., 6.6650551e-05,\n", + " 7.2444294e-05, 8.5881074e-05], dtype=float32), 1.728)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_530.wav', 'Wo drückt der Schuh?', 21, array([-1.46389175e-05, 3.62552214e-06, -9.26516877e-05, ...,\n", + " -3.03967099e-05, -1.01135854e-04, 3.96938458e-06], dtype=float32), 1.536)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_534.wav', 'Kann das noch warten?', 21, array([1.74110639e-04, 1.80995979e-04, 2.26840231e-04, ...,\n", + " 1.18193166e-04, 7.83515134e-05, 5.11603030e-05], dtype=float32), 1.664)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_539.wav', 'Passen die Sätze so?', 21, array([-3.1769360e-04, -4.7089945e-04, -4.3369626e-04, ...,\n", + " 1.6810809e-04, 5.3649095e-05, 1.4577823e-04], dtype=float32), 1.8346666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_542.wav', 'Ã\\x9cbermorgen.', 12, array([-2.4301407e-04, -3.5653665e-04, -2.1825638e-04, ...,\n", + " 6.1351508e-05, 9.2918686e-05, 8.8779299e-05], dtype=float32), 1.1306666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_589.wav', 'Ich mag deinen Mantel.', 22, array([-2.1532472e-04, -3.8814778e-04, -2.9697348e-04, ...,\n", + " -3.1324416e-05, -3.5802710e-05, 8.7614599e-06], dtype=float32), 1.6746666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_604.wav', 'Wie macht er das bloÃ\\x9f?', 23, array([-1.8150010e-04, -2.0398400e-04, -1.5460433e-04, ...,\n", + " -3.4698380e-05, -6.5080814e-05, -1.8794183e-06], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_666.wav', 'Was soll ich sagen?', 19, array([-8.8535160e-07, -7.4019059e-05, 7.4082243e-05, ...,\n", + " -6.2706102e-05, 2.9464120e-06, -1.1627621e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_694.wav', 'Wie misst man das?', 18, array([ 2.5176766e-04, 1.8225121e-04, 3.6178919e-04, ...,\n", + " 2.0104897e-06, 5.5382880e-05, -2.6957323e-05], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_704.wav', 'Mir fehlen die Worte.', 21, array([ 1.7020236e-04, 3.3776514e-04, 3.4704659e-04, ...,\n", + " 4.7222587e-05, -1.5073445e-05, -1.6250522e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_714.wav', 'Gehen wir?', 10, array([ 1.5890028e-04, 1.6513607e-04, 1.7650245e-04, ...,\n", + " 1.3219027e-05, 3.1738135e-05, -9.3036484e-05], dtype=float32), 1.3226666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_723.wav', 'Ich komme noch mal dran.', 24, array([-4.6879621e-05, -1.1869792e-04, -5.2995206e-06, ...,\n", + " 1.0155864e-05, -8.1713588e-05, -3.8661747e-05], dtype=float32), 1.8773333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_742.wav', 'Bitte schön!', 13, array([-3.4623430e-04, -4.4416677e-04, -3.0297900e-04, ...,\n", + " 5.3006592e-05, 5.1509913e-05, 7.1368544e-05], dtype=float32), 1.1733333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_743.wav', 'Das haben sie gesagt.', 21, array([-2.3902958e-05, 4.5714452e-05, 7.7266725e-07, ...,\n", + " -5.0056198e-05, 3.0718882e-05, 6.8078203e-05], dtype=float32), 1.8346666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_799.wav', 'Ist der Kugelschreiber blau?', 28, array([-1.6907173e-04, -2.9390136e-04, -2.4633619e-04, ...,\n", + " 5.9892503e-05, 6.6163295e-05, 1.4039288e-04], dtype=float32), 1.984)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_21.wav', 'Alles nach vorne!', 17, array([2.0106880e-04, 3.4844220e-04, 2.3129249e-04, ..., 9.6451986e-05,\n", + " 7.4439027e-05, 9.3146300e-05], dtype=float32), 1.5786666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_55.wav', 'Nichts dergleichen.', 19, array([-2.7673854e-04, -3.7996779e-04, -2.6658855e-04, ...,\n", + " -4.9654176e-07, -4.3088527e-05, -2.0399790e-05], dtype=float32), 1.5786666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_66.wav', \"Langsam nervt's.\", 16, array([ 7.7058452e-05, 4.7672478e-05, 2.6094380e-05, ...,\n", + " -6.2562191e-05, 2.7688688e-07, -1.2926825e-05], dtype=float32), 1.7493333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_98.wav', 'Seid ihr verrückt?', 19, array([-1.3435316e-04, -1.8146966e-04, -1.6307829e-04, ...,\n", + " -3.7551112e-07, 1.6737657e-05, 1.7336246e-05], dtype=float32), 1.6426666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_110.wav', 'Gib mir fünf!', 14, array([1.9428060e-04, 2.9409130e-04, 2.5521498e-04, ..., 1.9916235e-05,\n", + " 3.7017526e-05, 2.2721317e-05], dtype=float32), 1.3653333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_140.wav', 'Auch wieder wahr.', 17, array([-6.21244908e-05, -1.39888449e-04, -1.16935575e-04, ...,\n", + " -9.32170296e-05, -7.70114566e-05, -1.37492418e-04], dtype=float32), 1.3653333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_167.wav', 'Sicher ist sicher.', 18, array([ 1.6774700e-04, 2.7458806e-04, 1.3175888e-04, ...,\n", + " -3.9984116e-05, -4.5541576e-05, 2.3846082e-05], dtype=float32), 1.792)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_175.wav', 'Wie soll ich sagen?', 19, array([-2.0688836e-05, -6.4790765e-05, -1.1548823e-05, ...,\n", + " -1.0844359e-05, -3.6513706e-05, -4.4623717e-05], dtype=float32), 1.6213333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_200.wav', 'Ist doch Ehrensache!', 20, array([ 1.07319385e-04, 1.08591557e-04, 6.78624638e-05, ...,\n", + " 3.66282293e-05, -4.84154953e-05, -2.46383879e-05], dtype=float32), 1.92)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_258.wav', 'Jeder Mensch ist anders.', 24, array([ 9.4392788e-05, 1.3444535e-04, 1.5623294e-04, ...,\n", + " -9.0343368e-05, -1.2968398e-04, -2.8964683e-05], dtype=float32), 1.8986666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_262.wav', 'Nächstes Mal vielleicht.', 25, array([-4.9963495e-04, -7.3549181e-04, -5.7168922e-04, ...,\n", + " 5.7476438e-05, 8.7852583e-05, 6.3541149e-05], dtype=float32), 1.76)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_290.wav', 'Ich wollte nur nett sein.', 25, array([-3.0248266e-04, -4.1539475e-04, -4.3182663e-04, ...,\n", + " -6.8298694e-05, -3.5496461e-05, -8.2268067e-05], dtype=float32), 1.856)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_293.wav', 'Sie haben Post.', 15, array([7.0743052e-05, 1.5683858e-04, 7.2936782e-05, ..., 3.4985551e-05,\n", + " 2.5512374e-05, 4.4657580e-05], dtype=float32), 1.6)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_302.wav', 'Mit dem Raumschiff bitte!', 25, array([-3.3868386e-05, -4.2923082e-05, 2.2873657e-05, ...,\n", + " 2.9917417e-05, -9.9794874e-05, -1.3378082e-04], dtype=float32), 1.5470625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_309.wav', 'Hä, wieso das denn?', 20, array([-4.2834796e-05, -1.3094838e-04, -2.1130700e-05, ...,\n", + " -4.5203033e-05, -6.0939405e-05, -4.7152938e-05], dtype=float32), 1.9385)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_321.wav', 'Lass dich nicht so hängen!', 27, array([ 3.3312430e-05, 1.1557561e-04, 1.7304946e-04, ...,\n", + " -5.3516556e-05, -6.5977452e-05, -8.5248823e-05], dtype=float32), 1.6589166666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_333.wav', 'Sehen wir uns in der Bib?', 25, array([1.8330962e-04, 1.0809512e-04, 2.0564985e-04, ..., 5.3472275e-05,\n", + " 1.1819158e-04, 1.3498007e-04], dtype=float32), 1.9571458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_340.wav', 'Klingt logisch.', 15, array([-4.9080444e-07, -4.6037778e-05, -1.0552061e-04, ...,\n", + " -7.5399061e-05, -1.1574150e-04, -1.1011600e-04], dtype=float32), 1.137)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_361.wav', 'Lies mir etwas vor!', 19, array([-5.9860780e-05, -1.2714561e-04, -4.6063276e-05, ...,\n", + " 1.3993531e-04, 1.7140653e-04, 1.5545388e-04], dtype=float32), 1.5284375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_368.wav', 'Nö, nicht wirklich.', 20, array([1.4233610e-05, 5.8029418e-05, 2.2922040e-05, ..., 2.8016962e-04,\n", + " 1.9504840e-04, 1.6919435e-04], dtype=float32), 1.77075)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_401.wav', 'Besser als gar nichts.', 22, array([-1.9661777e-04, -3.8629526e-04, -3.8140707e-04, ...,\n", + " 4.2625456e-06, 9.6469674e-05, 2.5569330e-05], dtype=float32), 1.7055)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_402.wav', 'Lass mich doch mal träumen.', 28, array([5.1605228e-05, 2.0454232e-05, 5.4702823e-06, ..., 1.0539140e-04,\n", + " 9.8325436e-05, 6.1908002e-05], dtype=float32), 1.87325)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_407.wav', 'Wochenende!', 11, array([-7.1158116e-05, -1.3735623e-04, -1.4360537e-04, ...,\n", + " 7.2980845e-05, -2.7338607e-05, -2.3744215e-06], dtype=float32), 1.0251666666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_410.wav', 'Ich habe dich gewarnt.', 22, array([-2.9008405e-04, -3.9160642e-04, -3.8535651e-04, ...,\n", + " -8.1862388e-05, -2.1166212e-04, -1.1729619e-04], dtype=float32), 1.5563958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_420.wav', 'Kann schon sein.', 16, array([8.5848145e-04, 1.2030958e-03, 1.0428407e-03, ..., 9.0862151e-05,\n", + " 1.8885999e-04, 1.3144755e-04], dtype=float32), 1.2395208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_430.wav', 'Schön gespielt.', 16, array([-3.1265599e-04, -3.5982658e-04, -3.4920897e-04, ...,\n", + " -5.9947542e-05, -2.8197737e-05, -8.6103646e-05], dtype=float32), 1.3606666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_432.wav', 'Gut geschlafen?', 15, array([-4.6266021e-05, -4.5735891e-05, -1.5800438e-04, ...,\n", + " -5.1101240e-05, -4.5094261e-05, -1.9669098e-05], dtype=float32), 1.2488333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_435.wav', 'Auf das Wetter natürlich auch.', 31, array([-3.5034932e-04, -4.7157385e-04, -4.0150300e-04, ...,\n", + " 1.4378574e-04, 3.5348174e-05, 1.3807646e-04], dtype=float32), 1.9664583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_437.wav', 'Komm, geh weg!', 14, array([ 3.15589714e-05, 1.08517845e-04, 6.59165744e-05, ...,\n", + " -1.43856349e-04, -9.36611250e-05, -1.37200404e-04], dtype=float32), 1.4119375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_469.wav', 'Schluss mit lustig!', 19, array([ 1.0199297e-04, 1.2600295e-04, 1.6211855e-04, ...,\n", + " -1.5054672e-04, -7.8931960e-05, 6.7272131e-06], dtype=float32), 1.4259166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_483.wav', 'Das spart Geschirr.', 19, array([ 6.6607544e-04, 7.1844418e-04, 6.1214896e-04, ...,\n", + " -3.3901462e-05, 1.3226962e-04, 3.8378406e-05], dtype=float32), 1.8080208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_486.wav', 'Das haben Recherchen ergeben.', 29, array([-9.0566078e-05, -2.1272554e-04, -1.9089306e-04, ...,\n", + " 9.4858078e-05, 8.9547662e-05, 7.4881907e-05], dtype=float32), 1.9571458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_487.wav', 'Frohes Schaffen!', 16, array([ 6.8461159e-05, 1.5294057e-04, 2.2618793e-04, ...,\n", + " -2.1603348e-05, -5.1863241e-05, -6.0653092e-06], dtype=float32), 1.337375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_500.wav', 'Sie sind ja noch blutjung!', 26, array([-0.00065145, -0.00103323, -0.00116705, ..., -0.0001188 ,\n", + " -0.00014697, -0.00013791], dtype=float32), 1.8639375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_516.wav', 'Lebe ich noch?', 14, array([-4.3064877e-04, -5.6503405e-04, -4.1817623e-04, ...,\n", + " -1.6641241e-04, -1.2653919e-04, -8.6205284e-05], dtype=float32), 1.1090416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_527.wav', 'Nicht dafür!', 13, array([ 3.5247151e-04, 4.8163909e-04, 3.9777748e-04, ...,\n", + " -5.2257688e-05, -3.3391923e-05, -1.8325276e-05], dtype=float32), 1.137)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_534.wav', 'Genau hundert Stück.', 21, array([-0.00059065, -0.00093307, -0.00079542, ..., 0.00016691,\n", + " 0.00026112, 0.00016139], dtype=float32), 1.8732708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_535.wav', 'Wie ist das möglich?', 21, array([ 3.7494919e-04, 5.0490367e-04, 3.7185123e-04, ...,\n", + " 4.3858363e-06, -5.6393877e-05, -6.9622547e-05], dtype=float32), 1.3886458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_536.wav', 'Alles wiederholt sich.', 22, array([-7.8303702e-03, -9.4565414e-03, 4.3799067e-03, ...,\n", + " -7.5256619e-05, -4.4781635e-05, -4.8768667e-05], dtype=float32), 1.37)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_538.wav', 'Der Klügere gibt nach.', 23, array([-3.3002507e-04, -4.8394629e-04, -4.5790782e-04, ...,\n", + " -1.5844591e-04, -3.2335000e-05, -1.1339883e-04], dtype=float32), 1.4259166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_559.wav', 'Schwing die Hufe!', 17, array([-0.00077766, -0.00118464, -0.00101971, ..., -0.00019519,\n", + " -0.00011075, -0.00013927], dtype=float32), 1.3233958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_561.wav', 'Was das wieder kostet!', 22, array([ 8.5937936e-04, 1.1237016e-03, 9.1907283e-04, ...,\n", + " 2.4701139e-05, -1.2547316e-04, -5.1732359e-06], dtype=float32), 1.6775416666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_563.wav', 'Wieso immer ich?', 16, array([4.5056498e-04, 7.2014128e-04, 6.0793286e-04, ..., 8.4482606e-05,\n", + " 9.7867851e-05, 2.6745778e-05], dtype=float32), 1.5843541666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_582.wav', 'Dann gäbe es dich jetzt nicht.', 31, array([-2.4657813e-04, -3.9872411e-04, -3.3457237e-04, ...,\n", + " 1.6457469e-05, -1.5761821e-05, 1.1328906e-04], dtype=float32), 1.9944166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_585.wav', 'Dem werde ich Beine machen!', 27, array([0.00027461, 0.00040794, 0.00034263, ..., 0.00012492, 0.00024055,\n", + " 0.00019042], dtype=float32), 1.9850833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_588.wav', 'Wollen wir Ihn herein lassen?', 29, array([-3.2398489e-04, -4.3375781e-04, -3.6100275e-04, ...,\n", + " 1.1542152e-04, 9.4435090e-05, 1.1465035e-04], dtype=float32), 1.9198541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_590.wav', 'Richtig geraten!', 16, array([-2.6969259e-04, -4.4567345e-04, -5.3715584e-04, ...,\n", + " 6.1917281e-06, 1.5911644e-05, 3.0031568e-05], dtype=float32), 1.2954375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_634.wav', 'Nun sag schon!', 14, array([-0.00074525, -0.0010401 , -0.00091129, ..., 0.00015909,\n", + " 0.00022603, 0.00013058], dtype=float32), 1.0997291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_658.wav', 'Mit Vergnügen!', 15, array([-1.9300323e-04, -2.6942717e-04, -2.3031878e-04, ...,\n", + " 6.9992027e-05, 5.8482234e-05, 1.2584617e-04], dtype=float32), 1.1929166666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_659.wav', 'Komm sofort her!', 16, array([ 5.0228823e-04, 8.3419622e-04, 7.3006074e-04, ...,\n", + " 4.1768268e-05, -4.2891694e-05, -7.8192716e-05], dtype=float32), 1.4725208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_674.wav', 'Chill mal!', 10, array([ 3.6116564e-04, 5.9050595e-04, 4.8674442e-04, ...,\n", + " -1.4056740e-04, -6.9539550e-05, -1.2587184e-04], dtype=float32), 1.0624583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_677.wav', 'Jetzt mal Butter bei die Fische.', 32, array([-0.00017322, -0.00025202, -0.0003011 , ..., -0.00014372,\n", + " -0.00011187, -0.00014939], dtype=float32), 1.9198541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_705.wav', 'Das wird Macken geben.', 22, array([ 2.6667553e-06, 2.4150137e-05, 6.4756452e-05, ...,\n", + " -7.3486663e-05, -7.0459449e-05, 4.1346510e-05], dtype=float32), 1.7334583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_707.wav', 'Hilf mir mal auf die Sprünge.', 30, array([ 3.0066914e-04, 4.8592529e-04, 4.8968260e-04, ...,\n", + " -2.9595327e-05, -4.5949713e-05, -2.5512512e-05], dtype=float32), 1.8452916666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_747.wav', 'Versuch macht klug.', 19, array([-6.13919692e-04, -8.45544797e-04, -7.43770273e-04, ...,\n", + " 9.61075566e-05, -8.48421769e-05, -1.16592164e-04], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_768.wav', 'Kapiere ich nicht.', 18, array([ 4.0008963e-04, 6.7968445e-04, 6.0982589e-04, ...,\n", + " -7.4681542e-05, 2.5036192e-05, -4.9270067e-05], dtype=float32), 1.3747083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_776.wav', 'Der ist ja mickrig!', 19, array([-1.7217337e-04, -2.9700578e-04, -2.6711932e-04, ...,\n", + " -1.2146128e-04, -3.9679853e-05, -5.6118748e-05], dtype=float32), 1.3747083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_777.wav', 'Ja, sogar mehrere.', 18, array([ 1.1276272e-03, 1.6285295e-03, 1.3798362e-03, ...,\n", + " -2.8823823e-05, 3.4296296e-05, -5.9779604e-06], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_6_FINAL/6_778.wav', 'Fünf oder lieber sechs?', 24, array([-0.00051076, -0.00086243, -0.00095237, ..., -0.00015284,\n", + " -0.00011934, -0.00010978], dtype=float32), 1.9475)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_1.wav', 'Wen interessiert das schon?', 27, array([-2.0386204e-04, -1.6595512e-04, -3.4064340e-04, ...,\n", + " -5.8528771e-05, -4.0259012e-05, -2.3960278e-05], dtype=float32), 1.9034583333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_4.wav', 'Das sieht man sofort.', 21, array([-4.7220071e-04, -6.1083253e-04, -5.2480790e-04, ...,\n", + " 3.0703570e-05, 5.0339484e-05, -4.0401741e-05], dtype=float32), 1.7007708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_9.wav', 'Kannst du ein Instrument spielen?', 33, array([-5.8206980e-04, -9.0975891e-04, -9.2016242e-04, ...,\n", + " -3.6644913e-05, -8.9309695e-05, 5.9820622e-06], dtype=float32), 1.9519166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_17.wav', 'Nein, hör mir zu!', 18, array([1.8352878e-04, 2.3541819e-04, 1.9473537e-04, ..., 3.8015917e-06,\n", + " 3.0260228e-05, 4.7941758e-05], dtype=float32), 1.6038333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_24.wav', 'Sowas ist schade.', 17, array([ 5.2204914e-04, 7.2680251e-04, 7.3363306e-04, ...,\n", + " -3.0053505e-05, -6.5714506e-05, -9.0218302e-05], dtype=float32), 1.5509583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_48.wav', 'Ich will zocken!', 16, array([-0.00016469, -0.00039593, -0.00179843, ..., 0.00018615,\n", + " 0.00012972, 0.00017355], dtype=float32), 1.5773958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_55.wav', 'Ein Insider berichtet.', 22, array([ 3.7575817e-05, 2.7695228e-04, 1.8994253e-04, ...,\n", + " 2.4524426e-05, 4.0446877e-05, -2.5534926e-05], dtype=float32), 1.8505833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_65.wav', 'Evelyn ist seekrank.', 20, array([0.00062829, 0.00093936, 0.0008276 , ..., 0.00017747, 0.00012535,\n", + " 0.00013539], dtype=float32), 1.7712708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_66.wav', 'Zunächst der Blick aufs Wetter.', 32, array([-0.00092968, -0.00141539, -0.00128506, ..., 0.00019455,\n", + " 0.00034253, 0.00020309], dtype=float32), 1.8593958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_77.wav', 'Was schmeckt am besten?', 23, array([6.4622820e-04, 1.0704662e-03, 1.1439651e-03, ..., 1.9296777e-04,\n", + " 9.2506059e-05, 4.9435432e-05], dtype=float32), 1.6567083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_78.wav', 'Wir rufen Sie dann auf.', 23, array([-1.0261516e-03, -1.4563096e-03, -1.2881490e-03, ...,\n", + " 5.2330338e-06, 6.4821052e-06, -3.7749737e-06], dtype=float32), 1.6655208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_84.wav', 'Das Essen war vorzüglich.', 26, array([-5.0324254e-04, -7.2285999e-04, -5.4835685e-04, ...,\n", + " -4.1776315e-05, -4.3907283e-05, 3.2214456e-07], dtype=float32), 1.9959791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_89.wav', 'SüÃ\\x9fes oder Saures!', 20, array([-2.1448301e-04, -3.2685092e-04, -1.9420320e-04, ...,\n", + " 5.3501964e-05, 3.9838564e-05, 9.8899181e-05], dtype=float32), 1.5641875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_96.wav', 'Woran das wohl liegt?', 21, array([7.9406239e-04, 1.0801835e-03, 8.6238224e-04, ..., 1.5784081e-04,\n", + " 1.3262879e-04, 7.3408869e-06], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_118.wav', 'Hier hast du deinen Fisch.', 26, array([0.00047934, 0.0008143 , 0.00071459, ..., 0.00040429, 0.00026866,\n", + " 0.00011292], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_135.wav', 'Und zwar hochverdient!', 22, array([-3.4465449e-04, -5.7459215e-04, -4.8516967e-04, ...,\n", + " 2.8431052e-05, 9.6089265e-05, 2.6090011e-05], dtype=float32), 1.9475)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_144.wav', 'Da kräht kein Hahn nach.', 25, array([-2.4579404e-05, -2.7367115e-04, -1.3865142e-04, ...,\n", + " 6.7543602e-05, 4.0894251e-05, 2.7544003e-05], dtype=float32), 1.7095833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_148.wav', 'Du zuerst.', 10, array([-8.7500273e-05, -8.8356370e-05, 3.9270883e-05, ...,\n", + " -1.0109833e-04, 5.8080084e-05, -1.4014350e-04], dtype=float32), 1.3658958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_149.wav', 'Hier mal eine Faustregel.', 25, array([7.6173781e-04, 9.7895204e-04, 8.7399769e-04, ..., 5.2696447e-05,\n", + " 1.8836032e-06, 6.7383153e-06], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_150.wav', 'Ich sehe kein Leerzeichen.', 26, array([-2.0238354e-05, -3.9017228e-05, -1.8151976e-04, ...,\n", + " -2.8073411e-05, -8.1482809e-05, -9.7252036e-05], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_151.wav', 'Hast du mal einen Fünfziger?', 29, array([-6.5894198e-04, -9.4568409e-04, -8.3610136e-04, ...,\n", + " -1.5597163e-04, -1.5190896e-04, -4.1842508e-05], dtype=float32), 1.8770208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_155.wav', 'Mit Pommes?', 11, array([ 0.0003422 , 0.0003448 , 0.00032375, ..., -0.00023719,\n", + " -0.00028336, -0.00012051], dtype=float32), 0.9252916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_172.wav', 'Noch fünf Minuten bitte, Schatz!', 33, array([-4.4656807e-04, -5.2705233e-04, -5.8281276e-04, ...,\n", + " -1.7271057e-05, 3.9541996e-05, 1.4292495e-05], dtype=float32), 1.9387083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_174.wav', 'Es ist wie verhext.', 19, array([3.7680543e-04, 6.3684850e-04, 4.2467855e-04, ..., 1.3614137e-05,\n", + " 8.9109992e-05, 1.3674991e-04], dtype=float32), 1.9563125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_179.wav', 'Unter uns ist ein Verräter.', 28, array([-2.2123450e-04, -3.2310621e-04, -2.8145462e-04, ...,\n", + " -1.0567834e-04, 3.1090029e-05, 6.3631160e-05], dtype=float32), 1.8682083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_190.wav', 'Nimm die Maske ab!', 18, array([4.6733877e-04, 6.9651386e-04, 5.4769457e-04, ..., 1.6475593e-04,\n", + " 7.5979711e-05, 7.9883583e-05], dtype=float32), 1.2337291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_193.wav', 'Nicht dass ich wüsste.', 23, array([ 0.0001971 , 0.00045662, 0.00023958, ..., -0.00011544,\n", + " -0.00016933, -0.00016841], dtype=float32), 1.5862083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_194.wav', 'Der Tee zieht noch.', 19, array([-0.00024223, -0.00046848, -0.00045602, ..., -0.00014842,\n", + " -0.00016475, -0.00012201], dtype=float32), 1.6390833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_210.wav', 'Tu es für mich!', 16, array([4.4054058e-04, 7.1835978e-04, 6.8089634e-04, ..., 6.5819913e-05,\n", + " 6.3534033e-05, 2.4601215e-04], dtype=float32), 1.5685833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_216.wav', 'Bölken Sie woanders herum!', 27, array([-4.3733866e-04, -5.8234221e-04, -6.0285319e-04, ...,\n", + " -2.0549475e-04, -5.1659747e-05, -6.9836286e-05], dtype=float32), 1.9827708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_217.wav', 'So, so.', 7, array([5.1622407e-04, 8.1000535e-04, 6.2310486e-04, ..., 1.1862206e-04,\n", + " 7.1799346e-05, 3.3523640e-06], dtype=float32), 1.3747291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_220.wav', 'Leicht verdientes Geld.', 23, array([ 1.47327999e-04, 1.87759506e-04, -1.56362767e-05, ...,\n", + " 1.08211556e-04, 8.50987126e-05, -3.97509648e-05], dtype=float32), 1.7360208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_226.wav', 'Wie lautet der Zwischenstand?', 29, array([ 5.1066454e-04, 7.2763517e-04, 6.3450093e-04, ...,\n", + " -8.1010330e-05, -1.8156270e-05, -5.7707053e-05], dtype=float32), 1.9827708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_273.wav', 'Was hat ihn geritten?', 21, array([-3.4532882e-04, -5.6787761e-04, -6.2309759e-04, ...,\n", + " -3.4597360e-05, -1.2706745e-05, -1.1419446e-04], dtype=float32), 1.6214583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_282.wav', 'So nicht, Freundchen!', 21, array([-2.2482723e-03, -3.3393281e-03, -3.0241525e-03, ...,\n", + " 8.9230271e-05, 8.0567042e-05, -1.7856433e-05], dtype=float32), 1.7800833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_287.wav', 'So ein feiner Hund!', 19, array([-0.00024811, -0.00028893, -0.00043056, ..., -0.0001634 ,\n", + " -0.00015287, -0.00012142], dtype=float32), 1.4628333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_297.wav', 'Ah, die Feuerwehr!', 18, array([-5.8479345e-05, 1.3606872e-06, -3.1950235e-04, ...,\n", + " 4.5466539e-04, 4.1461250e-04, 3.1427949e-04], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_298.wav', 'Nachricht bitte faxen!', 22, array([-3.4957391e-04, -4.1374876e-04, -4.3978900e-04, ...,\n", + " -1.4674234e-04, -2.0285949e-04, -3.0548752e-05], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_330.wav', 'Alter Verwalter!', 16, array([0.00058996, 0.00086262, 0.00074697, ..., 0.00030815, 0.00029123,\n", + " 0.00018931], dtype=float32), 1.8615833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_333.wav', 'Was will man mehr?', 18, array([-8.3821319e-04, -1.1214241e-03, -1.0474359e-03, ...,\n", + " -4.0887986e-05, 1.7188730e-05, 6.5576496e-05], dtype=float32), 1.3570833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_362.wav', 'Ganz der Papa!', 14, array([ 1.0614250e-06, 1.0387501e-04, 2.6466480e-05, ...,\n", + " -3.6802659e-05, 4.0980707e-05, 7.8629993e-05], dtype=float32), 1.3042291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_368.wav', 'Es geht schon, danke!', 21, array([-3.1714016e-04, -4.7203674e-04, -3.6235168e-04, ...,\n", + " 7.8341058e-05, 4.7649206e-05, 1.9486140e-05], dtype=float32), 1.6919583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_404.wav', 'Notieren Sie sich das.', 22, array([-3.1276091e-04, -4.1585916e-04, -4.4194568e-04, ...,\n", + " -1.9349645e-04, -6.0014678e-05, 2.7422161e-07], dtype=float32), 1.8153333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_416.wav', 'Mach das Licht an!', 18, array([ 7.4020500e-04, 9.9551259e-04, 7.7506527e-04, ...,\n", + " -9.4190882e-06, -5.5277683e-06, 6.0646169e-05], dtype=float32), 1.273375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_418.wav', 'Gebt mir ein O!', 15, array([ 2.55384133e-04, 2.99102190e-04, 3.85188963e-04, ...,\n", + " -6.97520736e-05, -1.12780595e-04, -5.84875634e-05], dtype=float32), 1.5641875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_425.wav', 'Wir haben unsere Vorschriften.', 30, array([-0.0014397 , -0.00206455, -0.00194661, ..., 0.00017973,\n", + " 0.00031227, 0.00029818], dtype=float32), 1.9563125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_428.wav', 'Dort spielt die Musik!', 22, array([0.00064248, 0.00109204, 0.00095334, ..., 0.00016345, 0.00021933,\n", + " 0.00016792], dtype=float32), 1.9386875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_439.wav', 'Runter von der Couch!', 21, array([0.00032077, 0.0003695 , 0.00031393, ..., 0.00016823, 0.00027614,\n", + " 0.00030219], dtype=float32), 1.4716458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_440.wav', 'Geh, Martin. Geh!', 17, array([-0.0006147 , -0.00096355, -0.00084441, ..., -0.00019064,\n", + " -0.00014664, -0.0001376 ], dtype=float32), 1.4011458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_442.wav', 'Dann ist doch alles paletti.', 28, array([-0.0003903 , -0.00051721, -0.00051659, ..., 0.00044963,\n", + " 0.00069829, 0.00057605], dtype=float32), 1.7915833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_464.wav', 'Hören Sie erst einmal zu!', 26, array([-2.3209564e-03, -3.7553089e-03, -3.8581355e-03, ...,\n", + " 4.0617133e-06, 6.2217005e-05, 1.8342262e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_475.wav', 'Ich will die Hände sehen!', 26, array([-1.1517418e-03, -1.5774536e-03, -1.5022659e-03, ...,\n", + " 8.5659660e-05, 1.5909245e-04, 1.0823877e-04], dtype=float32), 1.4804583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_481.wav', 'Du kennst doch Tessa.', 21, array([-9.7565542e-05, -8.4838466e-05, -2.1631434e-04, ...,\n", + " -9.0966016e-05, -9.0894253e-05, -1.5524645e-04], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_487.wav', 'Angeber und Neidhammel.', 23, array([-4.2524905e-04, -5.5071624e-04, -4.9216941e-04, ...,\n", + " -9.1045105e-05, -3.0268184e-05, -1.0583480e-04], dtype=float32), 1.8593958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_491.wav', 'Können diese Augen lügen?', 27, array([-1.04710832e-03, -1.57430710e-03, -1.43215503e-03, ...,\n", + " 1.43472225e-05, 1.20743534e-05, -1.07111417e-04], dtype=float32), 1.8241458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_499.wav', 'Kann man hier denn nicht lüften?', 33, array([-9.1343711e-04, -1.1802320e-03, -9.9357730e-04, ...,\n", + " 7.8159035e-05, 2.3012167e-04, 3.3637294e-05], dtype=float32), 1.9871666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_500.wav', 'Der Mann ist vom Leben gezeichnet.', 34, array([ 1.06765685e-04, 2.15540877e-05, -9.11364405e-05, ...,\n", + " -5.42830057e-05, -9.09425871e-05, -3.43727625e-05], dtype=float32), 1.7712708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_506.wav', 'Wollen Sie mich aushorchen?', 27, array([ 0.00060325, 0.00087957, 0.00074186, ..., -0.00021219,\n", + " -0.00024823, -0.00017538], dtype=float32), 1.9739375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_531.wav', 'Je eher, desto besser.', 22, array([5.7826861e-04, 7.7570765e-04, 6.1795511e-04, ..., 8.9765228e-05,\n", + " 4.5600675e-05, 1.4581751e-04], dtype=float32), 1.7800833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_538.wav', 'Och, komm schon her!', 20, array([-4.9066258e-04, -7.3491497e-04, -5.5824185e-04, ...,\n", + " 8.5976262e-06, 1.0786976e-04, 1.2791457e-04], dtype=float32), 1.8593958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_542.wav', 'Nimm deine Maske endlich ab!', 28, array([ 5.4343470e-04, 7.2278164e-04, 7.2296784e-04, ...,\n", + " -3.4153378e-05, -3.6221893e-05, -8.8784982e-05], dtype=float32), 1.9386875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_574.wav', 'Wollt ihr mich ärgern?', 23, array([0.00089293, 0.00139316, 0.0012052 , ..., 0.00011375, 0.00022351,\n", + " 0.00014075], dtype=float32), 1.6567083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_599.wav', 'Das ist knorke.', 15, array([-8.0439750e-06, -4.1563135e-06, -3.6478632e-05, ...,\n", + " -1.6141655e-04, -8.8675122e-05, -1.2264083e-04], dtype=float32), 1.3394583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_603.wav', 'Suchst du Ã\\x84rger?', 17, array([ 1.8951594e-04, 3.2533749e-04, 2.3231433e-04, ...,\n", + " -1.0691231e-05, -6.9874281e-05, -4.5488341e-05], dtype=float32), 1.6038333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_615.wav', 'Hör nicht auf diese Schwätzer!', 32, array([ 0.00019477, 0.00020745, 0.00017311, ..., 0.00030501,\n", + " -0.00018354, 0.00024707], dtype=float32), 1.9739375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_618.wav', \"Gleich geht's weiter!\", 21, array([-6.4648612e-04, -1.0017229e-03, -9.2825363e-04, ...,\n", + " -4.5593577e-05, -6.6424482e-06, 1.4339538e-05], dtype=float32), 1.4452083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_641.wav', 'Herr, erbarme dich!', 19, array([ 9.9213721e-06, 1.8233144e-05, -3.5843041e-05, ...,\n", + " -5.0301041e-05, -1.3241796e-04, -2.0356404e-04], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_649.wav', 'Sammelt Holz für das Feuer!', 28, array([-0.00024918, -0.00046716, -0.00041068, ..., 0.00016901,\n", + " 0.0001653 , 0.00017449], dtype=float32), 1.9387083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_692.wav', 'Erst will ich noch duschen.', 27, array([ 2.7669812e-04, 5.0494721e-04, 5.6616898e-04, ...,\n", + " 4.0362014e-05, -7.8570345e-05, 6.2029525e-05], dtype=float32), 1.6082291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_703.wav', 'Was kommt als nächstes?', 24, array([ 5.5248733e-04, 8.9842337e-04, 6.7765010e-04, ...,\n", + " -1.3254551e-04, -9.5152573e-05, -2.1063161e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_718.wav', 'Setzen, sechs!', 14, array([-1.2044140e-04, -2.0982703e-04, -2.7291384e-04, ...,\n", + " 1.7828704e-04, 9.6640695e-05, 1.3019536e-05], dtype=float32), 1.2689791666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_720.wav', 'Dann nehmen wir meinen Wagen.', 29, array([ 1.2858727e-04, 1.7004457e-04, -5.1648447e-05, ...,\n", + " 2.5735653e-04, 2.8828968e-04, 1.9113944e-04], dtype=float32), 1.9915833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_724.wav', 'Lach mal wieder.', 16, array([2.5169516e-04, 3.1780155e-04, 2.4175562e-04, ..., 1.8466891e-04,\n", + " 9.4025556e-05, 1.4185447e-04], dtype=float32), 1.3570833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_726.wav', 'Lass uns mal Fieber messen!', 27, array([ 4.6217057e-04, 7.1049004e-04, 5.8858085e-04, ...,\n", + " -2.7612457e-06, -4.4886579e-05, -1.3602876e-06], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_733.wav', 'Ja, du hast ja Recht!', 21, array([-0.00065709, -0.00095549, -0.00067059, ..., 0.00023162,\n", + " 0.00042249, 0.00021008], dtype=float32), 1.8241458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_745.wav', 'Kannst du bitte das Licht anlassen?', 35, array([-4.5024044e-05, -6.6272514e-05, -1.4942518e-04, ...,\n", + " -1.0059726e-04, -8.9730158e-05, -4.9335773e-05], dtype=float32), 1.8593958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_7_FINAL/7_755.wav', 'Jetzt wird gefeiert!', 20, array([ 6.5074948e-04, 8.2373072e-04, 6.9322297e-04, ...,\n", + " 2.5613972e-05, -7.3600226e-05, 9.0847658e-05], dtype=float32), 1.4892708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_21.wav', 'Oh, ein Blechschaden!', 21, array([ 2.7968596e-05, 2.5622614e-05, 5.5850909e-05, ...,\n", + " -3.6388674e-06, -1.3192165e-05, -5.8324472e-06], dtype=float32), 1.7536458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_24.wav', 'Woran erkennt man sie?', 22, array([-1.6248678e-05, -2.0881544e-05, 2.2568598e-05, ...,\n", + " -1.0051125e-06, -4.4804568e-05, -3.8311518e-05], dtype=float32), 1.8770208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_57.wav', 'Wo hast Du den Ludenmantel her?', 31, array([ 9.4084098e-05, 6.2570427e-05, 8.1058839e-05, ...,\n", + " -3.1764132e-05, -4.2468575e-05, -3.3772998e-05], dtype=float32), 1.9915833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_59.wav', 'Bingo!', 6, array([ 4.7897654e-05, 2.7239477e-05, 3.7255515e-05, ...,\n", + " -1.7023414e-05, -2.9687346e-05, -3.9503360e-05], dtype=float32), 1.1456041666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_64.wav', 'Schreibt man das so?', 20, array([-1.6650798e-04, -2.2954465e-04, -2.1082905e-04, ...,\n", + " 5.5576045e-05, 1.4893518e-05, 2.0421723e-05], dtype=float32), 1.7272083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_81.wav', 'Halt mal kurz mein Bier.', 24, array([-8.2688921e-06, -1.1980872e-05, -4.0169580e-06, ...,\n", + " 8.8575485e-05, 1.3926605e-04, 3.6588870e-05], dtype=float32), 1.8417708333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_83.wav', 'Doch Hilfe naht bereits.', 24, array([ 4.9734876e-06, 5.2194659e-06, 1.2122488e-05, ...,\n", + " -1.8982364e-05, -4.2752654e-05, -8.2323677e-05], dtype=float32), 1.98275)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_84.wav', 'Formation einnehmen!', 20, array([8.1898354e-05, 7.4887575e-05, 6.6653323e-05, ..., 7.7452451e-06,\n", + " 2.1070047e-05, 3.0395060e-05], dtype=float32), 1.8682083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_87.wav', 'Holt mich hier raus!', 20, array([ 6.53247334e-05, -2.15428197e-04, -5.42638707e-04, ...,\n", + " -1.15612675e-05, 2.72592151e-05, 1.50995202e-05], dtype=float32), 1.5509583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_88.wav', 'Da vorne kommt mein Ex.', 23, array([-2.0436737e-04, -7.5976342e-05, 9.7310134e-05, ...,\n", + " 8.3587765e-06, -3.2081423e-06, 1.7971579e-05], dtype=float32), 1.8505833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_113.wav', 'Glaube es mir einfach.', 22, array([-2.7944061e-05, 1.0844935e-05, -1.5047234e-05, ...,\n", + " -2.7743961e-05, 2.9569403e-06, -3.5605283e-06], dtype=float32), 1.5333333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_115.wav', 'Was die Leute immer haben!', 26, array([-5.4329084e-05, -8.8018889e-05, -7.1306808e-05, ...,\n", + " 7.3982832e-05, 5.8832418e-05, 6.6730849e-05], dtype=float32), 1.9431041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_124.wav', 'BloÃ\\x9f nicht!', 12, array([1.01506448e-04, 1.75192414e-04, 1.12130554e-04, ...,\n", + " 3.55834927e-05, 4.65009398e-05, 5.75332670e-05], dtype=float32), 1.0310416666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_126.wav', \"Ich tu' immer nur rein.\", 23, array([ 3.21958287e-05, 2.19840458e-05, 1.46883485e-05, ...,\n", + " -8.37586867e-06, -5.43750639e-06, -1.22217643e-05], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_138.wav', 'Wann lief diese Sendung?', 24, array([-1.7348650e-05, 1.9956657e-05, 3.1632226e-05, ...,\n", + " 1.5858004e-05, 1.8046559e-05, -4.8364400e-05], dtype=float32), 1.9563333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_143.wav', 'Gehen Sie aus dem Weg!', 22, array([1.80967872e-05, 1.12411635e-05, 1.61865628e-05, ...,\n", + " 6.79703808e-05, 7.41552940e-05, 9.28417285e-05], dtype=float32), 1.3923333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_145.wav', 'Es geht drunter und drüber.', 28, array([ 5.3915655e-06, 8.5220972e-06, -3.3527529e-05, ...,\n", + " -1.0693114e-05, -6.3991156e-06, 1.2663132e-05], dtype=float32), 1.9915833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_149.wav', 'Suchscheinwerfer einschalten!', 29, array([-4.3899286e-06, 1.1313143e-05, -7.2204307e-06, ...,\n", + " -3.3424400e-05, -1.3328722e-05, -2.6314769e-05], dtype=float32), 1.8858333333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_152.wav', 'Ihre Uhr geht vor.', 18, array([ 1.1011517e-05, -3.0811309e-05, -2.2571772e-05, ...,\n", + " 8.1292972e-05, 7.4179443e-05, 7.1086802e-06], dtype=float32), 1.3394791666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_153.wav', 'Wir suchen noch Freiwillige.', 28, array([-5.6182507e-06, -3.0251003e-05, 5.1053936e-05, ...,\n", + " -5.0866500e-05, -1.7348602e-05, -4.6226152e-05], dtype=float32), 1.9298958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_157.wav', 'Halten Sie sofort an!', 21, array([ 2.3082459e-04, 2.3086018e-04, -2.2280088e-05, ...,\n", + " -4.5649995e-05, -3.0157349e-05, -1.7121181e-05], dtype=float32), 1.6501041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_190.wav', 'Zeig uns mal, wo der Hammer hängt!', 35, array([ 4.6486733e-05, 5.3618060e-05, 4.0510302e-05, ...,\n", + " -1.0646369e-04, -7.5534314e-05, -1.2183484e-04], dtype=float32), 1.91225)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_196.wav', 'Da kann man auch parken.', 24, array([-1.0925556e-05, -3.7278984e-05, -1.0163063e-05, ...,\n", + " -6.9978710e-06, -3.4896555e-06, -6.6393928e-05], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_204.wav', 'Eine gute Stunde ist rum.', 25, array([-2.2296244e-05, -5.8680125e-06, -5.0762057e-05, ...,\n", + " -4.8879232e-05, -8.5942098e-05, -6.8862631e-05], dtype=float32), 1.6214583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_205.wav', 'Oh ja, das fetzt!', 17, array([ 3.4871216e-06, -4.8185248e-06, 1.2310127e-05, ...,\n", + " -1.7998637e-04, -4.5437564e-04, -3.7538476e-04], dtype=float32), 1.5025)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_225.wav', 'Sag das Zauberwort!', 19, array([ 3.0607847e-05, 4.5160428e-05, 1.8997842e-05, ...,\n", + " -1.6968366e-05, 1.1446763e-05, -3.4663015e-05], dtype=float32), 1.6743333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_242.wav', 'Die Arme!', 9, array([4.2858810e-05, 7.1904920e-05, 2.9656387e-05, ..., 5.8210357e-05,\n", + " 4.0901028e-05, 3.2474836e-05], dtype=float32), 0.8636041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_246.wav', 'Schläfst du schon?', 19, array([ 3.2191518e-05, 5.0761428e-05, 4.3220087e-05, ...,\n", + " -4.0423780e-07, 1.7892495e-05, 5.0407853e-06], dtype=float32), 1.1456041666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_247.wav', 'Ja da schau her!', 16, array([-3.9683233e-05, -9.2827155e-05, -5.1356539e-05, ...,\n", + " 8.5207663e-05, 5.3869204e-05, 8.1267404e-05], dtype=float32), 1.3394583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_251.wav', 'Moment, das ging anders.', 24, array([-7.3496245e-05, -9.7117241e-05, -9.9846256e-05, ...,\n", + " -2.2075654e-05, -5.6377292e-05, -3.1324758e-05], dtype=float32), 1.9475208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_254.wav', 'Weiter zum nächsten Kapitel.', 29, array([ 2.7818656e-05, 2.9083269e-05, 2.7292099e-05, ...,\n", + " -1.4497251e-05, 1.6704771e-05, 1.8156856e-05], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_255.wav', 'Holla die Waldfee!', 18, array([ 3.2722608e-05, -3.4862321e-06, 2.1344584e-05, ...,\n", + " -3.5852513e-06, -1.3345180e-05, 1.8042003e-06], dtype=float32), 1.2777916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_278.wav', 'Lass mich nicht allein.', 23, array([-6.2487576e-05, -5.1307488e-05, 3.3147335e-05, ...,\n", + " -1.3666711e-06, -1.6965050e-05, 1.0842440e-05], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_283.wav', 'Warst du beim Frisör?', 22, array([-5.1554598e-05, -2.8181448e-05, -2.1276550e-05, ...,\n", + " 5.1014787e-05, 6.0253118e-05, 4.9681836e-05], dtype=float32), 1.60825)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_291.wav', 'Warum bin ich so fröhlich?', 27, array([-9.2893220e-05, -9.0468158e-05, -8.4269959e-05, ...,\n", + " 5.6945123e-06, 2.3743269e-05, -1.5906717e-07], dtype=float32), 1.5862083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_307.wav', 'Das klingt sehr gut.', 20, array([ 8.8375481e-07, 1.4093188e-06, -8.0541049e-06, ...,\n", + " -6.2088387e-05, -3.6809190e-05, -5.5097131e-05], dtype=float32), 1.4804583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_310.wav', 'Kann man das mitessen?', 22, array([-1.8419527e-05, -2.5431269e-05, -8.9255473e-06, ...,\n", + " 2.5581608e-05, 3.7564107e-05, 2.2521937e-05], dtype=float32), 1.5421458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_311.wav', 'Wo liegt das Problem?', 21, array([-1.7069402e-05, 2.2379625e-06, -8.6348446e-06, ...,\n", + " 2.4881610e-05, -2.6925150e-06, 1.8407424e-06], dtype=float32), 1.8065208333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_313.wav', 'Wo kann man sich ausloggen?', 27, array([-2.94713544e-07, -2.60781735e-06, 2.09315767e-05, ...,\n", + " -1.10319825e-05, -5.37709784e-05, -2.63888141e-05], dtype=float32), 1.7888958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_319.wav', 'Wo kommt das nur her?', 21, array([-3.2170439e-05, -2.5212325e-05, -3.7200436e-05, ...,\n", + " -9.3722010e-06, -3.0964005e-05, -1.5780270e-05], dtype=float32), 1.9298958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_337.wav', 'Und wenn man die nicht hat?', 27, array([-4.4960318e-05, 5.2144351e-05, -2.9507015e-05, ...,\n", + " -3.9032249e-05, 3.4188946e-05, -2.3692317e-05], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_347.wav', 'Sag ihr das bloÃ\\x9f nicht!', 24, array([8.6986920e-06, 4.4441199e-06, 3.0283294e-05, ..., 9.9162316e-05,\n", + " 7.8216704e-05, 9.9542762e-05], dtype=float32), 1.6126458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_348.wav', 'Wo bleibst du?', 14, array([ 3.3125209e-05, 5.7069548e-05, 3.6280937e-05, ...,\n", + " -2.4643228e-05, -2.7121812e-05, -1.5307731e-05], dtype=float32), 1.2998125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_352.wav', 'Jetzt oder nie!', 15, array([-1.7882267e-05, 1.5871639e-05, -7.5667369e-05, ...,\n", + " -3.7708491e-05, 7.9740630e-06, -7.9073770e-06], dtype=float32), 1.3747291666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_368.wav', 'Hinsetzen und FüÃ\\x9fe hoch!', 26, array([-3.0999392e-05, -7.2621566e-05, -4.7179296e-05, ...,\n", + " -2.5928295e-05, -3.2266624e-05, 1.4868124e-05], dtype=float32), 1.7624583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_394.wav', 'Nimm dir mal eine Pause!', 24, array([-2.7986377e-04, -3.0645030e-04, -2.3860915e-04, ...,\n", + " -3.2176635e-05, -4.1073359e-05, -1.7371191e-05], dtype=float32), 1.9519166666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_427.wav', 'Vom Kinde verschmäht.', 22, array([2.4927745e-05, 5.9401387e-05, 5.5517099e-05, ..., 8.8263223e-05,\n", + " 3.5481713e-05, 1.4234082e-05], dtype=float32), 1.9739375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_435.wav', 'Kann ich dir helfen?', 20, array([-7.2613778e-04, 1.4254064e-03, 4.3165400e-03, ...,\n", + " 9.7870041e-05, 6.2070317e-06, 1.0954802e-04], dtype=float32), 1.3923333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_449.wav', 'Woher will sie das wissen?', 26, array([ 6.6095758e-08, -2.7216944e-05, -1.6521408e-05, ...,\n", + " 3.0345358e-05, -5.6843191e-06, -4.2101074e-05], dtype=float32), 1.7272083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_468.wav', 'Das lass mal meine Sorge sein.', 30, array([-4.7126541e-05, -5.9281327e-05, -3.5599784e-05, ...,\n", + " 2.0367926e-05, 4.0726398e-05, 1.8718367e-05], dtype=float32), 1.9342916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_500.wav', 'Ich habe heute Geburtstag.', 26, array([-5.1001366e-06, 4.8161728e-05, 1.0626727e-05, ...,\n", + " -8.0793325e-05, -6.0714734e-05, -7.9644029e-05], dtype=float32), 1.9387083333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_513.wav', 'Fertig werden!', 14, array([-2.4789182e-05, -1.4137984e-05, -4.8843711e-05, ...,\n", + " 2.4393246e-05, 2.7856760e-05, 6.9619755e-06], dtype=float32), 1.3615)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_525.wav', 'Jeder trauert anders.', 21, array([-6.3906264e-06, -2.4861220e-05, -3.1557371e-05, ...,\n", + " -5.3394677e-05, 5.5594451e-06, -4.3505042e-05], dtype=float32), 1.8593958333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_530.wav', 'Wir sprechen uns später.', 25, array([ 1.9607371e-05, 1.2742041e-05, 5.9507223e-05, ...,\n", + " -1.0580019e-06, -1.0849526e-05, -2.2735680e-05], dtype=float32), 1.5950208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_534.wav', 'Den Schuss nicht hören.', 24, array([1.0162838e-04, 1.3316146e-04, 1.3368837e-04, ..., 5.8495625e-06,\n", + " 7.8353441e-05, 3.3752654e-05], dtype=float32), 1.8726041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_543.wav', 'Wer tut das nicht?', 18, array([-1.5056261e-05, -2.7894443e-05, -8.4756257e-06, ...,\n", + " -4.3981410e-05, -3.8667356e-05, -4.8794256e-05], dtype=float32), 1.5773958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_547.wav', 'Zu so später Stunde?', 21, array([-1.2750152e-04, 1.9311530e-05, -6.8482601e-05, ...,\n", + " -8.0274267e-06, 3.7486578e-05, -4.1844236e-05], dtype=float32), 1.6478958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_569.wav', 'Ein Zirkus ohne Tiere?', 22, array([-2.8725301e-05, -5.8967784e-05, -4.7625667e-06, ...,\n", + " 5.3123777e-06, -7.1301661e-06, -2.9527286e-05], dtype=float32), 1.8461666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_570.wav', 'Sag schon, was ist drin?', 24, array([ 1.10985304e-04, 5.97430153e-05, 9.55062278e-05, ...,\n", + " 6.52888993e-05, -5.82730863e-05, 6.85385385e-05], dtype=float32), 1.8373541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_574.wav', 'Wir zählen auf dich.', 21, array([-7.5106524e-05, -9.9009638e-05, -7.9571801e-05, ...,\n", + " 3.8461326e-06, 8.2744657e-05, 5.6746823e-05], dtype=float32), 1.9210833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_585.wav', 'Das funktioniert auch.', 22, array([ 5.4934342e-05, 1.7679840e-05, -5.7660582e-05, ...,\n", + " 4.9520886e-06, -2.5478117e-05, -6.3567706e-05], dtype=float32), 1.4628333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_589.wav', 'Was drauf?', 10, array([ 1.5172187e-05, 3.5768371e-05, -4.6845405e-05, ...,\n", + " 2.3743922e-05, -3.8076912e-05, 2.2450782e-05], dtype=float32), 1.2072916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_594.wav', 'Einer zur Zeit!', 15, array([-1.9068037e-05, -2.0037192e-05, -8.8215660e-05, ...,\n", + " -1.8433493e-05, -3.3125831e-05, 3.5209345e-05], dtype=float32), 1.4099583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_606.wav', 'Okay, und nun?', 14, array([-1.2366170e-05, 2.3954278e-06, -1.8647337e-05, ...,\n", + " -2.4212586e-06, 6.3337334e-06, -2.5126603e-06], dtype=float32), 1.5597708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_607.wav', 'Das verstehst du noch nicht.', 28, array([ 1.6215906e-04, 2.5805720e-04, 2.2398161e-04, ...,\n", + " -5.9032095e-06, -1.2547288e-06, -1.8913257e-05], dtype=float32), 1.7095833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_609.wav', 'Wie uncool!', 11, array([-1.1241895e-05, -3.2969092e-05, -5.8745212e-05, ...,\n", + " 8.5234688e-06, 1.9909365e-05, 1.7495377e-05], dtype=float32), 1.0927291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_624.wav', 'Setzt dich gerade!', 18, array([-1.7491520e-05, 6.7394591e-05, 5.0117076e-05, ...,\n", + " -2.1143003e-05, -1.6165326e-05, -1.6601503e-05], dtype=float32), 1.3835208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_638.wav', 'Nicht schlecht der Specht!', 26, array([-1.0250892e-05, 1.4861113e-05, -5.1604333e-05, ...,\n", + " 7.6938113e-06, 2.0211788e-05, 4.5162437e-06], dtype=float32), 1.8153333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_647.wav', 'Was haben die vor?', 18, array([ 1.2927976e-06, -4.4330540e-05, -4.2087355e-05, ...,\n", + " 1.2652035e-04, -7.1286093e-05, -1.9011653e-06], dtype=float32), 1.4628333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_648.wav', 'Ich habe gar nichts mitbekommen.', 32, array([ 1.6062468e-05, 4.4314598e-05, 1.1317232e-05, ...,\n", + " -8.4248430e-05, -4.8613791e-05, -4.1891144e-05], dtype=float32), 1.9915833333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_664.wav', 'Je mehr, desto besser.', 22, array([-1.0978662e-05, 2.8232571e-06, -2.7930673e-05, ...,\n", + " 5.0805535e-05, 3.9726485e-05, 6.7175766e-05], dtype=float32), 1.8505833333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_677.wav', 'Da vorne links!', 15, array([ 3.8325859e-05, 3.2421449e-05, 1.5961947e-05, ...,\n", + " 2.6722651e-05, -3.3873417e-05, 3.2344939e-05], dtype=float32), 1.4363958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_682.wav', 'Jetzt mal halblang!', 19, array([-2.0417360e-06, -1.3626728e-05, -2.8990502e-05, ...,\n", + " -2.2435464e-05, -3.3464916e-05, 2.5530893e-05], dtype=float32), 1.4892708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_687.wav', 'Gib uns ein Beispiel!', 21, array([-8.4907850e-05, -5.6986839e-05, 3.7472455e-06, ...,\n", + " -1.4217812e-05, -2.3697576e-05, -2.4605337e-05], dtype=float32), 1.6567083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_689.wav', 'Von wegen!', 10, array([-2.7207323e-05, -6.9836324e-06, -9.1906164e-05, ...,\n", + " 6.5761873e-05, 5.3384709e-05, 3.5098144e-06], dtype=float32), 0.8547916666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_691.wav', 'Das finde ich ziemlich doof.', 28, array([-2.9834633e-05, 5.6474819e-06, -2.5375591e-06, ...,\n", + " -3.2603730e-06, -5.9017879e-05, -9.6670803e-05], dtype=float32), 1.7977083333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_697.wav', 'Das trifft sich gut.', 20, array([ 7.9529818e-06, 3.9593842e-06, 3.0517844e-05, ...,\n", + " -4.2052940e-05, -3.0681629e-05, -2.6093589e-05], dtype=float32), 1.8241458333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_704.wav', 'Jetzt gibt es Zoff.', 19, array([ 1.7251841e-05, 3.0525447e-05, 4.0081544e-05, ...,\n", + " -2.7181366e-05, -6.4996988e-05, -2.0187828e-05], dtype=float32), 1.6655208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_708.wav', 'Liebe ist kein Verbrechen.', 26, array([ 1.3942296e-03, 2.0183886e-03, 1.7392144e-03, ...,\n", + " 4.2136421e-06, 1.5667934e-05, -1.1447505e-05], dtype=float32), 1.8329583333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_725.wav', 'Auch das noch!', 14, array([ 6.9235853e-06, 1.0541713e-05, -6.9821567e-06, ...,\n", + " -6.0647875e-05, -3.7899004e-05, 1.4291401e-05], dtype=float32), 1.2337083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_728.wav', 'Toller Hengst!', 14, array([ 1.5415973e-05, 1.2052349e-05, 2.2745300e-05, ...,\n", + " -5.1455394e-05, -8.6221211e-05, -2.3398878e-05], dtype=float32), 1.1632291666666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_748.wav', 'Reine Gewöhungssache.', 22, array([-7.46887818e-05, 3.63702893e-05, 2.65028193e-05, ...,\n", + " 1.14920855e-04, 8.75776823e-05, 7.50372201e-05], dtype=float32), 1.4452083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_764.wav', 'Siehe weiter unten.', 19, array([ 1.0315010e-04, 1.2668683e-04, 1.3160890e-04, ...,\n", + " 3.5362529e-05, -4.0091851e-05, 3.1800329e-05], dtype=float32), 1.5509583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_778.wav', 'Hilfe ein Ã\\x9cberfall!', 20, array([-9.1011774e-05, -1.6054764e-04, -6.9503607e-05, ...,\n", + " -3.2605390e-06, -1.1628125e-05, -4.9398786e-05], dtype=float32), 1.4011458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_8_FINAL/8_804.wav', \"Wen wundert's?\", 14, array([ 1.8174978e-05, 1.0757233e-05, 1.4760263e-05, ...,\n", + " -4.7010188e-05, -6.0861544e-06, -1.5782018e-05], dtype=float32), 1.2601666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_19.wav', 'Bis die Schwarte kracht.', 24, array([ 3.15900324e-05, -1.30308879e-04, 3.94875406e-06, ...,\n", + " 3.35644108e-05, 1.02667604e-04, 4.54106703e-05], dtype=float32), 1.7536354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_26.wav', 'Auch das wäre möglich.', 24, array([-4.5410670e-05, 1.9743770e-06, -1.9743769e-05, ...,\n", + " 4.3436296e-05, -1.9743770e-06, 3.5538786e-05], dtype=float32), 1.5421458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_28.wav', 'Was geht gar nicht?', 19, array([ 1.9743770e-06, 9.8718847e-06, -3.3564411e-05, ...,\n", + " 1.2241138e-04, -4.5410670e-05, 0.0000000e+00], dtype=float32), 1.4065104166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_33.wav', 'Die Geschichte geht anders.', 27, array([-3.3564411e-05, -7.7000703e-05, -8.2923834e-05, ...,\n", + " 3.3564411e-05, -3.9487541e-06, 3.1590032e-05], dtype=float32), 1.5333333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_42.wav', 'Welche SchuhgröÃ\\x9fe?', 20, array([ 0.0000000e+00, -1.1846262e-05, -5.9231311e-06, ...,\n", + " 4.5410670e-05, -3.9487539e-05, 2.9615656e-05], dtype=float32), 1.5685833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_51.wav', 'Mediathek aufrufen!', 19, array([-3.6328536e-04, 1.9941208e-04, -8.4898209e-05, ...,\n", + " 5.9231311e-06, -5.7256933e-05, -4.9359427e-05], dtype=float32), 1.9739479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_54.wav', 'Es tut ihr furchtbar leid.', 26, array([-3.3564411e-05, -4.9359427e-05, 1.1846262e-05, ...,\n", + " 5.1333802e-05, -8.8846966e-05, 5.7256933e-05], dtype=float32), 1.9563229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_62.wav', 'Noch mal von vorne, bitte.', 26, array([ 2.5666901e-05, -2.9615656e-05, -3.7513164e-05, ...,\n", + " 8.6872591e-05, -5.7256933e-05, 6.9103196e-05], dtype=float32), 1.8417604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_63.wav', 'Oh jemine!', 10, array([-1.7769393e-05, -6.9103196e-05, -3.7513164e-05, ...,\n", + " 5.7256933e-05, 5.1333802e-05, 3.9487539e-05], dtype=float32), 1.20728125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_84.wav', 'Reich mir den mal rüber.', 25, array([-1.5795016e-05, -9.8718847e-06, 6.7128822e-05, ...,\n", + " 0.0000000e+00, -1.2833450e-04, 3.3564411e-05], dtype=float32), 1.7448333333333332)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_101.wav', 'Findest du nicht auch?', 22, array([-7.3051953e-05, -9.8718847e-06, 5.9231311e-06, ...,\n", + " 2.5666901e-05, -5.3308180e-05, 1.1451387e-04], dtype=float32), 1.32184375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_113.wav', 'Alles korrekt.', 14, array([ 1.1846262e-05, 2.9615656e-05, 1.2833450e-04, ...,\n", + " -1.9743769e-05, 2.7641279e-05, -1.7769393e-05], dtype=float32), 1.3923333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_128.wav', 'Alles wird gut.', 15, array([-9.2795723e-05, -3.1590032e-05, 8.2923834e-05, ...,\n", + " 1.3820640e-05, -4.7385049e-05, 1.1846262e-05], dtype=float32), 1.6038333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_156.wav', 'Würde ich auch machen.', 23, array([-6.1205690e-05, -5.3308180e-05, -5.5282559e-05, ...,\n", + " -9.8718847e-06, -1.1648824e-04, -6.1205690e-05], dtype=float32), 1.3747083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_173.wav', 'Gib Gas!', 8, array([ 3.7513164e-05, 7.1077571e-05, -1.9743770e-06, ...,\n", + " 5.9231312e-05, -3.0405406e-04, 4.5410672e-04], dtype=float32), 1.03984375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_221.wav', 'Weil er es kann.', 16, array([ 3.9487541e-06, -3.1590032e-05, 2.1718148e-05, ...,\n", + " -9.6744472e-05, -3.9487539e-05, -6.3180065e-05], dtype=float32), 1.4011458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_225.wav', 'Was ist der Sinn des Lebens?', 28, array([-4.7385049e-05, -9.0821341e-05, 8.6872591e-05, ...,\n", + " 7.8975081e-06, -1.3820640e-05, -2.0730958e-04], dtype=float32), 1.9563229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_247.wav', 'Es ist kalt.', 12, array([-5.33081802e-05, -1.14513867e-04, 2.36925243e-05, ...,\n", + " -4.34362955e-05, 5.92313108e-06, -1.08590735e-04], dtype=float32), 1.17203125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_259.wav', 'ScheiÃ\\x9f drauf!', 14, array([ 9.8718854e-05, 3.9487541e-06, 5.9231312e-05, ...,\n", + " -3.3564411e-05, -1.7769393e-05, -1.1253949e-04], dtype=float32), 1.19846875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_271.wav', 'Katzen haben sieben Leben.', 26, array([-1.57950162e-05, 7.89750811e-06, -6.12056901e-05, ...,\n", + " -1.04641986e-04, -7.30519532e-05, -5.92313108e-06], dtype=float32), 1.8593854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_285.wav', 'Nicht so lasch!', 15, array([ 3.3564411e-05, 1.4412952e-04, -8.8846966e-05, ...,\n", + " 5.9231311e-06, -1.4610391e-04, -3.1590032e-05], dtype=float32), 1.4187708333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_309.wav', 'Ich gehe dann mal kicken.', 25, array([-7.8975077e-05, -5.1333802e-05, 2.1718148e-05, ...,\n", + " -9.8718847e-06, 0.0000000e+00, 1.7769393e-05], dtype=float32), 1.6743229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_313.wav', 'Versuchen Sie es später noch einmal!', 37, array([1.7769393e-04, 1.5597578e-04, 7.7000703e-05, ..., 1.7769393e-05,\n", + " 2.5666901e-05, 0.0000000e+00], dtype=float32), 1.9563125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_327.wav', 'Ulrike muss es ja wissen.', 25, array([ 1.3820640e-05, -4.3436296e-05, -2.5666901e-05, ...,\n", + " 8.0949460e-05, 3.1590032e-05, -1.5795016e-05], dtype=float32), 1.5553645833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_329.wav', 'So viele schon?', 15, array([-1.1451387e-04, -9.4770097e-05, 1.3820640e-05, ...,\n", + " -9.8718847e-06, 7.8975081e-06, 3.3564411e-05], dtype=float32), 1.32625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_339.wav', 'Noch nicht.', 11, array([-1.1569849e-03, -1.1234205e-03, -1.1056511e-03, ...,\n", + " -4.1461917e-05, -1.9743770e-06, -2.3692524e-05], dtype=float32), 0.9473229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_351.wav', 'So alt bin ich dann auch wieder nicht.', 38, array([ 1.0957792e-03, 8.6082838e-04, 5.8836438e-04, ...,\n", + " -7.7000703e-05, -1.0661636e-04, -5.3308180e-05], dtype=float32), 1.9100520833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_352.wav', 'Diese Gelegenheit kann man nutzen.', 34, array([ 1.2043700e-04, 1.9743769e-05, 7.5026328e-05, ...,\n", + " 3.1590032e-05, 6.5154440e-05, -5.1333802e-05], dtype=float32), 1.9981770833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_361.wav', 'Bist neidisch, was?', 19, array([ 3.9487539e-05, 1.7769393e-05, -2.9615656e-05, ...,\n", + " -5.9231311e-06, 1.9743770e-06, -3.1590032e-05], dtype=float32), 1.4848645833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_364.wav', 'Puh, das Quiz ist schwer!', 25, array([-5.9231312e-05, -6.9103196e-05, -8.2923834e-05, ...,\n", + " 1.3623202e-04, 1.3030888e-04, 2.1520710e-04], dtype=float32), 1.9497083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_371.wav', 'Alles klärchen!', 16, array([-3.9487541e-06, 3.3564411e-05, 1.5795016e-05, ...,\n", + " 5.1333802e-05, 6.1205690e-05, 3.5538786e-05], dtype=float32), 1.5068854166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_392.wav', 'Zeig mal dein Piercing!', 23, array([ 5.7256933e-05, 1.3820640e-05, 3.5538786e-05, ...,\n", + " -6.1205690e-05, -9.8718847e-06, 5.5282559e-05], dtype=float32), 1.7712604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_393.wav', 'Parlieren Sie doch im Park!', 27, array([-7.1077571e-05, -5.3308180e-05, -5.7256933e-05, ...,\n", + " 1.5795015e-04, 1.1253949e-04, 1.0069323e-04], dtype=float32), 1.8505729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_400.wav', 'Hä, was? 400', 13, array([ 3.5538786e-04, 2.7443841e-04, 2.5469463e-04, ...,\n", + " -6.3180065e-05, -1.7769393e-05, -5.9231311e-06], dtype=float32), 0.98696875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_431.wav', 'Tun Sie nicht so überrascht!', 29, array([-2.2310461e-04, -2.6259213e-04, -3.0800281e-04, ...,\n", + " -7.7000703e-05, -1.0661636e-04, -1.1451387e-04], dtype=float32), 1.6743229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_438.wav', 'Was lernen wir daraus?', 22, array([ 3.35644108e-05, -7.70007027e-05, -7.30519532e-05, ...,\n", + " -1.02667604e-04, -8.68725911e-05, -2.76412793e-05], dtype=float32), 1.9078541666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_439.wav', 'Was will sie denn noch?', 23, array([ 5.3308180e-05, 5.7256933e-05, -3.9487539e-05, ...,\n", + " -3.9487541e-06, 5.5282559e-05, 6.9103196e-05], dtype=float32), 1.7624479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_441.wav', 'Ich stecke fest.', 16, array([-1.6584767e-04, -1.5795015e-04, -1.3030888e-04, ...,\n", + " 9.2795723e-05, 7.5026328e-05, 7.5026328e-05], dtype=float32), 1.2976041666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_451.wav', 'Es riecht nach Sonnencreme.', 27, array([-4.1461917e-05, -3.7513164e-05, 2.1718148e-05, ...,\n", + " -2.7641279e-05, -1.0661636e-04, -1.0069323e-04], dtype=float32), 1.7007604166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_452.wav', 'Da entlang!', 11, array([-1.7769393e-05, 5.9231311e-06, 1.7769393e-05, ...,\n", + " -7.8975081e-06, 7.8975081e-06, 0.0000000e+00], dtype=float32), 0.97815625)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_453.wav', 'Tja, Thaddäus!', 15, array([-9.87188541e-05, -1.46103906e-04, -1.24385755e-04, ...,\n", + " 1.02667604e-04, 1.97437703e-06, -2.76412793e-05], dtype=float32), 1.6787395833333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_458.wav', 'Das Leben ist voller Ã\\x9cberraschungen.', 37, array([-3.9487539e-05, 1.1846262e-05, -1.3820640e-05, ...,\n", + " 6.1205690e-05, 3.1590032e-05, 1.9743770e-06], dtype=float32), 1.9739375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_460.wav', 'Danke der Nachfrage!460', 23, array([ 8.29238343e-05, 1.16488241e-04, 9.67444721e-05, ...,\n", + " -1.12539492e-04, -1.08590735e-04, -1.42155142e-04], dtype=float32), 1.4275833333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_465.wav', 'Ich will auch so ein Pferd.', 27, array([ 0.0000000e+00, 1.9743770e-06, -3.1590032e-05, ...,\n", + " 7.8975077e-05, -3.9487539e-05, -5.7256933e-05], dtype=float32), 1.5404791666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_473.wav', 'Was hat das zu bedeuten?', 24, array([-1.26360130e-04, -1.08590735e-04, -1.16488241e-04, ...,\n", + " 8.29238343e-05, 2.36925243e-05, -1.57950162e-05], dtype=float32), 1.8241354166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_492.wav', 'Die Narkose wirkt nicht.', 24, array([ 1.7571956e-04, 1.6782204e-04, 7.8975077e-05, ...,\n", + " 3.1590032e-05, -2.1718148e-05, -2.7641279e-05], dtype=float32), 1.965125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_495.wav', 'Dein Bruder ist echt krass drauf.', 33, array([-7.10775712e-05, -3.35644108e-05, -2.17181478e-05, ...,\n", + " 1.16488241e-04, 1.02667604e-04, 7.89750775e-05], dtype=float32), 1.9563125)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_498.wav', 'Das behaupten alle.', 19, array([-1.1253949e-04, -1.1846262e-04, -9.8718854e-05, ...,\n", + " 5.5282559e-05, -1.1846262e-05, 4.5410670e-05], dtype=float32), 1.3658958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_500.wav', 'So einfach ist es nicht.', 24, array([-0.00019349, -0.00019744, -0.00022113, ..., -0.00021521,\n", + " -0.0002231 , -0.00020534], dtype=float32), 1.7976979166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_503.wav', 'Das gibt ihm den Rest.', 22, array([-3.94875406e-06, -6.71288217e-05, -1.20436998e-04, ...,\n", + " 1.04641986e-04, 1.24385755e-04, 1.14513867e-04], dtype=float32), 1.5068958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_507.wav', 'Was fällt euch ein?', 20, array([9.8718854e-05, 9.0821341e-05, 6.7128822e-05, ..., 1.7374518e-04,\n", + " 2.0730958e-04, 1.5795015e-04], dtype=float32), 1.5421458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_511.wav', 'Lass die Glucke in Ruhe!', 24, array([2.6851529e-04, 2.3692525e-04, 8.2923834e-05, ..., 9.2795723e-05,\n", + " 6.3180065e-05, 6.1205690e-05], dtype=float32), 1.4099583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_512.wav', 'Wieso denn das nicht?', 21, array([-3.9487541e-06, -2.5666901e-05, -6.9103196e-05, ...,\n", + " 3.1590032e-05, -1.9743770e-06, 1.3820640e-05], dtype=float32), 1.4804583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_542.wav', 'Na gut, ich komme mit.', 22, array([ 6.3180065e-05, -3.9487541e-06, 4.3436296e-05, ...,\n", + " -6.9103196e-05, -6.5154440e-05, 7.8975081e-06], dtype=float32), 1.8329479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_543.wav', 'Entschuldige dich bei ihr.', 26, array([-9.6744472e-05, -7.8975077e-05, -5.1333802e-05, ...,\n", + " -7.7000703e-05, -1.2241138e-04, -5.9231312e-05], dtype=float32), 1.7624479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_544.wav', 'Das reimt sich ja.', 18, array([ 1.1056512e-04, 8.4898209e-05, 1.1648824e-04, ...,\n", + " -9.0821341e-05, -1.1451387e-04, -1.1253949e-04], dtype=float32), 1.25134375)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_549.wav', 'Sie kamen, um zu bleiben.', 25, array([ 3.9487539e-05, 7.8975081e-06, 3.3564411e-05, ...,\n", + " 2.1718148e-05, -2.7641279e-05, -9.6744472e-05], dtype=float32), 1.8593854166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_555.wav', 'Sie nimmt kein Blatt vor den Mund.', 34, array([-1.1569849e-03, -7.0287823e-04, -5.3308180e-05, ...,\n", + " 2.5666901e-05, 1.5795016e-05, -1.9743769e-05], dtype=float32), 1.8021041666666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_556.wav', 'Hoffentlich geht es ihm gut.', 28, array([1.0187785e-03, 1.1372411e-03, 1.2616270e-03, ..., 3.5538786e-05,\n", + " 7.8975081e-06, 5.9231312e-05], dtype=float32), 1.9342916666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_557.wav', 'Vergiss deine Schoner nicht!', 28, array([ 3.9487539e-05, -5.5282559e-05, -2.0336083e-04, ...,\n", + " -6.9103196e-05, -7.1077571e-05, -7.1077571e-05], dtype=float32), 1.8461666666666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_590.wav', 'Wenn du schon so fragst!', 24, array([ 1.4610391e-04, 1.4807828e-04, 1.7966831e-04, ...,\n", + " 1.7769393e-05, -4.3436296e-05, -2.7641279e-05], dtype=float32), 1.8329479166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_595.wav', 'Was muss ich einkaufen?', 23, array([ 0.00016387, 0.00012636, 0.00011254, ..., -0.00010464,\n", + " -0.00011649, -0.00010464], dtype=float32), 1.6038229166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_599.wav', 'Der tut nichts!', 15, array([ 9.2795723e-05, 6.1205690e-05, 2.5666901e-05, ...,\n", + " -1.1648824e-04, -9.8718854e-05, -7.8975077e-05], dtype=float32), 1.16321875)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_605.wav', 'Natürlich war es das.', 22, array([3.1590032e-05, 1.9743769e-05, 7.8975077e-05, ..., 1.4610391e-04,\n", + " 1.6782204e-04, 1.4412952e-04], dtype=float32), 1.5157083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_612.wav', 'Sprechen Sie deutsch?', 21, array([ 2.05335207e-04, 1.91514569e-04, 1.57950155e-04, ...,\n", + " 2.96156559e-05, -6.31800649e-05, -1.02667604e-04], dtype=float32), 1.5157083333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_616.wav', 'Gleich hole ich Anne ab.', 24, array([6.3180065e-05, 7.1077571e-05, 1.2636013e-04, ..., 2.1718148e-05,\n", + " 3.1590032e-05, 1.3820640e-05], dtype=float32), 1.4099583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_646.wav', 'Ich bin ganz hin und weg!', 25, array([-3.5143911e-04, -2.5666901e-04, -1.6979642e-04, ...,\n", + " -4.3436296e-05, -6.1205690e-05, 4.3436296e-05], dtype=float32), 1.7800729166666667)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_647.wav', 'Frische Luft tut gut.', 21, array([ 1.04641986e-04, 1.97437703e-06, -7.89750811e-06, ...,\n", + " 8.48982090e-05, 1.38206397e-05, -7.89750811e-06], dtype=float32), 1.6655104166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_652.wav', 'Nein, du Genie!', 15, array([1.3623202e-04, 1.2833450e-04, 1.2833450e-04, ..., 2.7641279e-05,\n", + " 4.5410670e-05, 5.1333802e-05], dtype=float32), 1.5068958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_675.wav', 'Das kann doch wohl nicht wahr sein!', 35, array([-1.9743769e-05, -3.3564411e-05, 3.1590032e-05, ...,\n", + " 9.2795723e-05, 9.6744472e-05, 1.2043700e-04], dtype=float32), 1.9827604166666666)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_684.wav', 'Jedes Kind weiÃ\\x9f das.', 21, array([-1.204370e-04, -1.382064e-04, -9.674447e-05, ..., -1.461039e-04,\n", + " -1.382064e-04, -8.489821e-05], dtype=float32), 1.4716458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_711.wav', 'Die werden ja nicht schlecht.', 29, array([-1.3030888e-04, -1.0069323e-04, -8.2923834e-05, ...,\n", + " -1.3228325e-04, -1.1253949e-04, -9.6744472e-05], dtype=float32), 1.4716458333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_719.wav', 'Das Leben ist schön!', 21, array([ 2.5666901e-05, 4.7385049e-05, 2.9615656e-05, ...,\n", + " -3.3564411e-05, 3.3564411e-05, 7.8975077e-05], dtype=float32), 1.2953958333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_720.wav', 'Was machst du jetzt?', 20, array([-1.5597578e-04, -1.2833450e-04, -1.3425764e-04, ...,\n", + " -5.3308180e-05, -3.9487541e-06, 3.9487541e-06], dtype=float32), 1.4099583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_728.wav', 'Falsche Antwort.', 16, array([-5.1333802e-05, -1.1846262e-05, 9.8718847e-06, ...,\n", + " -9.8718847e-06, -4.7385049e-05, -5.3308180e-05], dtype=float32), 1.3923333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_743.wav', 'Oder wir gehen Burger essen.', 28, array([-2.9615656e-05, -4.7385049e-05, -3.1590032e-05, ...,\n", + " -8.2923834e-05, -5.1333802e-05, 6.3180065e-05], dtype=float32), 1.7007604166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_744.wav', 'Lasst mich allein!', 18, array([-0.00018757, -0.00018757, -0.00024877, ..., -0.00011846,\n", + " -0.00011057, -0.00013031], dtype=float32), 1.7007604166666668)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_745.wav', 'Da sind wir wieder.', 19, array([7.2459638e-04, 7.7395578e-04, 8.3911023e-04, ..., 0.0000000e+00,\n", + " 1.3820640e-05, 1.7769393e-05], dtype=float32), 1.4804583333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_754.wav', 'So weit, so gut.', 16, array([-1.7769393e-05, -3.5538786e-05, 3.5538786e-05, ...,\n", + " 9.8718847e-06, -5.3308180e-05, -4.3436296e-05], dtype=float32), 1.5245208333333333)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_756.wav', 'Alles war voller Qualm.', 23, array([ 6.3180065e-05, 2.9615656e-05, 3.7513164e-05, ...,\n", + " -3.1590032e-05, -3.3564411e-05, 2.1718148e-05], dtype=float32), 1.5333333333333334)\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_9_FINAL/9_765.wav', 'Fick dich!', 10, array([-3.9487539e-05, -8.0949460e-05, -5.7256933e-05, ...,\n", + " 3.9487539e-05, 7.8975077e-05, 9.4770097e-05], dtype=float32), 0.9076666666666666)\n" + ] + } + ], + "source": [ + "# print clips shorter than 2 sec\n", + "for item in data:\n", + " if item[-1] < 2:\n", + " print(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "sec_per_chars = []\n", + "for item in data:\n", + " text = item[1]\n", + " dur = item[-1]\n", + " sec_per_char = dur / len(text)\n", + " sec_per_chars.append(sec_per_char)\n", + "# sec_per_char /= len(data)\n", + "# print(sec_per_char)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Average durations per char: 0.07641993439576344\n", + " > STD duration per char: 0.015251748851166484\n" + ] + } + ], + "source": [ + "mean = np.mean(sec_per_chars)\n", + "std = np.std(sec_per_chars)\n", + "print(\" > Average durations per char: \", mean)\n", + "print(\" > STD duration per char: \", std)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# fit a distribution\n", + "dist = norm(mean, std)\n", + "\n", + "# find irregular instances long or short voice durations\n", + "items =[]\n", + "pdfs = []\n", + "for item in data:\n", + " text = item[1]\n", + " dur = item[-1]\n", + " sec_per_char = dur / len(text)\n", + " pdf = norm.pdf(sec_per_char)\n", + " pdfs.append(pdf)\n", + " items.append(item)\n", + "# if pdf < 0.395:\n", + "# print(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6gAAAOFCAYAAABnc8/AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAgAElEQVR4nOzdd2AUZd4H8O+zm957QgqkUkJCKAmh9x4VEATECqJYALuCYANFLKfevbaznXp3WO70VMTeRVSqCCggQpDeOwRIMu8fWzK7O7M7u9nNzm6+n38gu7Ozz87Ozjy/p/weIUkSiIiIiIiIiPzN4O8CEBEREREREQEMUImIiIiIiEgnGKASERERERGRLjBAJSIiIiIiIl1ggEpERERERES6wACViIiIiIiIdCHE3wWwl5KSIuXm5vq7GEREREREROQDK1euPCBJUqrSc7oLUHNzc7FixQp/F4OIiIiIiIh8QAixTe05DvElIiIiIiIiXWCASkRERERERLrAAJWIiIiIiIh0gQEqERERERER6QIDVCIiIiIiItIFBqhERERERESkCwxQiYiIiIiISBcYoBIREREREZEuMEAlIiIiIiIiXWCASkRERERERLrAAJWIiIiIiIh0gQEqERERERER6QIDVCIiIiIiItIFBqhERERERESkCwxQiYiIiIiISBcYoBIREREREZEuMEAlIiIiIiIiXWCASkRERERERLrAAJWIiIiIiIh0gQEqERERERER6YKmAFUIMUwIsVEIsVkIMdPJdmOEEJIQolz22Czz6zYKIYZ6o9BEREREREQUfEJcbSCEMAJ4GsBgADsALBdCvC9J0q9228UCuBHAT7LHigFMANAeQCaAz4UQrSVJqvPeRyAiIiIiIqJgoKUHtSuAzZIkbZEk6SyANwCMVNhuHoCHAdTIHhsJ4A1Jks5IkrQVwGbz/oiIiIiIiIhsaAlQswBsl/29w/yYlRCiM4AcSZIWu/taIiIiIiIiIsALSZKEEAYAjwO4tRH7uEYIsUIIsWL//v2NLRIREREREREFIC0B6k4AObK/s82PWcQCKAHwtRCiGkA3AO+bEyW5ei0AQJKk5yVJKpckqTw1NdW9T0BERERERERBQUuAuhxAkRAiTwgRBlPSo/ctT0qSdFSSpBRJknIlScoF8COACyRJWmHeboIQIlwIkQegCMAyr38KIiIiIiIiCngus/hKklQrhJgG4BMARgAvS5K0XggxF8AKSZLed/La9UKItwD8CqAWwA3M4EtERERERERKhCRJ/i6DjfLycmnFihX+LgYRERERERH5gBBipSRJ5UrPNTpJEhEREREREZE3MEAlIiIiIiIiXWCASkRERERERLrAAJWIiIiIiIh0gQEqERERERER6QIDVCIiIiIiItIFBqhERERERESkCwxQiYiIiIh8rOZcHWrO1fm7GES6xwCViIiIiMjH2t79MbrM+8zfxSDSPQaoRERERERN4OTZ5tmD+vve4/jvyh3+LgYFCAaoRD6yctthnDpb6+9i6NqBE2eQO3Mx3lq+3d9FISIiIh8Z/MS3uO0/a/xdDAoQDFCbyI7Dp3DhM9/j8Mmzml+zovpQowKc1X8exvTXV6O+XvJ4H/6291gNTpxpOAZfbdiHpX8ccHs/l7+8DDe9sdqt19TW1WN59SEAwJLfDyB35mLkzVqM02fr8P6aXfhxy0HVMu85WoMxzy7FzW/+7HZZ/e273/djy/4TLrc7V1eP2rp6t/YtSZLN+fj4Z5sAAHe8/Yvqa87U1mHf8Rq33qep7bM7T4mIiPTkyw17cbbWvXs2+UbNuTos+b2hLnvk1FkcPXXOo329tGQrcmcuxoY9x7xVPF1ggOqBunoJy6sPofyBz9B6zkc4dbYW63YexaOfbFD98T/3zR9Y9ecRfLB2t+LzP245iDpzxX3P0Rq8tXw7xj73A255cw1OnKnFXf9bi3U7j2ou333vr8foZ5Zi0ZpdOHDyjMPzdS6C1u2HTuG9n3cqPrdu51HsPWYbMLy8ZCt+3n4Es975xa0LYG1dPSRJvSyV87/ABf+3xPr3pFeWY+ILP2HeB7/ik/V7sGHPMVzx8jK8urQa0xauQs05U/A48qklOHrqHL7ZtB8A8O2m/Xj35104cuos1u08ityZi/HQR78BMAVAM15fjRXVh/Dxuj3YdvAkvtq4D4WzP8JFz/2AO//7C95fYzoWkgTMeXcdZry+GhOe/xHTFq7Cwp/+tJavvl5C5fwvMPVfKwEAa3c0fGfbDp5EzTlTsLVl/wl8tXEfXvxuC06aA5tNe49DkiQcPX0O76yyHQbz/eYD+HWX84tPXb3kcCy3HzqFuYt+tQkKf9lxBC9+twUTnv8BNefqIEmm1207eBIb9xzHZS8tw4C/fAPAFHidqa2z7vf3vcfx2a97Me65H1A0+yMUzv4Im/fZBrNXv7YCT335u2IZ+z76NTrN+wy/7T6GI6fOYv/xhnNz0OPf4MnPN+HCZ763ec3Nb/6Mrg9+YS3DlFeX4+UlWwGYLvJfbdjndqDsbV3nf4Hz/vZdk76nJElevSH9tOUgPrS7Pj3+6UZcZz6XPXXwxBm88O0W1d/57qOnnTbErdx2GFNeXeHxzftsbT3+/dM267Xmm037NTXafbR2N3JnLsYf+09Yr5cHT5xBXb2ErQdO4qO1u/HLjiMuf5eBzlL56ffoV26/dsfhU/jnj9twrOYcth08CcB0jfxt9zGcqa3Dpr3H8cr3W1FXL2HxL7uxbudR/LH/BCrnf449R/XdKEUNmrIBseZcHd5asd1pvUGPjp46h0MaOijeWbVD9Vq3/dApXPbST9Y6gxb15nrB8upDmPzKCsx85xf8uOUgTpypxX9WbHe7A+PPg6dQW1ePpX8cwAvfbkHuzMXWpE/n6urx189/d9mx8s8ft+Hjdcp1YblzdfU4dPIsfvjjoE05D588i+M1nt0PLH7ddQxt7/4Ifx485dbrdh89jZ+3H3G6zbm6epyzq5Os23kUkiThxy0HceMbqzH6maW49KWfsH6XqY7Yce5nKJv7KbYdPInZ/1uLl5dsxeUvL8OBE2ew/dAp3PW/tXhpyVas33UUv+46hl1HTmPjnuMAgIc/3gAAGPbkd/hDQ+dCoBB6+5GXl5dLK1as8HcxnLr85WX41hz4AEDPwmR8v9nUmzaoXTqev6wLDAYBANh64CT+u3I7nv7qD+v2c6raITYiBHe+vRbZiZGYN7IEk15Zjj6tU232ay88xIDf5g7Dol924bwOmTAI4Np/rcT9F5TgndU7MKhdOlqnx2LZ1kMY9/cfrK9bdtdARIWH4Nhp0w+632NfQwDY+MBwnKurx/PfbsFVvfIQEWoEYGrJ6Tr/C5ytrUf1giqHcuTOXAwAWHf/UMSEh2DnkdPoueBL6/MzBhbhb1/8ju/u6I+cpCiH1//zx23o3DIB7TLikH/Xh5jcMw83DS7C2dp6pMSEAwBWbjuEgtQYdJxrSiYwtku2prkLseEhOG538b68eyu89sM2xe3X3DMEZXM/dblfV6b2zcffv9mC7vnJ+EGlZ1WLEaUZOHjiLH7aeghzqtrh1Nk6dGmViEte/AkAMLg4HS9cXo6xzy5FvSRh4dXdIEnAul1HcdFzDd95iEGgVnZBf39aT3TITgDQ8P3JGQTg7D4VFWbE1D4FeOLzTU7LP6hdGj7/bR8AoGteEspbJeJMbT1mDW+L4zW16KQxOUTvohTsPHwaT03sjBHmwG/xjF44evocJr5gOhaf3twHQ574FoDpuC35/QBendwVsREhqJeAf/6wDbOr2mHbwVNYt/MoxnTJBmBqCHjk443o1yYVY7tkIyLUiK827MPbq3bgqYmdsedoDQ6cOIOSrHhNZf1j/wkMNAf0lt/Lym2HcejkWQwuTgdgOubJ0WFYefdgSJKEt1Zsx95jZzB9QCG2HzqNrMRIGM3XDFe2HjiJd1fvRE5SFG77zxrMHdkex2tqcX2/Aghhu4+jp8/h0U82YPaIYkSGGRX3V3OuDgdOnEGvh00ByHs39ERZju25svWhEXjis024pFsrpMdFYM32IyjJiodBAF/8tg9xkaHompeEpX8cQJjRgPLcJOv+r/zHMny9cb91v3uP1eDJzzfh9WXb0TYjFhv2HEfHnAS8e0NPAEDV375DWmw4/n5ZOQwCKJz9kXVfmx8cjhCjY7vq8ZpzmPXOWkzs2hLd8pOx5cAJRIeH4NTZOlz92gps2W8KjrISIrHzyGnrd7V2x1EkRIUiIz4CdfWS9Rr48Mcb8OzXf9i8x8KrK63nnj3L977wpz/x75+2YfGM3tbnTp+tQ0SoATsOn8aqPw9jZMcsm9cePnkWB06cwT+WVmNAmzQYjQKb9hzHQx9twEc39ka7FnHWbc/U1uH3vSeQHBOGFvGRAICvN+5DcWYcwowG/LT1EIrSYnDdv1bhoTGl6NwyETXn6iAEEB5ixKGTZ3Hk1Fnkp8YAAE6drcXkV5bjxy2HEBcRgn9NqcTy6sNokx6LXkUpmPfBr3jJ3BgEAJGhRiybPRBCCIx8agkeu6gMnVom2nye8X//AeeVZeKRjzfgeI3ttXjL/BHIv+tDh+P34OgSzP7fOofHZw5vi9GdspAeF6F43L3hreXbUZmfhFbJ0YrPS5KE+xf9igldc9A2I05xm2DQbf4X2HOsBo+M6YBxFTn48+ApJEaHIjYiFOfq6rH3WA3++eM2/P2bLdgwb5j1t/LJ+j2Y+s+VWDilEj0KU9x6zw/X7sazX/+B88taoKpDJrISIm2e37zvBHKTo2x+85Zz8qUryjGwXbrHn3fVn4fRMTsBWw6cwLGaWnS2O4+VSJKEmnP1OFdfjzCjwXoMauvqYTQICCHw6tJqbNl/AvePLMHxmnPYdvAUUmPDUTn/C5t9bZg3DLX1EmLCQ6yPrdl+BCOfNjXQ9i5KwT+vqrR5jfze/dhFZRhrvqfJnTxTi192HEWoUSAi1Ijz/m8JJvfMQ4+CZEx5zbFu/dCFpRjYLg1xEaGY9c5a9GuTanON+nbTfuSlRCMnKcpa17PUdywsdb03l/+JO99ei6l98jFrRDtrI4IQApIkIW+W7W+/ekEV6uolfLVhn7Vsf53QETHhIdh//AxmvrPWuu2NA4tw8+DW2HusxnosHx9Xht1Ha3BFj1zU1UsIMQjUnKvDoMe/wU2DWmPPsRpU5iWhX5s01NWbGuEt174b31iN937eBQB4ZEwHjO6chcMnzyI2ItTmXmmpT1vuX/mzFqNeAq7pk4826bHWegUAVB84iSWbD2D+h78hPMSAJ8Z3xJX/WI6+rVPxzab9mDeqBHe/a3udm9a/ENf2K0DJvZ84fDdaVOQmYnn1Yevfj48rw6iOWVi9/Qi6tHJ9TvubEGKlJEnlis8xQHXP019txqOfbHS53fQBhTAaBJ78XLknyVc+mN4Le4/V4KpXG47hP66swKRXljtsO6pjJrq0SsTd761HUVoMPrulL37ZcQQXPNXQg/X5LX1RmBaDunoJry6txvllmah48HPr8/NHl6I4Mw6jnm54TVJ0mLWl8KUryvHp+r24bWgbGASw6s8juNp8Ifr8lj4Y9Pi3NmX6z7XdbQItcvTVbf3Q/7Gv3XpNbESIQ2WxuUmPC8eXt/ZDeyc3gv+7uBOmv24aCn5+WSZKMuPQITsB4aEGtE6PxZ6jp3Gmth61dRLKchJwprYOd/73F7xrvtHdOLAI1/YtQLt7PgYAfHN7P9z93nprw1P1giosWrPL+h4x4SE4caYWEytb4u6qYrz2QzXaZMTivZ93YWj7dNTWS5i2cDW+ub2ftQKt1MAgd+vg1qjMT8bSPw7gpSVbcbymFh1zEnDv+cXoZA5Ypv5zpXV0gRaW4KxHQTLKchLw7Nd/4PahbbD3WI218ef2oW2s18aY8BC8MqkC6XERGPbktzh5tg5PjC/DBWVZKFAIUABTg1e9JKHDfc4bjDY+MAzhIaYKxMpth/HNpv34aO1u/G7uyb+uX4FDcKmkMi8JP201DeHPS4nG1gOmINZoEC5HmNhrmxGL8FAj1qi0rPdvk4qvNpqOd3mrRFzfvwC3vrUG1/crxIMf/uZ03wYBdGmViHmjSnDRcz9Yf8cXds7CT1sOWQNuJePKs/HWClPDXnxkKI6aGym/vb0/Bj3+Dc46GXlwfb8CPKNwHOXHKjEqFKvvGQLA1FN9+mydRw1+V/bIxStLqxWfi4sIwS/3DXV7n3JTXl2BgydNDUKTXzHdfzq3TMDb1/VA3qwPrZ/jeM05vPbDNlzXt8DayLxu51Gc939LkBYbjmWzBzWqHE3peM05xISH4N8//YmV2w5jaPt0ZMRHItQokBkfiTpJQlxEKMru/xTX9rVtfPzbxZ0ww3yNurp3Hl74bqvNvgvTYnDgxBmsvnsw5n/4G174bisGtUvHriOn8fZ1PSBBwsKf/kRyTBhOn61H/7apSIwKswZ0/125A3kpURjzbMO9PsxowNe390OPBV+ia14S5o5sj2FPfocre+QiOToMU3rnIzLMiBmvr8b7a3ZhXHk2xpXnoEurRJytq8cL327B5T1yERcRat3nb7uP4fd9J9C3KBWb9h1HRW4STp2txbwPfsPry/60OZefu7QzhpW0cHpMX/uhGve8tx4A0K5FHD66sTcOnzyLTvM+w4yBRbi8eyuUP/C59RiFGg34bbfnIyyeHN8Re4/VYPO+E9i074Ti9WVQu3QkRYdixsAiPP7ZJryzSnnkm5r8lGhsOXASpVnxWGsepffKpAr8tPUQ9h6twTurdyLUKFCQGoO+bVJtAlOL1ukxuPf89tiy/wTuNh8fua0PjUC7ez5GzTnH643adUaJJ3UfoKHTBAA+vqk3QgwGDHr8G5ttIkIN1vJ9c3s/3PW/tchNjsa/ZSPknPni1r7Whmq9iA4zYv3cYf4uhlMMUL3IVeUwGG1+cLhNLwYRAavuHozOTbRcwLOXdMbw0hbYfugUej/i/lBLiyt75OLDtbux7/gZ1xu7EGoUOFfn3v3DncqImpeuKMeGPcfx1y9+53wqHXjn+h7o3DLRp/fGNfcMQXxUqMPjlt6Uf0+pRE9Z790Hv+xC78JUfLlxL9pnxltHWth7dGwH3P5f0/z39plxWG8ert06PQYlmfEIMQprgG8JUGvO1eHQybOolyTsPVaDLq2SFPf99cZ96FmYglCFHv8dh08hMtSIZPOIITU15+oQZjRYg2Wt9h2rQVe7Hjs9sIw08PRc+fmewZi2cDWWbG6Yu/f0xM7Ysv8E/mLOZ2AZxZSfGm0dOWHxxPgy3PymcpKe7vnJeP2abli38yheXVqNR8Z2sPb8/e2LzRhemuFwHslH8ZApKLef8kP+tXLOIJfXGX9igOpFzTFAJSL/+/LWvta5wUTNyfllmfi/izs5PP7h2t24/t+rEB1mRN82qZg5rB1uWLjK2hPkbfKeGAulaTA/bTmI8c//iKl98zFreDsAwFNf/o6zdRKGFKfjPHNeBflrtx08icOnzqGjeXi9ZUjkxV1b4qELS63bbT90CkaDQKZsOOzOI6dxoqYWbTJisX7XUTy4+Dcs/cPzqSa+Ih9l4S1T++Tj79869uo11oWds/D4uI6s81FAmzuyPS7vnuvvYqhyFqCGKD1IRET6wuCUmquP1u4GFAJUi5Nn6/Dh2j34cO0en5bDPjhVc+CEaYrL9kOnzMlRgMc+3aS6D/nUoWn9C9G2Rax1OY7Xl/2J+aNLrPPLLSMotswfAYPB1MMnzwGhZ94OTgH4JDgFgHdW7cRXG/b5ZN9ETUU+xznQBG7JiYiIKOgZhPIQ121uZuD0hRe/24LxFTmINc99PHGmFhvN2bVdBc2vLq1G6/RYm8Dtqa82O2z32KcbEWo0oGtew3Din3ccQXZCJJ7UGDST+w57mDmciBqPASoRERHpllIypyW/H7Aur+BPDyz+DQ8s/g2RoUZM6pnr1hzre99fj6cndna5nXwVAIspr67QtGwJEVEg4jqoFFQKUpWXCmiMGQMKvb5PIiLSzrJe4Lurd2L7oVO6W5T+9Lk6jxKA3bBwlUfvx+CUiIIZA1SdaJ8ZvOurNaUPb+yt+HiKkyxm4SHOfwa3DGnTqDJpNagRa7o9dlGZF0tCRKQvVX9bgnU7j+KmN3/G6Ge+xwOLnS/PQ0TU3A1om+bvIniMAapO9C5KtVmQnTyjNldpUs9c1df8dUInLJxSqfq8mkXTeilmcLQYUZph/X9OUqTqdhbTFHpq/62xXG3SYzVt522PXVSGR8Z28Mt7E1HzYsl+a0lCREQU6MIUOkm+u6O/V/adEBXmlf34AwNUnbiuXwEeH+e8FywpOnBPNE90yI53+zVK4WnvohRU5CqvVWcioYdsDT0A6Ncm1ebvq3rl2fxdvaAKpS7K540VnORr+7VKjlJ/L3j2ZkqB80cqvdD2vrujP8Z2MS2Wbu+vEzpq2sfknnmuNyIiIqJmxZM6YCDqU5Tq8FhOknp9r7lggOpn1QuqUL2gCvGRoUiPi3C6rdKatUotL8HCkwAvxGjA9f0KbB4bqHGIw2c397H+f/7oUpvn7j6v2O2y1Ms+wFU981CaFY/iRvSSPzq2DO9P66n4nCQBg4vdHyL8wTTHYFRrT778AvrjrIG4c1hb3Ht+Me4Y1gYjO2Zp2od9h/cH03spbrfsroGICjM6PJ4a2zB0W22ucFyE/3PBhRqVe/ZJ34a2d/83JR85Qdr87eJO+GB6L873JyKrzHjXI888MaeqnU/2q9UQu7pap5YJ+OdVXf1UGv0K3ujGT+aNbO+0gr/sroHW/2cn2v74kqLDbJL82M+bvKG/7c179d2DsfruwS7LFIiV495FKa43UnFx15YOj6mM/LVRJBsmK18E3VPyALtFQiQWTe+FitxEm23u0RD4lmaZWhG75iU57UV/aqL6OoGqZYSE5y/rgoVXm4YSv3eDcgAMACvmDLL+X34eA0BGfASu61eAST3zcH0/03n65jXdcGEnbYGqRduMWPRt7diaGBsRal3AXs0Ehe8dAIwG2y9/at98t8qkRzcNKvJ3EZqF8BDHRhGLhKhQxcefuaSLW+/xv+t74LwOLTCuPNut1wWTC8oyUZIVj8vsFpSXN0CRb2m5RxI1pY4tG+759tOpehQke7zfKb211QF+sqvneMvzl5c7PNY9X/3z9CpMwbT+rhvv7Nc8DfR7CgPURrKvqF/WPRcLp1QiIlT50KbJekmVbgh5KQ0Bqn0wY59EJzE6DNEqi/C2zWgItr68tZ/iNno1ojQDr07qqnrDfPu67tj60Aj0Mg9/fVHhx27Pvje2qkML2V/u3Zn/c213TdvJe10t72802J4Xk3rmukyQtWh6L2x9aITNfpTIK9PvT+uJPgqBnr16CRjSPgM9ClJQvaAKZeYg0L4HuWVSlE2DSZqL3n4AqMxPtgn6ldh/HqNB4InxjsODhVAOQOXfnNr8Y4MQNje3WcPbOSTG0nKsGsMbw73lbhrU2rs7bAT74e/BxP6USo0NR89CU0ViYFv3e1dLshx/651aJuKpiZ1RlOZ6HrnaaIBvbu/ndlkCQVenUzNIK2fTQyw2PziiCUriyBeNEME2isGujRXPXuJ8aSL7KUp6dHVv5fvGpd0a7vMdsuLx0hXlWHPPEACmnkYLrfk5hpdkYOtDIzCxUrkBW03bjFikx0U4zTPiichQx0bPzIQICIX6S765w+pfUypxpZM8KmoeGRvYyTMZoDaSfe8MYAocf5jZ+JYX+ZDNt6Z2t1bAc5Iivf6j8cSs4W1dbiPvddPKaDDAoHBcJ/XMRfWCKnRplQQhBJ65tDNev7obBhWn49e5Q/HDrAE228eEh2B0pyxcJJsjmZ0YiRsHFuGWwfIKfkP0MK1/ofVzrZgzyKEBIj81WnU+66JpvfC/63tY/85JirIGz5b5qvZzRYUQiucQAJvhvEoXL9v9NPw/MtSIDtkJmnovlYaNA8DEypa4tFtLPDG+DNf1K8Arkypc7stVuZSkxNr2CAshVHuJXTUj2B/GEJXjCgDLZtuel+1a+CfJVFPR0vrqKW8H3wBwcVfHec3e5KxRK1al0Q8Aylsl4qUrKvDpzX1Ue1ABoLOsItWzMBnnl2XikTEd0LqRycwu7OzYIp4RF4FWyd5fXssfHK4X7NVrtE0PDNeURM9oEEh0ck77ii++YhFkJ84lla2w5l5TkPb61d3Q3UXvYYSTkR/uktdptJjcMw+fyqZLKYkND8G1fQscHv/lviF4YFSp9dpqNAgMbJeOePPfl3dvZd3Wvk4kv+bK1dVLEEIgPtL23HaV3ba/wvPPuGgYUNIi3rYx31KXekd2XEd1zFI8Y9+5rgcWzzBNe5I///ktfXD70KZZXcKfGKB6gX23OqDemyOXpTiMtOF1F3bOxsc39cZ3d/Q3De+MMVXcJ1Q4bwma1DMXGeYfxUVdfNfFf35ZpuLj1Quq8ODoEozpnI2UmHC3x9bHRypXEO8cZhsQx0WEWi/UUWEhaGE3XyEhKhRPjO9o08ucEReBmwe3RkFqjMM8AAC4bWgbTDVfOFNiwm16CpfNHohF05TnSAKmILRTS7te7+J0VC+oUvmuneuQ7XjBVTqthhSnoyTTFAC/fGW59eYwqlMWvr29P567VH3IYb2T4OKBUaUY3Skbdw5ri/zUGADAuzf0xFe39dP8GS5QOUe+nzkAj4ztgOxE7YkABheno3PLBJsETPLjIb9hfTijN941D1dWOmbxkaG284olU2Knz2/pq7k8Wl3Xz/FGLO91vHNYW8X5td4yp6odEn2YYG1C1xzVaQTuNGx0adXw28lPiVHdTmsSLzXVC6owqDjd/H07VqRszimF10eEGtE6PdZpYP60rCJTmBqD/7u4E8ZV5LgVzGu9dgfT0Ez7j+KsscBdtw5ujekDCjG8JLh611wRAppT6Dm7H6hp6aVkLk8qjJzxBft6RCBIiQlHfGQoqhdUoXtBssvriNr9xNWSekrs6zSAadpYsso9JcQoXDbEdc1zPjKitcpoktGdsvHjrIHWYN1ZI6Glfjrc3Jtufx+2X30gLyUa388cgOoFVfhh1gDcprC84PCSDFzWrVNBo9EAACAASURBVJXD40osPaUVuUk2wbGlbt5ZdlzVOiASosLQPtMxUVR2YpTDlL9gxADVQ5aTTwLw5a2mSq28w8bg5Mj2aZ2KLq0S8azCXKUrephO/revM7WutM2IsyajiQkPwZb5IxySANlrmRSFEHMBBmlMnPPcpe63DGXERai2KF1S2Qp/MWcldjaPS8ldI5QnsEcoDI1wl/J1QFsNLy02QnVItVbe7m2KDQ/B85eXW3ucB7RNt0le1DI5ylrxT4oOw9MTbb8vdyu3HXMSbIahu5KZEGmdQyuXlRCpmP1XjRCm7/+d63vaJGB6bXKlzTYWxZlxsqBM+UNO7plr7UmXAIzsmIXCNMfAyNkSRa5kxkfgzmFtHSqI8uC4VXKUR5W8R8Z0sF4nXFHrKfeG1umx+F1leGC/NtrXYEuJ0RZEe2s5LtP37VgRklcWXI1cUBNmVL4BuPM9KLXgO6N1vTv59A9nVt09WNMoGS2U8gIosT/eUxV6WTw1tjwbtw5pg/svaO+1fQYCAd+McrCQ92p5wjLE19V9pbyVY6CkRmkElsU1ffJd5jPwFlfv89rkrhjUzvXv1j64cvV19m6dgikKUy+8de3cOG84VqrkP6k3t3K8Nrkr/n6ZeuO45bfubiNURnyENeD72Tz0V8nfJnTE7w8Ox+hOpoa+uAjbYNb+DPnvtd2tHQkt4iMVR7YJITBvVImmKUGWMiZEhWLNvUPwyqQK5CZHWQNUx3273KVT9jlrggEDVA/JW6gsww9CZFGpfQ/qVb3ysGHeMACmH+7b1/VQ7NXoXZRqHsaqfDE2GIRblSZfbGl9hQBGlLbAA6NKGt2rIRcV5rusq764Uc8a3lZzL7GrlkN3RbrR82YQDUOMO7dMwLxRJQFzUVMbstUmI9baWGT/m3N1RgshrC3K8sBhah/bBAr3nt/eYUi9u3P+6l2ceFpGXNgbV5Gjep3QakqvPF1MF7Dnj15By7ng7L21lkveEyW/XlsefmK867lB8lPGUrEUwrT0gnxEhmW7v1zk3flGSdFhqtMPXLFPthanMioGsJ2DaP9u6XGeXZ+UpglYPktqbDhu1tEcbl8zXVuUrz/28+E8acgqa2Sw9/CYDnhwdInLJUX+q7ExDgAursjBZd1aKZ4/RoPA3JHOGynGKAyl98S7TpIOAqb6wItXmAKX6SoZrOMjQx1Wa3D2PQ1rn4Gh7TMw57xim9Fbb17Tzc3Sq3PWAGC59vVpnYqh7ZVHKwjR8BlCZKNv7PfamOqaEAKhKg2FluctSrPikexGXeiBkSXW/88fXaqp979fmzR8fXt/1Q4bTxpDLflQpg8oxMc39dbc+BgoGKB6yHIuSRJgNP/RRnZy2Fc427WI80oPoDPuTgKXs59Ur6ViYPlBXdqtlVstc4lRodZMqvZDA2cM9Ed20sZFrVP7FqC3bB2ry7q1wg39lVv+R5S2wPLZ7s/LtWe5P2i5pjXczIT15tEiIVLzUJXG8nWwYQm67e+ZQsMxkv+OLWaNaOcyaHNnaLL9/qtKWzg874+AbEJFDqZ78HtTKr8WK+YMcjo8XN4I0djAGzD1yDwwqsT1hmaWQClJtrC5/dciD3ymKCT5sCRuU6tAWh7WOkfuv9d2x8NjSjHWPNxXkoD3p/XC9zMHOGybGB2muiyOliQ53uRO0CI/EvLfQXpcuMeNlc4afIQQuLEZZcE2GIRqw6zRIPDO9T2s+Q582dOqJj4yFJdUtoIQwmG+nqe6FyRj3qgSh3mHWj12UQdsme/7pFGW0/Tr2/vb5cUw+eb2fvha4ZrpbCj2c5d1UfzdKK1MkO/GaCgLpXLKqa3JLm+Uk49EUwzMPLgf5rr5WRpzy5WvH5+XEo1RCvk+lOoW9v5xZQX+72L3V1+w7LsiNwm/3DcEtw5pg5SYcM3ZiQMFA1QPWS58BmFae/ONa7rhtckNvWiWE8hoEFg0rRfGdHZvuQ1PhHrY2g2Yhk9Gy3rjFk3rpZplLSsh0iGjsDuGlWQg0VwJnNyz4T0qchNtLn6WH/ZVvfKczv3Um3mjSnD7UPWhcd7IWtgQcrr+zq3bylotA23amv09TD6szHKztr/RaTk2Y7uYeiGvUjnXVcvj4nl3so86Sy/vrlEdlef82i9pBQALxnRQrcDNHdker1/tvRZ3wDQESevwcKV5T3JaslS+N60nRrgRTFuuN9eah9MpDc+bPaJhaHZmQqTD57HMPVWrk8h/i64IAZTnJmG8i5wD8gqh2jmvVMnVorG9Y1YaAx9vJLe5ZXBrh8Yqb+072AhhmgunlO9AK28GtZZ1sMfazb+2ny/oiqveKPsy29+ThRCKvYSWuY9yagmB7KdiXdgpC0nRYTajreTnpFKZWyVHK460S4oOQ2VeEh66sNThOXe4us5ayO+3alNzLEkJ1ZITWobaAsDckdobDl2xrMhQlBaLZbOdJyd98fJy3He+6RouP9xqQbUaIYTD6gv2X9/oTlno1yYV05ys7dy/bZpqLhdn5L2w8qHLaquHBKrg+jRN6LWrKnH/Be2twwK65SfbXEjkLbil2fEez2XyhNo7RYYasWhaL6y9T3nc/rr7hzb0EDgp7qB2aXjxCtdLu6hxd/7HBWWZ1iy4vuHfiosn7265wWppk2jotQGGFGdgULt0zPTS3DJf+fmewVglm+Ni/zHnjixp6OV0cSycHaKk6DC8fV0PhwRbrjj7OY/smIkHR9vegOepDCnrXZSCxOgwr/SgVi+owl0qC5APbZ+Bt6Z2t1teqUFhWozNcM7Lu+eqZoq8po//W2kvqXTd+x8XEerWb8vSeGMZ6ms0CIeh2fZD6v93fQ98clNDBdVSQVCrtFv2p+V+oLQPVy9Te97T+49axnJPVbqa4qBQzEfdDE5mDCzyeGhysFKrfnuSNMeXkmPCseTO/g7LnNkHRa6yxLpiGVEgn6plCVycUZov2To9VnHEjX3w9/j4jlh192D0Lkq1BnGeXveNBoE3p3a3jthwRv4e9nUvrYGZloByXEUOru6dp2kUXHxkqNN3tgw91/I7zpb1DKfFOu+BH1ScjitlnSJaKSVWs/QCW47vb3OH2TwfExGCVyZ1RbqGJfncobbKAQD0KUrFHcOCJ7uvvq5OAeDCzlnWOUBX9MhV3c7yu/JkCIW7Xp3c1dry6ExyTBhKs+NVhxoLoa2NubHB9qB26ZpaXX0R09sn0gGgOmm9qbjTdmeZi2LpuclS6Blz3L+lUmyqYL94RbnbQ1S9SUurb0JUmM2F2Nk51/D5lLfxzXkkVIePVpW2sAYyljJd1j3XYSjPmnuG4KUrTFluQ5xlVXOD2u9KCOF0/vPnt/S1zmdx1YDktV41D1gSVmn9SuXf/dKZA1Qb54CGCoelcVGSJNS6SGuaEBVmM7XDcgWVzxmzOf9kjUXOyuopbT2z/gveXCV+sunVMB8rT+YDKi1h4W4vSTD4x5Wm64vakPN/2a0l6WquvJz9Ou3ekp0Y5TDn0p7m37/KlglRYaheUGUzssBZfc66P41vnJscZbNMoD3L0nfOhqJrGaWmpTyW3s/E6DCvNEyrvWeY0YDZVcWIjXBvWLXS7h67qAw3DSrSlhhLYQdqS87I2czxz3A+Re3ZS7uoTvux7MfXU/i0MBgEru8XPNl9GaC66fFxHfG+huGmIUYD/nFlBRZ6eZickr6tU1GSFW8dg98iIdI6/CAjviHzrOXHHmIQqsN3nbEsEt/Y+o39LdDTeSJqws29GJalUWzeW/bm0wcU4b0bejZZRj9vSIuNwNMTO+Pt63rg6Ymd8ffLXPdkuzvvzdvs39WT4eHOSq7Wm9zYCuljF5U5TaZyabdWNq2ma+4ZghkDizDQ1eczlzM+qiH5hSfzUJS4U8G054/5Z3JCOK4ZZ8/d36r8nM9MiHRaeXr5ygrcOaytTRkGasiwafN+5rdLUpkP2i3f1EhgWYC9MT6c0RuZ8RF4fJxs2SUPfuNFCpmr5RbP6KVpTWVvaGzg3slcMb2iR67j+o125/fa+4bgCnPlPVjXFLQ0CKj9tAvs7pFK2ykto6dFQlSoLhOwyVl66JKjwxQbbu6V9ao+e0lnTY073fKT8NKVzpfXemBUCdbfP1S1h3DZXQPx9CWu7wn2Aa587XSLa/oUoHpBFWLCQ1STBvUuSvFZg4Ma5REips+TGhuOmwa11nS8LcvdWJacWTFnkKZ6t/zt57mRqyAQvHtDT69P0fEH36VLJbeXCWisK7rnok16LHoUpmBQu3QMbJdmnVvy8U29kWtezF0IgdlVxfjf6p04cOKszT6UWrABU89FVkIkHlj8m9uVoLKcePRvk4rV24/gyKlzCDUYbN7n+cu6YPzzP7r3YZ1Ii43AK5Mq0NlF65vRIPzaG+QpyzBNteGa9tyZ99YUvN2TYdmbJ5lwnbGfB6VEPucjPirUIYGEYs+FwkNKc0Q90Zie2FhzA1SuQkKd6gVVOFNb51Cp2PTAcJf7/fyWvvht9zFNZVg0vRd2HTmt+rxlXV3LV92/TSq+2rhffYcaTolLKluif5s05CRF4bp+BVhRfcj63OhO2RjaPgPF93yiqfzyt7ttSBt8sn4vzuvQMMfo0m6tMLg4wyujNooz47B0lvM5V0rsz0lXP5v2mfEYVpKBd1bvtD42uWceXv5+q/b31Lid7RI/jo+54k4gHRsRisiwhkbX+y9oj3vfX++wXVWHFlj8y27N+71tSGuUZifgipeXaX6NhUG4Xou0Z2Eyvt98UPG5e88vxv2LfnV4vDGNT+vuH4o1249g5NPf2zx+TZ8CLK9egdbpzhs4fMHSmLX7aI3L7ZxJjgnHggtLNS2HZRlxNaEiBxd0zMTEF35S3O6Na7o7PGY/X9FoEKpL1hWmxdisv+6M5fOlxIRhxRzlZV+8zdvVCCFMnSauRquomdwzDwlRYRhrHmmhdWUCy3UwMSq0Ub2f8nNs3f1D8cjHG/DaD9s82tfCqysRajRomrrlTCB1ujjDHtQgYjAI9DDPSTAahE3ig7YZnmcRntonH3Oqiht64tz88YSHGPGPSV3xzW398cqkCsRHhdrcMNUqIJbW3ZgI99tR+rVJs5k8blnmwBs9F4EmwtxL56010BrNg/uQs3OuYV6fh+XRASGEw7wrJdFhRsxwknTBZQIuJ8e+dXosXri8HPNVhmCHhxg9uoYUpsVoTgSREhPuNGGL5Vqh9bvWst2Do0udrhcdFRaCb2/vb13v2vn7NbxhkXlumjwjsRDCGpx2zU1ymKvcWFd6sGavpcHx4TGlePlK5yMyOrVMQLsWcR6vDezq62jMTzg7MRKXOslMrnTqW6cHQFh7XwHlnih5g5XSEGKLK3vmeTwqSMtw5n9Pcb9nRO1nb3+81UZgKDXkDi5OR/WCKiREKc+Jc/Zdall33VnGb0C4Pc94dKcs9C5ynLM5oWtLtxqMFozpgB4Frud+Wqy7fyjese/NV/HGNd3cWg7G16Oibhviu+WYwrxULwkxGjCuPMfp0jdKGhruG3cM5T+ZmPCQRi2T2KMgBRW5SejSynE6jqVBw9k1LtiwB5VUWX53KTHhNglDPG3diY8KVWypVJsfM390KUZ3ynIYhuSJthlxeHVyV9dJOoJQckw43rimG0qyfJloyrXU2HDsP35GdR0wZ5zOQVUZwqz0+Hkae52VvHlNN4eefiEEHh5Tih2HlXv95OW23IiHqCwFMrGyJSZWtkTuzMU2jxekRuOP/ScBAOvtEjG4y1XvtdK8KWdJGfTeKOCt4rXUuEyLO9fGt6517GkBTD3Zx2tqUeRBz5SWpEZl2QnYsOe49e9hJRnYuPc4+rROdZksLDk6DC9e4Xz4oru8dQ6lxIRruk7YPuhYhtKseGTIerAs9yd5T+HM4W3x3Dd/KL6Pp0NiAVPwc0HHTFz2kvu9r86o3WN9OR/Z2b61JKVzlvFbvuuK3EQsrz5s/VseDMrL8ISGtSp9wZ3zoZubGd3d/focNpedFkrTH6YNcJ7w6IdZA9D9oS/dK4RZfGQo/nNtd7TNiEWnuZ95tI/GkCePDAQRoUZsfcj3Sx/pCQNUUmXfY2oZgeHtoZRqIsOM6NPa9XISWvX14r4Cjbs3Pq8yny9/Hd8R0eEhiI/S3rsQZjTgbF29pm1Vs/iaH9/0wHDVFPhadFRJvKC0DIglCG+Z1BDYFKbFYMO8YW73Qn58Ux8Uzf7IrdeocXeo31tTuzfZGprDFDIluuKLabONubx5o8J/UZcc3DS4yGYEiFqA4YmreufhzRXbrX9P7ZuPq3rn2byfOuefb9G0Xta5YK7IG16se2/E4fvLuDLXG9lpWK5L/bdRmBYLYI91ioyvTOtfCKNBOJ0n7Wp94MaeJt6ch95UFf+h7TOsAapBmJbNaQq9ClOwZPOBJnkvNd6sij12URn+s2I7Hvpog4s3bfivvKHBk2uUt7OEu8OSwdnTJRN7FaZg2dZDyExQ7n33RU4Hfya48wcGqKRZuXkSvdryE7cObu3+cFzR/H50zVVkmNHtOb+LpvfC1xv3adrWvuHE/v7gKjOkK+4Mp0qNDceLl5dbfzMWngyR9eavw91hUM4y/wLeK9vUvvkY2VH7/EHLd+HvxE6+oi1Y9Iz9d2YQwpoAr7FyU6IUAyylyuu/p3RDt4e+sCubZ2fUgLZpLkfaKI0ekDRMD7ioSzYGyfI5+EpPDUuGpMSoj2aYU9VO9d5s8drkrrjcPDc21CgcE8t58fd093mul23xlNrX9Te7ZHONvT6N7ZKtOKcXMK2eUCebN/n61d2sU4mairu/F4fzXPZ3UnQYpvYtcB2g2nnsojLc9p81yElqfENmU9YEo8ND8OOsgUh28ptyZlr/Qozpko2sBO/kjyBHDFBJs4rcJPw6d6jqGPvpGta/ouZncs9c3PjGz057IPq3SVVca61NRqzNMh7OqK4BqenV3udsXqO/pJvXiZvswVpwSrzVuHR591w331j9qTuGtcGS3/3bs+Ft3jjOcREhGF7iOMRdy661xi1qo2uU8hekxIShNCset8ky6Hr6MR/X0HvqLGuos4q+EPB5cDqhIschuCzLjseaHUe176NrS9XhpPLj/94NPZGZEOl6vnojXaBx3rk7jAaBunpJ9ffg7TUnYyNC0TIpCn8eOqVYFvk8WFeNA7rkST4Iu9/KmM5ZSI0NR2+VBpZr+xaoDof3t8YkqzMYBINTH2OSJHJLYyaAy1nm4fniJkb6MrJjFqoXVCHRyVzG6PAQj1tgp/QyBVv2lRbLIvSF6doCXFf81dHvzREGF3Y29VJe1t3/iRZev7obFl5diXvOK/b4Rq9Uv7q+X2GTLO/lbZas7xd09M018Zf7huLhsR0cjpk7UzZcberyeVnlNsRowKLpvbwy9UIpUY/99UTpXLmhfyEmVOTgkm4Nw/SFsN3WnXOzLNv5PP9IlREUvYt8O/1EngyqLCdBNTj1Vob16gVVqqM1OmlYo1KNJdu5AHCZOVmMsx4wb1w6g2n9XPt7idIn07o6gHyffVunqn7fM4e31f1yQ6RPDFDJIQW6hS8vzAWpMaheUIX2mfFenV9Fzc+c84oVb4ApMeH411WVeGqid9YYDQYlWfGoXlDlNAGJOxpT/+tekIweBSmY3Mv93lzL+7q6dgTS9AHLNdHXSwTYHzKvDiFXOd6WrLZqS2tYy+LFwqTEhKN6QRUynfSSxEeGYsGYDqoNr6mxzhMv2XvPxRrpavdUb9xrnZVS6y3WMqT/yh65jS6PkkXTeuHVyV09fr38c0zta1rfU/7dxXppqHpzIz/HG5OngcibGKASXpnk/IYRSJU8IrleRSlem8/nr19Bc/31PTG+Ydjm+9N64sMZva1/N8U1SU/NZt5sxLMPhuyPZZjR+9WCqX0LcO/5xRhfkeN0O18um+HqGPrq+76mT771/x4u9eiRhVdXWv+vdYm4RHNPdGN6Oe29fV1DVt3S7PhGXY+tPcGyz2H5bOEhBrTNsG1sZ9XFfZbkQWp4TJ3LiDONTkjVuB4rqWOA2owVpZmGPsorJPJ7eFN1bDIANuFR0Dd/nad6/nk4K1uiG9malYzu1LAmZIfsBBSrjPSgxrH/Cr+7sz8Wz7DtCdR6L1DrQQ0LMWBSzzyXa1f64lx/fHxHVOYl2Swd47QMKv/31BD5XHSV4+iNe63l2FmWUpMH+w1DfJueq8zD7lBbUgxQztLvzQYPX685qhezRrTz23vr+V6n1eXdc/HMJZ1xUbnrNY3JOY6HaMaeu6wL1u44qrjsh/xi7OtrBof4OvrnVZ4Pg6Lm5d7zi/HWih3+LoaDXC8NI1bSMMTXZ2+hK95sHLEcs8SoUDw6tsxh7lh6XIRqshlXpWhsMX1xr+mWn4w3pyqvOSvXFPchpaG8iVGh6OHjBDvWjxZgAcDUvvlon9kwr7eqtAX+/u0Wt5Yqc8e8USU4XnPOJ/sOFK56uAPsFGpyBoPAiFLP11unBgxQm7H4yFD0KnKd2p58y5LgI1TWk+3rpBnkvqa+MQthqli6Ck4m9czDJC9l5XWXs7L5MgGa5W1dzd0LlsqUL4Kn9LgIr2eabuwa2f4cTZNmDsp7F6WqNnykxYbjkkrPE4yFGg04V1dn89jqe4Z4vD8lSkVviE9d9WB7dvzzU6Kx5cBJ1xu6adZw2968O4a1xfX9Cq1zmoGG5e+uUpjLPr4iB2t3as+EbEm8RET+xwCVXPJ1naG5D/F9cnxHLFqzC+1aeCfbLAWHRdN64dP1e/xdDEVtM2KxYc9x1ee/nznAaXKaxlKqaIcaBTrleG84IclpC5AbeyWXv35M56YdIpeVEImlMwcgPS4C+4+fMZXH7gMtmz1I077UGhRSY8Ox7aDjkiXuWn33YJytq0fl/IZ1ZO1/E0q3VVe3Wk8bQj6+qQ/q6iW0u+djj16vldEgHHpPLcmwlIztko05767zaZmIyDcYoJKNZh4r+kVSdBiu8FHWRPKepv5tlGTFoyTL+bIV/rLw6m7YtFc5QH1kTIcmWx9OXp/+/cERXtprcF8EvdUZmxYbjn3mQM6i0UN8Za+/bUgb9Q19JNPH562WY68lSEyMDkNtXb0bb2z6x1dndlgI05n4Q6Q5oVH3Au0j4f53fQ+MfmYpgIZzzZ3zojEdCn+d0BHLqw95/HpqXnhVIVWcG0rk6Ib+Bf4uglssS0d4U1J0GLrlK8+bG+ciU6s3WIf4+uAS1dK8fuaFTdyDp4U3ErV4a/kwpd5Eb46GUVtXsSkkRYchNiIEc6qKvbpfXy7dZj30Cm/RkP3W8yG+eSnRTdbw5C1eWQdVx9WgmPAQfHFrXzw6toPm13RqmejVxFXuGNkxCw+MKlV9vrkkoiJtGKCSKss8yE4tOWyOSAiB6gVVuH1oW38XRbM5Ve3QIdu362r6gy+rMamx4dj60AhcWtlS82v+clEZ+rXx/bxxbwY47gSTasuUXNqtJUZ29N5cYyEEuuUn4blLu3htn54ICzFg7X1Dcb6TedT/udZ14iWLCRU5LpfvsND6vbgMNmX/1xpkOWuU/uq2flg03fk6r9T0ClJjEBGq7dyyaExDA0NIaioc4kuqBhWn47e5w6zDSIh8xVvV7qrSFoiLDOzL2oSKHHy0Tp9zT/XGZZIkD2tT7vYEjumSjTFd9NfjqsSbPUKW3pD3ft7ltX2+cY32wM+fshO1V/IXjOmABWM6oNfDX7rc1lcJsQDXa1z6A6cVEZGSwK7Jkc8xOCVf8nbd5OlLOnt5j03PUpklsufddR2pqXkz9lT7/pQabR4aU4oBbdNczml31TDji+BZz0No5YIpkG7Igk6kXwxQifwkKTrM30XwO94gySM+nINKjrQuU6LFxV1zsH7XsUbvx98aeyx6FaYgQWkNclkkdOewtmibEYsztXW49l+rPH6vuIjQgOnh15vpAwpx59trkRIT7u+i+Iw7wXcwBeqkbwxQifxg1d2DmflQhvc8cgeTaQSuhy5svqMD2mfGYeeR0wCA4aUZimuqynspr+tnSsj20drdLvcdqIGD3ss9vqIlxldon48eCPTasKf3c4GaFmvIRH6QFB2GmHC2DxE1hqt6FgNZYGrffJu/1RIekXvsh9J+clMfl695ckJHtE6PAdD4c1Pt+2v4fgPjC/ZlsMTfv/fxmFJTYYBKREQBxdmSGsFoaPsMAMC4CveHac4a3s7m74YlR9wvR4DEPH7RJiPW5TZRYSHobM6Kr3YslQJLLce9KQIHVz+3RdN64ZVJFT4vBzUOf8cUCNiFQzb8MfSjfWYcMuMjAmr5DiLyn+ZWv8pJikL1giqv7pM9IY3j7Pi1zYjzeL+BvP54abbzJExKGCz5TwCfatQMMEAlRU1504gOD8HSWQOb7g2JKCh4c13Q5sKTSikrss5d2SPX5m+vZL+X3YSVjr/aEF5rQqsACfx4bvmf/amSmxyFonSVEQEBcl5R4GOASkREAcVSOWfl1n2NCWDUXpOXEo2tB056XKbmRPM56+bJHSgBKenf17f393cRiDgHlZSx4kekH1kJkYjS2CszuDg96CurWtfxC/bj0FSyEyMBAB2yExSf/2B6LyybrTwK5vLujplqm5OcpCibv61zgN3YR1Odx5ae4O75yYrP+6JewN9oYOH3RU2FPahkgxcfIv357o7+mgezvnB5OeYu+hUvf7/Vp2XyJ16mmlZZTgI+vbkPClNjFJ+PDg9BtEpW8vsvaO/LouleelyE4uMu77Uuhvg6bG7+Nz7StLZqqNH9/ocurRK9PtfZFZ9m8eWFgihgMUAlItI5g4E1LSWBnFDGXyzHzN0zqrXanDQXAmW5E3f5fP6zwrmtJbHVYxeV4Z1VO1DmQcIial549SQ9Y4BKREQBResQX3LCx4HjnKp26NIq0afvEYga06ZiHxQnRoXi8KlzABoaApKiwzCld77Da/UqSNsvAoo7jUi+/Lp4LpAcA1QiHbn3dUbqwgAAIABJREFU/GKsqD7s72I0OQYa3qXn7LZvXtMN5+oaV74Qg2n4YmSoF7KlBjGluYRNdWYEUpDkicYu0+Py9bLaulrFffU9Q5A/azHqJUvPuG9r+Hq+rhBRcGGASqQjk3rmYVLPPH8Xo8mwwbT5qVRJwOKODtnxuGVwa0yoyEHX+V94oVTB6eo+jtcSSw8ef3v+4UmIF8wj2dXm6JI+BeuQfdIfBqhEREGmsb07eieEwIyBRf4uhq4tuLAU/dukOTweYp7PHB3O3me/8mSZHycvCsTAoTQr3qNkTkQU/HhlICIiCjITurZUDFo6ZMfjtiGt8cT4jn4oFTWG0hDbwcXpAAKzR9zXMXUgHhMiMmEPKhH5TRCPXPOr5JgwAECceckJIgshBKYNCKze54VTKjHxxZ/8XQyv8PZw3b9d3AkHT5xtmkzfvGAHFU+yoPs0SRKbFEiGPahE5He8LXnXNX3y8fCYUoztnO3vovhVAI56JAU9ClMwpZdpPu2VPXKtjxe3iMMH03v5qVSNo3ZqGs2BZniIY/VMqQIfHmJEZkKkN4tGzQwvk6RH7EElG8GcjIGouQg1GjC+oqW/i0HkdWGywC0nKRIlWYG13qerTLilWfGYMaAQEytbNVGJqLmZXdUOIQYDBrRNwwe/7PZ3cYgUMUAlRex5ICIicu3qPm4sqWPJoqxykxVC4JYhbVRe2nQtyJd2a4my7IQmez9qOmmxEfjLuDJ8/utet1/LuiE1FQaoREREFHD0MGctPS4cWUE4xPaBUaUOj1l6r3OSgu/zEpG+cA4qKeJQXyIKdHoIYMg7zivLBAD0a51qfawpexT1wN/nc0JUGF6ZVIEPpvX2azm0CsSld5qSJ78eX56D/LpIjj2oZIMXCCIKJH1bp+KbTfv9XQzysY45CaheUIV9x2v8XRSvCNRbbT+FtXVdWT57EE6drfVBacgbWO8jPWKASkREAeuVSRUc8dGMyHtw/N2j6AlPTtXIMCMAIDE6zLuFaSKpseEAwv1dDCIKIAxQiYgoYAkh2ANAAcedc7Zv61TMG1WCCztl+a5ARBrwWktNhQEqERERBZwbBxX57b0tCYNaJUX7/L2EELisG5edIf8RgrlJqGkxQCUiv+N9j3yhObb2f3FrX/y665i/i+FzkaFGtGsR57f3T4oOwwuXl6MiN9Gt10ms5ZNO8FwkPWOASkR+0wzjByKfKkiNQUFqjL+L0SwMLk53+zWWkKA5Np40NR5irXikSH+4zAwREREFlEAP8AIxwRMRUVNhDyoRERGRF62cMwiGQI+iiezwlKamwgCViPyGM2DIldgIz29TrEuRvyTHcFkVCh4CvF9T02KASjY4Z578gYEEKfny1r5IiArMtR/JNwK9B4f3WApkHJpOTYUBKikK9EoAEQW+fCb7oSDDJEmkF2wrIT1jkiQiIiIiomaIjSWkRwxQSRGHIRERkV6xTk2uMPDyPh5TaioMUMkGLz5EFCwEL2ikMxJbfykA+fJaOrGyJQAw6zXZYIBKRERE1ITYeEKByBdn7X3nt8eGecNgNPA3QQ2YJImIiIgCSqAGeOw/Jb3wpDPfF+evwSAQYTD6YM8UyNiDSkRERNSEAjO8pmDEc5H0iAEqEfkdexXIF1jxCl6BOpdzWv9C5CRFondRir+LQkSkWwxQichvGEAQkTsC/ZrRrkUcvrtjABKiwvxdlKAXqMPAm0qvohSUZcfj9qFtXG7bJj0WQOD//ihwcA4qERERBRQGH0SNExMegvem9dK07b+mVOK33ccQYmS/FjUNnmlERERERKQoKToMPQs5LJ2aDgNUIiIiIiIi0gUGqGQjQPNOEBE54CjQ4MWvlogoeDFAJUWs2FFTYHsIEREREckxQCUiv2N7CBEREREBDFBJBYf6ElGwKEiN9ncRiIiISCMuM0M2OLSXiIKFEAKvTu6K4hZx/i4KeQmXlyEiCn4MUImIKGj1bZ3q7yIQERGRGzjEl4iIiAILO1KJiIKWpgBVCDFMCLFRCLFZCDFT4flrhRBrhRA/CyGWCCGKzY+HCSH+YX5ujRCin5fLT0RERESkaGj7dH8XgYjc5HKIrxDCCOBpAIMB7ACwXAjxviRJv8o2WyhJ0nPm7S8A8DiAYQCuBgBJkkqFEGkAPhJCVEiSVO/lz0FEREREZLVs9kDER4b6uxhE5CYtPahdAWyWJGmLJElnAbwBYKR8A0mSjsn+jEbD8obFAL40b7MPwBEA5Y0tNBEFFyaNJiJ3cIQvaZEWG4HwEKO/i0FEbtISoGYB2C77e4f5MRtCiBuEEH8AeATADPPDawBcIIQIEULkAegCIKdxRSaiYMFKJhERERHJeS1JkiRJT0uSVADgTgBzzA+/DFNAuwLAkwCWAqizf60Q4hohxAohxIr9+/d7q0hEREREREQUQLQEqDth2+uZbX5MzRsARgGAJEm1kiTdLElSR0mSRgJIALDJ/gWSJD0vSVK5JEnlqalcEoCIiIiIiKg50hKgLgdQJITIE0KEAZgA4H35BkKIItmfVQB+Nz8eJYSINv9/MIBau+RKRERERJpwWgARUfBzmcVXkqRaIcQ0AJ8AMAJ4WZKk9UKIuQBWSJL0PoBpQohBAM4BOAzgCvPL0wB8IoSoh6nX9TJffAjyHonZaoiIiIiIyE9cBqgAIEnShwA+tHvsHtn/b1R5XTWANo0oH/mJYDM1ERERERE1Ma8lSaLgwp5Uago8zYjIE4KtqEREQYsBKtngPZ/8gacdEREREQEMUImIiIiIiEgnGKASERFRQOFoHyKi4MUAlYiIiAIK8yQQEQUvBqhERERERESkCwxQiYiIKKBwiC8RUfBigEpEfsfRekREREQEMEAlIj9iJwgRuYONWUREwY8BKhEREQUUNm4REQUvBqhERERERESkCwxQyQZT9xMRERERkb8wQCUiIqKAILEVlYgo6DFAJRtM3U9ERERERP7CAJWI/IZ9IUTkDsFWVCKioMcAlYj8jlVOItKCQ3wDX1J0GACgLDvBzyUhIr0K8XcBiIiIiNzBntTAlZMUhQ9n9EZhWoy/i0JEOsUAlYiIiIiaTHFmnL+LQEQ6xiG+REREFBA4wJeIKPgxQCUiIqKAwgG+RETBiwEqEfkde0WIiIiICGCASkR+xF4QIiIiIpJjgEpERERERES6wACViIiIiIiIdIEBKtngGuhERKRXvEcREQU/BqhEREQUUAQnsBMRBS0GqGSDN30iIiIiIvIXBqhERERERESkCwxQiYiIKCBIXDWZiCjoMUAlIr9hVZOIPMP5KEREwYoBKhH5HauaROQeNm8REQUrBqhERERERESkCwxQiYiIKMBw3AURUbBigEpERERERES6wACViPyOs8mISBNeLIiIgh4DVCLyGw7SIyJPCF48iIiCFgNUIiIiIiIi0gUGqGRD4vApIiLSKd6iiIiCHwNUIiIiIiIi0gUGqGSD83qIiEiveIsiIgp+DFCJiIiIiIhIFxigEhERUUDgHFQiouDHAJWIiIgCCof6EhEFLwaoROQ37A0hIiIiIjkGqETkd+wNISItuBQaEVHwY4BKREREAYUZ54mIghcDVCIiIiIiItIFBqhE5HcctUdEWsRGhAAARnbM8nNJiIjIV0L8XQAiar44So+I3BEdHoK19w1BdBirL0REwYpXeCIiIgoYsRGh/i4CERH5EIf4kg1mSCQiIiIiIn9hgEpERERERES6wACVbDB1PxERERER+QsDVCIiIiIiItIFBqhERERERESkCwxQiYiIiIiISBcYoBIREREREZEuMEAlIiIiIiIiXWCASkR+w2V3iYiIiEiOASoR+R1XNyIiIiIigAEqERERERER6QQDVCLyOw71JSIiIiKAASoR+RGH9hIRERGRHANUsiGxK4uIiIiIiPyEASoRERERERHpAgNUsiE45pKIiIiIiPyEASoRERERERHpAgNUIiIiIiIi0gUGqERERERERKQLDFCJiIiIiIhIFxigEhERERERkS4wQCUiIiIiIiJdYIBKREREREREusAAlYj8RvJ3AYiIiIhIVxigEpHfCX8XgIiIiIh0gQEqEfkde1KJiIiICGCASkR+xJ5TIiIiIpJjgEpERERERES6wACViIiIiIiIdIEBKhEREREREekCA1QiIiIiIiLSBQaoREREREREpAsMUImIiIiIiEgXGKASERERERGRLjBAJSIiIiIiIl1ggEpERERERES6wACViIiIiIiIdIEBKhH5jeTvAhARERGRrjBAJSIiIiIiIl1ggEpEfiP8XQAiIiIi0hUGqGTjjqFtAQDxkaF+LgkRERERETU3If4uAOnLuIocjKvI8XcxiIiIiIioGWIPKhEREREREekCA1QiIiIiIiLSBQaoREREREREpAsMUImIiIiIiEgXGKASERERERGRLjBAJSIiIiIiIl1ggEpERERERES6wACViIiIiIiIdIEBKhEREREREekCA1QiIiIiIiLSBQaoROQ37TPjAAAjSlr4uSREREREpAch/i4AETVf+akx2DJ/BAwG4e+iEBEREZEOsAeViPyKwSkRERERWWgKUIUQw4QQG4UQm4UQMxWev1YIsVYI8bMQYokQotj8eKgQ4lXzc78JIWZ5+wMQERERERFRcHAZoAohjACeBjAcQDGAiy0BqMxCSZJKJUnqCOARAI+bH78IQLgkSaUAugCYKoTI9VLZiYiIiIiIKIho6UHtCmCzJElbJEk6C+ANACPlG0iSdEz2ZzQAyfIUgGghRAiASABnAci3JSIiIiIiIgKgLUlSFv6/vfsPtvyu6zv+enfTiMQqULdWkyhLZm0nSifQbaBTpRZSDMYxccZpg4zG2g6TSoZU7ECsTEbzn2knto47Ykbj1B9xtWjbHbs2ta2tQ6fSrBoIAWM2AUkyWFaIpJTWJPLuH+e7cLjdm72wd7nve/fxmDmz3+/n+z2Xz+GT7+597jn3u8mja/uPJXnZxpOq6g1J3pTkwiSvXIbfnlXMfijJc5N8b3d/9GwmDAAAwN60bTdJ6u7D3X1ZkrckeesyfGWSP03yFUkOJPm+qnrRxudW1eur6nhVHT958uR2TQkAAIBdZCuB+niSS9f2L1nGNnMkyXXL9rcn+ffd/XR3fzjJf0tyaOMTuvvO7j7U3Yf279+/tZkDAACwp2wlUO9NcrCqDlTVhUmuT3J0/YSqOri2e02Sh5btD2b5uG9VXZTk5Ul+72wnDQAAwN5zxp9B7e5nquqmJPck2Zfkru5+oKpuS3K8u48muamqrkrydJInktywPP1wkp+uqgeSVJKf7u53n4sXAgAAwO62lZskpbuPJTm2YezWte2bN3nex7P6p2YAAADgWW3bTZIAAADgbAhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBG2FKhVdXVVPVhVJ6rqltMcv7Gq7q+q+6rqHVV1+TL+umXs1OOTVXXFdr8IAAAAdr8zBmpV7UtyOMlrklye5LWnAnTN3d394u6+IsntSe5Iku7++e6+Yhn/jiTv7+77tvUVAAAAsCds5R3UK5Oc6O5HuvupJEeSXLt+Qnc/ubZ7UZI+zdd57fJcAAAA+P9csIVzLk7y6Nr+Y0letvGkqnpDkjcluTDJK0/zdf5uNoQtAAAAnLJtN0nq7sPdfVmStyR56/qxqnpZkk9093tO99yqen1VHa+q4ydPntyuKQEAALCLbCVQH09y6dr+JcvYZo4kuW7D2PVJfmGzJ3T3nd19qLsP7d+/fwtTAgAAYK/ZSqDem+RgVR2oqguzis2j6ydU1cG13WuSPLR27M8k+Tvx86cAAAA8izP+DGp3P1NVNyW5J8m+JHd19wNVdVuS4919NMlNVXVVkqeTPJHkhrUv8Yokj3b3I9s/fQAAAPaKrdwkKd19LMmxDWO3rm3f/CzP/S9JXv45zg8AAIDzxLbdJAkAAADOhkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMMKWArWqrq6qB6vqRFXdcprjN1bV/VV1X1W9o6ouXzv2V6rqv1fVA8s5z9nOFwAAAMDecMZArap9SQ4neU2Sy5O8dj1AF3d394u7+4oktye5Y3nuBUl+LsmN3f01Sb4hydPbN30AAAD2iq28g3plkhPd/Uh3P5XkSJJr10/o7ifXdi9K0sv2q5O8u7vftZz3ke7+07OfNgAAAHvNVgL14iSPru0/tox9hqp6Q1U9nNU7qG9chr86SVfVPVX1O1X15rOdMAAAAHvTtt0kqbsPd/dlSd6S5K3L8AVJvi7J65Zfv7WqXrXxuVX1+qo6XlXHT548uV1TAgAAYBfZSqA+nuTStf1LlrHNHEly3bL9WJLf7O4/6u5PJDmW5KUbn9Ddd3b3oe4+tH///q3NHAAAgD1lK4F6b5KDVXWgqi5Mcn2So+snVNXBtd1rkjy0bN+T5MVV9dzlhkl/M8l7z37aAAAA7DUXnOmE7n6mqm7KKjb3Jbmrux+oqtuSHO/uo0luqqqrsrpD7xNJblie+0RV3ZFV5HaSY939787RawEAAGAXO2OgJkl3H8vq47nrY7eubd/8LM/9uaz+qRkAAADY1LbdJAkAAADOhkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGGFLgVpVV1fVg1V1oqpuOc3xG6vq/qq6r6reUVWXL+MvrKr/s4zfV1Vv2+4XAAAAwN5wwZlOqKp9SQ4n+dtJHktyb1Ud7e73rp12d3e/bTn/W5LckeTq5djD3X3F9k4bAACAvWYr76BemeREdz/S3U8lOZLk2vUTuvvJtd2LkvT2TREAAIDzwVYC9eIkj67tP7aMfYaqekNVPZzk9iRvXDt0oKp+t6r+a1V9/VnNFgAAgD1r226S1N2Hu/uyJG9J8tZl+ENJvrK7X5LkTUnurqov3vjcqnp9VR2vquMnT57crikBAACwi2wlUB9Pcuna/iXL2GaOJLkuSbr7T7r7I8v2byd5OMlXb3xCd9/Z3Ye6+9D+/fu3OncAAAD2kK0E6r1JDlbVgaq6MMn1SY6un1BVB9d2r0ny0DK+f7nJUqrqRUkOJnlkOyYOAADA3nLGu/h29zNVdVOSe5LsS3JXdz9QVbclOd7dR5PcVFVXJXk6yRNJblie/ookt1XV00k+meTG7v7ouXghAAAA7G5nDNQk6e5jSY5tGLt1bfvmTZ73y0l++WwmCAAAwPlh226SBAAAAGdDoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYYUuBWlVXV9WDVXWiqm45zfEbq+r+qrqvqt5RVZdvOP6VVfXxqvrH2zVxAAAA9pYzBmpV7UtyOMlrklye5LUbAzTJ3d394u6+IsntSe7YcPyOJL+2DfMFAABgj9rKO6hXJjnR3Y9091NJjiS5dv2E7n5ybfeiJH1qp6quS/L+JA+c/XQBAADYq7YSqBcneXRt/7Fl7DNU1Ruq6uGs3kF94zL2RUnekuSHnu1/oKpeX1XHq+r4yZMntzp3AAAA9pBtu0lSdx/u7suyCtK3LsM/mORHuvvjZ3jund19qLsP7d+/f7umBAAAwC5ywRbOeTzJpWv7lyxjmzmS5MeX7Zcl+baquj3J85J8sqr+b3f/2OcyWQAAAPaurQTqvUkOVtWBrML0+iTfvn5CVR3s7oeW3WuSPJQk3f31a+f8YJKPi1MAAABO54yB2t3PVNVNSe5Jsi+9xAB9AAAMgklEQVTJXd39QFXdluR4dx9NclNVXZXk6SRPJLnhXE4aAACAvWcr76Cmu48lObZh7Na17Zu38DV+8LOdHAAAAOePbbtJEgAAAJwNgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwwpYCtaqurqoHq+pEVd1ymuM3VtX9VXVfVb2jqi5fxq9cxu6rqndV1bdu9wsAAABgbzhjoFbVviSHk7wmyeVJXnsqQNfc3d0v7u4rktye5I5l/D1JDi3jVyf5iaq6YNtmDwAAwJ6xlXdQr0xyorsf6e6nkhxJcu36Cd395NruRUl6Gf9Edz+zjD/n1DgAAABstJV3My9O8uja/mNJXrbxpKp6Q5I3JbkwySvXxl+W5K4kX5XkO9aCFQAAAD5l226S1N2Hu/uyJG9J8ta18Xd299ck+WtJvr+qnrPxuVX1+qo6XlXHT548uV1TAgAAYBfZSqA+nuTStf1LlrHNHEly3cbB7n5fko8n+drTHLuzuw9196H9+/dvYUoAAADsNVsJ1HuTHKyqA1V1YZLrkxxdP6GqDq7tXpPkoWX8wKmbIlXVVyX5y0k+sA3zBgAAYI8548+gdvczVXVTknuS7EtyV3c/UFW3JTne3UeT3FRVVyV5OskTSW5Ynv51SW6pqqeTfDLJ93T3H52LFwIAAMDutqV/8qW7jyU5tmHs1rXtmzd53s8m+dmzmSAAAADnh227SRIAAACcDYEKAADACAIVAACAEQQqAAAAIwhUAAAARtjSXXwB4Nl85Quem6+9+It3ehoAwC4nUAE4a7/55r+101MAAPYAH/EFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAIAhUAAIARBCoAAAAjCFQAAABGEKgAAACMIFABAAAYQaACAAAwgkAFAABgBIEKAADACAIVAACAEQQqAAAAIwhUAAAARhCoAAAAjCBQAQAAGEGgAgAAMIJABQAAYASBCgAAwAgCFQAAgBEEKgAAACMIVAAAAEYQqAAAAIxQ3b3Tc/gMVXUyyR/s9DzO4EuT/NFOT4LPibXbvazd7mXtdi9rt3tZu93N+u1e1m5rvqq795/uwLhA3Q2q6nh3H9rpefDZs3a7l7Xbvazd7mXtdi9rt7tZv93L2p09H/EFAABgBIEKAADACAL1c3PnTk+Az5m1272s3e5l7XYva7d7WbvdzfrtXtbuLPkZVAAAAEbwDioAAAAjCNTPQlVdXVUPVtWJqrplp+dDUlWXVtVvVNV7q+qBqrp5GX9BVf16VT20/Pr8Zbyq6keXNXx3Vb107WvdsJz/UFXdsFOv6XxTVfuq6ner6leX/QNV9c5ljX6xqi5cxr9g2T+xHH/h2tf4/mX8war6xp15JeeXqnpeVb29qn6vqt5XVX/ddbc7VNX3Lr9fvqeqfqGqnuO6m6uq7qqqD1fVe9bGtu1aq6q/WlX3L8/50aqqz+8r3Ls2Wbt/uvy++e6q+tdV9by1Y6e9pjb7/nOz65azd7q1Wzv2fVXVVfWly77rbrt1t8cWHkn2JXk4yYuSXJjkXUku3+l5ne+PJF+e5KXL9p9L8vtJLk9ye5JblvFbkvzwsv1NSX4tSSV5eZJ3LuMvSPLI8uvzl+3n7/TrOx8eSd6U5O4kv7rs/1KS65fttyX5h8v29yR527J9fZJfXLYvX67HL0hyYLlO9+3069rrjyT/Msk/WLYvTPI81938R5KLk7w/yRcu+7+U5Ltcd3MfSV6R5KVJ3rM2tm3XWpL/sZxby3Nfs9Ovea88Nlm7Vye5YNn+4bW1O+01lWf5/nOz69bj3KzdMn5pknuS/EGSL13GXHfb/PAO6tZdmeREdz/S3U8lOZLk2h2e03mvuz/U3b+zbP+vJO/L6huwa7P6BjrLr9ct29cm+Zle+a0kz6uqL0/yjUl+vbs/2t1PJPn1JFd/Hl/KeamqLklyTZKfXPYrySuTvH05ZePanVrTtyd51XL+tUmOdPefdPf7k5zI6nrlHKmqL8nqD++fSpLufqq7/ziuu93igiRfWFUXJHlukg/FdTdWd/9mko9uGN6Wa2059sXd/Vu9+q75Z9a+FmfpdGvX3f+hu59Zdn8rySXL9mbX1Gm//zzDn5ecpU2uuyT5kSRvTrJ+Ex/X3TYTqFt3cZJH1/YfW8YYYvno2UuSvDPJl3X3h5ZDf5jky5btzdbR+u6Mf57Vb/SfXPb/fJI/XvvDe30dPrVGy/GPLedbu8+/A0lOJvnpWn08+yer6qK47sbr7seT/LMkH8wqTD+W5Lfjuttttutau3jZ3jjO58d3Z/XuWfLZr92z/XnJOVBV1yZ5vLvfteGQ626bCVT2hKr6oiS/nOQfdfeT68eWv51yu+phquqbk3y4u397p+fCZ+2CrD769OPd/ZIk/zurjxl+iutupuVnFa/N6i8ZviLJRfGu9a7mWtudquoHkjyT5Od3ei6cWVU9N8k/SXLrTs/lfCBQt+7xrD53fsolyxg7rKr+bFZx+vPd/SvL8P9cPkKR5dcPL+ObraP1/fz7G0m+pao+kNVHll6Z5F9k9dGYC5Zz1tfhU2u0HP+SJB+JtdsJjyV5rLvfuey/Patgdd3Nd1WS93f3ye5+OsmvZHUtuu52l+261h7Ppz9iuj7OOVRV35Xkm5O8bvkLhuSzX7uPZPPrlu13WVZ/sfeu5fuWS5L8TlX9xbjutp1A3bp7kxxc7ph2YVY3izi6w3M67y0/g/FTSd7X3XesHTqa5NTd0m5I8m/Xxr9zuePay5N8bPmY1D1JXl1Vz1/eYXj1MsY50t3f392XdPcLs7qe/nN3vy7JbyT5tuW0jWt3ak2/bTm/l/Hra3W30QNJDmZ18wHOke7+wySPVtVfWoZeleS9cd3tBh9M8vKqeu7y++eptXPd7S7bcq0tx56sqpcv/z1859rX4hyoqquz+tGWb+nuT6wd2uyaOu33n8t1uNl1yzbr7vu7+y909wuX71sey+omnX8Y1932O9d3YdpLj6zu0vX7Wd1N7Qd2ej4enSRfl9VHm96d5L7l8U1Z/WzGf0ryUJL/mOQFy/mV5PCyhvcnObT2tb47q5sSnEjy93b6tZ1PjyTfkE/fxfdFWf2hfCLJv0ryBcv4c5b9E8vxF609/weWNX0w7oT3+VqzK5IcX669f5PVHQpdd7vgkeSHkvxekvck+dms7hrquhv6SPILWf288NNZfVP897fzWktyaPlv4eEkP5akdvo175XHJmt3IqufSzz1Pcvb1s4/7TWVTb7/3Oy69Tg3a7fh+Afy6bv4uu62+VHL/0kAAACwo3zEFwAAgBEEKgAAACMIVAAAAEYQqAAAAIwgUAEAABhBoAIAADCCQAUAAGAEgQoAAMAI/w+a7XNaKksz5AAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot pdf values too see outliers\n", + "plt.figure(figsize=[16,16])\n", + "plt.plot(pdfs)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_11_FINAL/11_227.wav', 'Q-R-S-T-U-V-W-X-Y-Z macht es komplett!', 38, array([-4.0032621e-04, -3.3042193e-04, -3.4537757e-04, ...,\n", + " 7.7704317e-06, 2.7401828e-05, 7.1041533e-05], dtype=float32), 11.323739583333333) 0.38161673291429454\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_14_FINAL/14_496.wav', 'Ist der Kuli blau?', 18, array([ 1.2363373e-05, -3.6298752e-05, 2.1456377e-05, ...,\n", + " 3.9692618e-06, -6.7328816e-05, -9.5399046e-05], dtype=float32), 5.530666666666667) 0.38054811432758695\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_17_FINAL/17_426.wav', 'H-I-J-K-L-M-N-O-P!', 18, array([ 4.7872534e-05, -3.4164757e-05, -2.1835160e-04, ...,\n", + " -4.3899294e-05, -7.5021897e-05, -3.4489829e-05], dtype=float32), 11.167979166666667) 0.32909346861901806\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_119.wav', 'Kann ich mich irgendwie revanchieren?', 37, array([-5.1586820e-05, -9.1837741e-05, -9.9342957e-05, ...,\n", + " -1.4234778e-04, -1.2327779e-04, -1.4810068e-04], dtype=float32), 9.728) 0.3853891360487213\n", + "('/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_5_FINAL/5_41.wav', 'Ja, eben.', 9, array([ 8.6438486e-05, 1.5554321e-04, 1.1511238e-04, ...,\n", + " -1.3761004e-05, -2.3534812e-05, -5.6318945e-06], dtype=float32), 2.1033333333333335) 0.38819509492217963\n" + ] + } + ], + "source": [ + "# print outliers\n", + "threshold = 0.39\n", + "for item, pdf in zip(items, pdfs):\n", + " if pdf < threshold:\n", + " print(item, pdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Audio\n", + "Audio(\"/home/erogol/Data/Mozilla_DE_Thomas3/BATCH_2_FINAL/2_119.wav\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Plot Dataset Statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAd00lEQVR4nO3dfZRcdZ3n8fcnnQYqQekgGaQbMIyyYZERoj2zODoji2gQETIehoeVGRBczuyZGcTBsERnFWedhTkRhTnj4GRUUEEEMUZkxQzrw3F1EDexwYCQgeEpdHholQaBRjrJd/+4t5LqSlV1Pde9XZ/XOTnpulV977dvuj518/397r2KCMzMLH/m9boAMzNrjgPczCynHOBmZjnlADczyykHuJlZTjnAzcxyygE+x0g6RtJjPdr2JZKu7cW25zJJSySFpPnp41slndWmdYek17RjXXVur221mwO8KyQ9LOm4rK2rxTp69kHR7yLiHRHxhV7XMZtKH+h5qT0vHOBm1rDi/wastxzgHSbpS8DBwDclPSfponT50ZL+VdKkpLskHZMu/31Jv5B0UPr4SElPSzqs2rpm2f6wpK9JmpD0kKTzS567RNKNkr4o6deS7pE0WvL86yWNpc99VdINkj4uaSFwKzCc1vGcpOH02/aotr6yuq6S9ImyZd+Q9Ffp1/9d0ni6ns2S3lplPddI+sf0v+bPSfqRpFdKuiLdb/dJWlbn/vg9Sben/yaPS/oHSXuUPB+S/kzS/elrPi1JVeqquq7ylki67PuS3pd+PSDpE+nvwYPAO8vWXfraeZL+WtIjkp5K9/0+lWpKX78yrWerpHOqrTd9fLakH5b9/H8u6X7g/nTZlZK2SHpW0kZJf5AuPx74EHBa+u9yVyO1l+yjsyQ9mu6LD1f7ufpWRPhPh/8ADwPHlTweAX4JnEDyIfq29PHi9Pm/Bb4LFIBNwF9UW1eFbR0DPJZ+PQ/YCHwE2AP4beBBYHn6/CXAi2kdA8ClwI/T5/YAHgHeDwwC7wZeAj5evp2SbVddX4U6/xDYAih9vAiYAoaBpelzw+lzS4BXV1nPNcAvgDcAe6X77SHgT9MaPg58r8798QbgaGB+us17gQtKthXALcAQyQfpBHB8lbqqrit9HMD8ktd/H3hf+vWfAfcBBwH7At8rfX3Za88BHkh/lr2BtcCXqtR0PPAkcASwEPhyut7XlK83fXw28MOyn/+2tKZCuuxM4BXpz3kh8ASwV8nvw7VlNdRVe8k++meS98GRwG+A/9jr93OW/vgIvDfOBL4VEd+KiB0RcRuwgST4IPnF3wf4CTAOfLrJ7fwuyYfC30TESxHxIMkb4vSS1/wwrWM78CWSNwrsCp+/j4jpiFib1jObausr939J3qB/kD4+Bbg9IrYC24E9gcMlDUbEwxHx7zW2+fWI2BgRLwJfB16MiC+mNdwAFI/Aa+6PdB0/johtEfEw8E/AW8q2dVlETEbEoyTBelSlgupcVzWnAldExJaI+BXJB2E17wE+GREPRsRzwCrgdFVucZwKXB0Rd0fE8yS/Z426NCJ+FRFTABFxbUT8Mv05Lyf5d1ta57rqqf1jETEVEXcBd1H996kvOcB741XAH6f/vZ6UNAm8GTgAICKmSY4sjwAuj/SQpMntDJdt50PA/iWveaLk6xeAvdI30DAwXrbtLXVss9r6ZkjX+xXgjHTRfwGuS597ALiAJGCekvSVkhZNJU+WfD1V4fHe6dc194ek/yDpFklPSHoW+F/AfrP8fHtTQZ3rqmaYmfv6kVleW/r8IyQfvPtXeW29661mxu+ApA9KulfSM+n+3IfGfs7Zaq9rf/crB3h3lAfwFpL/Kg6V/FkYEZcBSBoBPgpcDVwuac8a66plC/BQ2XZeFhEnzPqd8DgwUtbjPajJOqq5HjhF0quA/wR8befKI74cEW8mCd0A/q4N25ttf1xF0ro4NCJeThLuFXvcdai1rufTvxeUvP6VJV8/zsx9fXCN7Wwl2Uelr93GzA+xetf7fI2ainb+u6f97otIjuwXRcQQ8Ay7fs7Zfkcaqd0qcIB3x5Mkfb6ia4F3SVqeDljtpWRa3oFpYF4DfA44l+RN9z9rrKuWnwC/VjIgWEi3dYSk363je28naWX8haT5kk4Gfq+sjlfUGjCbTUSMkfSvPwusj4hJAElLJR2bfnC9SHIUvaPZ7ZSYbX+8DHgWeE7SYcB/a2FbVdcVERMkrbEz0xrOAV5d8r03Auenvw+LgItrbOd64AOSDpG0N8mR/g0Rsa3Ca28EzpZ0uKQFJAcJpe4E3i1pgZK54efW8TNuIxkLmC/pI8DLS55/ElgiqVrONFK7VeAA745Lgb9O/9v+wYjYApxMclQ2QXJkuJLk3+N84LeA/5G2Gd4LvLc4ul++rlobTXvAJ5L0aR9iV1jOGroR8RLJwOW5wCRJ3/4WkoEkIuI+kjfgg2kttVoctXwZOC79u2hP4LK03idI9seqJte/Ux3744MkrZxfk/TGb2hhc7Ot67+S/Jv/Engt8K8lz/0zsJ6k5/tTksG9aj5PMtbwA5Kf6UXgLyu9MCJuBa4gGeh9IP271KdIBqqfBL5A2tKqYT3wbeDfSNofLzKzxfLV9O9fSvppK7VbZcUZAGazknQH8JmIuLrXtZiZj8CtBklvUTKner6S059fR3LEZWYZ4LOprJalJH3ThSTzpU+JiMd7W5KZFbmFYmaWU26hmJnlVFdbKPvtt18sWbKkm5s0M8u9jRs3/iIiFpcv72qAL1myhA0bNnRzk2ZmuSep4lmzbqGYmeWUA9zMLKcc4GZmOeUANzPLKQe4mVlO+UxMM+updWPjrF6/ma2TUwwPFVi5fCkrlo30uqxccICbWc+sGxtn1dpNTE1vB2B8copVazcBOMTr4BaKmfXM6vWbd4Z30dT0dlav39yjivLFAW5mPbN1cqqh5TaTA9zMemZ4qNDQcpvJAW5mPbNy+VIKgwMzlhUGB1i5vN4b2/c3D2KaWc8UByo9C6U5DnAz66kVy0Yc2E2atYUi6fOSnpJ0d8my1ZLuk/QzSV+XNNTZMs3MrFw9PfBrgOPLlt0GHBERryO5I3XLdww3M7PGzBrgEfED4Fdly/4lIralD38MHNiB2szMrIZ2zEI5B7i1DesxM7MGtBTgkj4MbAOuq/Ga8yRtkLRhYmKilc2ZmVmJpgNc0tnAicB7osat7SNiTUSMRsTo4sW73dLNzMya1NQ0QknHAxcBb4mIF9pbkpmZ1aOeaYTXA7cDSyU9Julc4B+AlwG3SbpT0mc6XKeZmZWZ9Qg8Is6osPhzHajFzMwa4GuhmJnllAPczCynHOBmZjnlADczyykHuJlZTjnAzcxyygFuZpZTDnAzs5xygJuZ5ZQD3MwspxzgZmY55QA3M8spB7iZWU45wM3McsoBbmaWU03dkcfMzOqzbmyc1es3s3VyiuGhAiuXL2XFspG2rNsBbmbWIevGxlm1dhNT09sBGJ+cYtXaTQBtCXG3UMzMOmT1+s07w7toano7q9dvbsv6HeBmZh2ydXKqoeWNcoCbmXXI8FChoeWNcoCbmXXIyuVLKQwOzFhWGBxg5fKlbVm/BzHNzDqkOFDpWShmZjm0YtlI2wK7nFsoZmY55QA3M8spB7iZWU7NGuCSPi/pKUl3lyzbV9Jtku5P/17U2TLNzKxcPUfg1wDHly27GPhORBwKfCd9bGZmXTRrgEfED4BflS0+GfhC+vUXgBVtrsvMzGbRbA98/4h4PP36CWD/ai+UdJ6kDZI2TExMNLk5MzMr1/IgZkQEEDWeXxMRoxExunjx4lY3Z2ZmqWYD/ElJBwCkfz/VvpLMzKwezQb4zcBZ6ddnAd9oTzlmZlaveqYRXg/cDiyV9Jikc4HLgLdJuh84Ln1sZmZdNOu1UCLijCpPvbXNtZiZWQN8JqaZWU45wM3McsoBbmaWUw5wM7Oc8g0dzKyr1o2Nd+wONf3GAW5mXbNubJxVazcxNb0dgPHJKVat3QTgEG+CA9zM2ma2o+vV6zfvDO+iqentrF6/2QHeBAe4mbVFPUfXWyenKn5vteVWmwcxzawtah1dFw0PFSp+b7XlVpsD3Mzaop6j65XLl1IYHJjxfGFwgJXLl9a1jXVj47zpsu9yyMX/mzdd9l3WjY03X/Ac4BaKmbXF8FCB8QohXnp0XWylNDMLxQOgu3OAm1lbrFy+dEbAQuWj6xXLRpoKXA+A7s4BbmZtUevouh1zvz0AujsHuJm1TaWj63a1Pupp0fQbD2KaWUfVMzulHq0OgM5FPgI3s45qV+ujlQHQucoBbmYd1c7WR7MDoHOVWyhm1lGdaH14PnjCR+Bm1lHtbn14PvguDnAz67h2tj48H3wXt1DMLFc8H3wXH4GbWSbUe7KP54Pv4gA3s6YUA3d8cooBie0RjDTZ326kr13vKfv9cOcfB7iZNWTd2DiX3HwPk1PTO5dtjwCaH1BspK9dz6Bovwx0OsDNrG7lwVhJvQOKpUfIUeU11frasw2K9stAZ0sBLukDwPuAADYB742IF9tRmJllT6VgrKQYvNXaGPV8EEDzfe1+GehsOsAljQDnA4dHxJSkG4HTgWvaVJuZZUy9ATg8VKjZxqjng6CVk336ZaCz1WmE84GCpPnAAmBr6yWZWVbVE4DF4K3Vxqj1QSBgZKjApe/+nabbHf1y4aumAzwixoFPAI8CjwPPRMS/lL9O0nmSNkjaMDEx0XylZtZzlYIRYJ6Sv0uDt1Ybo9oHwchQgYcueyc/uvjYlnrVK5aNcOm7f4eRoUJbPhCyqpUWyiLgZOAQYBL4qqQzI+La0tdFxBpgDcDo6Gi1sQozy4FGTouv1caodypgq7W2M7CzOC2xlUHM44CHImICQNJa4PeBa2t+l5nlWj3BuG5snOd/s2235cWQztulYbM6LbGVAH8UOFrSAmAKeCuwoS1VmVnm1HsEWm2GyaIFg3z0Xa/d+T15ujRsVqclNh3gEXGHpJuAnwLbgDHSVomZZU8rLYBGjkCrzTBZsMf83AR2uaxOS2xpFkpEfDQiDouIIyLiTyLiN+0qzMzapxjA4+lJM8UArvc62o3cFi2rYdeKaoOuvZ6W6KsRmvWBVu9L2UgoZzXsWpHVaYkOcLM+0OpRcSOhnNWwa0VWpyX6WihmfaDeMxMr9cmBmjNKyuVthkm9sjjoqojuTc0eHR2NDRs8UcWs2+qZGVLpNYMDgoDpHVH1+6zzJG2MiNHy5T4CN+sDxaAtvwzs0y9M84Eb7uSCG+7ceU3vUtPbKx/g5XlGyVziHrhZjjVyd/YVy0ZYuOfux2zFiC4P71ryPKNkLvERuFlO1Ts3u57rbjcqzzNK5hIHuFlOzXZ24LqxcT72zXt4+oXpKmuYXaUeeN5nlMwlDnCznKo1NbDeGybMZnp7sGjBIBHwzNT0nJlRMlc4wM1yqtrUwHkSH/vmPXWFt2DWtsrTL0xTGBzgU6cd5eDOGA9imuVUtWtzb4+YtW0yMlTg4cveyadOO4oBadZtNXLWpnWPA9wsp4pnB9YTwKVKe9grlo1w+alHVvwgKOeZJ9njADfLsRXLRtjRwPS/ocLgbqeAl58mXu0DwTNPssc9cLMcKp0aOK/CCTjlBiQuP/XIqj3s0tPEKw2AeuZJNjnAzXKmPGDrOQFnR0TdA5Bz9Vomc5ED3Cxnqt0wodKp8EWNtj+yeOEm250D3CzjStsl+xQGZ1zLpNSOCK447Si3P/qIA9wsY8oD+/mXtu28qFS18IbkKNvtj/7iADfLkPL+dq3ALlU+NdCB3R8c4GYZsW5snAtvvKuhqwIWZeHuMNZ9DnCzHls3Nr7bdbobMVLSOrH+4gA364Fin3t8cqqu65FU4wHK/uYAN+uy8j53o+G9aMEgky/4yoDmADfrqlb63JC0S3508bFtrsryygFu1gXtuLmC2yVWrqWLWUkaknSTpPsk3Svpje0qzGyuKLZMGgnvRQsGOfPog3deYGpkqOCZJrabVo/ArwS+HRGnSNoDWNCGmsxyrfREnOGhAs//ZtusN1coDmSOuK9tDWg6wCXtA/whcDZARLwEvNSesszyqdKNhmcz25UCzapppYVyCDABXC1pTNJnJS0sf5Gk8yRtkLRhYmKihc2ZZV+1C01VUxgccHhb01oJ8PnA64GrImIZ8DxwcfmLImJNRIxGxOjixYtb2JxZtq0bG6/riLuo0s0VzBrRSg/8MeCxiLgjfXwTFQLcbK6acTKOoJGZgVf4BsHWBk0HeEQ8IWmLpKURsRl4K/Dz9pVmlh3lVwh8adt2XpjesfP5RsLbp75bu7Q6C+UvgevSGSgPAu9tvSSzbGn2CoGVeC63tVNLAR4RdwKjbarFLDMavedkNUOFQRbuOd/X5raO8JmYZmWauedkJYXBAS456bUObOsYB7hZmUanAlayaMEgH32Xw9s6ywFufa/ee07WUpyF4jMprZsc4NbXmh2g9CVdLQsc4NbXmmmX+JKulhUtXY3QLO+2NnDmJHgaoGWLj8Ct75SeQVmPAYkdEW6XWOY4wK1vNHPz4MLggK9XYpnlALc5r5G74QgY8gCl5YQD3Oa08lkm9Rj7yNs7WJFZ+zjAbU4o7WsPpKe+DzRxCvzwUKFDFZq1nwPccq/aqe+NhrdnmFjeOMAt19aNjXPhjXc1fb2SeYIdPoPScsoBbrkz40YKJDcDbtRQYdAXmrLcc4BbrpS3SxoNbx9p21ziALfcaKVd4vncNhc5wC2zWm2VFGeh+Kjb5ioHuGVSs60SH2lbP3GAW+Y02iopHp37SNv6jQPcMqHZdsmAxOWnHunQtr7kALeec7vErDkOcOuJRi/pWs73nDRzgFuXNXJlwErc5zbbxQFuHdfqdEC3Sswqc4BbRzXb3/bMErPZtRzgkgaADcB4RJzYekk2VzR75qRnlpjVpx1H4O8H7gVe3oZ12RzQSp/b7RKz+rUU4JIOBN4J/C3wV22pyHKrmXtOAkgQvqSrWcNaPQK/ArgIeFm1F0g6DzgP4OCDD25xc5ZVjdy6zP1ts/ZoOsAlnQg8FREbJR1T7XURsQZYAzA6OtrcVfctc4ozS7ZOTu28CXA9/7jub5u1TytH4G8CTpJ0ArAX8HJJ10bEme0pzbKoUn+73l63+9tm7TWv2W+MiFURcWBELAFOB77r8J7bim2SZgYnhwqDDm+zNvM8cKvJp7ybZVdbAjwivg98vx3rst5rxz0n3es26zwfgdsMrd5zEtzrNusWB7jNsHr95rqmApZaMDiPPeYP8MzUNMOeGmjWNQ5w22nd2HhDvW63Scx6ywHex2b0utOzIevlNolZ7znA+0TpiTf7FAaZ3r6D51/a1SqpFd7FgUzf5d0sWxzgfaB8YLLRa5V86rSjHNZmGdT0iTyWH80MTBaNDBUc3mYZ5QCf4xodmCxVGBxg5fKlba7IzNrFLZQ5qtlLuxYNFQa55CSfQWmWZQ7wOWC2Acp6+JrcZvnjAM+5ZgcoF6WXgPWJN2b55QDPsWbvOTkyVOBHFx/boarMrFsc4DnUSn/bA5Nmc4cDPEdaHZj0pV3N5hYHeA60cpd38IwSs7nKAZ4xpdcnKZ663sg1uQU771HpAUqzuc0BnhGVjrKLg5P1hrcvMGXWXxzgGVA+FbAZ7m+b9R8HeI81OxWwyMFt1r8c4D3QjntOemDSzBzgXVSpz93I4GTgU93NbBcHeBc0Ow3QoW1mtTjAO6CVW5WB7zVpZvVxgLdJtb52o+HtqYBmVi8HeIsqnd7e6KCkWyVm1oymA1zSQcAXgf1J8mdNRFzZrsLyoB3ztz2bxMya1coR+Dbgwoj4qaSXARsl3RYRP29TbZnW6vxtB7eZtarpAI+Ix4HH069/LeleYATIfYCX3uGm/HoirV5YyifemFm7KJo8gpyxEmkJ8APgiIh4tuy584DzAA4++OA3PPLIIy1vr5MqtUUG54m995rP0y9M133iTfF1xQtSub9tZs2StDEiRsuXtzyIKWlv4GvABeXhDRARa4A1AKOjo61/WnTY6vWbd+tpT++InUfc9fwAPso2s25oKcAlDZKE93URsbY9JfXW1smppr/X87fNrJtamYUi4HPAvRHxyfaV1H2lPe95acujUZ6/bWbd1soR+JuAPwE2SbozXfahiPhW62V1XrUTb5oJb88oMbNeaGUWyg9Jxupyp3yg0ifemFke9eWZmJUGKuvlAUozy4q+CfDSlkmjBL6/pJllzpwP8ErXKmnEyFCBH118bJurMjNr3ZwO8FavVVIYHGDl8qVtrsrMrD3mdIA30uu+4rSjdn5PpVPozcyyJpcBXn6tkv982GK+d98E45NTO09dH2hgPvfIUGFnUDuwzSwvchfg5W2R8ckprv3xozufL4Z2veHtNomZ5VXuAryVKYBFxduceR63meVZ7gK8lWuVOLDNbC7JfICX97v3KQw2NSXQ0wHNbK7JdIBX6nc3w31uM5uLMh3gzfS7fSMFM+sXmQ7w2frdQ4VBFu453/O2zawvZTrAh4cKNdsmz0xNc+dH397FiszMsmNerwuoZeXypRQGB6o+PzxU6GI1ZmbZkukj8GI7pNJd4D0waWb9LtNH4JCE+NhH3s4Vpx3FyFABkUwJ9O3LzKzfZfoIvNSKZSMObDOzEpk/Ajczs8oc4GZmOeUANzPLKQe4mVlOOcDNzHJKUeeND9qyMWkCeKRk0X7AL7pWQOvyVi/kr+a81Qv5qzlv9UL+am53va+KiMXlC7sa4LttXNoQEaM9K6BBeasX8ldz3uqF/NWct3ohfzV3q163UMzMcsoBbmaWU70O8DU93n6j8lYv5K/mvNUL+as5b/VC/mruSr097YGbmVnzen0EbmZmTXKAm5nlVM8CXNLxkjZLekDSxb2qoxpJB0n6nqSfS7pH0vvT5ftKuk3S/enfi3pdaylJA5LGJN2SPj5E0h3pfr5B0h69rrGUpCFJN0m6T9K9kt6Y5X0s6QPp78Pdkq6XtFfW9rGkz0t6StLdJcsq7lMl/j6t/WeSXp+RelenvxM/k/R1SUMlz61K690saXm3661Wc8lzF0oKSfuljzu2j3sS4JIGgE8D7wAOB86QdHgvaqlhG3BhRBwOHA38eVrjxcB3IuJQ4Dvp4yx5P3BvyeO/Az4VEa8BngbO7UlV1V0JfDsiDgOOJKk9k/tY0ghwPjAaEUcAA8DpZG8fXwMcX7as2j59B3Bo+uc84Kou1VjqGnav9zbgiIh4HfBvwCqA9D14OvDa9Hv+Mc2TbruG3WtG0kHA24FHSxZ3bh9HRNf/AG8E1pc8XgWs6kUtDdT8DeBtwGbggHTZAcDmXtdWUuOBJG/OY4FbAJGcDTa/0n7v9R9gH+Ah0sH0kuWZ3MfACLAF2JfkWvq3AMuzuI+BJcDds+1T4J+AMyq9rpf1lj33R8B16dczsgJYD7wxC/s4XXYTyYHIw8B+nd7HvWqhFN8IRY+lyzJJ0hJgGXAHsH9EPJ4+9QSwf4/KquQK4CJgR/r4FcBkRGxLH2dtPx8CTABXp22fz0paSEb3cUSMA58gObp6HHgG2Ei293FRtX2ah/fiOcCt6deZrVfSycB4RNxV9lTHavYg5iwk7Q18DbggIp4tfS6Sj9NMzMOUdCLwVERs7HUtDZgPvB64KiKWAc9T1i7J2D5eBJxM8sEzDCykwn+jsy5L+3Q2kj5M0s68rte11CJpAfAh4CPd3G6vAnwcOKjk8YHpskyRNEgS3tdFxNp08ZOSDkifPwB4qlf1lXkTcJKkh4GvkLRRrgSGJBVvnZe1/fwY8FhE3JE+vokk0LO6j48DHoqIiYiYBtaS7Pcs7+Oiavs0s+9FSWcDJwLvST90ILv1vprkg/2u9D14IPBTSa+kgzX3KsD/H3BoOnq/B8mgxM09qqUiSQI+B9wbEZ8seepm4Kz067NIeuM9FxGrIuLAiFhCsj+/GxHvAb4HnJK+LDP1AkTEE8AWSUvTRW8Ffk5G9zFJ6+RoSQvS349ivZndxyWq7dObgT9NZ0ocDTxT0mrpGUnHk7QDT4qIF0qeuhk4XdKekg4hGRj8SS9qLBURmyLityJiSfoefAx4ffo73rl93Ivmf/phegLJ6PK/Ax/uVR016nszyX8zfwbcmf45gaSv/B3gfuD/APv2utYKtR8D3JJ+/dskv+APAF8F9ux1fWW1HgVsSPfzOmBRlvcx8DHgPuBu4EvAnlnbx8D1JD366TRIzq22T0kGuj+dvg83kcywyUK9D5D0jYvvvc+UvP7Dab2bgXdkZR+XPf8wuwYxO7aPfSq9mVlOeRDTzCynHOBmZjnlADczyykHuJlZTjnAzcxyygFuZpZTDnAzs5z6//Sn0SdRnW0yAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title(\"text length vs mean audio duration\")\n", + "plt.scatter(list(text_vs_avg.keys()), list(text_vs_avg.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAepElEQVR4nO3dfZRcdZ3n8fcnnQYqoHSQiKQhhlE3jqAQbWdxcUYXdYIOAuM4iCsrCG7O7M74tBg30RmBWWdhTkTBM44O4wMqiKDEiPgQWdHj6CBuYsCAkIEBJHR4iEqjkI50ku/+cW8l1ZV6rttd91Z9Xuf0SdW9t+/91i9V3/r19/e79yoiMDOz4pnT6wDMzKwzTuBmZgXlBG5mVlBO4GZmBeUEbmZWUE7gZmYF5QReUJJeKenBHh37AklX9uLYM6G6LSXdIemVPQypKUmLJYWkuenzb0k6K6N9h6TnZrGvFo+XWeyDxgk8Q5Lul/TqvO2ryzh69kXRKxFxdER8v9dxtCMiXhsRn+t1HM3U+vIvSux55ARuZpko/zVgs8cJPCOSvgAsAr4u6QlJ70uXHy/pXyVNSLqt/Ke5pP8k6ZeSjkyfHyvpMUnPr7evJsdfKOk6Sdsk3SfpnRXrLpB0raTPS/ptWiIYq1j/Ykkb03VflnSNpA9JOhD4FrAwjeMJSQvTX9uv3v6q4vqEpA9XLfuapP+ZPv5fksbT/WyW9Ko6+7lC0j+mf24/IelHkp4l6dK03e6StLTF9iil+3tM0s+Bl1Yda89fP5L+QNLN6f/fQ5L+QdJ+FduGpL+QdHe6zcclqc5rqLuv6pJIuuz7kt6ePh6S9OH0PXMv8CdV+67cdo6kv5b0C0mPpv9PB9eKKd1+RRrPVknn1Ntv+vxsST+sev1/Kelu4O502WWStkj6jaQNkv4wXX4S8H7gTen/4W3txF7RRmdJeiBtiw/Ue10DISL8k9EPcD/w6orno8CvgNeRfFm+Jn2+IF3/d8BNQAnYBPxVvX3VONYrgQfTx3OADcAHgf2A3wPuBZal6y8AdqRxDAEXAT9O1+0H/AJ4FzAMvAF4CvhQ9XEqjl13fzXi/CNgC6D0+XxgElgILEnXLUzXLQaeU2c/VwC/BF4CHJC2233AW9MYPgR8r8X2uBj4F+AQ4Ejg9srXWNn26fGOB+am8d0JvLti2wBuAEZIvnS3ASfVeQ1195U+D2BuxfbfB96ePv4L4K403kOA71VuX7XtOcA96es+CFgDfKFOTCcBjwDHAAcCX0z3+9zq/abPzwZ+WPX6b0xjKqXLzgSekb7O84CHgQMq3jtXVsXQUuwVbfTPJJ+ZY4HfAb/f689+r37cA59ZZwLfjIhvRsTuiLgRWE+S+CB5Mx8M/AQYBz7e4XFeSvKl8LcR8VRE3EvyJj+jYpsfpnHsAr5A8uaHvQnlYxExFRFr0niaqbe/av9C8qH7w/T5G4GbI2IrsAvYH3iBpOGIuD8i/r3BMb8aERsiYgfwVWBHRHw+jeEaoNwDb9YepwN/FxG/jogtwMfqHTA93o8jYmdE3A/8E/CKqs0ujoiJiHiAJLEe18W+6jkduDQitkTEr0m+NOt5C/CRiLg3Ip4AVgFnqHaJ43TgsxFxe0Q8SfKebNdFaVtOAkTElRHxq/R1XkLyf7ykxX21EvuFETEZEbcBt1H/vdf3nMBn1rOBP0//ZJ6QNAG8HDgcICKmSHqWxwCXRNrN6PA4C6uO837gsIptHq54vB04IP1QLATGq469pYVj1tvfNOl+vwS8OV30X4Cr0nX3AO8mSRqPSvpSRYmmlkcqHk/WeH5Q+rhZeyxk+mv8Rb0DSvoPkm6Q9LCk3wD/Bzi0arPqtjiIGlrcVz0tx5xuW7n+FyRf0ofV2bbV/dYz7f0i6b2S7pT0eNr2B9Pe62wWe0vtPQicwLNVnYC3kPz5N1Lxc2BEXAwgaRQ4H/gscImk/Rvsq5EtwH1Vx3laRLyu6W/CQ8BoVd32yA7jqOdq4I2Sng38R+C6PTuP+GJEvJwk6Qbw9xkcr1l7PMT017iowb4+QVK6eF5EPJ3ki6BmjbsFjfb1ZPrvvIrtn1XxuJ2Yt5K0Z+W2O5n+hdfqfp9sEFPZnvdIWu9+H0nPfn5EjACPs/d1Nns/tRP7wHMCz9YjJLW7siuB10talg5CHaBkWt4RacK8Avg0cC7JB+l/N9hXIz8BfqtkQLCUHusYSS9t+ptwM0kp468kzZV0KvAHVXE8o9EgWDMRsZGkfv0pYF1ETABIWiLpxPSLawdJL3p3p8ep0Kw9rgVWSZov6QjgHQ329TTgN8ATkp4P/Pcu4qq7r4jYRlJGOzON9xzgORW/ey3wzvS9Mx9Y2eA4VwPvkXSUpINIevrXRMTOGtteC5wt6QWS5pF0KCrdCrxB0jwlc8PPbeE17iQZC5gr6YPA0yvWPwIsllQv97QT+8BzAs/WRcBfp3+2vzetr55K0tPaRtIzXEHS7u8Engn8TVpmeBvwtvKIffW+Gh00rQGfTFJ7vY+9ybJp0o2Ip0gGLs8FJkjq9jeQDA4REXeRfKjuTWNpVOJo5IvAq9N/y/YnGVD8Jcmfxc8kqXl2pYX2uJDkT/P7gO+Q1PDreS9J2ee3JHX0a7oIrdm+/hvJ++NXwNHAv1as+2dgHUnN96ckg3v1fIbkNf2A5DXuoM6XVER8C7iUZFD4nvTfSh8lGdR+BPgcafmrgXXAt4F/I2njHUwvsXw5/fdXkn7aTey2d2aA2R6SbgE+GRGf7XUsZlafe+CGpFcomVM9V8kpzS8i6UWZWY75zCmDZIrXtSTzgO8F3hgRD/U2JDNrxiUUM7OCcgnFzKygZrWEcuihh8bixYtn85BmZoW3YcOGX0bEgurls5rAFy9ezPr162fzkGZmhSep5hmyLqGYmRWUE7iZWUE5gZuZFZQTuJlZQTmBm5kVlM/ENLOeWrtxnNXrNrN1YpKFIyVWLFvCaUtHex1WITiBm1nPrN04zqo1m5ic2gXA+MQkq9ZsAnASb4FLKGbWM6vXbd6TvMsmp3axet3mHkVULE7gZtYzWycm21pu0zmBm1nPLBwptbXcpnMCN7OeWbFsCaXhoWnLSsNDrFjW6k3sB5sHMc2sZ8oDlZ6F0hkncDPrqdOWjjphd6hpCUXSZyQ9Kun2imWrJd0l6WeSvippZGbDNDOzaq3UwK8ATqpadiNwTES8iOTu013fSdzMzNrTNIFHxA+AX1ct+05E7Eyf/hg4YgZiMzOzBrKYhXIO8K0M9mNmZm3oKoFL+gCwE7iqwTbLJa2XtH7btm3dHM7MzCp0nMAlnQ2cDLwlGtzaPiIuj4ixiBhbsGCfW7qZmVmHOppGKOkk4H3AKyJie7YhmZlZK1qZRng1cDOwRNKDks4F/gF4GnCjpFslfXKG4zQzsypNe+AR8eYaiz89A7GYmVkbfC0UM7OCcgI3MysoJ3Azs4JyAjczKygncDOzgnICNzMrKCdwM7OCcgI3MysoJ3Azs4JyAjczKygncDOzgnICNzMrKCdwM7OCcgI3MysoJ3Azs4Lq6I48ZmbWmrUbx1m9bjNbJyZZOFJixbIlnLZ0NJN9O4Gbmc2QtRvHWbVmE5NTuwAYn5hk1ZpNAJkkcZdQzMxmyOp1m/ck77LJqV2sXrc5k/07gZuZzZCtE5NtLW+XE7iZ2QxZOFJqa3m7nMDNzGbIimVLKA0PTVtWGh5ixbIlmezfg5hmZjOkPFDpWShmZgV02tLRzBJ2NZdQzMwKygnczKygmiZwSZ+R9Kik2yuWHSLpRkl3p//On9kwzcysWis98CuAk6qWrQS+GxHPA76bPjczs1nUNIFHxA+AX1ctPhX4XPr4c8BpGcdlZmZNdFoDPywiHkofPwwcllE8ZmbWoq4HMSMigKi3XtJySeslrd+2bVu3hzMzs1SnCfwRSYcDpP8+Wm/DiLg8IsYiYmzBggUdHs7MzKp1msCvB85KH58FfC2bcMzMrFWtTCO8GrgZWCLpQUnnAhcDr5F0N/Dq9LmZmc2ipqfSR8Sb66x6VcaxmJlZG3wmpplZQTmBm5kVlBO4mVlBOYGbmRWUE7iZWUH5hg5mNqvWbhyfsTvUDBoncDObNWs3jrNqzSYmp3YBMD4xyao1mwCcxDvgBG5mmWnWu169bvOe5F02ObWL1es2O4F3wAnczDLRSu9668Rkzd+tt9wa8yCmmWWiUe+6bOFIqebv1ltujTmBm1kmWuldr1i2hNLw0LT1peEhVixb0tIx1m4c54SLb+Kold/ghItvYu3G8c4D7gMuoZhZJhaOlBivkcQre9flUkons1A8ALovJ3Azy8SKZUumJVio3bs+beloRwnXA6D7cgI3s0w06l1nMffbA6D7cgI3s8zU6l1nVfpopUQzaDyIaWYzqpXZKa3odgC0H7kHbmYzKqvSRzcDoP3KCdzMZlSWpY9OB0D7lUsoZjajZqL04fngCffAzWxGZV368HzwvZzAzWzGZVn68HzwvZzAzSwXWp0r7vnge7kGbmY9Vy6LjE9MEuwti9SqbfuCWHs5gZtZV7IYUGxnrnirg6KDMNDpEoqZdSyrAcV2yiKtDIoOykBnVwlc0nuAtwMBbALeFhE7sgjMzPIvqwHFdueKNxsUHZSBzo5LKJJGgXcCYxFxDDAEnJFVYGaWf1kNKGY9V3xQBjq7LaHMBUqSpoB5wNbuQzKzomjWc251ZknWc8UH5cJXHSfwiBiX9GHgAWAS+E5EfCezyMwslyqT8sGlYYaHxNSu2LO+3HNuVoeuldx/tPLETGJs9drkRddxApc0HzgVOAqYAL4s6cyIuLJqu+XAcoBFixZ1EaqZ9dLajeNc+PU7eGz71J5lE5NTDM8R8+cNM7F9alrP+YSLb2o4s2QmBxkH5cJXiojmW9X6RenPgZMi4tz0+VuB4yPif9T7nbGxsVi/fn1HxzOz3qnuTVcbkrjk9GOnJcijVn6DWtlF1C9xjI6UMuuFZy2Lm1J0StKGiBirXt5NDfwB4HhJ80hKKK8CnJ3N+lCtWR2VdkXsUx6ZI7GrRgdx4UipcIOMeZ2W2PEslIi4BfgK8FOSKYRzgMszisvMcqSVxFouj5STXa3kXa5DF+1syqxuSpG1rmahRMT5wPkZxWJmPVavTFCv5FFt68Rk3d76kMRFb3jhnh5rkQYZ8/oXg0+lNzOg8fVIas3TrqVReWR3xJ7kfdrSUS56wwsZHSkhktp3ZXLPm7z+xeBT6c0MaFwmKA8slnvnI/OGeWLHTqZ27zt9cPW6zS3NwS7S3XXyOi3RCdzMgPrlgPGJSU64+KY9ZZWPvum4uvO4i1geaUVepyU6gZsZUH9qn2DP8urZF7NxVmVe5PEvho7ngXfC88DNeqfZPOZac70FNedy53m+dj+qNw/cg5hmA6DZDRPKyX1yahdDEpAk6Xrdu17PvrCEE7jZAGg0QFmZ3CE5Kadcsx7N6ewLSziBmxVYq3edaTRAecH1d9RN7llf5tWy5QRuVlBZ3EcSkgtS1TI+Mcl7rrmVA4bnMFIaLsR87UHjBG5WUN3eR7IVATy2fYrf7dzNR990HD9aeaKTd454GqFZQTU7vbs8MDk+MclQnQtLtaofb0fWD9wDNyuoRqd31xqY7JZnnuSPE7hZQdUrizz5u51c+PV9Bya75Zkn+eMSilkBVc7bniOouCRJ3UHJVo2Uhvndzt19dSp8v3IP3KxgqssjuzuojoyOlLj0TcfVnCJ4wSlHF+pKgYPMPXCzgml2d5xmyr3pZtcsccLOPydws4JpZzCxPPuk/O9ojSTtRF1cTuBmBdPq3XFg72nxLoH0J9fAzXKm2enxtWafDA+JkdJwzf3l4d6NNjPcAzfLkVp3P1/x5du48Ot3MLF9ioNLw0jsuWpgdVnkqJXfqHkFQc/h7k/ugZvlSK0ByqndwWPbpwiSKYKPbU+mCVZeNbBcHsnrvRttZjiBm/VAvTJJuz3l6vKIrx44WFxCMZtltcok77nmVt59za0d7a8y6ffr7cysNidws1lWq0zSzZVKiny3d+uOSyhmsyzLAUWXRwZbVz1wSSPAp4BjSDoR50TEzVkEZtZPKi/t2o3S8BwOGB5iYvuUyyPWdQnlMuDbEfFGSfsB8zKIyayv1Lrbe6uGJHZHOFlbTR0ncEkHA38EnA0QEU8BT2UTlln/6PTaJT6D0prppgd+FLAN+KykY4ENwLsi4slMIjMrqHK5pDwLpJWyyWi6Xb1rlpjV0k0Cnwu8GHhHRNwi6TJgJfA3lRtJWg4sB1i0aFEXhzPLv1pTBEXjWSajIyV+tPLEWYnP+ks3s1AeBB6MiFvS518hSejTRMTlETEWEWMLFizo4nBm+bZ24zjnXXtbW1MEPYvEutFxDzwiHpa0RdKSiNgMvAr4eXahmRXD2o3jXHD9HS3fCafcI3eZxLrV7SyUdwBXpTNQ7gXe1n1IZvlWWeMemTfMEzt2MtXGbXEWumRiGekqgUfErcBYRrGY5V51jbt8Yal2+MqAlhWfSm9WQ/VMknKpo9vbmYGvDGjZcQI3q9Lomtzt9LiH5wgEU7v2llc8aGlZcgI3q9Lomtytmj9vmPNff/Se/fnKgDYTnMDNqnRTox6SuOT0Y6claSdsmylO4DbwKuvdB5eGaXrmTR0+9d1mmxO4DbTqenerc7kBRkrDHLj/XJdHrGecwG2gdXOhqQtOOdoJ23rKCdwGWif17vIApZO39ZoTuA2cypr3nPTqf434mtyWV07gNjBqXbOkWfL2wKTlmRO4DYRW74rjW5ZZkTiBW1+pvPdk+eYI8+cNM7F9qqWZgYccuL8vNGWF4QRufWHtxvF9TnUvl0faOYPSF5qyInECt8Lr5qbB1XyhKSuSbu7IY5YLWVwhEHyhKSseJ3ArtLUbx1u6aXC1IYkzj1/E6EgJkdwdx7NNrGhcQrHCqDVAqQ7246mB1i+cwK0Qquvc5QHKRjNLJIhIzpyMgMcnPTXQ+osTuOVOrZ72UAtnTFa69E3HOUlb33MCt1yp19NuJ3mPjpScvG0geBDTcqXbGSWeSWKDxD1wy4XKskm7yvdfGHV92waME7j1XDcn4jhp2yBzArdZVWuAshOjIyVfs8QGnhO4zZp6A5SN1Lo9pevcZomuE7ikIWA9MB4RJ3cfkvWbTuvb5V525Q0YPI/bbK8seuDvAu4Enp7BvqzPdFrfruxln7Z01AnbrIauphFKOgL4E+BT2YRj/WTtxnHOu/a2tpP3kORT3c1a0G0P/FLgfcDTMojF+kSta3O3ytcpMWtdxwlc0snAoxGxQdIrG2y3HFgOsGjRok4PZwVQ656TjVSfJu8pgWbt6aYHfgJwiqTXAQcAT5d0ZUScWblRRFwOXA4wNjbW2Zwxy712at3uZZtlo+MEHhGrgFUAaQ/8vdXJ2/pPt/ecdH3bLDueB24tqzePu9Vat3veZtnKJIFHxPeB72exL8uXyjnYc7o4c3KkNMwFpxzt5G2WIffAra5OzpysNn/eMOe/3onbbCY4gds+urkyYNmQxCWnH+vEbTaDnMBtmm6uDFjmWrfZ7HACtz3KZ062WyrxPSfNesMJ3Do+c9KXdDXrLSfwAdbumZOVfElXs95zAh8QldMBDy4NM7VrN08+1VqduzQ8xJ+9ZJTv3bXNl3Q1yxEn8AFQPTDZTo/bZ06a5ZcTeJ/rdGASPJvELO+cwPtUN/Vt8JmTZkXgBN5nurkWN/jMSbMicQLvA5VnTta6CXAr3OM2Kx4n8AKrVSZpJXkLGEkvAesZJWbF5QReQN3O3/bApFl/cAIvkG4HJl3fNusvTuAF0O3ApOvbZv3JCTznOrk6YHkg0zcJNutvTuA5U+uek+1wmcRscDiB50StMkk7yduJ22zwOIH3mE+8MbNOOYH3gE+8MbMsOIHPolq97XaTtwcmzazMCXwWdFsmAZ+AY2b7cgKfAdNKJIIOruQKeDqgmTXmBJ6x6nnbnSRv17fNrBUdJ3BJRwKfBw4j6SheHhGXZRVY0VT2utvlnraZdaKbHvhO4LyI+KmkpwEbJN0YET/PKLZC8I0TzKxXOk7gEfEQ8FD6+LeS7gRGgYFI4L4+iZn1WiY1cEmLgaXALVnsL8984o2Z5UXXCVzSQcB1wLsj4jc11i8HlgMsWrSo28P1RDcn3pSvZ+L6tpllrasELmmYJHlfFRFram0TEZcDlwOMjY11OKGud/aZVdLi73netpnNtG5moQj4NHBnRHwku5B6r5sZJeAyiZnNjm564CcA/xXYJOnWdNn7I+Kb3YfVO51cf7vMA5NmNpu6mYXyQ5IpzH1j7cZxzrv2travwe3EbWa9MLBnYpbLJFsnJjm4NMzUrt08+VRrvW6feGNmeTCQCby6TNLOSThDEpecfqyTtpn13MAk8Moe95wOblUGnlliZvkyEAm8usfdSfJ2ucTM8qaQCbyyN72wSWLtdGAS3OM2s3wrXAKv7k2PT0yyas2mPesrE/t/fv4Crtsw3lHy9swSM8u7wiXw1es27zNHe3JqFxd+/Q52TO2eltiv+vEDLZ85OX/eMBPbp5r26M3M8qJwCXxrnbMja11cqtXkPTpS4kcrT+wiKjOz2Ten1wG0a+FIKdP9lYaHWLFsSab7NDObDbnvgVcPWJbr2p2c6g4wPCQO3G8uj0+6XGJmxZbrBF5rwPK6DeP82UtGufLHD7S9P08FNLN+kusEXm/A8upbtjB/3nBbN1VwndvM+k2ua+D1Bix3RfDEjp0MD7V2LS3Xuc2sH+U6gTcasJzaHRy431xG022GlCTz0ZESZx6/iNGREkqf+2QcM+tHuS6hrFi2pOG1uR+fnOLW8/94lqMyM8uHXPfAT1s6ykVveOGe3nW1rKcUmpkVSa4TOCRJ/JLTj6U0PDRtuevaZjbocl1CKSvXr1u9gJWZ2SAoRAKHJIk7YZuZ7ZX7EoqZmdXmBG5mVlBO4GZmBeUEbmZWUE7gZmYFpejgdmMdH0zaBvyiYtGhwC9nLYDuFS1eKF7MRYsXihdz0eKF4sWcdbzPjogF1QtnNYHvc3BpfUSM9SyANhUtXihezEWLF4oXc9HiheLFPFvxuoRiZlZQTuBmZgXV6wR+eY+P366ixQvFi7lo8ULxYi5avFC8mGcl3p7WwM3MrHO97oGbmVmHnMDNzAqqZwlc0kmSNku6R9LKXsVRj6QjJX1P0s8l3SHpXenyQyTdKOnu9N/5vY61kqQhSRsl3ZA+P0rSLWk7XyNpv17HWEnSiKSvSLpL0p2SXpbnNpb0nvT9cLukqyUdkLc2lvQZSY9Kur1iWc02VeJjaew/k/TinMS7On1P/EzSVyWNVKxblca7WdKy2Y63XswV686TFJIOTZ/PWBv3JIFLGgI+DrwWeAHwZkkv6EUsDewEzouIFwDHA3+ZxrgS+G5EPA/4bvo8T94F3Fnx/O+Bj0bEc4HHgHN7ElV9lwHfjojnA8eSxJ7LNpY0CrwTGIuIY4Ah4Azy18ZXACdVLavXpq8Fnpf+LAc+MUsxVrqCfeO9ETgmIl4E/BuwCiD9DJ4BHJ3+zj+m+WS2XcG+MSPpSOCPgQcqFs9cG0fErP8ALwPWVTxfBazqRSxtxPw14DXAZuDwdNnhwOZex1YR4xEkH84TgRsAkZwNNrdWu/f6BzgYuI90ML1ieS7bGBgFtgCHkFxL/wZgWR7bGFgM3N6sTYF/At5ca7texlu17k+Bq9LH03IFsA54WR7aOF32FZKOyP3AoTPdxr0qoZQ/CGUPpstySdJiYClwC3BYRDyUrnoYOKxHYdVyKfA+YHf6/BnARETsTJ/nrZ2PArYBn03LPp+SdCA5beOIGAc+TNK7egh4HNhAvtu4rF6bFuGzeA7wrfRxbuOVdCowHhG3Va2asZg9iNmEpIOA64B3R8RvKtdF8nWai3mYkk4GHo2IDb2OpQ1zgRcDn4iIpcCTVJVLctbG84FTSb54FgIHUuPP6LzLU5s2I+kDJOXMq3odSyOS5gHvBz44m8ftVQIfB46seH5EuixXJA2TJO+rImJNuvgRSYen6w8HHu1VfFVOAE6RdD/wJZIyymXAiKTyrfPy1s4PAg9GxC3p86+QJPS8tvGrgfsiYltETAFrSNo9z21cVq9Nc/tZlHQ2cDLwlvRLB/Ib73NIvthvSz+DRwA/lfQsZjDmXiXw/wc8Lx29349kUOL6HsVSkyQBnwbujIiPVKy6HjgrfXwWSW285yJiVUQcERGLSdrzpoh4C/A94I3pZrmJFyAiHga2SFqSLnoV8HNy2sYkpZPjJc1L3x/leHPbxhXqten1wFvTmRLHA49XlFp6RtJJJOXAUyJie8Wq64EzJO0v6SiSgcGf9CLGShGxKSKeGRGL08/gg8CL0/f4zLVxL4r/6Zfp60hGl/8d+ECv4mgQ38tJ/sz8GXBr+vM6krryd4G7gf8LHNLrWGvE/krghvTx75G8we8Bvgzs3+v4qmI9DliftvNaYH6e2xi4ELgLuB34ArB/3toYuJqkRj+VJpJz67UpyUD3x9PP4SaSGTZ5iPcekrpx+bP3yYrtP5DGuxl4bV7auGr9/ewdxJyxNvap9GZmBeVBTDOzgnICNzMrKCdwM7OCcgI3MysoJ3Azs4JyAjczKygncDOzgvr/34J4JquheD8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title(\"text length vs median audio duration\")\n", + "plt.scatter(list(text_vs_median.keys()), list(text_vs_median.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAgAElEQVR4nO3df5QlZX3n8fdnepqhZ3RtlEnC9DAOybKwIgra/lqSFX8giAYImhWM2ZhgOJsNG00MyRATRZMcxiWJmrNEQ4xhYwxo1IwTQckP8CRrlNCTARUEMwF0psEwKk0SZpSeme/+UXXHO7er6tb9Xbfu53XOHPreqlv13KLv9z79ref5PooIzMxs/K0adQPMzKw/HNDNzGrCAd3MrCYc0M3MasIB3cysJhzQzcxqwgHdxoKkMyXtGdG5r5T0J6M4t1knHNCta5IekPTSqh2rx3aM7IujpR0bJX1M0jckPSrpS5JeL+mHJP17+u8xSdH0+N8lbZL0GUnflvRvkv5V0g5JWyStGfX7ssFyQDerpg8Cu4GnAk8Bfhz4l4j4u4h4QkQ8ATgl3Xe28VxEfC197rKIeCJwHPBm4CLgJkka7tuwYXJAt65I+iCwCfiLtGf4S+nzz5f095KWJN0p6cz0+f+S9jaPTx8/U9Ijkk7OO1ab829Ie7B7Jd0v6eeatl0p6SOS/jjtpd4lab5p+7Mk7Uy3/ZmkD0v6DUnrgE8BG5p6vBvSlx2Vd7yWdr1X0m+1PPcJSb+Q/vzLkhbT49wr6SU5b/E5wHUR8VhEHIiInRHxqXbXpVX6+s8A5wEvAF7R6TFsjESE//lfV/+AB4CXNj2eA74JnEvSWTgrfbw+3f6bwC3ADPBFkl5k5rEyznUmsCf9eRWwA3grcBTw/cB9wNnp9iuBb6ftmAKuAj6fbjsK+CrwRmAauBB4HPiN1vM0nTv3eBnt/K8kPWulj48B9gMbgJPSbRvSbZuBH8g5zl8DnyXpWW/K2WczEMDqluc/A7whY/+/Bd456t8b/xvcP/fQrZ9eB9wUETdFxKGI+CtggSQQQhIYnwT8A7AIXNPleZ5D8iXxjoh4PCLuA/6AJPg1/L+0HQdJ0hfPTJ9/PrAa+N2IWI6Ij6ftaSfveK3+jiTI/lD6+NXA5yLiQeAgsAZ4mqTpiHggIv455zg/mh7r14D7Jd0h6Tkl2lnkQeDJPR7DKswB3frpqcCPpumWJUlLwA+S5HGJiGXgOuDpwG9HRLeV4Z5KkhZpPs+vAN/btM/Xm37eBxwtaTVJT3mx5dy7S5wz73hHSI97A3Bx+tRrgQ+l23YBbyL5YntY0g1NKZ3W4zwSEVsi4pT0fd0BbOsxBz4HfKuH11vFOaBbL1oD8m7ggxEx2/RvXURsBZA0B7wN+CPgt1tGXXQS3HcD97ec54kRcW7bV8JDwFxLYDy+y3bkuR54taSnAs8DPnb44BF/GhE/SPKlFMA72x0sIr4B/BbJl1FXPez03sWzSXr9VlMO6NaLfyHJXzf8CfDDks6WNCXp6HQY4MY0gF4H/CFwCUlg/fWCYxX5B+Df0huMM+m5nl4yJfE5ktTHZZJWSzofeG5LO54i6Ukl27JCROwEvgG8H7g5IpYAJJ0k6cXpF9m3SXLrh7KOIemd6XtaLemJwM8AuyLim520RdJaSS8EPkFy3W7q9n1Z9TmgWy+uAn41TXv8YkTsBs4nSX/sJelJX07ye/ZzwPcAv5amJX4S+ElJP5R1rKKTpnnsVwKnAffz3eDZNghHxOMkN0IvAZZI8v6fBL6Tbr+HpId9X9qWzJRICX8KvDT9b8MaYGva3q+TXI8rcl6/FvjztI33kfToz+vg/P9H0r+RfEG9m+SvhHMiIvMLxOpB0XUa06weJN0GvC8i/mjUbTHrhXvoNnEkvVDS96XpjJ8AngF8etTtMuvVirv0ZhPgJOAjwDqSdMarI+Kh0TbJrHdOuZiZ1YRTLmZmNTGylMuxxx4bmzdvHtXpzczG0o4dO74REeuzto0soG/evJmFhYVRnd7MbCxJ+mreNqdczMxqwgHdzKwmHNDNzGrCAd3MrCYc0M3MasIzRc0m0Ladi1x98708uLSfDbMzXH72SVxw+tyom2U9ckA3mzDbdi5yxce/yP7lgwAsLu3nio9/EcBBfcw55WI2Ya6++d7Dwbxh//JBrr753hG1yPrFAd1swjy4tL+j5218OKCbTZgNszMdPW/jwwHdbMJcfvZJzExPHfHczPQUl5990ohaZP3im6JmE6Zx49OjXOrHAd1sAl1w+pwDeA21TblI+oCkhyV9KWf7j0n6gqQvSvp7Sc/sfzPNzKydMjn064BzCrbfD7wwIk4Ffh24tg/tMjOzDrVNuUTE30raXLD975sefh7Y2HuzzMysU/0e5XIJ8Km8jZIulbQgaWHv3r19PrWZ2WTrW0CX9CKSgP7LeftExLURMR8R8+vXZ66gZGZmXerLKBdJzwDeD7w8Ir7Zj2OamVlneu6hS9oEfBz48Yj4Su9NMjOzbrTtoUu6HjgTOFbSHuBtwDRARLwPeCvwFOD3JAEciIj5QTV42Fxm1Kw9f06qocwol4vbbH8D8Ia+tahCXGbUrD1/TqrDtVwKuMyoWXv+nFSHA3oBlxk1a8+fk+pwQC/gMqNm7flzUh0O6AVcZtSsPX9OqsPVFgu4zKhZe/6cVIciYiQnnp+fj4WFhZGc28xsXEnakTc03CkXM7OacMrFzLrmCUXV4oBuZl3xhKLqccrFzLriCUXV44BuZl3xhKLqcUA3s654QlH1OKCbWVc8oah6fFPUzLriCUXV44BuZl274PQ5B/AKcUA3qwmPCTcHdLMa8JhwA98UNasFjwk3cEA3qwWPCTdwQDerBY8JN3BAN6sFjwk38E1Rs1rwmHCDEgFd0geAVwIPR8TTM7YLeA9wLrAPeH1E/GO/G2pmxTwm3MqkXK4DzinY/nLgxPTfpcB7e2+WmZl1qm1Aj4i/Bb5VsMv5wB9H4vPArKTj+tVAMzMrpx83ReeA3U2P96TPrSDpUkkLkhb27t3bh1ObmVnDUEe5RMS1ETEfEfPr168f5qnNzGqvHwF9ETi+6fHG9DkzMxuifgxb3A5cJukG4HnAoxHxUB+Oa2ZdcqGuyVRm2OL1wJnAsZL2AG8DpgEi4n3ATSRDFneRDFv8yUE11szac6GuydU2oEfExW22B/CzfWuRmfWkqFCXA3q9eeq/Wc24UNfkckA3qxkX6ppcDuhmNeNCXZPLxbnMasaFuiaXA7pZDblQ12RyysXMrCYc0M3MasIB3cysJhzQzcxqwgHdzKwmPMrFbMhcOMsGxQHdbIhcOMsGySkXsyEqKpxl1isHdLMhcuEsGyQHdLMhcuEsGyQHdLMhcuEsGyTfFDUbIhfOskFyQDcbsKxhip/d8uJRN2sFD6ccfw7oZgM0LsMUx6WdVswB3awP8nq3VVvfc1zaad1xQDfrUVHvtuwwxbxA2880SD/aadXmgG7WheZAu0riYMQR2xu92w2zMyxmBMXmYYp5gXbhq9/iYzsW+5YGKeqFl2mnVZ+HLZp1qBGAF5f2E7AimDc8uLS/1DDFvEB7/W27+zqrtKgX7uGU9VAqoEs6R9K9knZJ2pKxfZOkWyXtlPQFSef2v6lm1ZAVgLNsmJ3hgtPnuOrCU5mbnUHA3OwMV1146hE97LxAW/RF0Y2iSU1l2mnV1zblImkKuAY4C9gD3C5pe0Tc3bTbrwIfiYj3SnoacBOweQDtNRu5MgG1uXfbbn3PvHTHVEYqp7E/dD7M8PKzTzoitdNpO636yvTQnwvsioj7IuJx4Abg/JZ9AvgP6c9PAh7sXxPNqiWvpzslddW7zUp3TK8SR0+v/Hg2AnBr2qeRX9+2czH3PO6F11+Zm6JzwO6mx3uA57XscyXwl5L+F7AOeGnWgSRdClwKsGnTpk7balYJeT3dboNj6+zRJ81M89jjB3js8SPTOrMz01x53ilccPocZ2y9pathhu6F11u/bopeDFwXERuBc4EPSlpx7Ii4NiLmI2J+/fr1fTq12XANoqd7welzfHbLi7l/6ytYt2Y1ywdXplrWrVl9+BweZmhZyvTQF4Hjmx5vTJ9rdglwDkBEfE7S0cCxwMP9aKRZ1Qyyp9suWG/buZg5VBI8zHDSlemh3w6cKOkESUcBFwHbW/b5GvASAEn/GTga2NvPhpqN2radi5yx9RZO2HIjZ2y9pTBf3Yui0SiN3HlWMPcwQ2sb0CPiAHAZcDPwZZLRLHdJeoek89Ld3gz8tKQ7geuB10fkjLkyG0Pd3ITsVtGY8Lwhk1OSb3BauZmiEXETyVDE5ufe2vTz3cAZ/W2aWXUMs9ZJUYndn//wHZmvORSR2Q5XUJwsnvpvVsKwb0Lm5eg7maLvCoqTx1P/zUqoytJxeWPW9z1+YEVu3wtSTx4HdLMSqlLrpHXI5OzMNAge2be8IrfvoY2TxykXsxKqtHRcczrmjK23sLR/+YjtrqA4uRzQzUqq4izLol74u15zWmHtlgbfOK0PB3SzMVbUCy/zV4VvnNaLA7rZGOu1gqKXnqsXB3SzMdZrbt83TuvFAd1szPWS2/eN03pxQLeJNsobglW4GdkuZWPjRaMquTI/Px8LCwsjOXfdVSFQ9GoY76H1hmDDMWunedsPnzLQa5Z17l5qqvfalnH/fZkkknZExHzWNvfQa6YOoxaG9R7yCl09sm954Nds2Dcji4J2mZSNg/548EzRmqnDdO9hvYeiG3+DvmbDvBnZa6XIYVaatN44oNdMHUYtDOs9tLvxt7i0f2D1z4dZG6bXL8g6dBImhVMuNVOHUQudvodu0gHbdi7y2HcOFO4jONyOsmmforY0b5tdO830KrF86Lv3sGamp3jRyes5Y+stfU1t9PoFWYdOwqRwD71mqlJEqhedvIesdMCbPnwHp739L3N71I3XtNZAaSagdbhAu15pUWqiddsj+5ZBSXGtxrqkr3r2HB/bsdj31Eavfw1UpdKkteeAXjODWMB42Dp5D3k3Npf2L+cGw6JVfxrnyxv7VdQrLUpNZG1bPhisW7Oa+7e+gs9ueTG33rN3IKmNXr/k69BJmBROudRQFYtIdarseyhzY7P1OHmvORTB/VtfASRVDDtNXXWTmmjeNqjURq+zSatUadKKOaDbWMvLtzdkBcMyOfpuJty0O267cw7y/kevX/J16CRMAqdcbKxs27l4xMiTF528fkU6oFlWMCyTQugmdVV03DLndGrDeuUeuo2NrAlHH9uxyKuePceNX3goudHYYt/jB9i2c/GIQFw2hdBpr7TMcYu2ObVhvfLUfxsbeXntudkZPrvlxWzbuciV2+9aMXqleUr9uM54HNd2W/956r9VWtlg1e6m4QWnz3H1zffmLskGjGVZhDqUc7DhKJVDl3SOpHsl7ZK0JWef/ybpbkl3SfrT/jbTxk1rrrvdmPAyY6/LjIcuCvrjOuNxXNttw9e2hy5pCrgGOAvYA9wuaXtE3N20z4nAFcAZEfGIpO8ZVIOtmlpnQf77tw8cngVZ1KPspEhVmZEnRSNFyg4LrFp6wzM1rawyPfTnArsi4r6IeBy4ATi/ZZ+fBq6JiEcAIuLh/jbTqixrFmTzlHbI71F2EqzKjDzJGikC8Nh3DjC7djrzXM09/Ky/GC7/6J2c9va/HEhNlzJGNVOz7F9ZVh1lcuhzwO6mx3uA57Xs858AJH0WmAKujIhPtx5I0qXApQCbNm3qpr1WYFQ9y7yZl626HRMOK9/bu15zWuZ7azz39r+464hRL0v7l5leJaanxPLBI+unNPfw82Z0NvLyo8hfj2IRCuftx1O/xqGvBk4EzgQuBv5A0mzrThFxbUTMR8T8+vXr+3Rqg9GWOC37p38jSDd6fpu33MhDj658bWuw6vS9XXD6HGuPWtlXWT4UrDtqdWEPv8x7GXb+ehTlHNrl7d17r6YyPfRF4PimxxvT55rtAW6LiGXgfklfIQnwt/elldbWKFdvbzdbE74bpFt7fi2ZGWZnprnyvFNWjN3u9L3lBeZH9y9zx9te1tN7KTr+oAx7pmZRKsy99+oq00O/HThR0gmSjgIuAra37LONpHeOpGNJUjD39bGd1sYob5xl5a2np3REJcFGj7JdembdmtVHBIVtOxdzA+zi0v7cHmJefjmgsEeZl4NvVfdKg0V5e4+6qa62PfSIOCDpMuBmkvz4ByLiLknvABYiYnu67WWS7gYOApdHxDcH2XA70ijroHcyw7HdF0zz9kZPsEhzCqa5LVl554aiHmXre2kdsQNHpoSqNiKmX4ry9j//4TsyX+NRN6PnmaI1UaVFh4vkzfZsmJI4FMGG2Rke+86BwprlrRozRhsawTbvfK3758kL2uNyzbuV977bzdi1wSqaKeqAXiPj0FvMCoL9Ijhc/rbZCVtuzKxvnrd/WZMa2Or+RVZ1nvo/ITq5cTao4N963BedvJ5b79l7xHmuuvDUwz3nKYmDEYf/W0bevkV530GkoyZ1wo+LiFWXA/oE6nSUQnPqohFM5zI+xFnH/ZPPf+3w9sZ5rrrw1BU92BO23Fiq7TPTU4eXais7LntQ47iLvijG4a+lXrg+ejU55TKBOkkVFKVIpleJJxy9mqV9yx3lvLPOk9emY9ZOs/ao1Zn5604C5iACbF7qIe8LxykJ6wenXOwInaQKioYZLh+Kw7Mxy4zdblhMxzK3TtnPCo5v++FTMoNgN7XK+x1M81IPo5wTMGh1/8tj3DmgT6BOcsqDyge3pnjGNS+b9UVR12F9nlBUfQ7oE6iTnHLZmZOdyuqx1iUvO8o5AYNU57886sJrik6grNogr3p2MouzdeZl2ZmTDcesnT7iuK97fn4RtnHvseap69qgkzqqZ5y4hz6hmnvDZf6ULpqg05CX8771nr217LHmGdf0UTt1/cujThzQLfdP6Td/5E6gOPg3HLN2OvcG5ijKv45aXdJHzSbx/+O4cUC33D+ZD0b05eZlXXusk8b/H6vP49CtbX2Vuk9lNxsnRePQfVN0zAxiYYF2Nz5908tsPLiHXmFZdVFaZyBC9qIQnZ6j3Q3PrKn+ZjZ8nilacVmz74AVI08+9PmvZVYNXNq/3NUEj04qH3oSiVn1uYc+YllBdXpVUgCrdXm2dvJy3Z3WtS6qfOh8utlouYdeYZmrzHcayVNZue6sMeZv+vAdvP0v7jpch6XVoQgEmX8NOJ9uVl0O6CPWzwCZNcEjr7jWI/uWc4N24zieRDKZXIBrfHmUy4j1K0DmTfAo+sIIklV7so5T1+nrVqzxF93i0v4j1mvtx2gqGzz30Iesufczu3aa7/SwFFvz+ptZvahtOxdZ1WYloCDJi+f1xtxTmywuwDXeHNAHrDWAN68gn5XDnplexYFDwfLB9nn0QxG5a2I2elrtlnUruslZx+nrVswFuMabA/oAtd6QzLsJ2ezJ69YcXiSh0TPOWwmoaKmzooUpGpxCsVajKsDlvH1/lAroks4B3gNMAe+PiK05+70K+CjwnIiYuDGJrb+U+x4/0PHq9g8u7V/RM85b6uxFJ6/PrZJY1KMS+ENjmUZRgMsLZ/RP24AuaQq4BjgL2APcLml7RNzdst8TgTcCtw2ioVWX9UvZjayeUDdLneX1tDyO3IqMogCX8/b9U6aH/lxgV0TcByDpBuB84O6W/X4deCdweV9bOCbKpDjaKeoJdbrU2btec5pLnVpXhn3vxHn7/ikzbHEO2N30eE/63GGSngUcHxE39rFtAzOIAldlf/mmp8TszDQiqSHe+Llo1aA8eXnNDbMzmasSedV5q6Ki32PrTM83RSWtAn4HeH2JfS8FLgXYtCl/abJBGlS+Li/FMTszzbo1q1lc2s+UxPLBYN2a1SuKaXXTrnb5To9SsXHghTP6p0xAXwSOb3q8MX2u4YnA04HPSAL4PmC7pPNab4xGxLXAtZDUcumh3V3rR74uqwrivscPrNhvZnqKK887BVhZaCtribdO2+UFB6wO/HvcP22Lc0laDXwFeAlJIL8deG1E3JWz/2eAX2w3ymVQxbmKhj9t27nIm3LyzoLcMd2txy9ToXBmehVHT0+xtG85d3JP8w3KE7bcmDkNv2y7zGwy9FScKyIOSLoMuJlk2OIHIuIuSe8AFiJie3+b272itAVwxM+tGvm6duNhy978/PbyIfYvHwLIndzTnHf3Arxm1qtalc/NKwc7V1BsCpLUyFUXngqQ2ftuXkAiryfdjeYeet5Yc9/INLNmE1M+t9vhT41g/uaP3JnZm25eQCKvJ92p1ps+ziOaWa9qFdDbpS2Keu/t6p40blBm3ZFvlVeWtl0xLY9KMbNe1Cqgtxv+lLetbF68MS0f4Mrtd2XWV5mdmeaVzzxuxdqfTp+Y2aDVKqCXSVtkbcubcdmq0dNv9KSLbqDOP/XJTp+Y2VDV6qZot/JupjZzD9vMqqDopqhXLILM1XmmV4lj1k572ryZjY1apVy6VXaEiWs2m1mVOaCn2o0wcc1mM6s6p1xKKqq1YmZWBQ7oJblms5lV3USnXDrJibvWiplV3cT20Bs58cWl/QTfzYnnLSqRNRLGNZvNrEomNqB3mhP3CkBmVnUTkXLJSq10kxN3rRUzq7LaB/S84Yaza6d5ZN/KWizOiZvZuKp9yiUvtRKBc+JmVitjH9C37VzkjK23cMKWGzlj6y0rbmrmpVAe3b/snLiZ1cpYp1zazd7ctnMxdz3PDbMzzombWa2MdUBvN1Ilb9EKp1bMrI7GOuVSNFIlb9GKKcmpFTOrpbEO6HkjUjbMzuQG+0MRDuZmVktjHdCLZm8WBXszG752Axisd2OdQ29Xx7xofVEzGx6Xnx6OUgFd0jnAe4Ap4P0RsbVl+y8AbwAOAHuBn4qIr/a5rZnyRqqUXbTCzAavaACDP5P90zagS5oCrgHOAvYAt0vaHhF3N+22E5iPiH2Sfgb438BrBtHgTnhYotnobdu5mLtmr8tP91eZHvpzgV0RcR+ApBuA84HDAT0ibm3a//PA6/rZyCxeDs6s+hqpljy+p9VfZW6KzgG7mx7vSZ/LcwnwqawNki6VtCBpYe/eveVb2aLT0rdmNhp5w4fB97QGoa+jXCS9DpgHrs7aHhHXRsR8RMyvX7++6/N4OTiz8VCUUvF8kP4rE9AXgeObHm9MnzuCpJcCbwHOi4jv9Kd52fJ+SRaX9ntIlFmF5KVU5tLSG9ZfZQL67cCJkk6QdBRwEbC9eQdJpwO/TxLMH+5/M49UlHdzCsasOrzS13C1DegRcQC4DLgZ+DLwkYi4S9I7JJ2X7nY18ATgzyTdIWl7zuH6IuuXpJVTMGaj55W+hkuRUbxqGObn52NhYaHr1zePcsl7BwLu3/qKrs9hZqPhUWz5JO2IiPmsbWM7U7R5jPkZW2/JHOfqIVFm48ezSrs31rVcGpynM6sPj2Lr3tj20Jt5mr9ZfXSzgLslahHQwdP8zepiw+yMU6hdqkXKxczqwynU7tWmh25m9eAUavcc0M2scpxC7Y5TLmZmNeGAbmZWEw7oZmY14YBuZlYTY3VT1PUdzMzyjU1Ad30HM7NiY5NycX0HM7NiYxPQXd/BzKzY2AT0vDoOru9gZpYYm4Du+g5mZsXG5qao6zuYmRUbm4AOru9gZlZkbFIuZmZWzAHdzKwmHNDNzGrCAd3MrCZK3RSVdA7wHmAKeH9EbG3Zvgb4Y+DZwDeB10TEA/1tqpmNg0bNpcWl/UxJHIxgrmVUWt4+Lzp5Pbfes7fUSLbm2k5PmplGgqV9y5mv63cdqG6PN+h6VIqI4h2kKeArwFnAHuB24OKIuLtpn/8JPCMi/oeki4AfiYjXFB13fn4+FhYWem2/mVVIa82lZjPTU1x14akAufvkvaY16BWdp/V1WfvmHbeMbo/Xr3ZI2hER81nbyqRcngvsioj7IuJx4Abg/JZ9zgf+b/rzR4GXSFLpFppZLWTVXGpo1F4q2ifvNZ2cp/V1/a4D1e3xhlGPqkxAnwN2Nz3ekz6XuU9EHAAeBZ7SeiBJl0pakLSwd+/e7lpsZpXVrrbSg0v7O66/lLV/mWM09ul3HahujzeMelRDvSkaEddGxHxEzK9fv36YpzazIWhXW2nD7EzH9Zey9i9zjMY+/a4D1e3xhlGPqkxAXwSOb3q8MX0ucx9Jq4EnkdwcNbMJklVzqaFRe6lon7zXdHKe1tf1uw5Ut8cbRj2qMqNcbgdOlHQCSeC+CHhtyz7bgZ8APge8Grgl2t1tNbPaaa65VDTKJW+fsqNcWms7FY1y6XcdqG6PN4x6VG1HuQBIOhd4N8mwxQ9ExG9KegewEBHbJR0NfBA4HfgWcFFE3Fd0TI9yMTPrXNEol1Lj0CPiJuCmlufe2vTzt4Ef7aWRZmbWG88UNTOrCQd0M7OacEA3M6sJB3Qzs5ooNcplICeW9gJfbXrqWOAbI2lM98atzePWXhi/Nru9gzdube53e58aEZkzM0cW0FtJWsgbilNV49bmcWsvjF+b3d7BG7c2D7O9TrmYmdWEA7qZWU1UKaBfO+oGdGHc2jxu7YXxa7PbO3jj1uahtbcyOXQzM+tNlXroZmbWAwd0M7OaqERAl3SOpHsl7ZK0ZdTtaSXpeEm3Srpb0l2S3pg+/2RJfyXpn9L/HjPqtjaTNCVpp6RPpo9PkHRbep0/LOmoUbexmaRZSR+VdI+kL0t6QZWvsaSfT38fviTpeklHV+0aS/qApIclfanpucxrqsTvpm3/gqRnVajNV6e/F1+Q9OeSZpu2XZG2+V5JZ1ehvU3b3iwpJB2bPh7oNR55QE8Xob4GeDnwNOBiSU8bbatWOAC8OSKeBjwf+Nm0jVuAv4mIE4G/SR9XyRuBLzc9fifwroj4j8AjwCUjaVW+9wCfjoiTgWeStL2S11jSHPBzwHxEPJ2ktPRFVO8aXwec0/Jc3jV9OXBi+u9S4L1DamOr61jZ5r8Cnh4RzyBZtP4KgPRzeBFwSvqa30tjyjBdx8r2Iul44GXA15qeHuw1joiR/gNeANzc9PgK4IpRt6tNmz8BnAXcCxyXPncccO+o29bUxo0kH9YXA58ERDJbbXXWdR/1P5JVru4nvVHf9Hwlr7yx4c4AAALeSURBVDHfXUf3ySRlqD8JnF3FawxsBr7U7poCvw9cnLXfqNvcsu1HgA+lPx8RL4CbgRdUob3AR0k6Jg8Axw7jGo+8h065RagrQ9JmkoU8bgO+NyIeSjd9HfjeETUry7uBXwIOpY+fAixFsog3VO86nwDsBf4oTRO9X9I6KnqNI2IR+C2S3tdDJAuj76Da17gh75qOy2fxp4BPpT9Xss2SzgcWI+LOlk0DbW8VAvrYkPQE4GPAmyLiX5u3RfJ1W4kxoJJeCTwcETtG3ZYOrAaeBbw3Ik4HHqMlvVKxa3wMcD7JF9EGYB0Zf3ZXXZWuaRmS3kKSAv3QqNuSR9Ja4FeAt7bbt9+qENDLLEI9cpKmSYL5hyLi4+nT/yLpuHT7ccDDo2pfizOA8yQ9ANxAknZ5DzCbLuIN1bvOe4A9EXFb+vijJAG+qtf4pcD9EbE3IpaBj5Nc9ypf44a8a1rpz6Kk1wOvBH4s/SKCarb5B0i+6O9MP4MbgX+U9H0MuL1VCOiHF6FORwRcRLLodGVIEvCHwJcj4neaNjUWxyb97yeG3bYsEXFFRGyMiM0k1/OWiPgx4FaSRbyhQu0FiIivA7slNZZAfwlwNxW9xiSpludLWpv+fjTaW9lr3CTvmm4H/ns6EuP5wKNNqZmRknQOSQrxvIjY17RpO3CRpDVKFrI/EfiHUbSxISK+GBHfExGb08/gHuBZ6e/4YK/xKG54ZNxQOJfkzvU/A28ZdXsy2veDJH+WfgG4I/13Lkle+m+AfwL+GnjyqNua0fYzgU+mP38/yS/7LuDPgDWjbl9LW08DFtLrvA04psrXGHg7cA/wJZJF0tdU7RoD15Pk+JdJAssledeU5Mb5Nenn8IskI3iq0uZdJLnnxufvfU37vyVt873Ay6vQ3pbtD/Ddm6IDvcae+m9mVhNVSLmYmVkfOKCbmdWEA7qZWU04oJuZ1YQDuplZTTigm5nVhAO6mVlN/H+kdrVI+Iu3mgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title(\"text length vs STD\")\n", + "plt.scatter(list(text_vs_std.keys()), list(text_vs_std.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAgAElEQVR4nO3df5xcdX3v8dc7mwUX8LIgW4QlkVQpCCIJbBUb2wL+iICVFBWwatHSm95efVRaSg3aW/FHH8SiYn3cFi+KgooCFRop0CIl2F65oiaG35AaJZisAYKygGaFTfK5f5wzydnJObuzs/PjzMz7+XjsY2fOOXPmu2dmP/Od7/fz/X4VEZiZWXeZ0+4CmJlZ4zm4m5l1IQd3M7Mu5OBuZtaFHNzNzLqQg7uZWRdycLeGk3SCpE1teu4LJX2lHc89U5L+VdLZ7S6HdScH9x4haYOk15btXLMsR9s+RIpI+jtJy9LbGyTtW3RsRJwcEVfO8vneJenbszmHdScHd7PGOg5YLWkImIiIp9pdIOtNDu49QNKXgfnAv0j6haS/SrcfL+n/SRqTdLekE9LtvyXpCUnz0vvHSHpS0hFF55rm+Q+WdJ2kLZIelvRnmX0XSrpW0pckPSPpfkkjmf3HSlqb7vsnSddI+pikvYF/BQ5Oy/ELSQenD9uj6HxV5bpU0ieqtn1D0l+kt98vaTQ9zzpJr5nm7xRwFHAfMAKsneb4b0n64/T2uyR9W9In0mv9sKSTM8e+S9KP07I8LOntkl4KfBZ4Vfr3j6XHnppes6clbZR0YeY8h0oKSWdL+kn6On8ws79P0gck/Sh9rjWZ98ERkm6V9PP0epyRedwpkh5IHzMq6S+n+tutBSLCPz3wA2wAXpu5Pwz8DDiF5EP+den9oXT/3wKrgAHgXuC9RefKea4TgE3p7TnAGuBvgD2AXwd+DCxJ918I/CotRx9wEXBnum8P4BHgfUA/cDrwHPCx6ufJPHfh+XLK+TvARkDp/f2AceBg4PB038HpvkOBFxec5zBgDHga2Jbe/lV6rjHgnQWP+xbwx+ntdwETwH9Py/2nwE8BAXun5z48PfYg4KjM476dc/2PTq/9y4HHgKWZvyOAz6Wv7THAs8BL0/3np6/34elzHwO8IC3DRuDdwFxgEfAEcGT6uM3Ab2eu47Htfs/3+o9r7r3rHcDNEXFzROyIiFuB1SRBEZIguS/wPWAU+Ic6n+c3ST4wPhIRz0XEj0kCy1mZY76dlmM78GWSgAJwPEkg+UxETETE9Wl5plN0vmr/lyTQ/XZ6/y3AdyLip8B2YE/gSEn9EbEhIn6Ud5KI+GFEDAKfAc4jCW7/BbwkIgYj4ss1lBngkYj4XFruK0mC+IHpvh3AyyQNRMTmiLi/6CQR8a2IuDd9Xe8Bvgb8btVhH46I8Yi4G7ibXdfoj4G/joh1kbg7In4GvBHYEBFfjIhtEbEWuA54a/q4ifRa/beIeDIiflDj32xN4uDeu14EvDVtkhlLv9K/miSgEBETwBXAy4BPRkS9M8y9iKTpJPs8H2BX0AJ4NHN7K/A8SXNJatCjVc+9sYbnLDrfJOl5rwbelm76A+CqdN964FySD7nHJV2dafaZpNK0BVwAfISklv1S4H5JX6+hvLuVOyK2pjf3iYhfAmcC/wPYLOkmSUcUnUTSKyXdnjaDPZU+7oCi5yK5Rvukt+cBeR9iLwJeWfU6vh14Ybr/zSQVg0ck/YekV9XyB1vzOLj3jurgvBH4clqzrPzsHRErACQNAx8Cvgh8UtKeU5xrKhuBh6ue5/kRccq0j0y+6g+nbdkV8+osR5GvAW+R9CLglSS10eTkEV+NiFeTBLYAPp53goj4LeAI4IcRsS/w18DH07/1LQ0oIxFxS0S8juTD9yGSbz+Qfw2+CtwAzEvL81mSJpZabAReXLD9P6pex30i4k/T8n0/Ik4Dfg1YCVxb699mzeHg3jseI2nvrvgK8HuSlqSdaM9Tklp4SBpMrwAuB84hCbIfneJcU/ke8EzaOTmQPtfLJP1mDY/9DknzyHslzZV0GvCKqnK8QFOkG04nbV54Avg8cEtEVDolD5d0UvqhVmk/3zHFqY5jVwfqsSRNXA0h6UBJp6WdyM8Cv8iU5THgEEl7ZB7yfODnEfErSa8g+UZSq88DH5V0mBIvl/QC4EbgNyS9U1J/+vObkl4qaY+0g3ff9Bvf00x9rawFHNx7x0XAX6dfqf8yIjYCp5E0kWwhqZmdT/Ke+DOSGtj/Spsu3g28W9Jv551rqidN24/fCCwEHmZXIJ02IEfEcySdqOeQdEy+gyTIPJvuf4ik5v3jtCy5zSY1+Crw2vR3xZ7AirS8j5JcjwumOMdxQKWd+ViSTuRGmQP8BUkH689J2s//NN23CrgfeFTSE+m2/wl8RNIzJB3ZM6lFfyo9/pskQfpyYCAingFeT9JX8lOSa/JxkusE8E5gg6SnSZqB3j7zP9MaqZIlYNYRJH0X+GxEfLHdZTErM9fcrdQk/a6kF6bNMmeTpPb9W7vLZVZ2u2UQmJXM4STNBHuT5Me/JSI2t7dIZuU3bbOMpOcB/0nStjYX+HpEfEjSApI0sheQtC++MyKeSzugvkTSBvkz4MyI2NC8P8HMzKrV0izzLHBSRBxD0in2BknHk3SmXBIRLwGeJOn0Iv39ZLr9EgrSx8zMrHlm1KEqaS/g2yQ99TcBL4yIbemAhQsjYomkW9Lb30kHjjxKMkKx8IkOOOCAOPTQQ2fzd5iZ9Zw1a9Y8ERFDeftqanOX1EfS9PISkmHoPwLGImJbesgmkrlKSH9vBEgD/1MkTTdPVJ1zGbAMYP78+axe3bC0YDOzniDpkaJ9NWXLRMT2iFgIHEIyiKRw6HOtIuKyiBiJiJGhodwPHjMzq9OMUiHT0Xu3A68CBjPzdRxCMrkU6e/KFKFzSQar/KwhpTUzs5pMG9wlDUkaTG8PkEwN+yBJkK/Mm3E28I309g3pfdL9q2Yx6ZSZmdWhljb3g4Ar03b3OcC1EXGjpAeAqyV9jGROjcvT4y8HvixpPclQ6bPyTmpmZs0zbXBP54NelLP9x0yexKmy/VfsmuPZzMzawCNUbaeVa0e5+JZ1/HRsnIMHBzh/yeEsXTQ8/QPNrHQc3A1IAvsF19/L+MR2AEbHxrng+nsBHODNOpCDe48pqp1ffMu6nYG9YnxiOxffss7B3awDObj3iJVrR/nwv9zPk1sndm7L1s5/Ojae+7ii7WZWbp7ytwdUmlyygb2iUjs/eHAg97FF282s3Bzce0Bek0vWT8fGOX/J4Qz0903aPtDfx/lLDm928cysCdws06UqbeujNTSrzEnXn77o9KOdLWPWJRzcu1B15st0tkdwwfX3ctHpR3PH8pMaWg5/WJi1h5tlutB0zTB5Km3vjVL5gBkdGyfY1Xm7cu3otI81s9lzcO9C9Wa4NDIzZqrUSjNrPgf3LjRdhktf2sY+08fNhFMrzdrLwb0L5WW+VAz09/G2V85remaMUyvN2svBvQstXTTMRacfzXAaSCs19eHBAS46/Wg+tvTonfuV2d7Izs6iD5hfPrvN7e5mLTCjNVSbZWRkJLzMXnnMNsslm4Y5R7Cj6i020N+324eJM2vMZk7SmogYydvnmrtNMtssl+zjYffADrt3rDqzxqzxnOduk0w3gVh1DfvEI4a4/aEtO+9vfW5bTWmY2Y5VT1pm1ngO7jZJUTbL6Ng4iz7yzd0mHvvKnT+ZdL9W2Y5VZ9aYNZ6bZWySomwWQe7EY/WozsxxZo1Z4zm42yRFWS6z6Xbv7xODA/2FmTmetMys8dwsY5NUgu6FN9zP2Hh9NfXBgX723nNuzZkvlX3OljFrHKdCdpFGphMuXrFqRm3oFXlpjmbWHE6F7AGNTiespTNzcKCfdxw/f9JgqUqWi9MYzdrLwb0LrFw7ynnX3t3Qibqm6swcHhzg02cu5K4PvZ6PLT16Z5v59vRboPPUzdrPbe4drlJj317QvFZLDTyvOef8JYfvNid8UZOL89TNysc19w433dzt06UTFjXnADXPP+M8dbPycc29w00VQGtJJ5yq1n3H8pNqqnkfPDiQ2/nqPHWz9pm25i5pnqTbJT0g6X5J70u3XyhpVNJd6c8pmcdcIGm9pHWSljTzD+h1RQG0T6opa6URtW7nqZuVTy01923AeRHxA0nPB9ZIujXdd0lEfCJ7sKQjgbOAo4CDgX+X9BsRMbN132ySvHZxgK3Pbdvt2JmkIzai1u08dbPymTa4R8RmYHN6+xlJDwJT/deeBlwdEc8CD0taD7wC+E4DytuTqhe8Hh0b5/x/uhsEE9snd6QODvRz4ZuOqjmwFnWczrTWvXTRsIO5WYnMqENV0qHAIuC76ab3SrpH0hck7ZduGwY2Zh62iZwPA0nLJK2WtHrLli0zLngvyWsXn9gRuwV2gL33nDujIJtd2KNZC3eYWevV3KEqaR/gOuDciHha0qXAR0mmHfko8Engj2o9X0RcBlwGyQjVmRS618yk/bueDBXXus26T03BXVI/SWC/KiKuB4iIxzL7PwfcmN4dBeZlHn5Ius3qsHLtKHOkwjz2ap2QoeJVl8yar5ZsGQGXAw9GxKcy2w/KHPb7wH3p7RuAsyTtKWkBcBjwvcYVuXdMNUCpf47o79OkbZ2QoeJVl8xao5aa+2LgncC9ku5Kt30AeJukhSTNMhuAPwGIiPslXQs8QJJp8x5nytSnaIBSn8TFbz1m5zGdVAMuyqs/95q7uPiWdTtXdhodG6cv/cYy3CF/m1mZeFbIEluw/KbCedSHBwc6KqhXTPU3TcWzTZrtzrNCdqipVkVqd7PGyrWjLF6xigXLb2LxilU1PX+l/6Aes5kEzawXObiXWN7IT7H7qkitDnz1tJtPN8FZLTxXjVntHNxLLC8HvSg0tjLwTTUfzUweM1OdkAlkVhaeOKxk8tIE71h+0s79RSsktTLw1TMfzWw/fDohE8isTFxzL5FamjvKMElX0QfJVB8wRfsGB/onfTOpXtkJPGrWrB6uuZdILYtelGGSrnrmoyl6zEzmwTGz2jm4l0itzR3tni6gng+YMnwomfUSB/cS6aRFL+r5gGnWh5KnMzDbndvcS6QM7emdxtMZmOVzzb1E3HRRm2xNPW9SNS/ObebgXjrtbk8vu+qFS4oGRXnAk/U6N8tYR6l1MFQZ+ynMWsnB3TpKLTVy91OYObhbh6mlRu4BT2YO7qVVz6yLvSAvoyhreHDAgd0Md6iWUnWnYSW9D+jJwFWdx/7m44a58e7NjI1PTDrOzTFmu7jmXkL1zLrYrfLy2K9bM8qFbzqKT5+5cNK8NG6OMdvFNfcSqmfWxW411QfdHctPcjA3K+DgXkKdNA1Bs9X7QecpCazXuVmmhDwNwS5FH2hzpMJOZk9JYObgXkp5KzD1antyUXbM9ojCgO0+CzM3y5SWpyFIVK7BedfeXdMcMivXjuY2aUFv9llY73LN3Upv6aJhdtQwh0ylOaZIL/ZZWO9ycLeOUMvSflPNO9OrfRbWuxzcrSPU0sk8VbNLr/ZZWO+aNrhLmifpdkkPSLpf0vvS7ftLulXSD9Pf+6XbJekzktZLukfSsc3+I6z71dLJXFS795QE1otq6VDdBpwXET+Q9HxgjaRbgXcBt0XECknLgeXA+4GTgcPSn1cCl6a/zWZluk7mehbuNutW09bcI2JzRPwgvf0M8CAwDJwGXJkediWwNL19GvClSNwJDEo6qOElN6viFFKzXWaUCinpUGAR8F3gwIjYnO56FDgwvT0MbMw8bFO6bXNmG5KWAcsA5s+fP8Nim+VzCqlZouYOVUn7ANcB50bE09l9ERFAfq5agYi4LCJGImJkaGhoJg81M7Np1BTcJfWTBParIuL6dPNjleaW9Pfj6fZRYF7m4Yek28zMrEVqyZYRcDnwYER8KrPrBuDs9PbZwDcy2/8wzZo5Hngq03xjZmYtUEub+2LgncC9ku5Kt30AWAFcK+kc4BHgjHTfzcApwHpgK/DuhpbYzMymNW1wj4hvAyrY/Zqc4wN4zyzLZdYQnvrXepUnDmsjB57m8nKF1ss8/UCbeM7x5vPUv9bLHNzbpCjwnHvNXSxescpBvgG8XKH1Mgf3NpkqwLgW3xi1zCRp1q0c3NtkugDj5oPZ83KF1ssc3NukaPm4LDcfzI7nmrFe5myZNqkEmItvWVe4LJybD2bPc81Yr3LNvY2WLhrmjuUn8ekzF7r5wMwayjX3EsjW4p3z3hweU2C9xsG9JNx80DwezGS9yMG9DVyLbK2pBjP5ulu3cnBvMdciW8+DmawXuUO1xTwkvvU8mMl6kYN7i7kW2XpFg5lOPGKIxStWsWD5TZ7ywbqOm2Va7ODBgdy8dtcimycvG+nEI4a4bs2om8esazm4t9j5Sw6f1OYOzmlvhepspMUrVrmT1bqag3sLVGfHvPm4YW5/aIuzZdpk5drRwlHBbh6zbuHg3mR52THXrRn1HCdtUnk9isyRWLD8Jn/oWsdzh2qTOTumXPJej6ztEV48xbqCg3uTOTumXGZy3f0hbJ3Mwb3JnGNdLjO97v4Qtk7l4N5kXjCiXIpej8GB/tzj/SFsncodqk1WPW97nzTp67477FqrKOf9xrs373Zs9kPY8wFZp3Fwb4FKEPCcMuWQzXmvzmaq2G+vfj70e0exdNGw5wOyjuRmmRZx1kw5FWXP7LXH3Em1fL921mkc3FvEWTPlVMvr4tfOOtG0wV3SFyQ9Lum+zLYLJY1Kuiv9OSWz7wJJ6yWtk7SkWQXvNM6aKadaXhe/dtaJaqm5XwG8IWf7JRGxMP25GUDSkcBZwFHpY/5RUl/OY7veyrWjk2YcPPGIIWfNlFAt2UzOeLJONG1wj4j/BH5e4/lOA66OiGcj4mFgPfCKWZSvI1U64EbHxneOdrxuzShvPm6Y4cEBBAwPDngKghJYumiYi04/esrXpZZjzMpmNtky75X0h8Bq4LyIeBIYBu7MHLMp3bYbScuAZQDz58+fRTHKp6gD7vaHtnDH8pPaVCorUj1jZOVbV3Xao4O5dZJ6O1QvBV4MLAQ2A5+c6Qki4rKIGImIkaGhoTqLUU7ugOtced+6PMeMdaK6gntEPBYR2yNiB/A5djW9jALzMocekm7rKe6A61xOe7RuUVdwl3RQ5u7vA5VMmhuAsyTtKWkBcBjwvdkVsfO4A65z+VuXdYtp29wlfQ04AThA0ibgQ8AJkhYCAWwA/gQgIu6XdC3wALANeE9EFM+v2qXyhrh7uHpn8DKI1i0UEe0uAyMjI7F69ep2F8MsdzqCgf4+Z8dYKUlaExEjefs8t4xZhr91WbdwcDer4rRH6wYO7mZ18BTAVnYO7mYz5CmArRN4VkizGXIuvHUCB/cGq54wzCMbu09Rzvvo2LhfcysNB/cG8tD13jBVzrtfcysLB/cG8tf13pA3AjnLr7mVgTtUG8hD17tPdVbMiUcMcftDWxif2E6fxPaCQYB+za3dHNwbyEPXu0teVsxX7vzJzv3bIxDJHBzV/Jpbu7lZpoE8YVh3KVo8OysAVW3za25l4Jp7A3noeneptWklSFZn8mtuZeLg3mAeut49iprZqg0PDuSusOVRrNZObpYxKzBdVgwUN8E4LdbazcG9QTx4qfvkLYz9juPn17RQttNird3cLNMAnmuke9XbzOa0WGs319wbwLU0q+Z1dK3dHNwbwLU0q+a0WGs3B/cGcC3NquW113upPmslt7nXKZvmtu9AP/19YmL7rrGKrqX1ruoUyEvOXOigbi3n4F6H6g7UsfEJ+ueI/fbqZ2zrhHOae5g7160sHNzrkNeBOrEj2GuPuaz9m9e3qVRWBlN1rju4Wyu5zb0O7kC1In5vWFk4uNfBHahWxO8NKwsH9zo4zc3yrFw7yi+f3bbbdr83rB2mDe6SviDpcUn3ZbbtL+lWST9Mf++Xbpekz0haL+keScc2s/Dt4jQ3q1bpSB0bn5i0fb+9+v3esLaopUP1CuB/A1/KbFsO3BYRKyQtT++/HzgZOCz9eSVwafq763j2R8sqmvt9rz3m+n1ibTFtzT0i/hP4edXm04Ar09tXAksz278UiTuBQUkHNaqwZmXljlQrm3pTIQ+MiM3p7UeBA9Pbw8DGzHGb0m2bqSJpGbAMYP78+XUWo/08Z7eBl1i08pl1h2pEBPnLSE73uMsiYiQiRoaGhmZbjLbwnN1W4U52K5t6g/tjleaW9Pfj6fZRYF7muEPSbV3Js0FahTvZrWzqbZa5ATgbWJH+/kZm+3slXU3SkfpUpvmm67id1bKKOtnddGftMG1wl/Q14ATgAEmbgA+RBPVrJZ0DPAKckR5+M3AKsB7YCry7CWUuDbez2nQ814y1y7TBPSLeVrDrNTnHBvCe2RaqU5y/5PBJ/7jgdlabzHPNWLt44rBZqPxz+iu3FXHTnbWLg/sseTCTTcVNd9YunltmBlauHWXxilUsWH4Ti1escsqjTcspktYuDu41ck671aM6RXJwoJ/n9c/hz6+5yxUEayoH9xo5p93qtXTRMHcsP4lLzlzIs9t28OTWCVcQrOkc3GvkjjGbLVcQrJUc3GvkRRhstlxBsFZycK+RO8ZstooqAnMkd9JbwzkVcgrVw8bffNwwtz+0xTntVpe8QW8A2yOZd8+jV62RHNwL5A0bv27NqCeDsrpVD3qbI+0M7BUevWqN4maZAu78smaoZM48vOJUdkT+TNlug7dGcHAv4M4vazZ30lszObgX8D+eNdPKtaNsfW7bbtvdSW+N4uBewNkx1iyV/pwnt05M2j440O8+HWsYd6gW8IyP1ix5/TkAe+85d9L7y4t82GwoCjp1WmlkZCRWr17d7mKYtcSC5TcVLjoskqa/E48Y4ro1o7utFeCavWVJWhMRI3n73Cxj1mJT9dtU5py56s6fOFvLZsXB3azF8vpzqhXV7J2tZbVym7tZi1X358ykYdTZWlYrB3ezNsiu4LV4xarc1ZqqOVvLZsLNMjm84pK1Ui3NNIA7U21GXHOvkjenjCdzsmbKNtMU1eCHBwf8/rMZcc29iueUsXaozDnz6TMXevCcNYRr7lU8p4y1Uy2D5zy4yWrh4F7l4MGB3K/GzlKwVsl2tlZzs6HVysG9St6CCv1zxNbntrFg+U2uKVnLZWvqngPeajWr4C5pA/AMsB3YFhEjkvYHrgEOBTYAZ0TEk7MrZutUfy3ed6CfXz63beckT64pWStV19SrA3uFmw2tWiM6VE+MiIWZ+Q2WA7dFxGHAben90sumP158yzrOX3I4D684lb33nMvE9vyaklmzFU0yVs3NhlatGc0ypwEnpLevBL4FvL8Jz9MwU7VjuoPV2qmW95mzaSzPbGvuAXxT0hpJy9JtB0bE5vT2o8CBeQ+UtEzSakmrt2zZMstizM5U6Y9etMPaabr32fDggAc3Wa7ZBvdXR8SxwMnAeyT9TnZnJPMJ5zYSRsRlETESESNDQ0OzLMbsTFU796Id1k5TjV6tvA8d2C3PrJplImI0/f24pH8GXgE8JumgiNgs6SDg8QaUs6mmSn/0oh3WTpX32XnX3p2bJXPhDff7vWm56l6sQ9LewJyIeCa9fSvwEeA1wM8iYoWk5cD+EfFXU52r3Yt1VLe5gxdGsHKZaoGPrP45Yp/nzWVs64SDfQ+YarGO2dTcDwT+WVLlPF+NiH+T9H3gWknnAI8AZ8ziOVrCtXMru6Jvl9UmdoTTdg3wMntmHSHv22WthgcHuGP5SU0olbVbs2ruZtYied8ut2YG103Fabu9ycHdrENUzzlTa23eabu9qeeDu2fYs05VNFVGdkS103Z7V88F92wwr/5ncAeUdZpKbb7yvh4bn6AvnVxs2JWVntZTi3VUvsaOposSj41PeN4Y63jZ9zUkk4t5gJP1VHCvdRImd0BZJ/HqYZanp4J7rUHbHVDWSTy5neXpqeBeS9B2B5R1Gk9uZ3l6JrivXDvK1ue2TXlMn+QpB6zj5E0ull09bPGKVaxcO9qm0lm79ES2TK35wDsiHNit43j1MMvTE9MPLF6xqqZ5OTxM27rBVO/34cEBTjxiiNsf2uKxHV1gqukHeqJZxqvZWC+Z6v0+OjbOV+78yc504NGxcc695i4WfvibbrrpMj0R3Is6lvokhFezse5ST0fq2PgEF1x/rwN8F+mJ4F60mtInzziGh1ecyh3LT3Jgt64x1epNU3FufHfpiQ5Vz9duvST7fq+lrynLufHdoyc6VM161Uznge+T+OQZx7ji0yF6bj53z/RolphqDdY82yOcNtkluq7mXlRTGRzo58I3HeU3rPWkvP+L/jnJ7JE7ckLAfnv1s9cec11BKrmeSoUsmhzM2QDWy5YuGuai049meHBgZ4bYxW89hqK63ZNbJyalS2b/d1auHWXxilUe/VpyXdcsM1WHUCUbwDUQ60XVKzlB7Z2u2Uya7DcAj34tr66ruU+X4+tsALNdZpI2WRnwlDe98LnX3MWhrsmXStcF9+nerJ4pz2yXbHNNI4yOjXP+1+9m4Ye/6WabNuu64F55s+63V/9u+zzFgNnuli4a5o7lJ6EGnW9iezA2PpHbXm+t0xVt7pXUx9Gx8UnrR5768oM8QZJZjQ4eHJjxoKdazLSvy6nMjdGxqZDZgC4g768Y6O/znDFmNZrpgKeZEPDwilMLK2KVAJ6bstkn9t5jLk+NTzjYV5kqFbJpwV3SG4C/B/qAz0fEiqJjZxrcZ/Im9DS+ZrXL1poH9+rnF7/axkROInx/nyDI3VdkjsjNqQcKK2h58ipt2XLvO9CPBGNb8z8MGv3NoN7zNaIcLR+hKqkP+AfgdcAm4PuSboiIBxpx/loXugZnx5jNRHW65FQ1bZjZ/DVTfQ7MpIpZ3cxTXdkbG5/YeWx1qmb1sbNN5az3fI0uR55mtbm/AlgfET8GkHQ1cBrQkOA+k4Dt7Biz+uXlxlfvh+Y26eTJxoDpKnvZD4O8Y2cz/qXe8zW6HHmalS0zDGzM3N+UbmuIWgO2s2PMWiNvBGwzZWNALZW9yjFFx9b7Db/e8zW6HHnalgopaZmk1ZJWb9myZUaPzctlr6Rx9Sm55QU4zFqrkkFBxWIAAAWuSURBVFJZWSOhWQG+utJWS2WvckzRsfV+w6/3fI0uR55mBfdRYF7m/iHptp0i4rKIGImIkaGhoRmdPK+WcMmZC9mw4lR+dNEpbPACHGZtN92AQlX9rsXgQP9ulbbpnif7YVC0cE+93/DrPV+jy5GnWW3u3wcOk7SAJKifBfxBI59gurZAM2uv6kVD8lIfobjTttaFvKsX45kqW6bRC/fUe75WLCDUzFTIU4BPk6RCfiEi/rboWC/WYWY2c21ZrCMibgZubtb5zcysWNfNLWNmZg7uZmZdycHdzKwLObibmXWhUswKKWkL8Ehm0wHAE20qTr06rcydVl7ovDK7vM3XaWVudHlfFBG5A4VKEdyrSVpdlN5TVp1W5k4rL3RemV3e5uu0MreyvG6WMTPrQg7uZmZdqKzB/bJ2F6AOnVbmTisvdF6ZXd7m67Qyt6y8pWxzNzOz2Slrzd3MzGbBwd3MrAuVLrhLeoOkdZLWS1re7vJUkzRP0u2SHpB0v6T3pdv3l3SrpB+mv/drd1mzJPVJWivpxvT+AknfTa/zNZL2aHcZsyQNSvq6pIckPSjpVWW+xpL+PH0/3Cfpa5KeV7ZrLOkLkh6XdF9mW+41VeIzadnvkXRsicp8cfq+uEfSP0sazOy7IC3zOklLylDezL7zJIWkA9L7Tb3GpQrumYW1TwaOBN4m6cj2lmo324DzIuJI4HjgPWkZlwO3RcRhwG3p/TJ5H/Bg5v7HgUsi4iXAk8A5bSlVsb8H/i0ijgCOISl7Ka+xpGHgz4CRiHgZyTTXZ1G+a3wF8IaqbUXX9GTgsPRnGXBpi8pY7Qp2L/OtwMsi4uXAfwEXAKT/h2cBR6WP+cc0prTSFexeXiTNA14P/CSzubnXOCJK8wO8Crglc/8C4IJ2l2uaMn8DeB2wDjgo3XYQsK7dZcuU8RCSf9yTgBtJFr95Apibd93b/QPsCzxM2uGf2V7Ka8yuNYP3J5lG+0ZgSRmvMXAocN901xT4P8Db8o5rd5mr9v0+cFV6e1K8AG4BXlWG8gJfJ6mkbAAOaMU1LlXNnSYvrN1okg4FFgHfBQ6MiM3prkeBA9tUrDyfBv4K2JHefwEwFhHb0vtlu84LgC3AF9OmpM9L2puSXuOIGAU+QVIr2ww8Bayh3Ne4ouiadsr/4h8B/5reLmWZJZ0GjEbE3VW7mlresgX3jiFpH+A64NyIeDq7L5KP4VLkmEp6I/B4RKxpd1lmYC5wLHBpRCwCfklVE0zJrvF+wGkkH0oHA3uT89W87Mp0TWsh6YMkzaRXtbssRSTtBXwA+JtWP3fZgvu0C2uXgaR+ksB+VURcn25+TNJB6f6DgMfbVb4qi4E3SdoAXE3SNPP3wKCkykpcZbvOm4BNEfHd9P7XSYJ9Wa/xa4GHI2JLREwA15Nc9zJf44qia1rq/0VJ7wLeCLw9/VCCcpb5xSQf+nen/4OHAD+Q9EKaXN6yBfedC2unmQVnATe0uUyTSBJwOfBgRHwqs+sG4Oz09tkkbfFtFxEXRMQhEXEoyfVcFRFvB24H3pIeVpryAkTEo8BGSZWl4F8DPEBJrzFJc8zxkvZK3x+V8pb2GmcUXdMbgD9MMzqOB57KNN+0laQ3kDQzvikitmZ23QCcJWlPSQtIOiq/144yVkTEvRHxaxFxaPo/uAk4Nn2PN/cat6ODZJrOiFNIesB/BHyw3eXJKd+rSb663gPclf6cQtKOfRvwQ+Dfgf3bXdacsp8A3Jje/nWSN/564J+APdtdvqqyLgRWp9d5JbBfma8x8GHgIeA+4MvAnmW7xsDXSPoEJkiCzDlF15Sk0/0f0v/De0kygcpS5vUkbdWV/7/PZo7/YFrmdcDJZShv1f4N7OpQbeo19vQDZmZdqGzNMmZm1gAO7mZmXcjB3cysCzm4m5l1IQd3M7Mu5OBuZtaFHNzNzLrQ/wfXr4XGNVPn1wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title(\"text length vs # instances\")\n", + "plt.scatter(list(text_len_counter.keys()), list(text_len_counter.values()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Check words frequencies" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "w_count_df = pd.DataFrame.from_dict(w_count, orient='index')\n", + "w_count_df.sort_values(0, ascending=False, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "Collapsed": "false", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
die3066
der2362
das1794
ist1767
nicht1467
......
wertvollsten,1
blutgruppe1
gelenkschmerzen1
entgeltbefreiung1
anrã¼cken.1
\n", + "

27102 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " 0\n", + "die 3066\n", + "der 2362\n", + "das 1794\n", + "ist 1767\n", + "nicht 1467\n", + "... ...\n", + "wertvollsten, 1\n", + "blutgruppe 1\n", + "gelenkschmerzen 1\n", + "entgeltbefreiung 1\n", + "anrã¼cken. 1\n", + "\n", + "[27102 rows x 1 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "w_count_df" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check a certain word\n", + "w_count_df.at['auto', 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erogol/miniconda3/lib/python3.7/site-packages/matplotlib-3.2.0rc3-py3.7-linux-x86_64.egg/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 159 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "/home/erogol/miniconda3/lib/python3.7/site-packages/matplotlib-3.2.0rc3-py3.7-linux-x86_64.egg/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 156 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "/home/erogol/miniconda3/lib/python3.7/site-packages/matplotlib-3.2.0rc3-py3.7-linux-x86_64.egg/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 159 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "/home/erogol/miniconda3/lib/python3.7/site-packages/matplotlib-3.2.0rc3-py3.7-linux-x86_64.egg/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 156 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAG5CAYAAACDRzPnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy9h23ruAAAgAElEQVR4nOy9e7hdVXX3/xm5h6tcAkKCXCOCFblEtGq13kCxBa0Vwb6Vqr9ivdVq1R+2r0Xb+mp98V61pRVFCwK2KgiIIoIKKCQgJBAICYRAQkhC7vecnDPeP8aYrnV29t5nn+SEc5L9/TzPfvbea80111xzzTnGnGPMi7k7QgghupdRw50AIYQQw4sUgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5Y4Y7Ae048MAD/YgjjhjuZAghxC7FXXfd9ZS7T+o0/IhWBEcccQQzZswY7mQIIcQuhZktGEx4mYaEEKLLkSIQQoguR4pACCG6nBHtIxBCiOGip6eHhQsXsmnTpuFOSksmTJjAlClTGDt27A7FI0UghBBNWLhwIXvvvTdHHHEEZjbcydkGd2f58uUsXLiQI488cofikmlICCGasGnTJg444IARqQQAzIwDDjhgSHosUgRCCNGCkaoECkOVPikCIYQYwdxwww0ce+yxHHPMMXzmM5/ZKfeQj0AIITrgiAuuG9L4Hv3M6wcM09vby3vf+15uvPFGpkyZwgte8ALOPPNMjj/++CFNy4A9AjObYGZ3mtm9Zna/mX0yjx9pZneY2Twzu9LMxuXx8fl/Xp4/ohbXx/L4HDM7fUifRAghdjPuvPNOjjnmGI466ijGjRvHOeecw9VXXz3k9+nENLQZeKW7Px84EXitmb0I+BfgC+5+DLASeGeGfyewMo9/IcNhZscD5wDPBV4LfM3MRg/lwwghxO7EokWLOOyww373f8qUKSxatGjI7zOgIvBgXf4dmx8HXgn8dx6/FHhD/j4r/5PnX2Xh0TgLuMLdN7v7fGAecOqQPIUQQojtpiNnsZmNNrN7gKXAjcDDwCp335pBFgKT8/dk4HGAPL8aOKB+vMk1QgghGpg8eTKPP16JzYULFzJ58tCLzY4Ugbv3uvuJwBSiFf+cIU9JYmbnm9kMM5uxbNmynXUbIYQY8bzgBS9g7ty5zJ8/ny1btnDFFVdw5plnDvl9BjV81N1XATcDvw88w8zKqKMpQDFcLQIOA8jz+wLL68ebXFO/x8XuPs3dp02a1PFy2kIIsdsxZswY/vVf/5XTTz+d4447jrPPPpvnPve5Q3+fgQKY2SSgx91XmdlE4DWEA/hm4E+BK4DzgOLKvib//zrP/9zd3cyuAS43s88DhwJTgTuH+HmEEGKn0Mlwz53BGWecwRlnnLFT79HJPIJDgEtzhM8o4Cp3v9bMZgNXmNk/A78FvpHhvwF8x8zmASuIkUK4+/1mdhUwG9gKvNfde4f2cYQQQgyWARWBu88ETmpy/BGajPpx903Am1vE9SngU4NPphBCiJ2FlpgQQoguR4pACCFa4O7DnYS2DFX6pAiEEKIJEyZMYPny5SNWGZT9CCZMmLDDcWnROSGEaMKUKVNYuHAhI3k+U9mhbEeRIhBCiCaMHTt2h3f+2lWQaUgIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILkeKQAghuhwpAiGE6HKkCIQQosuRIhBCiC5HikAIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILmdARWBmh5nZzWY228zuN7MP5PFPmNkiM7snP2fUrvmYmc0zszlmdnrt+Gvz2Dwzu2DnPJIQQojBMKaDMFuBv3X3u81sb+AuM7sxz33B3S+qBzaz44FzgOcChwI/M7Nn5+mvAq8BFgLTzewad589FA8ihBBi+xhQEbj7YmBx/l5rZg8Ak9tcchZwhbtvBuab2Tzg1Dw3z90fATCzKzKsFIEQQgwjg/IRmNkRwEnAHXnofWY208wuMbP98thk4PHaZQvzWKvjjfc438xmmNmMZcuWDSZ5QgghtoOOFYGZ7QX8D/A37r4G+DpwNHAi0WP43FAkyN0vdvdp7j5t0qRJQxGlEEKINnTiI8DMxhJK4DJ3/z6Auy+pnf8P4Nr8uwg4rHb5lDxGm+NCCCGGiU5GDRnwDeABd/987fghtWBvBO7L39cA55jZeDM7EpgK3AlMB6aa2ZFmNo5wKF8zNI8hhBBie+mkR/AS4M+BWWZ2Tx77O+BcMzsRcOBR4F0A7n6/mV1FOIG3Au91914AM3sf8BNgNHCJu98/hM8ihBBiOzB3H+40tGTatGk+Y8aM4U6GEELsUpjZXe4+rdPwmlkshBBdjhSBEEJ0OVIEQgjR5UgRCCFElyNFIIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdjhSBEEJ0OVIEQgjR5UgRCCFElyNFIIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdzoCKwMwOM7ObzWy2md1vZh/I4/ub2Y1mNje/98vjZmZfNrN5ZjbTzE6uxXVehp9rZuftvMcSQgjRKZ30CLYCf+vuxwMvAt5rZscDFwA3uftU4Kb8D/A6YGp+zge+DqE4gAuBFwKnAhcW5SGEEGL4GFARuPtid787f68FHgAmA2cBl2awS4E35O+zgG978BvgGWZ2CHA6cKO7r3D3lcCNwGuH9GmEEEIMmkH5CMzsCOAk4A7gYHdfnKeeBA7O35OBx2uXLcxjrY433uN8M5thZjOWLVs2mOQJIYTYDjpWBGa2F/A/wN+4+5r6OXd3wIciQe5+sbtPc/dpkyZNGooohRBCtKEjRWBmYwklcJm7fz8PL0mTD/m9NI8vAg6rXT4lj7U6LoQQYhjpZNSQAd8AHnD3z9dOXQOUkT/nAVfXjr8tRw+9CFidJqSfAKeZ2X7pJD4tjwkhhBhGxnQQ5iXAnwOzzOyePPZ3wGeAq8zsncAC4Ow8dz1wBjAP2AC8HcDdV5jZPwHTM9w/uvuKIXkKIYQQ242FeX9kMm3aNJ8xY8ZwJ0MIIXYpzOwud5/WaXjNLBZCiC5HikAIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILkeKQAghuhwpAiGE6HKkCIQQosuRIhBCiC5HikAIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILkeKQAghuhwpAiGE6HKkCIQQossZUBGY2SVmttTM7qsd+4SZLTKze/JzRu3cx8xsnpnNMbPTa8dfm8fmmdkFQ/8oQgghtodOegTfAl7b5PgX3P3E/FwPYGbHA+cAz81rvmZmo81sNPBV4HXA8cC5GVYIIcQwM2agAO7+SzM7osP4zgKucPfNwHwzmwecmufmufsjAGZ2RYadPegUCyGEGFJ2xEfwPjObmaaj/fLYZODxWpiFeazVcSGEEMPM9iqCrwNHAycCi4HPDVWCzOx8M5thZjOWLVs2VNEKIYRowXYpAndf4u697t4H/AeV+WcRcFgt6JQ81up4s7gvdvdp7j5t0qRJ25M8IYQQg2C7FIGZHVL7+0agjCi6BjjHzMab2ZHAVOBOYDow1cyONLNxhEP5mu1PthBCiKFiQGexmX0X+EPgQDNbCFwI/KGZnQg48CjwLgB3v9/MriKcwFuB97p7b8bzPuAnwGjgEne/f8ifRgghxKAxdx/uNLRk2rRpPmPGjOFOhhBC7FKY2V3uPq3T8JpZLIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdjhSBEEJ0OVIEQgjR5UgRCCFElyNFIIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdjhSBEEJ0OVIEQgjR5UgRCCFElyNFIIQQXc6AisDMLjGzpWZ2X+3Y/mZ2o5nNze/98riZ2ZfNbJ6ZzTSzk2vXnJfh55rZeTvncYQQQgyWTnoE3wJe23DsAuAmd58K3JT/AV4HTM3P+cDXIRQHcCHwQuBU4MKiPIQQQgwvAyoCd/8lsKLh8FnApfn7UuANtePf9uA3wDPM7BDgdOBGd1/h7iuBG9lWuQghhBgGttdHcLC7L87fTwIH5+/JwOO1cAvzWKvjQgghhpkddha7uwM+BGkBwMzON7MZZjZj2bJlQxWtEEKIFmyvIliSJh/ye2keXwQcVgs3JY+1Or4N7n6xu09z92mTJk3azuQJIYTolO1VBNcAZeTPecDVteNvy9FDLwJWpwnpJ8BpZrZfOolPy2NCCCGGmTEDBTCz7wJ/CBxoZguJ0T+fAa4ys3cCC4CzM/j1wBnAPGAD8HYAd19hZv8ETM9w/+jujQ5oIYQQw4CFiX9kMm3aNJ8xY8ZwJ0MIIXYpzOwud5/WaXjNLBZCiC5HikAIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILkeKQAghuhwpAiGE6HKkCIQQosuRIhBCiC5HikAIIbocKQIhhOhypAiEEKLLkSIQQoguR4pACCG6HCkCIYTocqQIhBCiy5EiEEKILkeKQAghuhwpAiGE6HKkCIQQosuRIhBCiC5nhxSBmT1qZrPM7B4zm5HH9jezG81sbn7vl8fNzL5sZvPMbKaZnTwUDyCEEGLHGIoewSvc/UR3n5b/LwBucvepwE35H+B1wNT8nA98fQjuLYQQYgfZGaahs4BL8/elwBtqx7/twW+AZ5jZITvh/kIIIQbBjioCB35qZneZ2fl57GB3X5y/nwQOzt+Tgcdr1y7MY/0ws/PNbIaZzVi2bNkOJk8IIcRAjNnB61/q7ovM7CDgRjN7sH7S3d3MfDARuvvFwMUA06ZNG9S1QgghBs8O9QjcfVF+LwV+AJwKLCkmn/xemsEXAYfVLp+Sx4QQQgwj260IzGxPM9u7/AZOA+4DrgHOy2DnAVfn72uAt+XooRcBq2smJCGEEMPEjpiGDgZ+YGYlnsvd/QYzmw5cZWbvBBYAZ2f464EzgHnABuDtO3BvIYQQQ8R2KwJ3fwR4fpPjy4FXNTnuwHu3935CCCF2DppZLIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdjhSBEEJ0OVIEQgjR5UgRCCFElyNFIIQQXY4UgRBCdDlSBEII0eVIEQghRJcjRSCEEF2OFIEQQnQ5UgRCCNHlSBEIIUSXI0UghBBdjhSBEEJ0OVIEQgjR5UgRCCFEl7NLKIIjLrhuuJMghBC7LU+7IjCz15rZHDObZ2YXdHqdlIEQQuwcnlZFYGajga8CrwOOB841s+M7vV7KQAghhp4xT/P9TgXmufsjAGZ2BXAWMHuwEdWVwqOfef1QpU8IIbqOp1sRTAYer/1fCLywHsDMzgfOz79uZtbv/L9sG2mzY0II0cWcMpjAT7ciGBB3vxi4GMDMfJiTI4QQuz1Pt7N4EXBY7f+UPCaEEGKYeLoVwXRgqpkdaWbjgHOAa57mNAghhKjxtJqG3H2rmb0P+AkwGrjE3e9vc8l84IinI21CCNGtmLvM8EII0c3sEjOLhRBC7DykCIQQossZUYrAzEaZ2VvM7CIzG187foCZfbYcy3D7mNkz8/9oM/tBk/g+UDv/rfz9cTP7oJm9xMzenJ978twX8/vN+T3RzI7N33uZ2a8ybaeZ2Yvz+J/l90sGeLb9zez325z/gybHLjSz59avy/SObgg32swuG+D+ozPt1i5ck+tGlWvy99n5e4/8HmNmV5rZ3h3Gd2QnxzqMa4yZXdTk+MkWHFY79uZ8n79XP5bf4xvjaIjvnWZ2Qu3/S8xsekMcn8x7HFkL93Yzu87Mjhoo3xuuG21ml+V1Jb/NzJ5lZmeXslbqQUlTu/gb0v7mJsf+LO/7wVoaLjKzPzazUbWwe9TLxGBpqNejsy7Wjz2vsZ43XlcLW8+zcs3RpUy0uKZep0fV0jDRzF7eLO52x9o85wc6PPa7PG9yblSRM7Vje3SahsEwYnwEWZA/DpwG1AuZN/xvRz1sebDtKrA7SOO9G5+hMZ3bm8Y+Qpk3ix/ax9vqvtub362OO9ADjAO25vHRQG+mvTHdfU2O9+X3ZmB8/m8c6FCu6zSNzcKVsCuB/Qd5fWPYZtf1EXkwpnbOamF783wzxdTJ89Xj6QEmUOXp6CbXNtKb4QZbd3qAsXkfa7hfPU31NNTff3m/xrbloXzq1/ZQ5eFG4jlLPKU+NNL4LI3vp1zbmM/lfW2pnR9dC1N/f63ibleG6ufq+b857zGuxXWFkhcOLAOuAw4C9gZWATe7+5cGiGNEKYIngYOHOx1CCLGbMdbdt7YLMJJMQ1ICQggx9Kwxs9PaBRhJiuAXw50AIYTYDfk94LPtAowYReDuf0jYxYQQQgwRZbXndowYRZA+grajN4QQQgwOM/slcHi7MCNCEVisO3TAcKdDCCF2Qw4Ezm0XYEQoAvoPxxJCCDF07APc0i7AiBC+7r6RaiyxEEKIoWMP4D/aBRgRiiD5zXAnQAghdkP2BU5uF2Ak7VC2L9UsPiGEEEPDLJrPtv4dI6JHYGYTgGOREhBCiKFmErCgXYCRInjHUq1HI4QQYuj4EbC4XYAR0SNw97XATcOdDiGE2A2Z4u6fbBdgJC06V1bRE0IIMXQ48Bfu/u1WAUZEjyC5gQEcGkIIIQZND/C1dgFGkiK4meHZO0AIIXZnFjPAfhQjSRGcM9wJEEKI3ZCVDGBtGRE2eTPbF9hvuNMhhBC7IR8jdj9ryUhyFi8htlgTQggxdKxw97aLeo4k09BHCKeGEEKIoWP/gQKMpB5BH7HZskxEQggxhLh724E4I6lH8Cix3pAQQoiho9fMXt0uwIhQBGb2DGLBuRGRHiGE2I3oBT7fLsBIEbzrgGuADcOdECGE2M3YAuzdLsCIUATuvhX4BAOskCeEEGLQ7AHc2i7AiHEWA5jZr4EXMMAsOCGEEB3jwGhvI+xHRI+gxumMvDQJIcSuzkfanRwxPQIzmwhcD9wLfGCYkyOEELsTy939wFYnR5IiGAc8CeyDTENCCDFUrAT2dPfxrQKMGDOMu28B7ieGkQohhBgabiVGDrVkRCw6B2Bm1xHp0XaVQggxdLwaeFu7ACPJNDTe3Teb2chIkBBC7CbsSktM3Gxm/wBcOdwJEUKI3Qkzm9nu/IgxDRHdlwV0sFKeEEKIQfHZdidHTI/A3TcAp7n7aLR3sRBCDCUfbndyxPgICma2J7H2kBBCiKFhk7tPbHVyxPQIajyf2KCm7dZqQgghOmbX2KqyYGaLgYOBtl5uIYQQHbEW2MPdW/qER2KP4HI0qUwIIYaKHw8UYCQqghOAh4Y7EUIIsZvwcuD2dgFGlCIws08DXybWzxZCCLHjrHD3l7ULMKIUAfAyYC4webgTIoQQuwnPHijASFME04Fvoy0rhRBiqBhtZh9tF2BEjRoys1OAvYCby6FhTI4QQuwO9AEL3P2oVgFGlCIomNkCYghpy/WzhRBCdMQWYIu7t9zAfqSZhgrjgLHDnQghhNgNeCcDjMQcSYvOYWb3EzuUwchVUkIIsStxKtCyNwAjT9ieAXwJmIgWnhNCiKHgjUDL/Yph5PoI9gBWED4CR05jIYTYXrYCK939oFYBRlqPoHAH1Qb2UgJCCLH9fB/Y2C7ASFUEfwTMAG5DcwqEEGJHeCNwUbsAI8o0ZGbPILQXhINjFTGCaF+0qb0QQmwP69oNHYWR1yNYB9zt7q8kViH9OnA3MeO4bzgTJoQQuyh3DxRgRPUIAMxsL3dfZ2azgWOA+cAhDDD8SQghRFMedvdj2gUYUfMIANy9bFP5uvy+H61GKoQQ28vSgQKMNNNQnaOJLSvPJXwFy5F5SAghBst+ZjazXYAR1yOo8XLCZ/AxYD9iLOwGYlG6Qh8jW5kJIcRw8yFgdrsAI85H0IiZzSJGDD0bCX4hhBgsM4DvuftnWwUY0ULVzPYnzEP3AouAn+apHkIpbAWWDU/qhBBil+Cvgb9oF2DE9QjM7Dp3f33+ng8cRKxEup5wGo8jFEAxa20CJgxDUoUQYldgPTDX3U9qFWAk9gj+BMDM1gIHAL3EchNjiOUmHiOGk0KsQyQlIIQQrXEG8BGMREVwE4C77+3u+7j7PsCP87OSWFf7LKJX8BXgKuAp4mFHVvdGCCGGn72AF7ULMOJMQ80ws38mhP8hRK+gl1Bie2YQ9QqEEGJbnBh9ucLdj2gVaCT2CJpxFrHcxJ7EXgVPEUNKx9NcCTSutNebHyGE6DZuJqwpLdlVFMEm4DJgHnAdMeu4BziSmDX3FGEqghhN1LjN5WiqZa1BJiQhRHewnmg0/6xdoF1FEUwnTEJfBU4EfggsAVYTD7ov1bOMYuCJcmWPAykEIcTuzCpgX3f/SLtAu4oi2Ad4M3AL8BrgPHc/HHgl8HFiN7NWz9JO2GvTGyHE7swedNDg3VWcxa8A/iA/RwO/BZ4D3Am8mvAT7E+YhXqAu4AXE4K+CHvNShZCdBtrgbe4+4/bBdolFAGAmY0GXgC8Avgrwm/wb8A/EopgDNWcg3b0Eo6TA6kmpmlSmhBid2W+ux/VLsAuoQjM7CZixNCvgV8BtxI7mY1z91MzzCrCV1AfHVSUQpljUHoEnfYOHJmPhBC7Ln3uPlDjeJcxlcwEtgC/B5yQ368BbjOzfzWzPwDWEC38rYQyqO91bPR/1k6fW0pACLHbs0v0CApmtjexeNKHgWcCt+cpB16Yv8cRSmMCIcgXAIcTyqE+mmgtMQ9hoL2QG3sTQgixK7E2V2hoyS6hCMzsfYSj+BTgUcI89CvgJCrzzX75+53AecBnCYdysf2vJMxLRfCvo9rb4Ang0NotZRISQuwObHT3AXd43FUUwYcJwX+Xu2+tHb8cmAZcQwjuPwImAQ8Tgv0uYt7BZOBJ+gv7VtQzpCiDZk7ozUSPQgghRio97j6Q1WNE71D2O9z9ohanpgAnl32OzexC4D5i/4JTgIXA6wnhXl+xtAj4+nLW0F/gb6DaK7luFirXj29yTAghRhKrOgm0q9u9DyJa5oUewndwJbFhzbOJdYeMqqVfF9hFCZRzjaOK+ppcU1ci5fvp7FaN/C6cEGKksNfAQXaRHkEbLgPuMLOr8/8fAw8Q8wJ+RuxdMJfY+P6DxIJ1fUTLfxxVS76xNT+Kbc0+9SGn9Z7E052H6nkIITphEzDRzC5397e2C7hL+AjaYWbTgJfk39uAHxF+ggXEOkQHlqD5vQ/Ri9hA+Ax6gfuB51ONENpAKIp2trUetl3cTgghRgpluf757n50u4C7vCJoxMzeDVwPfJL+ZpQzCIF/ALFq6U157EFCWRRH8iZildN9gMNKtC1u11c7tzNb6s38GloyQwjRCQPOLN7VTUPN2NvdF5jZzfRXBM8jdjM7AvgI0dp/DHguIVDXE0NNy/aXU4hWf9kis+5ILoK5mJk2E4qjkaFyIjfzawxWCcihLUT3sYBYpbktu6MieCC/ZwN/B7y2du6LxAzkrwA/B24g9jMoO52NJpTAMfm/+BG2ED2FvTL8MwgfQlEMrZRA6ZoNZct9c6ZrsEJdSkCI7mMFA6+/tvuZhgpmNodo+a+gGv2zFbi7YS7CQmBvQmjvSSiKvQhB3zgbuU4fIew3Am8CPk30LsYzuNb3YFvqq/Pe+w3iGiFE93Kvu5/YLsDu2CMoLHP3a1qdNLNZhBBeS/gHijAuzuU+YjbyXsRoo/VUPQeoWvkTieWwR9N8uOlA1MM2KoX6/6XEcNm7gZcNEGdZTbU+6a3E1ckKrUKIXZ8fEX7Q9w0UcHfuEbwKOJdwCv9uroG7fz/PH14L/lfExjfPohKYZURQH+E8nkAojF8DpxLDUa8Hrnb3E83sMcL09DlCEG8iTEiNDLYHUBzSmzMNg3ESl2ub3a+s1gph+tIsaSF2P3oJc7m7+wmtAu3OPYK3E2sNjaVqqTuxfDXuvqAENLO3EstQrCWEYx9hUlpIOJdXAfcQtvl5xGii+4nF7Caa2bOIzC7bwZWhp8VH0GxCWjvqyqII8gmEwL6DGC7bbke2ZhPoGkce7Z2/pQSE2H1xYumdtuzOiuAF7n5sh2EPJDa5OYFYu2gGIWzXEstM/B4xN+Ea4C+J3dCK0F9B7I+wlGoWX+OSFHW2UPU2jMqMU2+9N1Mcvfm5kViArxVGNdqpmL72bghTzsHAq68KIXZdZtcbva3YnU1D3wT+r7vP7iDsauAthK3/48ROaC+m6klsBE4GrgVOA45195/Wrj+Q6CnsRWv7e7PF7LaHeou/WTybCUUzKn+PIhTDE8RoqGamJQ0tFWL3w4FNu83qo9uDmT1A7G88nxCIRgs7mZkto3IS1ymKYIG7H2VmPwbe7O7rzOw57v6gmZ2cYZ4JnAX8L8KB3ChY68K2leCtz1uoX1f2QyhDWevDR9s5mLdmnBObhFtG9HLKPIgBC4sQYpeh1Pct7j6g6Xd3VgSHNzverJuUYf+BEPzPI1r2KwkT0Z2EKWYr4Ud4PuGAfgkxF+FPiPWMAH6fEOTtRuWUrTQHO3KntOQHGvWzIcMWM1Wjz6AMid2S4SZ0EKcQYtejD/ixuw/oI9htFcFgMbM/aTg0AXglcCbhE3hni0tPBT5G9AQ+SSxhUZzEm6h2SxtHzFHYk4HXKGrWY3iI6HXs3eRc47XFPPRzYvnt4+g//HV0httE5YgWQuw+OLAYmAM8udsvOjdUpE8BYqTRMwmBvg5YQmTk61pcN9PdTzCz7xC9hB5gKtVksx6qSWatzC+9hKDeg+h5bCCUT2/GMZYQ2vuyrYmpcQntwmZi8tkkYBbwVeDrVMtnj6O/2UmIoUI+p5HBEsLkvWCgRed251FDg8Ld3w6td0PLc1OJGcTHU7WiJ+f3QrPXMxIAACAASURBVOBgwky0AfgNMcz02Xl+U7kV21aS0VTLVIyhEsyjqUw844mu3nqqUUBGCPs985oNhNIYk+EPynAnAP+ev+ujhAbyWQixPagsjQyuJSwVWwcKqJbgtnwHOJ+YlYeZHW9mxSz0TapW9SuAbwMLzOzfiVFHDxPDO/ckegcHUy1rXRapW1e7V2nNb6X/Bjt1Yd1LmJe2UPUaoHJk70tlappA9CCKH+QpKlPRBsJxPq/JM9/eIi+EELsmfcA7CNlw/kCBpQi25VvAT6iWpX4I+Jv8PdHdbyJMagvc/ROEYP8JcDoh0I8nhPEoYp/kNXm85PXehFCHquU0mhDWRWj/kMqZ+9UMdx/R+l/REF+99TWK6FlMyf+fJ0xTVwCvy99lOdrNtetfwshjMDbLkWLfHCnpeDroGzhIS56ufOqm99GKX7j7LwYKJEWwLQe6+1VkQU/zUBnps9nMRgFzzex9ZvZGYE93/767zyWcMzPz2wjTzL6EqWYsoSCKk7aOEQpiNdEb+MP83gK8nxD+Mwkh/yuit7KW/j4Car+Lye/v8/d5wC8IM1V556UXsSG/yzPuCI3Cwdk2jZ2yves1PV300X/G+nClY7jYEdnxdOXT7vQ+BluHyhL5t3QSWIpgW9ab2QFkxpvZi6g2vv8hYZr5a+AU4M8JIVv4lrt/jBhtNBV4EbHkdSmQvyWUwFwqAdmT59bmuaVET2IrIfyvI4awHg2sdfc3ld8ZZiGx/tEyYktOJ5bEmE0sUPckoVB+UEvnKqp3X0xNQzF8tLE8NZsp/XSyoy3Cnjbn6suL14fnis7ZnfNse56t3TWDrUNlPtIfdxJYo4YayAliXyGWlbiPGHUzmphL8GOitd7vpbj7irz2bnc/2czWEkJiLNsOFXVCgJfj9+a9lhKK4nTCnDSRykFc1ityYnbwbJrvSdBHCP35RG/kv4lexoeBy4lF+OpzERonp42U+QR9hOlqYgdhmzm6Oz3Wjq15zWia71U92LjLexwo7K4wkqsuNNot1Q7tF0nUIIUqf5YRPf7B7DXSKm+LX9GAXncfcAN7KYIGzOzNhM3/MGKfgRcSC8z9MWFfX0T/JR4ceC+x3OvZwJVEL2ED0dqeQwi1l9L6xdV3PNtE1UpfS3TtZhF7HbyM6E30EGsiFSE1qiGOIuy3UAnTIuR/TUx8683wjesi7eyK2ek9vksoru2Jvz5p7+naQnQgOl011mvfRSHU3+tg4xss21sG7gIeJ+bdjGQltrNozLed8X4GG6cTimDAvdW78YUNxMfdfQ2x8csrgK8BL3f344BL3P0odz+y/k2s4zODEOJ3EYJ8NSGQ7so4oNrMBirH8FrCB7A4w3+cUDbLiF7DR4FHqfZanpBhZtG/terEqKL1wBuJQvmjvLYsWAeVEniMalG6Yop6OlpnnZhRnFDCW6kc64OJfwzbLtWxMyhKp5PWVLO61sz0VExpo+ivyDptsXnDd6fsqJ/jFOANPP0ypfSwh7tF26x3Plga/X2Nz1TWDWsM2yqeNZ2mQ4pgW4rAfD3wH+5+HTmc093f3ewCd7/X3S8Fjs7vLYS5p5doyV+Svx8i7PhLiKGdPYT55ivE3gWriPH+vyD8Edfl7/9FzGx+F7EHwr3ESqhPAT8lXvw6qq02f0a10NwzqBRMb97rvXmfh4GbCWU1Ou//dDCQacSI9Gxix0anNIsbIh86ibdeGcs6T71EPhUF0Ec1MuxxmlfgVulo11JrjKfRZNeq7lrDd6cMNvw8KiVdxql3mqdQCbTVwMX5+/4Orq3nSRke/XQo/XY0e9+l/NaVVDMlXT/WuOpws2cazNyvvQgz8YBoQtm2LMp5Aa8B/sXMxtO5wpxrZk7Y9lcQZpkTCeHsxNDS1Rnf/lRDRo+l0vbLqVq15b6TgAfz9wJiOGvZ7+BYokKuIGZEjyVa+b1EC62siPoiwlx1NaFQNhE9jF/muXVET2Rfhr5SlS5tJ/bi8j0buI0ws62i+SY/regh8qG02Es5rw/XbYcTFehI+gvWMht7Qu05yogvCDPe2Wy7v0OzSt7MlEAt3nuIvTBabUm6s8x49Z5BK58IhK9qFZHP9xHlvOzn0Y7y7KOIsvxrorG0lXje+q56rdLXS7zjA/N7uBu0zd5DWbplC9uWv2ZloY+By+VKojz0EY24fYi5Ss3uXfLproGTLx/BNpjZHsSG97Pcfa6ZHQI8r77sdJtrD8if5xGt8fMJH0MxVYwiWo9OCOh1xEY5LyZ2Nvt7YpIawJ8Ro5L64e63m9kXiJFLG6jMPp5x307lq5hJtLIuJwrZyrzmMKJ38pxMw5G1dO1IpSp7LZSCvi6fs7FydyLESpgyZ6LTdPUSSm4DUQle2+F1jfcuprJ2jaWSxuIP2kBU/vqzFZ9MWeupkwrfjMZ82ESVpyuJhkVx/jdLYyNFWQ50zzFU/qVWgwk2UjV2BlN+bgReTeWvGgkDFQainp/lvZd8X0u19W1p9LRTplCZi9fRXInW73cfYWloPN6M5YS14adlV8Z2SBHsZHJv5D8lRhsdRfgLjiFs/5OJoal7EzOazyQmfznhfD4FeDnRUtqT0P6PufvnzWwesfLpG4D/ndc8CTxC7JzWQ7RqDyO24BxNCPu1ed+LCXPTeOAi4AL6j2ZqpF7wdqZTeR6hmLZXKBST2KH096FA/9ZuvaXWTHhtJnpIhxOCahz9K3V9I6GiAK8EzmmTti2Ekv7hYB6oIf07I9+LqaxZfrW6X8njucRQ6RKeQaZxNZGve1H1uiDeY/GVNI6cazX6ql16hyrv6rsOlvJQGjqbCKVY78WtIJR0ncZyNCqvK72I+vHtVeRFufy3u79joIeSIhhCzGw9/U0co6kK9np338fMfo9ooc8lTB//P2FbLgvTvYlo7Z9LzB/YRDiGnehh/Duh7d9FVMBFxFDRJwjfwzcJJfImYrnsVYQwO4oYRno7MeP4P/K6LxIFc0+iV/LRIcqORjNQo5mmFaWb/FtiM6DSEq63TBvDNxMIQzEEczGhqFvdA6oK3LjnQyfpaqzs9WuaPWuz/Spa3assZ7IPndEYR7OeWBEue1P19hYTZfllxMCEKbRv3W+l6l1sIebB9BJzY8p7hupZy33LSLhSr9YQdabUm0abepnwN4b2e3c30uxdDMRG+g913h6lU8w+7Si9/9G07kH0C+/uHZn/pQiGEDO7mUpAbCUqxkXA/yUcxl8jWvWja+GeICrRb6jWCPoJMVzV6hvp1FY6LQvjQdj3/5IYpnoSITw3Ei2tMwjfwlsJJTARuCHj/zKhFDYQDul/Ilo2+9NfiHdSqJu1cEpFLbb6jcS+zi9gWwFYnzdQhOXjhJA4jNgK9NwO0lEo8ylWEIJ8oFZi/RkXET2mOYQCfhPhlCyVrq6U+gjH/XTCXv2GvK9Ttdg2Zfh9iXc9imqC4mCFTjFZjSEaAwfSX8h1Gk/ZHnVjXr8nzffXbhdfMYtsploZt6yF1eqaXqLXeijVmliH0/mmSFuJ+TaHUvnXRjOwiWtnUAYPlPLQmG8LiJ78dELJ7U/7XkzJ/3Z7iC8j6shiotdcGlZF6RY/U1EWa4C3dGTWliIYOsxsAiE43kQICIgX/WpiHsFGojCXzWCeJJygE/P3rFp0BxNmnM8Ro4DeA7zC3d/Q5L7N/Bp/Tpgpvk+08svKpM8nHMRHEIrqL4nexZt3PAf6KZBiNy29pAEntdTiKEJ0M/33Smg2TrveSi7HlhDDbcewbRe6KJqSrn0Iod+4v/Vy4n3tT1Sq0vWvT/Ir6ZiX9z+E8L0cR7zPQ4je38mEku4l3v94+ptUSrrKMxZ/zyhCSHfamm3GYCcJ1udgbCLyrlxfemujiFnr5Zn6qIRXD/1NTPX7l1FgexCCchyhzFYRAyLq76quJIvA66FSNosIBX10xlPyiYbrW9HqfDPHbXFO05AfjQ7+TuJvpN6IaudPKI2AsVS9qjW07xX0Afe6+8ltwgDD723f3fgh0ZI/nljXZx1RcJcTL20C0a0em7+PJLqDEwgzzedqnzKz+HpCqF4EvNrM1jTe1N031NY7wt0XE0tP9xHCZjPhmNubWFRvMlEpDyWcTzOIgvaaDp+zsfWwlcrpVSru2Py+mVBCxYfxVJs45xPO8u9mnNOpRhC1a9WX1uETxKiof6PqcTS2FkvlLRsEOeGzqdOb6TyIqJjFvDeXaJE+AXyDqkV6WH7GEsJxD8IUN5Hww5yU148iykIv8V7qcyTqwr6MRBpH/1Z6MY040Vsqz1LMIGuoBHndDFWf67A5w24lHNklXHl/92Uaewjh/ET+LjNVy9yGU4gWav0ZHq09Z0lDXaBOoDKhHE6VvyX991GVjx6qjZO2UCmlfTNfjiR6l/tnnGXE3YeotmjdQjQKmtFOSDcqzjIse0LDuWI6K0vDLCJ8fCX+kueN63jV/y/K7/WZ7tV53VNUjaIS38b8vTjP3dAk7f+T331Ez7+zBoS76zNEH+C+/J5ICN5zgUuBLxGV7seEGebmfNHFRroyX+CXidVG/5vwD8wnBLptR1rm5PfXM/6VhBCYmYXuRqoCXIRDEdiNn77ad1nMqlH41z9l7HRPFtoN+XknMaLqU2yrOEq8PYRA21JL88oW9/GM/+NUlWYzlf15HZWZqPF5yn3KM2/OdH8kwxdhUn/+xus3Ek73OYSS+HHt/IYMsyzj2pr5fXst7zZQKbF2nx76K4HGT31Ow6aGdzDQZzPVSrfrqBYz7CXKylaiHNbfkxOK6j7C+d1Ti6uTZ5mZcd1GNdJmQX7X87r+DKXsbSb8R4cTkyLvA/6LUChrCBNsKQOlTHWSD63u2Sz966jmjrSKZwOhhJbRf4HInoZwZW7K1hbne4k66sAdRO+pVb1rfI6jiH1VBpYXwy08d6cPIfRfQrRSDs8C+6/A24mW1X8Rrd31VL2FYqe9nxCS7yCEypeyoowjnL7LgP/VYTquI5zGxxNbaX6DMC+VAlQqfhGGfVnA7iQqZCcFrR5mC2HT31o7NzcLcVkcrwicJR1UtA1Ey6i0Er+acTSr1MVGfQkxYqqPqjXVScVfmfe8jMpctzHjPTzT/8uGexeBu5Qw2T2R/2/Oc2Xxv7oy6clneopKSC3LclAEfRHi9SHBj2V66kqrE0H/WyoFVE93T+34glq66vct915SS2sflTmtj+hZLsjyNpMwf30gP+sJk9g1+U7qaS4t/TKrvjz75syLgZTJpszvPyTq21PEBMkzmzzrYBRA4zWl19R4bAPV/iB1YV3P33LtZpqX2Xqc5Z2uYtsGS+NnI1FmNhOm5162fc7SCNpM9NBOkCJ4+hVBeZHlRWypv5QMMzlf0MOEsJxLmGe+X4vnXmJC0bcIBTEd+DuiG/mhDtIxnnDMNlbu3ixMC7MC/RVhtnhV3uszRCWut6RaVah2FW15pnddfkrh3DDAdfWKUQTjcirFWVc0vYRwXEQs1X028J/EKqv35LlbGipIqTibMv/r5+qtyOIEnkXl1ymC7Kk8dxvhiP90xlUEe6PgaOz5rG3431iJByO0Sr6WXk/ZGrUo0Mb7t/usBi7Mz370F0zNekR9VMuTrCHK6QoqZdZ43eZafKUXsIRKOZRGya1UyvmJjOMjVA2mFQ1xNvZWW+X9YJRAp+FLubiaqnw8mzBb/SNVb7a8q8Z7lF6rA5+oPWOrtJTysSw/q6h6vSXM3cA/A1cMRnbJWTyEmNnhLU5dSzhL96PaVrLYEFcQ9v+PEwV/H8IOOpowoZxJ2MjnAe8Gvu7unxwgHbcC/4dqyv51xJIZuPuCDHM3Uem/S1TASRm2k+Fm9Zm7sK0dsiw5MIdKYHzN3S8zs15C2LdyHpc4H8l4DyME7zGEY30OUSFmEQp0TyKvJuZ9Smv8eCKfixOzbrcuTrfi+Kx/l7AriEr2PcKWfRSx0N94Ir8WEEMlP0n4hiYBZxGK6V+JSj2aMKscmun7dMb/nlo8k/M5S56WvCytzz2pRpLcRtjFN1NNHjw50zaf8E1MyLw7iOhNjqOalV1a38WJ/0vCsf0sQoAvzjQcCvwtMcrtPwnz5EH0n8U6NvNnccZTllf/AnAVsd7VaqLMjyF6NvtluIMy7DMzXx7PNCym8pk1c3B7Xj+RaOnOMrOZmdcHEGWlNBBuJObZlJFN7WYrl7g7safXnd/lmvq1fYTwH081abTRkf1wPsPBxDt8IsM8r/YMZQBEme/SS+ThEXmPUn4bZykXpXMPcJm7f6mDZ1KPYCf1DN4LPKP2/0uEAFtCdJ2XEb6CXxDmmNIiXZT/pxHj+x8kuvhjCUFzxyDS8C/EjNZvEa3YbxLOpSuIoacPEDNv9yW69n15/01EBf4G0bVfSrVNZml91FthzVoumzL++wjzyrWEoPoV/Vs2axriW0FUou/ntZ/Mc0szz+r3LC2sLUSrtI+YmV1atbMJAdjMP1COfZAQPu8kFOJaQtiuye9ZtTTX710cs/cRe0/sn+94//zMzWfZTFTIUjmX5bOsperhFL9G8RcUE1gPIVQ35rnFVK3/0hqd3pDnddPSPKLnuSzf3axMc8nHHqKsradyPm4lhNJG4LZaL7feoi0OzYcz304kBOMeGX4d1bDkqZkXfYRp8hEqQTaDqidQ8nURlc28/p7L4oNriDLck/f6YL6D0uscqBW/vsmxxjL8JNU+v8U8eX2mrZiz6qYfpxoMUt5DvbdweS1t5f38Re2+xanttf+NaVxKKOV35fm7897FzNqqF7EVmNmRvBhuobk7foB7Gv5/mKiU8wmH8Bvz+F3EzOE5REtoL+DNee5IQgj9PD+3EN3mn3eYhldm3L8lKuXdRIWfQwiDR6hsvL9tKKjNPqVgL6VqWT5MZXJ5gFA4XyQq69w8dlJWoj8gfCXtbNu9hK3/o4QweoLKRFR6F3WzWw8xI/uGPPZ5oiX7t8TokSmEw76P/pWlnobG9JT/6+gvWOs247WZf72Zf/Pz/vOpxseXLUXLKKHNwMcIBVXS8SAxG/ma2r03UpkIBnJINirfhwiz33eJVnpdILVynPble9pC+LBWZtq/m8+2kuiZFgFV4iiKq4foNd2e4RcRAmoJoWiK/+FvMl9+TOVILQ71RwghXRpCRZGtyDx4Mo/dQgyq2EDUjY2EAv6PfOb78/l/QKVAyjtrZn9vNNu1E6rtPqUhsoFo+NxLKMNm5ascK/l5f957CWFiWk9/E2VRDguJ8lLiWEW1gnAJ20tlOruVqIOHSxEMnyKYRW2kD+Ht30o1euc6QriXnsHniUln12Thfg9ROU+pfV6S4T7bYRo+md8TiJnK9dbyPxCjPh4nBNParHitbJmlkmzJOBcSFbw4HuuVqFxfWk831yplKeTtWm8ljk1sO1qmXjmKgLmJUEhFOcwhejhlNnbdL7CB/itCNj5nSeO1tXw8g+jRFKd3uWZN5uO7814b8r4bCYFWWrufyOs+lPl9WaZ1I2EGLGWiL8+vJlaILb6aPkLAX5Z5+UA+w54ZR/ETPEmYEFbk8dKybifA+qiUThG0PcD/1J6/5NdWqrWtnFAOy6ls3MUkM5cY9TadEFxX1c5tzLy8gqoHWvK9Pqy1lUBeT/SwFmRebCKE/keI3skjmcb78n19jaoc1Rs5j1C15utCt/TOyv+fUI2qa8y34kPpyTz/CtGLPJyo10fndf9G855I+byLqgFQfE2t3ls5/gRhZrwq/9fTV2+wzQKOlCIYPkXwf/MlvSo/d2SleZjK67+CaPk+loX63jxX1l7pIRRD4+fOQablBkLx3EE1zHEuoYj2ICpy3SlYhG8vlbCtV5hS0FZQCdjSdS+CcnlWiDdlGh7N8/fU4i2t5CJUPSvDrIzjM4SAnM62grtUll6il/MkocgOb/i8NuNbTn+hUir6U5mGH2bFOY5w9j1F2N33z3e0Ko9tJCr79cCt+Wz7Eo7q2wi7+AJCcK+l6nGtp3IMfpxqtNgTGXcxfT1GrA1Dprvc82ai5bgu428mJMron2JqOZooe4cRyv4uKsHvGddnMvzKPL420/YDKj9EXaHUhekTRIPg5xnm17Uyd3FDGVyZz7CUMFGW99k4XLTRJOSZB6uJ+lFMb/tnvDcR9ew/M19KD2pNPt+PqXpm9bJbf47GkUYba8eLue0XVIMYiqlscy2e5+V9rs53WJR7iWdtvt8Sd13Ql5WCN9TSWTcR9tau2Zz5sSTv9yGi3Jd0baBqHJTG121SBMOnCEYRLcX/zs+7CPPQP2clm58V6W8JQXYDscTEd4nRBk8RwuuPap8ziZbPnEGm5b6sGO8mhpL+rpfRJFwRBqWlX8bAr8yK9SSh5H5FCM8ioJZmBf00IXDGZZzjieUtHqQaAVKEUGOFKP//rpaOxopa/5ThcbcRgm4LoWgfyPs9QDgg98r4fkCldIrdfm1DnHVhURbtW57xfzLveSfRynu4lndfyWuuJ2zfvXntbwmFUhTBJiql1+5T8qf0FLZkni/M9K0jJt5tJATPo5neC/O5S/5ton1r9Of5PPcQ5e4ewub/G2B2Ptvn6b95UX2Y68ba769n+KJIf5rxF8VeRtAVk8j1RJl5kOjtbqJS4D1UvZN2reOi0Mtw36eohuOWYcD1BkSzMfoljjLIoD7yq1nZK2XkCqr5Fc/Kd30e1YS2LfRXcvXP43mf0ogqfoL6/UrPb3PtviuJelSE/hVE2ewhTIzlmUq8r0Q+gmFXBhOBY/P3ewgt3peV7JtEq/GKLLhPZcG4gBA46wgn0/za56GsXC8dZDoupqrUawlBVD71nscWolV1E1XLsC6s1xGVdD7RtS62+sbKUlpKxbF3JWGWup4wCxS7by/bDn0rn/UZZjVRmRdTtdDqlXQj0aJ+OSEkixKYXztfegTz89p3U5ldSk9oKf1ndD6ZcZbPDwjH8y15j+JEfCjzdU7G+az8f2d+353fexJmo1Op7LzFXHEb1Qzd+vDjYnPuJey9lxF25xUZ/t8yX/4i39kKqtblxrxPL+GkLYKkKKjybotZozinHyUWQdwIrM60FxPf1tp76CXMJg/lsdIy/1Hm92NUjY7HM92Npp7SEykzgjcRo+felM8/m6gz66iUa18+b5lQ9jEq5XFCfsqw6dX0V7rF51Js95syzlVUCsOJhs1Gohy0m5cwn0rhPk7Uif8i6tBtee6SPFaff1HvOfcSI5v+JK/bTPTWWynucs2VTfKzMZ13keVOimD4lMCZhHCYn/+/kQVuZi3M3oRD7x5iCOAz8vgBWYHuIlrpKwiBsIpYemJCh2mYRQif2VkwluTv2VnYy0iXy4llMXqzMBbhW1pI12UB3Qxcn3G/kGiRPkA1M3ox0etZCMzNcGWm9YWEkHiIMEeVESxrM94FmV+LiVb94VmJbshK9mCt4NcVQVFS15GCq/b8J2cl/Aoh4L5Ppdgeo+rp9BGmhUvoPwFodubDTQ3xvpFYAfY3+Qw/I4RGGQ30RMZbhOet+buYeT5B5excSTWMs4wCuTTTPJ3wQSyj6uZvoBL2izP/y5o778383ZDXXFh71kZhsZDK9FPMgXU/wHLCrFJa86vYtkVdhjSvJYYm30EI6Y3Awlp+lfkWxxCC+lNUPaQLCQG6Jo/Np5rdfCeVw7uYu64G3kY1J+cWws8ylUpBLsl8WFFLd8mHJVTKtm5yqQvUK/Md3kxV3i6hUtSl3H2DmOg4h1gv6Y+IMlAmAJbeT+Pks2ZKpf5p5UMrDbYNRK+rPhm0+ImeomoA/BWxmsH7pQiGTxGUYZm/rR3bTI77zv/jCQH1a2Is+YuJVUhfRjgoNxNd6/8k9k5+P1FBv9dhGg6vfR5r8nkEmF4L/yjVshdP1gpgGdroWfBKy3o2oTTKsL4yumN1hr+Q6I08j1B2RlTULxMjP4ppppdqiYEbiAp8FdVwyVLgGytRXTm8HJjXJA82UY3+aZzoVLr4ZbmApVROzd5M53PIiX5Er+4WQol+Ajg+j7+SEPYLiYr66/zMzvc9J+P8ArAhr3mAqhW+Nd9FcZjWR400Co2S7i1Ei3wN0VtZnvEeSAihR4kGxRO1PK6/zyI41ub7fGnmQ/FbrKd/2WlspfZlfq3I93YN0ejYn1CStxML7u1P9FxWE42aZUR5LmamtxLK8EuEqfFrRCPqAUJxP5b3KApoKVXZLO+pCMOtmddbiZ5z6ck8VHvWPqpRS6XnVfJ6HtFzXU/l6G8mvIugLmuI/ZxodJxMDMq4PdN9EdUSKfUeQeOnNIhuyXyaSSXUi99nXv6ek++2+PWaLdVSnN2XEr3fkzuRF5pQthMws9+4+4vM7LfuflIeKzMwDyRe/AHES/sRUSE2lcuJCT9LgbXufnwt3ruJHsHvju1gOh8ATnf3x/L3sVTmkYOIAngA0ZJq3OnrA8Q4boiC17gbU09efwxRQOcSgnVrXvvveV19ZcrRRKW5khhq+hqqpblnE+a2MqGoJ+/1CDFE9H+IoatkWk4GTifMFi8jhhjuRYwuWUK1GN3kjOMVREU7lFBCB7j7ZjO7n6h8M4Er3f2eJvl4Xv48gJgkNJt4h1fk86xx973MbBmhJE7O+5SFxv4P1QqxbyMmWn2TEDIQyuY3+Xta5ueivNcKYC93n2xmRpSpuzPsC/Pb2XayVBmyuCrTcQwhxE4keiPXEg2a7xDv/ge1fDXCeXsQsXrtZqJXtqjhPnsQLfYJVO/r8bxuPNVS1qXX8QliItitRFnYO6/ZCuDuY83secC33P2U3PTpNOK9vYp4x8fl804n/DOH0pw1GX99Qlgf/ZeELpP8GvdkKIpw/wz/Wyrz0osIZbUq03AO1eq1pW7cQjReykx7iB6QEQ3CUVSrjEK1mmtRgHvk9xIi7ycR5RiqOvUoOS/D3V/ZIg9qTzUCWtC724foNr6VEB5Tia7+vxGV6IOEU/VP82XPyTClBTY5C8wXidbMe7LQfI0w41xLh1oe+C81+AAAIABJREFUeGV+/0mLzxlUo5Z+TeXku4QQ4m+gsr/u3/CZTWUnrY/i6SFaIk/VnumfCRv3Y/n7DqLCl5ZNWavocULQP0LYhbdm2m4hhNVHqSYolQr5eN7z51TDY/+e2OpzBiEUV2RezqcaTlh6A57Hv5vpOCjT/kvCFHE9MD7zsdHHspYQ8hACsbQu6xPGyj3WEC3Ib2aa+6gcm8X8Ny+fcb/M13dQtWSLeWFzhtuY170vz92W+fsQIaQ/TOVXqdukS0u3OK+btXqL/f5+qtEsywmlegOhKP8MODGf/RDgtCbl75v5KXtll+dfSuWDOCfvtZyqNbyVGEhxUB47mjBTHU6UvYeIunQJIexmE2X5n6hmSP+YULBrqWz5ZV7DzVSjxcrAjOkZbgZVj6GYR8tAibpJaR7VXIeHiUloP8v7vJVYBv5B+vvSiu+rbtbcROUTqs95qA9VLvd9J/17x8XHVsrdKkKJPkrlL/tZR7JiuIXm7vghNPansnBNJ4TfeGLN/70zzP8mur+3ES26xjhupupSlklGTjXDd0AnENVcglIhL8nPN4FL8tx4Yo+CS/Ke78hPEWBzqIT1FsJ0sDgL8BVUQ2HvyMK4gGjVrMr4X0pU6tcQSy9cnIX0U1mA78uKdB/RW1pNZT7aRFTWVfQXYOV7PaE0jiXWXa8/+7lEJV+Vleq6DL+KUCwbMkwRlvUhs7PzvZxJtOxvHSCfp2a8xR9Tuv29hND6JDEEcRmhEDcBryN6SLdSrdA6i/4V+1GqkTNlNMt6ouU+O5+72NwfJBoKE/J9FMX2F2zrUymmqDKAYSHR6n95vqtvEAr0TkKwzSaUy/8QvYStmbYFefyUWl40zqq/jBD038v3cQlRth4nhPV/EUp3FZVZZQuhhD6U6X2UylRS/FWzar9LftVt8cW3UBzb8wnl8e9Uzvk1tWuL4P0QYeralO+mlLW6UC6r+Ja1qJ5DKMgyj2U11UTOMnelvIPN+exFQVxKlI1ZVMPLSz0vDvzZVAMPHqJa/6r4QMpnLdELLD6k2+jQWSzT0E7AzKYRrdIj6L/dHh47jL2UaL1cRHS9VxCtrc0lDnf/6zZrF5UwC9qdr6WnbJjTLz3u/o9m9uI8/hlijLu7+7fN7K2EAPs0Ufgez+vWE63v6UShO4howRqVo3IclY18GtFaHG9mhxICYU93P9HMbiQq2EsJ5bmVavr8Nwi7ell/p48w7ZQ16pcQJpSxhLJ4KdHqLM84nmg9TiZGT8wmZmt/guhdPUqYO/6CUGinZRrK2Gzc/RcN+fisFll8OWEW6iHMEcXHcFa+74uJES5vILYbPYV4578mysCThCJ8hDDROJVJpLQmV+fzjyXMZhuIET5vAn7h7s+rpXNB5tGhVPv+QmUCeZRoXT9EtSHPIxnnmMzDBZnX46i27Pw6MZx0jLtb3uulxDpSJ+T/e9z9xFpa3kT0iPcm3sPmjLP07H5N9Ar2yPsV89KTxDt7G/BZQrDeTZiavkc0Xp6VaXomMaqr9LyOpjI5rqbaM3gL/ddzKnlazC5FAI8jGm0fJMxyZUMiqPYqfpCoA79PmKTOyDTfTSjTku9b8z39F80pSmJDPsN1RKNpPdV6XvtSreU0Nc/fkvnyd1TmpTISbz+iPL3F3We3uG8/pAh2AmY2h+ialxUgCz9095PM7NPEbmKXZ6X9hybRlBESR1BbCM7d/3o70nMD0Xq4mygsEAXsJKLS3ENMivo+sCmV0L8Q5qPbCbvnXURL/p8JIVEccGS8p2XcBxGF8TmEwjiJaM291t2n5yJhawnn9+cJAb6VqGhbCEFQFMarCaH1XCIvTyFMbEcRS1ZcQ7Wz2tiM867aM44jxqj/FSGox1L5F8qwwUOInsr9wN+7e+MmNfV8nFX7WzYWmkNU0puI97V/xjse+Jy7/31uJnQO8Z6/mul/P9ESfB0h8J4i5mi8n/Ch/DXRev5upn0eIUh/QSirItRKi7XH3ffJdP4X0cB4F2Gu+AqVsCvDhA/OZyj29IOJFuSfEkLsImKE1FFEb+9lxEinE4D93P13G/6Y2d2eu2BlHp3g7m5mH8q4Pp3P/S6iwfEPhEIwqgX8DiCU/18RPcM7M/oTqPxCqwiFcAKx+N7VxNyaqwilcBPh6/kloQgeI0Y0jSbKz5S833F5bo8MeyZVuRtHlKupxOicxi1Yi09hVb7jiUTjZ1/C33cSoaz68twYQgmW3smYvPZRwh9T3smDhHLbmNcZ1b7QRYGXPcn/mFDgvZmGJYSfoPg1NgOT3L34HQdEimAnYGa3uvtLmxy/lihwr6HaW/hOQtA+y93n1MLeTjgIi7kAAHe/dDvSc5+7/16T4w8Qo1/czM4hFqq7mSh0LwMucPcrzew/M80HEsJiJeGcK5O29iNauKeSE4Hc/flmdqe7n2pmm6hWoDwk86D0dsZkPowjWk4QyuYponCXSUKNW0kW+gjb6fvd/ZSG57uKqCjHEgp1HFXPxQlBWJZr6KNqfRvRM2q78buZnUz4cI4nFNN0QgH2EMLgqTx2eubZVnd/Zl77INGavYCo2K8nlPFmwk9zdn6uy/RdD/yxu0/M9/ZVondxIGFiWU+8h5UZ3+FUO40dQn+HcXGGGtHK3J8QOMsyjjcQAvJZVKaIssex57Ufy9/HZR6WFu+5ee9/J+zk04jGxkJCcX6PUIqrCQWwimg87EP0oqcS770I4D/JtD1JtTIp+X88IXzPy7TdTjQeZmW6yraWj2X+QSi6FVRbXJYd3NYQLfLiLB5NNfihbn55FWHu+lzG91FCES3MfH8HoTw/mvlwDP0FevHTFBPgtURj5jv5HJ73XkqUnT8glMTRRMOgjyhvJ+fzHUm1a1xP5sMP3P0dDAIpgp2Amb2KqBD9zD2EHbFxb+H/j3A0jXP3I83sRGKW5xTvYK/RDtNzMfAVd5/VcPx7RMtzCVFBfkW0tCAmRT2Z4e5295PNbC7hnPwpUeDGUE28KcMeJwLvcvefmtmHiYr9OkJwnU21ounJefyZhDDYk2jxf4RoiTph9riWGKP9OqLLXcbSlxbQAsLcM5XwX5TlESAEwzpC8HyPEEqPEXMg/ohKAFp+xrt72Zay07ydRVT+BwhBci/Ru5ibz/kWYpmHPyUc2RMzH76b6djP3T9Wi+9OQjAelvkyJuN7Zj7LW4hRVe8hlMgqokfn+UzfIlreEC3OhwjBsZJomW7IePbNeL9J9BomEgrzr939VbX0/JgQ0L8g7OInUinUtXnvghOC+F2EwIQQ5BDlY0+q0UOlh1Ja2lsIRXQr8c4PJ1rqhxAKbS+iETI/07+cKA/fIfwkVxHmmf2Iclha7kVxQZTZsoXqhMyT/ajmUCwl/CCXEsL+OKq9m0fTf2/h6zIvipJyQjGd4DHabE/CLHk/VcOnzAgel2HL1qUH0l9Rl9/r839ZnG9i5tflhBzZQrWmUSnH38k4p7r7mXSIFMFOILvmzyEKQWnNu7u/I22qU939m2Y2iTCfvBS4xauhpvcR3eR1RKWo+w5WDCIdZeG1MYSgfCTjOiKP300U5jsJe/8vgRe6+8EN8dxBOICnp0KYRPQcXt/ktj8sz5HXvoYwGxnhtDsSeF/azssyugdRCZa9iZEbz6b/ngWlQj9GNfSzJ68t++A+2pCWA4nJTcelSeovCWfm84i9DO4BfuTu/5hpvbud8k1TR2EUobj+iKq3s5lQLr2EolpCjP76PKGwLJ9p37z+caJcHFW7x+FEQ6CYvDYRef1xQnmMJswjhxKtxS8T5ogJRC9ufKbpBsIcUnwlDxNCowji0YSy2kooACPKwaeA17v7ezI9FxADG8q+CGOInuo5wBfd/U2t8iuvv49olc8n7Okr89k3EILeCPPaRMJE1ksovcuJRsENee/JmY+/JNY12mhmRxGt5JcSQn4OIXCfQSjICXmvPagGA8yjcubfkO/jnHwnxUewnjCVFXNO8Q+UljdUvar1RFmCaNCdTTRGIHpWZRTWC4h3WUw/pTeyJb/vIt7jDZkv5LV1ikJcR7yD9XmPYiIsebmWWBb8eXRKJx5lfQY9aqjpekDUZtjm/0OJuQLQf/LZTEJgFVvi/Pw8Msh0HN7iU8wOL699LiOcqOcQgqM+VLK+INansrCV5bJHAxfV7vme2u+PA4c1pGkBURlmUg3jLBWuzMadT7V2/maq2c4biC74FYQQK0spPEb/tX/KrOqy3MCjVOsKPUm02tcRQu1r+U72JUdmNMnH7+T3KrYdovpsQhA9SUyW+gQhdNdkOr5N9Jr+tBbfKeSorXq+EWa1tYQA/yLRuv800fI9hFCodxMK7KNULcHf5vPuRfSCZmacxQRWH0lTHKp3Z1m4g4ZRV3n/D+Vncd6r7A39BaKF+3lCMH0Z+HLt2qnE+lqziYZHGb65iOgh/zzT/WA+898SPZyHM72/JBy07waem3FekO/4/VRbqRZT0vIsEwuJFn1xsP8mv9+cv+cR5bw805pM1/H5Pu+o5ePniCGgpR6UGfCzCSU9vVk5ybSup5rAWJbpLqO1+ohycipR9zcSo+j68jmWZRn4EOHXKYsXrsr0f46wIDxC1JFbCTNX6Wlsolr0sKPRQr9L93ALzd3xQ+4X3OR4mWFbF/oraD7n4BHgwJ2cziPJJSuolE0Zx18+o4jewHMI5fQ+4LiGeH7TIv6lRK/oFbVjG1IglGGURVAsJFqeS4hu96sIG/q9+XmUsIVflelclNdcmpW5Pj/iXbXPiwlh/VxCeN1ICKClhAA6N/N8AfBUi+eYnRW3zJ7t96mFmU206GYSPZUy5G9SVuSvEj2eSVmpvw+clGE+TfTO7hvgnd2Rcd0HbMxj91MJ/3tK+co4/4xqGGpZCuOrmaf7EkKxjJ3/XB67l0rhLSeU7gb674P8EKF0zgPOq6Xv1nx3MwlF8xRVq7jMr9hC9D6WEQL5AULxlTkTZfRLKRtX5jv/WaZpbi0vlhOO7bKI241Ue0CUoaHr8vxPqIbMlvMzqRbP25DPtZ5w3BZFWobtbiHqxLWE0j+XWrlrV9/pv4xHfVG6shFQmR9QFFl9efIy8ez/tXfmYXdNZ///3ImQkZhpQ6oxU1SpkIgfqq2XVovQmkoHnVSat1qdNIYOvLQutIYOtF7VkpcihhapmlJBBkkFRaKmKk0lISKm+/fHd63sffazz3nOeXLOeU5kfa/rXM+z91l7r7X32Xvd656+90LkN5if2x9J6WI+wmPhnl/Y0FzQ25Pm2/FDVugjz40/i3IystmU5xzcTKj61MJx3k/GFDoArfxihbDxwIDw3YxuznMBsv0fSeWEPAM5HKcCXw9to6ngQFTacUnu8yqajP6OVoz9c33MINBIoAnum+G8xVyJ/OfikrHG7+4utL2OoKmVHHM8GRtkXkjGWPA4mcwPv/mS8ELODMefHF70pWH7gPBS70eh6hyBlqPGvd4a+XJuDeM5FwmxryAfyF/RRHVRGMdQssS2BWHcMeM5UmEcTFYRbja5erdoUhmGzB2PkPkYRlUZ37Twd3b4Ozz0tRWZRrpV6PtItBCKAnRUOGZNpHlcjMKsd0Ehq7GP2UhDehGZwy4P43+QjIb6FSSo4qr8X2QaxVIkWP4TxvNg+LwTPZebIS3iK2Ra+f+F3/focNz9aBFS61lb9r6T1d9eFH77PAdUPkEyLo5eINN2bw3b/8y1dbKExChcXkCa1fnUyUkWP/XUp01oHEU6hoixZnYRMNTMPoecjDehl3uV8DkA2QgfB2aa2W0U8guaOM5V3P218P9vyLJfz0Naym+QCWlyiAe/2sOTXUD/cFw+ld3DeJ80s7HArcHGvhD5KNZEWkG0Cz+NVp7boUgLgD+Z2Vh3fx45jh8zsy3QZHYUyk9YGPo5Jj8gM7vS3Q/J+UmKmAGMd/cFof2aZJEgFXD3c4FzzewCd/9iro/huWbbovDPIeil3AA408yORqr+BWhiAUWJLXT3G8zs+4XuRgNHm9k8shBLd/lU+qIQ192D7+XZcB+eQr/TOORMnosiWMaQhSFG5+PaZOGJmyOz0DlIS/kiegb/X2480WRxNRIuhyCn7NwQaXZzuEfRd7XUzPoAj5rZcUhzWwUJ8dfDfXscaVhrIQ1lW3d/y8xmIA3pRWC8mX0I5Uzsjyb6SLlwKlrdgwTKhUhQxwCC6ch/EvNzPoies7FIMM4mcxZvGM7zpLs/E64TMxuI3ssYifN8OPYS9DytigIe3EMORQmuDBF3a4VxxboZQ5AW81O0IBmAzIWxPkUMKd4W2f/7h2OuQeGxB4Vr3g+ZAyehxeOGSAB+BrjJzDZz959UGVslWrniTJ/SFdM+aMI4M/z/CHppN6HSjv+psk+Tx3IL8NHw/xwkhCbnvs/TV8fMzQpqhW7O/4vw9yY0gTyNJqlV0MpnAZn6mydHi7xMp6IXfLcwtpfJVo8DCn2tjxzsN4XtMeGFGF7l00XLKdvXg3u6BjJxHYkEy4/JTBkL0WQ6j8yRW8yILh1v7vu7yLS4o8L3uyEt6QY0OY0jK585l2wVGTn7Y23qvxJoSML5RpErMBP2fQeZil4Nv0vk+X+CbGU9N9d+ZzQ5DUOT5lXhmqeTmZueDNd/L1rwHIME0u/DvRsTPr8M59wLFWIv3uuTwnmfR9pMpOqIZpzxaOJ8Nlz3rmiBM4cs0/r1cI53Fc59RrhPD4axfgMJlWhC2rv425SML0Y1LQi/wX/ITLAPoBDcf6P3Y0b47X6GNOJjySiz/xGu6X6Ug3FxuIap6D15NXz/aLi/eyFBVarhlo61NyfFlfGDVoofRZP/BnRDX9DisYxAKvBT4UGaBYwI3+0CXNrN8d8If88jOA3zn1y7+8LfvG8khljOCC/aGWg1/Gh4wf4WxhZjrpeSTUbR5Jan9Y7C5oGwvQrBPFFl7A+g0M24vVat9nXcy1iE59vIIfp8eIGPC9cWBcD54Zq/QhWOnm76uZTM+R05gKIzvcLUFCaN1ZGWkDc/RAruHXL3NRak362kzx2RED8z/B6fLv6eYbsicKDw3U5IQI0Ddgr7BpOZayLVdhxf5ITqsuCgki7lWrIKeJH1M9KKRMEQaxM8SsZdNRUJrTWrjHdeGEOsYjc3jDM+j9Ee/xDB9Ftyjgm56/Iqn5fDOaajXIw5ZH6NWH/4tXCN8b5ND20OD/cg+koWhXGejzSGf9T7XKXw0TbCzD6LXs4/I9V8DxTfviZdcw5mo1Xe1mRJNHgu1LCJ4xpMFrL5ZNi9MRmtryN1dThUmBPXcPdJOfbNIh5D9vFRaGKMyWSHocnxKeA6d78yjGMX4MvuflRkbjWzrZBP4RRkchuR78ADzYaZ3efuOxcYX2PlrTPIQlRjfPlxaNKeGE41FviBu/9vvfctj5C9vZDKzOYN0W/6NfSCR4bJ29HKPl7Dk9QJM5uAfBa/RBrBhSh2/wJXpnr++me5TEoxSS5mnkKWn3A6mui3QuG2m6EJZIdct/lcktvQJDQDCbHtCu3ucfeR3VzDWch0E6Oc7iLLYVkV1dk91cyOQNrU1MLx+ZDV/ijv5FkUvrk6eobfRWZK6ocmyRhsMBEJkDztw3c9FzpsZoeEc30UvafPoWfocbRo2DNsX4BMNcuexcJY90LC5PfoeXga3fPRKELvMmSSew4J0jWREIiV7V5Fmt5ryIR0BBJmQ9H7M5mMofcU5Gf8YO6aziuOqQxJELQRgXpiN3efH7bXRhL8UQo5B2hSnoCcZh9B6nMfdy+jo+jpeNYIfYxBL+A9aDX/UqHpN8nslssoKrybhJWQPTsePcQ/RJPNQ+hhPxhFg2yBVjOrILNKjIh4Admfx4TT3Y4ccg9W6esv6IW8JUxYI5EAeCfKyH2o5Jityfwaf/Y6eVmq9N8le9vMNkAC5iQkJCKvjJGjqHD3bRrs64/IqXklJZnq7r59aBeF6Rtoonw99Ps0ioq5J4zr5TCWd6AV9P+6+48Kfd7g7vuZ2R9QBNYr4RznQea7MrML0D2fSJYQhbtfnTvXW8CR7v7b3L6fk0X57OXK/VgTuNndY5JjtfvRH2mI1yBn7oBwrjvRZD0O+bvWDocMRhN6rDq3FPkm8vkvUYgehhYzk8O5B6JgjwphW2NspyBh8Ti6/6uF//+JfDEbICHwJnoH4/MRieUGIEH3vXCer5Fl8sdCNB9EPobbwrPfH+jr7svuf3dIzuL2Yj6Vk+xLyN67U7GhmU1z98lmZmGlcbKZTaOcl6inuBiZYA4J20cCp7r7gflGYVWzhbsvpQSWkewVNYaF7n5TaDOKLHHpEXd/3cw+jF7QlyhQaaAV037IARgJ57ahuiM+skaOMLO7w3EHo5VyFyEAECb+Hk/+BUwxs/e4+2wz+xK6p+uiCTFOtNHW6+hFfz8KZa0bZrZ5OMdz4bwj0QRzKtIgv55r/kwITjD03JyLVsfno3DgdZCgvQ/4obtfW61fd4/Jg7FW9cIqTasFDlyd234CONbMRnhI5kOmI8JENiP8/6KZLcv0NrOPAYe4+2GFPqcg4bMqSrr7RLi2tYGvonvye/SsrYo08kfQQuR7SDvcoXDOuOB5D3By0LaOcnc3Mw/jGVTlHsTxRgr5g8M9eB4tTkDCaSF6ZvuQJXXORVnlq5IVVjod2NTdfx2CH76MEslWQabCsej9GRQEF0hAxLoU3SJpBG1ALiN1B/RgXYsejANQNMABxdWoiWtoNApb+zNa+Z3u7tU4d3oyrgqmyBr7bkIJZC9XOc8j6GUrTuZfROruArSijxENGwLfd/fpZSvpcM7FiIDsd2HXJ4CL3L3LyxeiaY5Hq9OisDkHrbquoTL66urieZYHZjYHRTvNQyu2hSjOfzurwiLr7v8ws9neQAaomcX4/ynIqRhf4Ce8wEMVsnoHo5V+P7LCK0ZG470I2f7HIHPgo4jN9Fc1xlAzA7uOa5iOhOC5aNKL5o7XkeP738ivcRaa4LdyRY7FiKP1ctft6B2KHP4RkVDvdbJcGFA46jvQ3LeDme2BTEavufvQ3BjLeMHmoQidfZDQ/TRweTXzi4lc8gp3n2ni27oFOawdaWP7IG3jcHTfXwzn/xYSpu9Fz/VM4FqvLFL1PbSIuQg9D6+QFfsB+W+6L0gTkDSC9mBI+Pt4+ERci1YjM4vhgmjCHIgehNPQCquaLb6nWGJmo939Lli2al8SvzSz88gocmeaWReq7PDvC+5+XfHkweYPsv1+AD2wm6AX/wK0Alq2ki4Z3zx3fyOca161i3D3N83sk+5+NjKx5bF6GP8H84dQuUJtBvatMb7ox6igqDCR1j3bYD9vAC+WrIrLEFfxe6NnZwP0zg8L++cjW/q4sH8wMjHsgSKwusDMNgM2CYKvi+/KRPtwDtJUHEUmfdXd87+fhd/1SyG89i4kPE9E+QpLwpgHh+3bg3/NEHHfEDNbC/kz+iOm37+gRVPE73L/vwf5VBaE/V8EXjWzd5GFShcJBg9B2udZ7r7AxAs2KYx1ETLdfs/dbym7T+GefMvMtjSFT/dD2uEl4esNURjupkhDOg+FU38EmU83QhrkF5Gg+Grh9MOCH+UGAmswut93Ii24buZRSBpBryHEWg9GL0AXlDmeWjCGHZC6vEbY9SIKUZ0Vvq8peOIK1KqQ7MVVd85WnaffjvvyK+mlZGyP96IXMYaU9kGhoZ+tci2xdsEVVNqm61aPlwdWpVZB3hEcHL0RsbjLVfW8tGHiAy0MRqJ7neehWuBVeKhKNLZY1vAZNLmtipzc96IIryk1xnFXaLsnJb4rM7sHhUDmNbmvuPsuuXN83t0vym2/DwUJfNrMtkRCwFAo80NmNgYlWq6CVsubIeE1DK2W9winegwJhmhOOj/8vREJtvic9yXTjn5NVn/i0mrXHcY5AQmI/6DnbKK7/6ubYw5AZpwjkYB7LvS7LkowXK/Qfo43UIrWxLC7CAkRkBb1uLsfUv2okvMkQdA+mNnlyNzxJrLLrg6c4+5n5tpMojwBCoDuHLQNjmc1ZL8cgRxnC9XFMrttbDcI1Sl4M2z3RUydr4TtUpI95CiegFToWP91O5Tlea+LqrpoNrkz/H0TZV7HmrFvhbGtRQlM0SyxX5CZ48kwpi7305ubmJcn+FsuR3CN88/LnX9YSZOnvEpEmVWhRQ/frevuL9Q5htHAZe7+riDA93P3ecGf9b7Qpovz1MweiA7sbs6/v7tfX0e72UjLvCeYd/ZCgm4AmmD/SabJgrScSKd9FyIfHI4EwPOhzYx6nwkz2w6Zrg5CpIYfqOOYJ8N47kXO+seQlnaiu98T2oxB2tR97n5s0L62qHVPioLDeljXPJmG2out3X2RmR2O7JLfRKurM3Ntzgp/D0SqfJ7nvebqowe4lqxgzTM12k1Gpp3oIxiAInqi3XXnMt+FmV2FnNG7oVXblohT5stIQEDXCKXon7icrsV0uiBnbrmeSirfrZEQiHVnW4qind+yWgX5feuixKRtqDSrdGvLdfdNlmN4E0wZrmUaWx8z+xXwDnff1xRJtWvRRxBWwzsB6wdt9gmULX4ClSyxN5kYS3+P7vuhwI1Ro6mmtQSMCH19jXLhHbNkX3X3V82sT1jMzESC19EzHRHNnPPQYmQO0qaGIadt/l6cUmNcRTyPVvbzkb+iHkQ68IPQ+xPzCKYEIeHITPc82Xv1DDIn1RKO081sZBQmaPE0tUb7UiRB0F70M7N+SFX8aXBmVjTwUB7RzH7sldFEk8zs/iaPZ5i7V4vCyaN/3lHs7i+bUvAjppjZ1kWHN0pOOwiWmTZeRQLhDZRIdA0ShPGlz/Oxx9XbpvnVUcnYov9lC7KqVYbi1FdDK6xv07VsaE0zwPIiOMJ3Kez+LTIp7I80w0+hMNmGYGbbIsH8IeRkfQ7Y0t2/XeWQY5AQjg5jyPwkv0Z26++E/X8PYyz6CD6OnJcPI9/VsShk80gqfVfRJPF5Kn8zZJJpAAAWCklEQVTXT4TtWnkw88PfwTXaADxtZkORML0FrfgNPVdHEOpbeyW99/lk9a8nhev7V7UAiDJY12iwz5U889UwEC3qFiKBOgY5uPM5F5OQue0adAGvWHGCyMYSNdB+VAqT4dRYOFVDEgTtxUVoJfUAcEcwi1QLwxtkZu9297kAZrYJkvbNRC1HbR6LzWzHaG83hYsuyX0/knKH9+JgTtgbPfzbo1VO5E8BrQIPJ0si2hg50i5HiTs1V0dxJWdmdwA7uvtLYftklIhzGeURTU1F0RGMIk2KjuC13f1XZjYuCPzbzey+BvuZgOLPR6JJ8BRkbhiCBF4ZSjW2gHXc/Uoz+xaAu79hZmUTyWvu7ma2OCwEHJlFDiy0OxH4Y9B8T0L34bQ6fTWDgtlxUXD8l8LdPx7+3dwU9TMU+dr+B63QnwLWMeVPgGzooGduADIbHogcxktRctxLdZhdN0KO75ndtCvDdkgY90eC6k7gC+7+dGxgZi8jjSCGp8aiM2XYHwnbi9AzUKFBmdm5jZg/kyBoLya5CMyAZXbDaiXlxgN/MbO5aGIdTlZ5arlglQVrjgl9VBCcFQ4ZB0w0szixbYhU/ohqWsW6aOW1FZoU1wF+7u7Pmllcyf+MkESEYuFvQmq+Ia3hTVNRGSMjoyvD+miFFfFa2Fca0dQCDMn9HytYXVVoE7N6/2lm+6F7UurzKMKU/HcZupebIztz9DfdS2XoZBHVNDaQsF6bbPIZSfniZJCZPQGsa8rY3hjlKVwHFb6r7wbBMhr9pmeRRYjVRHQgm9knUSJlt3D3201JbJGmeWfke5uE4u9BwQMfRqbV7ZA2+GDY3gH5DE6uo69vddemBt5APoLpKPR3NRSosXeuzQTEo7WRmf0WZeQfXWUs/zCzKeHvbWVtGoL3Es/NyvihpPAJgba3SvvV0Cp6e+ScbdY4htf6lLQfi16ubVECzg1o9V127mPz14tC9+4Px89D9stBZPz5kZJ7Rm5sc5DJaHPkYxiOTD0za1xTJEc7OXxmogiTvamDO75Nv//+KHJlW2TSmUYg/avz+DWRGehipOFsgwTkw8D7axxXSosevtsRMWAuCH//jsotFs+xB8pqnRb6OwmZMS4Hzs61y9dCOCy/r4HrPBsxc+4exrdjtecttI903/l6EX+jsl5EHzQBP1ByfJd9LfjtY/2DSLkegx+K7dZGYb/70009ErKCSeOWd3xJI2gDQkjcNsAaZpZXpVcn5zQsHHNUYdf2ZoZ3E+JWD7zx0NST3H1isMvuSe1V3hcQpz4oImki4lOfhJzfZ4ftX4Q2rwdzQFRtY7GNb4Y26yN6iqqro3BNPzAlvu0edh3j7jNyEU1l9vGmwZTxewKVvgg85wj2LPpjIbqPDcGVaXs1EmwXoWpeTyPG0HtrHFrLDzQHxerHWtDXIGFQ7Duasj7p7lvmvir6rmI28z7AGcGZ24fGEAMGogM3mhqrOdWHh2doC7To6IO01uh/ejcKOV0PmB3MQVF77A+sYmaL3L2YS9BMPEbGJTQj9F/mL9kDJZJG+/8fStpEvM/M3gF82swuJfOxAd065iuQBEF7sAWS8EPRKiriJaSWliHPr9IfrWyn02InZxVEm/F+iFq6jEc/YtnD6O5zg5p/DVLPL0QmhXwizrnoYV/PzH6AhMd33f2WEAo3MpxznLv/u9YgXXbooi26ln28mZiIru+XVHHWhaihz9FVWFQzD5ZhdeQcPRI5HI8C7jYz87A8LKIbwX8psqH/MGwfhqiTx4YxxxKfEYMK+7an0ndVloiVp72oB8UIMAcWmdkOXm6fPw49Qy8gbWcUEvrroKioRUiTOhFl6Z9CNtneiUK566/v2zO8jITTGsg8dR8F+39waG9KloPxeTP7gLt/uco5L0SRYO9GQi8vCLpzzFcg5RG0EWa2q7v/tYfHDkWVo+qJ8mkqrDzdfhm5WaHtMGTnzz9Y66FV8FIA7xpnnk8imuvuN4bwyy7wBhPEzOwS4ExfDkK5OvtZFktfo80UNPHkGUpx96IvoezYPijscVdEz/B+NKHESmWruPuh1c9Q9bxdEpiq7DsNxef/EwnvJWSJW5939z/RJJjybXZCPDyGFlGzkACd6O7/U3LMlmSa6DvdfYQVSOuC1nCru3fRxizH2toKWMbemmeHrcivMJE0bhUFevjNH3T3rbo5d0XBpJ4gaQRtgJl9Izy8h4UVcgW8Pu/+YuRE7Q3UXOUF9f8gspXuzeGrfAjhoFz7l5B9dHX0z8PI7oyJhfJGKquF5YVK3fwpAaURTUVh1FNYlvE7KYQX/oHK+PS8ej7Q3U/sYVdroolxOjJ9LETCOUZw9XRFWxGHbgp5LQtT/qhnrKY3InPblcifUC2ypacYhnwCL4f+JiC/1BgkRLsIAnd/2MwGe460DoVsDjUlakW8ZaK3jslmfdA9bIiSoQfoa6Jo2RIgRDutW2jzGNKYowa3UdhXCjNb3d0XAd/JPYfLkExDnYfIfll3HoBVZhj3QckoVzZ5XHXBlUF8dW47rgwjriXj4l9KoHhw9+7iwcv6Ojb8ewElYYg9GH6rNahoh45qed4MUlTPrzez/3L3GxvtxN3nm9mFyGQzCHH4/B0RET5f8+ASWO049IdLDllsSoSMc8ZI9Fwe2izfVQ7rUWk2eR1Y392XBPt+NSzzN5nZGShAYA2y38SRieZCFLEVC+EsQgSQrcSq6F73zZlAI49WfNeHAA+Z2b1hexcUEVYNlyNtqfgMQjINdT7MbGCYXGu12SO3+QYqFvJ0tfa9CavCILqc54x88KORADgL+Ra6DUPsVARNaBByFL5Gpp3U7aQ0cSq9D02UdyOH8V/dfUnNA7uep0jtUYGiX8FE0HYOGXnfs2TUydPd/eBG+u9mbCehBLaYJfwRpA39GIUfH17luMNRWPOOKF9gPqrxPDHX5lMlh3qTBVnZ2O5Dfp1bUFbzZOB3njGgVkVw1Nc692WI3ffOoF03Pr4kCNoHM9sVxdUPdveNzWx7ZF9tiJO+0xDMOed594lpjZyzKlFds/poBsxsL3f/cyEabBm8yXTXuX6HoCiqE4AN3H01M/s4MMrdT2hFn1XG0RLflSlpcVTYvNvd69Kmc/6mLwFHufu0wvfjkA/rAqRlbBu0h5fcvVoAxHIjRLQdh3wcO5rZwcBn3H3f8H1V/0Ud594TRcvtjhI0pyOhcE7d50iCoH0ws6lIJbwu5zCqxsc/ElHTboXUyr7A4haHuDUEq0xM2wwV1WiKHb4RB3VvwsxOcfcJwSldhOcjgszMyLKoTzOzjYANuwn9LPZ3HEq0G4SeiweBM9z9j+H8j7p7rcS7HqFaxBPyA/2tTZFZ3cIy6vR3ooimYgnY0Sha7+uovsV7g0+hX7O12sK43o2c2bshlt95wOF5zSv4EA5092psA7XO3xdFGu6JQriXFMJ8ax+fBEH7YGZT3X2XWpEDubb3I36WiciZdRSwuS9fdmNT0ah5ocFzD0T2/dnu/mhwUL/H3W/u5tCOhWUZsA2VYiyc4wQUJvkntEjYF8WeH4Rs9n/x1tS1jhFPe1PJIbQWcKW7f7PZffYEVUw/IBPWrkiQga5lD7JC8Gt7oSBTk8cVKUgGoN9pMcGvFkNizexaxOd0C5VU6jWDSYIAiX6jO4G7GvUbJWdxe/GUme2GnFn9EHVDaRlFAHd/zMz6uuifLwkrl44RBJ4VXBmJwtwiz8/qaJLqsSCow0HdUTCzmPhWi8VzF69RirEeuPtZuc3ngNtCJNrdSGssFjBpFga6+4md7rvyrEZGkTr9DpSpfjISYL9DETxfQ7H7x7R4aDtRGRJ7BAqJ/YKZxZDYq+lZouMs5DfaFgmXBWbWmN/IW5xanT4VKeHrIAbKf6FV3WVoJVLW9g6k+l+KwuXG04ZU+B5e1wyCdhm2+1BCp/F2/iC78yHxN0KLrNmFNlPRZB1pNdalAfoFatAstOH6vg/8V2/f5wbGew/yxcXtwYhiYl8UTXMrCiF9BoVoDm/xeO4oGc/tSEOY06Q+hiCz4T9Q0Zu6j00aQRvhyowtjXgowZFoQj0OCYGNkPrfiajIanX3t8xsZXu26mHxLM2ibqCPQ1Hcf1sd0wHjgG+ZWSwwFGkj3qLDfFcB1ajTT0KZxZcgvqcPoYzfTyHSw1ah25BYE9X6j1CoeJcyoNUQ/Ea7I63gCcRFdWetY4pY2V7WXoWV13Md74FqOg/P7OuvknGudCrmmtnxKBIDFLHR5Zre5uiWxdPdf2tm08iyqD/m7lVNgyWI2bsfKfmu6fxJBayBFjGnI9v6Nch5vCUyuXQaqlGnfxRpAPej32Ao4rRqmMO/QfwWmBr8ABAI+4IJK2a9X4IYSM9GTt9jqI+nqT/wE+RvqMVCWxXJWdxGWB31XHNtRyF75nAqeWma7ghcXpjZemi1uxeakCYj3vaGE51WVJgoMc5Ddtq/IbPPwe4+qyzrMw+vMwM0Rigt92B7gJyj+9PuPsDMHgRecfedOzSsdydUYKeCOt3dp5nZQ2hFPg1dT1smwe5CYi3QlJjZbA8V76wO6pKmjC0JgvbBGqjnauIdGU9XXpr5xbYJvYsQunc8EgRboJXmI+7+evh+HuWZnzHMtiHhHmL3j6IreV1T6zAX+oxcOS+jSKFfoqiuHwFHlz3DvQkzG4s0qFiE5ntIu34LraCdUMmMBpP6WoUQmTUa+D9EjvcMyhxveWhuMg21AbkVYWk91yqHLXT3m9oxvuWFmfUHPkPXWryNsGqusHD3N030zGejuP7i983miLoROUNbWnWtgEjf8AQyV0xAOR6d6rsqUqcfQMhMN7M5KFLoGULeS9kirRcwDvEjHY+y6feksgxoy5A0gjagyoowonRFaGanowiTq6kkMWuIfbMdMLOJiJ/mMORwOxx4yN3H9erA2ggT9UM/ZI7Ix4BPtypMqvk2DfY13d1rnrPZsEr6ht+Q0YVPrHlgL6FaZjoa//FIgJ2HTEbroWivHoc7NwNmtjswxUPIa9i3Yzve+SQI2gQTpeyu7n53ne1vK9ntnit00inIvXSRH6gfSnEf2e3BbxPkfq98spW7+15VfsuIhn9TMxuPyNOupzrTadNhom/4HKpL0Z+cNtJpvqtqmekohHS5kvpaBTN7BdUpGBv9a+0S+sk01CaEkMqfoszBeto3zDnSi4i1eBeY2bYo0Wm9XhxP25DLGC0WUyFst+K3fA04E5XnjIKnIbbJnsBF9bwfJb6rDkQ16vTTfTmT+lqIR9DveruZfcbdp1BuRWg6kiBoLyab2UHA1d1FKpiKlU9AHOyg5JNTvQc8JG3Az8PK6rsoc3IwitdeGRCL1m+BuF6uRS/vRyhQCIc49v8GNnb3Y0Pc+BaelbCsF18DNvVuKra1CCuE78qrZKab2WmWK41q4lBql5+lO7i7X29mjwBXmNnFVNbiaBmSaaiNsIyG+A0UwVCVhtjMrkJhiL8Ju44Etnf30mSi3oRVFqbpF3a7u7cyQaejECgM9vOMZmMIcIO7j8m1uQKtpI9ysV4ORDbhhjhuzOxmlINQk8q8FViRfFdl6GRfh1VykA1CeQUHunvLF+xJI2gj3H1IiCDajCpF63MY4e75aIxTzKysXmsnoFiYZmXE+mQF0Qn/r19oM8LdDw3cQLj7K2bWE9V/Maq6dhuVk3HLwkdziDkvO+X21Sos31FoQlJfSxC0lGU1Edx9MXCImW3cjv6TIGgjzOyzKERsGDATZRhPQQ9lEUvMbLS73xWOHYUcXp2IYd4LtZQ7DJcC95rZH8L2x4BfF9q8ZmYDyMwSI+iZ4LwmfNqOFcx3tQyFpL7nyZI6MbO1Wu1o7w4xBBllFef3P9mO/pNpqI0w8ffvDNzjqky0JfDDMnOPqWjNpSi1H8Rh/il3n9W2AdcJa0FhmhURIUx097B5h7vPKHy/D/KjbI3qOo9CyVh/aec4lwcrmO9qGQoh3PlJr0dJfa1ArRDklvedBEH7YGb3hZT8mYiSeKmZPeju2+Ta/Hf+ELKi74vRA/uTNg65LuQSdFpSIP7tAlNJwVlIs5sLTO2Jwzc3qVWgHZPZiuS7KkMI447FgU4NppcN3X1qLw+tV0PGk2movXg6ZDpeA9xiZi/SlbO/WhTKEdQuZN2b2Le3B7CC4FdIY9gHlRScYWZ3eAMlBQPy9vn+qKB9TT6jJmJF8l2V4WeEPAKU/PgScBV613oVvWl2SxpBL8FU4GMN4I/u/lrJ991GoSSseLDlLClY47ztIScz+yvw9YLv6ix337XVfTcDOc6kbqsE9gZCnkaRqqXl0XdJI+gluPvt3TSpJwolYQWCdS0puLP3gKG1QFnRB2kI7XqXvwBcGnwFEHxXbeq7GYicSR2XR2BmFyKuoT0Rqd/BtMkKkARB56KeKJSEFQvLX1JQ+DGZj+ANRAQ3tlmDLEPBd3Uplb6rD6BrWxGwvMWBWondAkXLLHc/xcx+jCrftRzJNNTB6C4KJWHFRDDzHQ2cAGzg7qvVeVycjGPkS8xBiFQWLQskMLNYB6E0g9rdj2hV381GiNaLeQSTOyGPAMDMprrYUe9B1NnzUS3wTVvdd9IIOhghbGyFyNhM6B62/CUF66azaDbc/RRY5rvaMee7Ohm4oZV9Nxvu/jBiy+00XB+CSc5E772j6mktR9IIEhLaBDM7AU38PS4pGM7Ta4EEgQdnO3ePdXZXA2Z5G4qnrEwI97V/u/IzkkaQkNAmuPtZTTpVbwYSJN9Vi2Bmd6EEvTtRKcu2JekljSAhYQWDmX0H0SznJ+Mr3P1Hbeo/+a5aADPbBN3X3RH9zFJU12N8y/tOgiAhYcVDmozfngh1E/ZAv+2ewJPt4PFKgiAhISGhA2BmjwP/Bi5H5qGZ7t6WHIckCBISEhI6AGY2DhiN6ik/jPwFd7j74y3vOwmChISEhM6BmQ0GjkF5JsPcvW/L+0yCICEhIaH3ETKJR6NSr1OAu5CzeG7L+06CICEhIaH3YWYHo4n/X23vOwmChISEhM6Amb0TGE4ux8vd72h1vymhLCEhIaEDYGanA58A5gBvht0OtFwQJI0gISEhoQNQpO9oJ/q0u8OEhISEhFLMRTWL245kGkpISEjoRZjZecgE9AowMxQwWqYVuPvxrR5DEgQJCQkJvYv7w99pwHW9MYDkI0hISEjoAJjZIOBVd38zbPcFVnP3V1rdd/IRJCQkJHQGJgMDctsDgFvb0XESBAkJCQmdgf7u/nLcCP8PbEfHSRAkJCQkdAYWB3pxAMxsJ2BJOzpOPoKEhISEDkCY+K8Ang27NgQOdfdpre47RQ0lJCQkdAY2Ad4LbAwcCOyCwkpbjmQaSkhISOgMnOTui4ChqDrZ+cAF7eg4CYKEhISEzkDkF9oP+IW73wCs2o6OkyBISEhI6Aw8Y2YXAYcCN5rZarRpjk7O4oSEhIQOgJkNBD4MzHb3R0Mh+/e4+80t7zsJgoSEhISVG8k0lJCQkLCSIwmChISEhJUcSRAkJCQkrORIgiAhISFhJUcSBAkJCQkrOf4/rkQyWNNX7+8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# fequency bar plot - it takes time!!\n", + "w_count_df.plot.bar()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/dataset_analysis/AnalyzeDataset.ipynb b/notebooks/dataset_analysis/AnalyzeDataset.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e17153510699e339473583b5b551df38b053984d --- /dev/null +++ b/notebooks/dataset_analysis/AnalyzeDataset.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "TTS_PATH = \"/home/erogol/projects/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.append(TTS_PATH) # set this if TTS is not installed globally\n", + "import glob\n", + "import librosa\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "from tqdm import tqdm_notebook as tqdm\n", + "from multiprocessing import Pool\n", + "from matplotlib import pylab as plt\n", + "from collections import Counter\n", + "from TTS.tts.datasets.preprocess import *\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/male/karlsson/\"\n", + "META_DATA = [\"kleinzaches/metadata.csv\",\n", + " \"spiegel_kaetzchen/metadata.csv\",\n", + " \"herrnarnesschatz/metadata.csv\",\n", + " \"maedchen_von_moorhof/metadata.csv\",\n", + " \"koenigsgaukler/metadata.csv\",\n", + " \"altehous/metadata.csv\",\n", + " \"odysseus/metadata.csv\",\n", + " \"undine/metadata.csv\",\n", + " \"reise_tilsit/metadata.csv\",\n", + " \"schmied_seines_glueckes/metadata.csv\",\n", + " \"kammmacher/metadata.csv\",\n", + " \"unterm_birnbaum/metadata.csv\",\n", + " \"liebesbriefe/metadata.csv\",\n", + " \"sandmann/metadata.csv\"]\n", + "NUM_PROC = 8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# use your own preprocessor at this stage - TTS/datasets/proprocess.py\n", + "items = mailabs(DATA_PATH, META_DATA)\n", + "print(\" > Number of audio files: {}\".format(len(items)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# check wavs if exist\n", + "wav_files = []\n", + "for item in items:\n", + " wav_file = item[1].strip()\n", + " wav_files.append(wav_file)\n", + " if not os.path.exists(wav_file):\n", + " print(waf_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# show duplicate items\n", + "c = Counter(wav_files)\n", + "print([item for item, count in c.items() if count > 1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "def load_item(item):\n", + " file_name = item[1].strip()\n", + " text = item[0].strip()\n", + " audio = librosa.load(file_name, sr=None)\n", + " sr = audio[1]\n", + " audio = audio[0]\n", + " audio_len = len(audio) / sr\n", + " text_len = len(text)\n", + " return file_name, text, text_len, audio, audio_len\n", + "\n", + "# This will take a while depending on size of dataset\n", + "if NUM_PROC == 1:\n", + " data = []\n", + " for m in tqdm(items):\n", + " data += [load_item(m)]\n", + "else:\n", + " with Pool(8) as p:\n", + " data = list(tqdm(p.imap(load_item, items), total=len(items)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# count words in the dataset\n", + "w_count = Counter()\n", + "for item in tqdm(data):\n", + " text = item[1].lower().strip()\n", + " for word in text.split():\n", + " w_count[word] += 1\n", + "print(\" > Number of words: {}\".format(len(w_count)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "text_vs_durs = {} # text length vs audio duration\n", + "text_len_counter = Counter() # number of sentences with the keyed length\n", + "for item in tqdm(data):\n", + " text = item[1].lower().strip()\n", + " text_len = len(text)\n", + " text_len_counter[text_len] += 1\n", + " audio_len = item[-1]\n", + " try:\n", + " text_vs_durs[text_len] += [audio_len]\n", + " except:\n", + " text_vs_durs[text_len] = [audio_len]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# text_len vs avg_audio_len, median_audio_len, std_audio_len\n", + "text_vs_avg = {}\n", + "text_vs_median = {}\n", + "text_vs_std = {}\n", + "for key, durs in text_vs_durs.items():\n", + " text_vs_avg[key] = np.mean(durs)\n", + " text_vs_median[key] = np.median(durs)\n", + " text_vs_std[key] = np.std(durs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Avg audio length per char" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "for item in data:\n", + " if item[-1] < 2:\n", + " print(item)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "sec_per_chars = []\n", + "for item in data:\n", + " text = item[1]\n", + " dur = item[-1]\n", + " sec_per_char = dur / len(text)\n", + " sec_per_chars.append(sec_per_char)\n", + "# sec_per_char /= len(data)\n", + "# print(sec_per_char)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "mean = np.mean(sec_per_chars)\n", + "std = np.std(sec_per_chars)\n", + "print(mean)\n", + "print(std)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "dist = norm(mean, std)\n", + "\n", + "# find irregular instances long or short voice durations\n", + "for item in data:\n", + " text = item[1]\n", + " dur = item[-1]\n", + " sec_per_char = dur / len(text)\n", + " pdf =norm.pdf(sec_per_char)\n", + " if pdf < 0.39:\n", + " print(item)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Plot Dataset Statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "plt.title(\"text length vs mean audio duration\")\n", + "plt.scatter(list(text_vs_avg.keys()), list(text_vs_avg.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "plt.title(\"text length vs median audio duration\")\n", + "plt.scatter(list(text_vs_median.keys()), list(text_vs_median.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "plt.title(\"text length vs STD\")\n", + "plt.scatter(list(text_vs_std.keys()), list(text_vs_std.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "plt.title(\"text length vs # instances\")\n", + "plt.scatter(list(text_len_counter.keys()), list(text_len_counter.values()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Check words frequencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "w_count_df = pd.DataFrame.from_dict(w_count, orient='index')\n", + "w_count_df.sort_values(0, ascending=False, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "scrolled": true + }, + "outputs": [], + "source": [ + "w_count_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# check a certain word\n", + "w_count_df.at['minute', 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# fequency bar plot - it takes time!!\n", + "w_count_df.plot.bar()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/dataset_analysis/CheckDatasetSNR.ipynb b/notebooks/dataset_analysis/CheckDatasetSNR.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0aa0734335b751b76feda7278196fdd61f3479da --- /dev/null +++ b/notebooks/dataset_analysis/CheckDatasetSNR.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.\n", + "\n", + "To use this notebook, you need:\n", + "- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/\n", + " 1. extract in the same folder as this notebook\n", + " 2. under MacOS you'll have to rebuild the executable. In the build folder: 1) remove existing .o files and 2) run make\n", + "\n", + "\n", + "- FFMPEG: ```sudo apt-get install ffmpeg ``` \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "import subprocess\n", + "import tempfile\n", + "import IPython\n", + "import soundfile as sf\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from multiprocessing import Pool\n", + "from matplotlib import pylab as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [ + "# Set the meta parameters\n", + "DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/female/eva_k/\"\n", + "NUM_PROC = 1\n", + "CURRENT_PATH = os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [ + "def compute_file_snr(file_path):\n", + " \"\"\" Convert given file to required format with FFMPEG and process with WADA.\"\"\"\n", + " _, sr = sf.read(file_path)\n", + " new_file = file_path.replace(\".wav\", \"_tmp.wav\")\n", + " if sr != 16000:\n", + " command = f'ffmpeg -i \"{file_path}\" -ac 1 -acodec pcm_s16le -y -ar 16000 \"{new_file}\"'\n", + " else:\n", + " command = f'cp \"{file_path}\" \"{new_file}\"'\n", + " os.system(command)\n", + " command = [f'\"{CURRENT_PATH}/WadaSNR/Exe/WADASNR\"', f'-i \"{new_file}\"', f'-t \"{CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt\"', '-ifmt mswav']\n", + " output = subprocess.check_output(\" \".join(command), shell=True)\n", + " try:\n", + " output = float(output.split()[-3].decode(\"utf-8\"))\n", + " except:\n", + " raise RuntimeError(\" \".join(command))\n", + " os.system(f'rm \"{new_file}\"')\n", + " return output, file_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [ + "wav_file = \"/home/erogol/Data/LJSpeech-1.1/wavs/LJ001-0001.wav\"\n", + "output = compute_file_snr(wav_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wav_files = glob.glob(f\"{DATA_PATH}/**/*.wav\", recursive=True)\n", + "print(f\" > Number of wav files {len(wav_files)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if NUM_PROC == 1:\n", + " file_snrs = [None] * len(wav_files) \n", + " for idx, wav_file in tqdm(enumerate(wav_files)):\n", + " tup = compute_file_snr(wav_file)\n", + " file_snrs[idx] = tup\n", + "else:\n", + " with Pool(NUM_PROC) as pool:\n", + " file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "snrs = [tup[0] for tup in file_snrs]\n", + "\n", + "error_idxs = np.where(np.isnan(snrs) == True)[0]\n", + "error_files = [file_names[idx] for idx in error_idxs]\n", + "\n", + "file_snrs = [i for j, i in enumerate(file_snrs) if j not in error_idxs]\n", + "file_names = [tup[1] for tup in file_snrs]\n", + "snrs = [tup[0] for tup in file_snrs]\n", + "file_idxs = np.argsort(snrs)\n", + "\n", + "\n", + "print(f\" > Average SNR of the dataset:{np.mean(snrs)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [ + "def output_snr_with_audio(idx):\n", + " file_idx = file_idxs[idx]\n", + " file_name = file_names[file_idx]\n", + " wav, sr = sf.read(file_name)\n", + " # multi channel to single channel\n", + " if len(wav.shape) == 2:\n", + " wav = wav[:, 0]\n", + " print(f\" > {file_name} - snr:{snrs[file_idx]}\")\n", + " IPython.display.display(IPython.display.Audio(wav, rate=sr))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# find worse SNR files\n", + "N = 10 # number of files to fetch\n", + "for i in range(N):\n", + " output_snr_with_audio(i)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# find best recordings\n", + "N = 10 # number of files to fetch\n", + "for i in range(N):\n", + " output_snr_with_audio(-i-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(snrs, bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/dataset_analysis/CheckSpectrograms.ipynb b/notebooks/dataset_analysis/CheckSpectrograms.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4d4ba57acf2928c2aad5af99dd3c5009a9a66280 --- /dev/null +++ b/notebooks/dataset_analysis/CheckSpectrograms.ipynb @@ -0,0 +1,384 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "from tts.utils.audio import AudioProcessor\n", + "from tts.tts.utils.visual import plot_spectrogram\n", + "from tts.utils.io import load_config\n", + "import glob " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "config_path = \"/home/erogol/Projects/TTS/tts/tts/config_thorsten_de.json\"\n", + "data_path = \"/home/erogol/Data/thorsten-german/\"\n", + "file_paths = glob.glob(data_path + \"/**/*.wav\", recursive=True)\n", + "CONFIG = load_config(config_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Setup Audio Processor\n", + "Play with the AP parameters until you find a good fit with the synthesis speech below. " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Setting up Audio Processor...\n", + " | > sample_rate:22050\n", + " | > num_mels:80\n", + " | > min_level_db:-100\n", + " | > frame_shift_ms:None\n", + " | > frame_length_ms:None\n", + " | > ref_level_db:20\n", + " | > fft_size:1024\n", + " | > power:1.5\n", + " | > preemphasis:0.0\n", + " | > griffin_lim_iters:60\n", + " | > signal_norm:True\n", + " | > symmetric_norm:True\n", + " | > mel_fmin:0\n", + " | > mel_fmax:8000.0\n", + " | > spec_gain:1.0\n", + " | > stft_pad_mode:reflect\n", + " | > max_norm:4.0\n", + " | > clip_norm:True\n", + " | > do_trim_silence:True\n", + " | > trim_db:60\n", + " | > do_sound_norm:True\n", + " | > stats_path:None\n", + " | > hop_length:256\n", + " | > win_length:1024\n" + ] + } + ], + "source": [ + "# audio={\n", + "# 'audio_processor': 'audio',\n", + "# 'num_mels': 80, # In general, you don'tneed to change it \n", + "# 'fft_size': 1024, # In general, you don'tneed to change it \n", + "# 'sample_rate': 22050, # It depends to the sample rate of the dataset.\n", + "# 'hop_length': 256, # In general, you don'tneed to change it \n", + "# 'win_length': 1024, # In general, you don'tneed to change it \n", + "# 'preemphasis': 0.98, # In general, 0 gives better voice recovery but makes traning harder. If your model does not train, try 0.97 - 0.99.\n", + "# 'min_level_db': -100,\n", + "# 'ref_level_db': 20, # It is the base DB, higher until you remove the background noise in the spectrogram and then lower until you hear a better speech below.\n", + "# 'power': 1.5, # Change this value and listen the synthesized voice. 1.2 - 1.5 are some resonable values.\n", + "# 'griffin_lim_iters': 60, # It does not give any imporvement for values > 60\n", + "# 'signal_norm': True, # This is more about your model. It does not give any change for the synthsis performance.\n", + "# 'symmetric_norm': False, # Same as above\n", + "# 'max_norm': 1, # Same as above\n", + "# 'clip_norm': True, # Same as above\n", + "# 'mel_fmin': 0.0, # You can play with this and check mel-spectrogram based voice synthesis below.\n", + "# 'mel_fmax': 8000.0, # You can play with this and check mel-spectrogram based voice synthesis below.\n", + "# 'do_trim_silence': True} # If you dataset has some silience at the beginning or end, this trims it. Check the AP.load_wav() below,if it causes any difference for the loaded audio file.\n", + "\n", + "AP = AudioProcessor(**CONFIG.audio);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Check audio loading " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wav = AP.load_wav(file_paths[10])\n", + "ipd.Audio(data=wav, rate=AP.sample_rate) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Generate Mel-Spectrogram and Re-synthesis with GL" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "AP.power = 1.0" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max: 2.4340844\n", + "Min: 2.0181823\n", + "Mean: 2.2137265\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mel = AP.melspectrogram(wav)\n", + "print(\"Max:\", mel.max())\n", + "print(\"Min:\", mel.min())\n", + "print(\"Mean:\", mel.mean())\n", + "plot_spectrogram(mel.T, AP);\n", + "\n", + "wav_gen = AP.inv_melspectrogram(mel)\n", + "ipd.Audio(wav_gen, rate=AP.sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Generate Linear-Spectrogram and Re-synthesis with GL" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": " [!] Mean-Var stats does not match the given feature dimensions.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mspec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAP\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwav\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Max:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Min:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Mean:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplot_spectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAP\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36mspectrogram\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0mD\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stft\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_amp_to_db\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmelspectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36mnormalize\u001b[0;34m(self, S)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_scaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' [!] Mean-Var stats does not match the given feature dimensions.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;31m# range normalization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mref_level_db\u001b[0m \u001b[0;31m# discard certain range of DB assuming it is air noise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: [!] Mean-Var stats does not match the given feature dimensions." + ] + } + ], + "source": [ + "spec = AP.spectrogram(wav)\n", + "print(\"Max:\", spec.max())\n", + "print(\"Min:\", spec.min())\n", + "print(\"Mean:\", spec.mean())\n", + "plot_spectrogram(spec.T, AP);\n", + "\n", + "wav_gen = AP.inv_spectrogram(spec)\n", + "ipd.Audio(wav_gen, rate=AP.sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "### Compare values for a certain parameter\n", + "\n", + "Optimize your parameters by comparing different values per parameter at a time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "audio={\n", + " 'audio_processor': 'audio',\n", + " 'num_mels': 80, # In general, you don'tneed to change it \n", + " 'num_freq': 1025, # In general, you don'tneed to change it \n", + " 'sample_rate': 22050, # It depends to the sample rate of the dataset.\n", + " 'frame_length_ms': 50, # In general, you don'tneed to change it \n", + " 'frame_shift_ms': 12.5, # In general, you don'tneed to change it \n", + " 'preemphasis': 0.98, # In general, 0 gives better voice recovery but makes traning harder. If your model does not train, try 0.97 - 0.99.\n", + " 'min_level_db': -100,\n", + " 'ref_level_db': 20, # It is the base DB, higher until you remove the background noise in the spectrogram and then lower until you hear a better speech below.\n", + " 'power': 1.5, # Change this value and listen the synthesized voice. 1.2 - 1.5 are some resonable values.\n", + " 'griffin_lim_iters': 60, # It does not give any imporvement for values > 60\n", + " 'signal_norm': True, # This is more about your model. It does not give any change for the synthsis performance.\n", + " 'symmetric_norm': False, # Same as above\n", + " 'max_norm': 1, # Same as above\n", + " 'clip_norm': True, # Same as above\n", + " 'mel_fmin': 0.0, # You can play with this and check mel-spectrogram based voice synthesis below.\n", + " 'mel_fmax': 8000.0, # You can play with this and check mel-spectrogram based voice synthesis below.\n", + " 'do_trim_silence': True} # If you dataset has some silience at the beginning or end, this trims it. Check the AP.load_wav() below,if it causes any difference for the loaded audio file.\n", + "\n", + "AP = AudioProcessor(**audio);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "from librosa import display\n", + "from matplotlib import pylab as plt\n", + "import IPython\n", + "plt.rcParams['figure.figsize'] = (20.0, 16.0)\n", + "\n", + "def compare_values(attribute, values, file):\n", + " \"\"\"\n", + " attributes (str): the names of the attribute you like to test.\n", + " values (list): list of values to compare.\n", + " file (str): file name to perform the tests.\n", + " \"\"\"\n", + " wavs = []\n", + " for idx, val in enumerate(values):\n", + " set_val_cmd = \"AP.{}={}\".format(attribute, val)\n", + " exec(set_val_cmd)\n", + " wav = AP.load_wav(file)\n", + " spec = AP.spectrogram(wav)\n", + " spec_norm = AP.denormalize(spec.T)\n", + " plt.subplot(len(values), 2, 2*idx + 1)\n", + " plt.imshow(spec_norm.T, aspect=\"auto\", origin=\"lower\")\n", + " # plt.colorbar()\n", + " plt.tight_layout()\n", + " wav_gen = AP.inv_spectrogram(spec)\n", + " wavs.append(wav_gen)\n", + " plt.subplot(len(values), 2, 2*idx + 2)\n", + " display.waveplot(wav, alpha=0.5)\n", + " display.waveplot(wav_gen, alpha=0.25)\n", + " plt.title(\"{}={}\".format(attribute, val))\n", + " plt.tight_layout()\n", + " \n", + " wav = AP.load_wav(file)\n", + " print(\" > Ground-truth\")\n", + " IPython.display.display(IPython.display.Audio(wav, rate=AP.sample_rate))\n", + " \n", + " for idx, wav_gen in enumerate(wavs):\n", + " val = values[idx]\n", + " print(\" > {} = {}\".format(attribute, val))\n", + " IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99], file_paths[10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "compare_values(\"ref_level_db\", [10, 15, 20, 25, 30, 35, 40], file_paths[10])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/dataset_analysis/PhonemeCoverage.ipynb b/notebooks/dataset_analysis/PhonemeCoverage.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..af00deafdf9850bc5a635035618ba30550ee19af --- /dev/null +++ b/notebooks/dataset_analysis/PhonemeCoverage.ipynb @@ -0,0 +1,251 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "# Jupyter Notbook for phoneme coverage analysis\n", + "\n", + "This jupyter notebook checks dataset configured in config.json for phoneme coverage.\n", + "As mentioned here https://github.com/mozilla/TTS/wiki/Dataset#what-makes-a-good-dataset a good phoneme coverage is recommended.\n", + "\n", + "Most parameters will be taken from config.json file in mozilla tts repo so please ensure it's configured correctly for your dataset.\n", + "This notebook used lots of existring code from the TTS repo to ensure future compatibility.\n", + "\n", + "Many thanks to Neil Stoker supporting me on this topic :-).\n", + "\n", + "I provide this notebook without any warrenty but it's hopefully useful for your dataset analysis.\n", + "\n", + "Happy TTS'ing :-)\n", + "\n", + "Thorsten Müller\n", + "\n", + "* https://github.com/thorstenMueller/deep-learning-german-tts\n", + "* https://discourse.mozilla.org/t/contributing-my-german-voice-for-tts/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# set some vars\n", + "# TTS_PATH = \"/home/thorsten/___dev/tts/mozilla/TTS\"\n", + "CONFIG_FILE = \"/path/to/config/config.json\"\n", + "CHARS_TO_REMOVE = \".,:!?'\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# import stuff\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.datasets.preprocess import load_meta_data\n", + "from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme\n", + "from tqdm import tqdm\n", + "from matplotlib import pylab as plt\n", + "from multiprocessing import Pool, cpu_count\n", + "\n", + "# extra imports that might not be included in requirements.txt\n", + "import collections\n", + "import operator\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "tags": [] + }, + "outputs": [], + "source": [ + "# Load config.json properties\n", + "CONFIG = load_config(CONFIG_FILE)\n", + "\n", + "# Load some properties from config.json\n", + "CONFIG_METADATA = sorted(load_meta_data(CONFIG.datasets)[0])\n", + "CONFIG_METADATA = CONFIG_METADATA\n", + "CONFIG_DATASET = CONFIG.datasets[0]\n", + "CONFIG_PHONEME_LANGUAGE = CONFIG.phoneme_language\n", + "CONFIG_TEXT_CLEANER = CONFIG.text_cleaner\n", + "CONFIG_ENABLE_EOS_BOS_CHARS = CONFIG.enable_eos_bos_chars\n", + "\n", + "# Will be printed on generated output graph\n", + "CONFIG_RUN_NAME = CONFIG.run_name\n", + "CONFIG_RUN_DESC = CONFIG.run_description" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "tags": [] + }, + "outputs": [], + "source": [ + "# print some debug information on loaded config values\n", + "print(\" > Run name: \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\")\n", + "print(\" > Dataset files: \" + str(len(CONFIG_METADATA)))\n", + "print(\" > Phoneme language: \" + CONFIG_PHONEME_LANGUAGE)\n", + "print(\" > Used text cleaner: \" + CONFIG_TEXT_CLEANER)\n", + "print(\" > Enable eos bos chars: \" + str(CONFIG_ENABLE_EOS_BOS_CHARS))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_phoneme_from_sequence(text):\n", + " temp_list = []\n", + " if len(text[0]) > 0:\n", + " temp_text = text[0].rstrip('\\n')\n", + " for rm_bad_chars in CHARS_TO_REMOVE:\n", + " temp_text = temp_text.replace(rm_bad_chars,\"\")\n", + " seq = phoneme_to_sequence(temp_text, [CONFIG_TEXT_CLEANER], CONFIG_PHONEME_LANGUAGE, CONFIG_ENABLE_EOS_BOS_CHARS)\n", + " text = sequence_to_phoneme(seq)\n", + " text = text.replace(\" \",\"\")\n", + " temp_list.append(text)\n", + " return temp_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "tags": [] + }, + "outputs": [], + "source": [ + "# Get phonemes from metadata\n", + "phonemes = []\n", + "\n", + "with Pool(cpu_count()-1) as p:\n", + " \n", + " phonemes = list(tqdm(p.imap(get_phoneme_from_sequence, CONFIG_METADATA), total=len(CONFIG_METADATA)))\n", + " phonemes = [i for sub in phonemes for i in sub]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "tags": [] + }, + "outputs": [], + "source": [ + "s = \"\"\n", + "phonemeString = s.join(phonemes)\n", + "\n", + "d = {}\n", + "collections._count_elements(d, phonemeString)\n", + "sorted_d = dict(sorted(d.items(), key=operator.itemgetter(1),reverse=True))\n", + "\n", + "# remove useless keys\n", + "sorted_d.pop(' ', None)\n", + "sorted_d.pop('ˈ', None)\n", + "\n", + "phonemesSum = len(phonemeString.replace(\" \",\"\"))\n", + "\n", + "print(\"Dataset contains \" + str(len(sorted_d)) + \" different ipa phonemes.\")\n", + "print(\"Dataset consists of \" + str(phonemesSum) + \" phonemes\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"5 rarest phonemes\")\n", + "\n", + "rareList = dict(sorted(sorted_d.items(), key=operator.itemgetter(1), reverse=False)[:5])\n", + "for key, value in rareList.items():\n", + " print(key + \" --> \" + str(value) + \" occurrences\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# create plot from analysis result\n", + "\n", + "x = []\n", + "y = []\n", + "\n", + "for key, value in sorted_d.items():\n", + " x.append(key)\n", + " y.append(value)\n", + "\n", + "plt.figure(figsize=(50,50))\n", + "plt.title(\"Phoneme coverage for \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\", fontsize=50)\n", + "plt.xticks(fontsize=50)\n", + "plt.yticks(fontsize=50)\n", + "plt.barh(x,y, align='center', alpha=1.0)\n", + "plt.gca().invert_yaxis()\n", + "plt.ylabel('phoneme', fontsize=50)\n", + "plt.xlabel('occurrences', fontsize=50)\n", + "\n", + "for i, v in enumerate(y):\n", + " plt.text(v + 2, i - .2, str(v), fontsize=20)\n", + " plt.text(v + 2, i + .2, \"(\" + str(round(100/phonemesSum * v,2)) + \"%)\", fontsize=20)\n", + " \n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/notebooks/dataset_analysis/README.md b/notebooks/dataset_analysis/README.md new file mode 100644 index 0000000000000000000000000000000000000000..79faf5215951c996e7b15cc960a93195fd9034a8 --- /dev/null +++ b/notebooks/dataset_analysis/README.md @@ -0,0 +1,7 @@ +## Simple Notebook to Analyze a Dataset + +By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set. + +What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. + +This notebook is inspired from https://github.com/MycroftAI/mimic2 diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py new file mode 100644 index 0000000000000000000000000000000000000000..161e2ae3bee1fe4e2ff7f2a707385152179c5125 --- /dev/null +++ b/notebooks/dataset_analysis/analyze.py @@ -0,0 +1,215 @@ +# visualisation tools for mimic2 +import matplotlib.pyplot as plt +from statistics import stdev, mode, mean, median +from statistics import StatisticsError +import argparse +import os +import csv +import seaborn as sns +import random +from text.cmudict import CMUDict + +def get_audio_seconds(frames): + return (frames*12.5)/1000 + + +def append_data_statistics(meta_data): + # get data statistics + for char_cnt in meta_data: + data = meta_data[char_cnt]["data"] + audio_len_list = [d["audio_len"] for d in data] + mean_audio_len = mean(audio_len_list) + try: + mode_audio_list = [round(d["audio_len"], 2) for d in data] + mode_audio_len = mode(mode_audio_list) + except StatisticsError: + mode_audio_len = audio_len_list[0] + median_audio_len = median(audio_len_list) + + try: + std = stdev( + d["audio_len"] for d in data + ) + except StatisticsError: + std = 0 + + meta_data[char_cnt]["mean"] = mean_audio_len + meta_data[char_cnt]["median"] = median_audio_len + meta_data[char_cnt]["mode"] = mode_audio_len + meta_data[char_cnt]["std"] = std + return meta_data + + +def process_meta_data(path): + meta_data = {} + + # load meta data + with open(path, 'r') as f: + data = csv.reader(f, delimiter='|') + for row in data: + frames = int(row[2]) + utt = row[3] + audio_len = get_audio_seconds(frames) + char_count = len(utt) + if not meta_data.get(char_count): + meta_data[char_count] = { + "data": [] + } + + meta_data[char_count]["data"].append( + { + "utt": utt, + "frames": frames, + "audio_len": audio_len, + "row": "{}|{}|{}|{}".format(row[0], row[1], row[2], row[3]) + } + ) + + meta_data = append_data_statistics(meta_data) + + return meta_data + + +def get_data_points(meta_data): + x = meta_data + y_avg = [meta_data[d]['mean'] for d in meta_data] + y_mode = [meta_data[d]['mode'] for d in meta_data] + y_median = [meta_data[d]['median'] for d in meta_data] + y_std = [meta_data[d]['std'] for d in meta_data] + y_num_samples = [len(meta_data[d]['data']) for d in meta_data] + return { + "x": x, + "y_avg": y_avg, + "y_mode": y_mode, + "y_median": y_median, + "y_std": y_std, + "y_num_samples": y_num_samples + } + + +def save_training(file_path, meta_data): + rows = [] + for char_cnt in meta_data: + data = meta_data[char_cnt]['data'] + for d in data: + rows.append(d['row'] + "\n") + + random.shuffle(rows) + with open(file_path, 'w+') as f: + for row in rows: + f.write(row) + + +def plot(meta_data, save_path=None): + save = False + if save_path: + save = True + + graph_data = get_data_points(meta_data) + x = graph_data['x'] + y_avg = graph_data['y_avg'] + y_std = graph_data['y_std'] + y_mode = graph_data['y_mode'] + y_median = graph_data['y_median'] + y_num_samples = graph_data['y_num_samples'] + + plt.figure() + plt.plot(x, y_avg, 'ro') + plt.xlabel("character lengths", fontsize=30) + plt.ylabel("avg seconds", fontsize=30) + if save: + name = "char_len_vs_avg_secs" + plt.savefig(os.path.join(save_path, name)) + + plt.figure() + plt.plot(x, y_mode, 'ro') + plt.xlabel("character lengths", fontsize=30) + plt.ylabel("mode seconds", fontsize=30) + if save: + name = "char_len_vs_mode_secs" + plt.savefig(os.path.join(save_path, name)) + + plt.figure() + plt.plot(x, y_median, 'ro') + plt.xlabel("character lengths", fontsize=30) + plt.ylabel("median seconds", fontsize=30) + if save: + name = "char_len_vs_med_secs" + plt.savefig(os.path.join(save_path, name)) + + plt.figure() + plt.plot(x, y_std, 'ro') + plt.xlabel("character lengths", fontsize=30) + plt.ylabel("standard deviation", fontsize=30) + if save: + name = "char_len_vs_std" + plt.savefig(os.path.join(save_path, name)) + + plt.figure() + plt.plot(x, y_num_samples, 'ro') + plt.xlabel("character lengths", fontsize=30) + plt.ylabel("number of samples", fontsize=30) + if save: + name = "char_len_vs_num_samples" + plt.savefig(os.path.join(save_path, name)) + + +def plot_phonemes(train_path, cmu_dict_path, save_path): + cmudict = CMUDict(cmu_dict_path) + + phonemes = {} + + with open(train_path, 'r') as f: + data = csv.reader(f, delimiter='|') + phonemes["None"] = 0 + for row in data: + words = row[3].split() + for word in words: + pho = cmudict.lookup(word) + if pho: + indie = pho[0].split() + for nemes in indie: + if phonemes.get(nemes): + phonemes[nemes] += 1 + else: + phonemes[nemes] = 1 + else: + phonemes["None"] += 1 + + x, y = [], [] + for key in phonemes: + x.append(key) + y.append(phonemes[key]) + + plt.figure() + plt.rcParams["figure.figsize"] = (50, 20) + barplot = sns.barplot(x, y) + if save_path: + fig = barplot.get_figure() + fig.savefig(os.path.join(save_path, "phoneme_dist")) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--train_file_path', required=True, + help='this is the path to the train.txt file that the preprocess.py script creates' + ) + parser.add_argument( + '--save_to', help='path to save charts of data to' + ) + parser.add_argument( + '--cmu_dict_path', help='give cmudict-0.7b to see phoneme distribution' + ) + args = parser.parse_args() + meta_data = process_meta_data(args.train_file_path) + plt.rcParams["figure.figsize"] = (10, 5) + plot(meta_data, save_path=args.save_to) + if args.cmu_dict_path: + plt.rcParams["figure.figsize"] = (30, 10) + plot_phonemes(args.train_file_path, args.cmu_dict_path, args.save_to) + + plt.show() + +if __name__ == '__main__': + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..8b8da28dbf828ec38d8f078d14639701ccadf43d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "Cython", "numpy==1.17.5"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a0d9f76eae7519d68072b02c890f8dcd0e8f265 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +torch>=1.5 +tensorflow==2.3.1 +numpy==1.17.5 +scipy>=0.19.0 +numba==0.48 +librosa==0.7.2 +phonemizer>=2.2.0 +unidecode==0.4.20 +tensorboardX +matplotlib +Pillow +flask +tqdm +inflect +bokeh==1.4.0 +pysbd +pyworld +soundfile +nose==1.3.7 +cardboardlint==1.3.0 +pylint==2.5.3 +gdown +umap-learn +cython +pyyaml \ No newline at end of file diff --git a/requirements_tests.txt b/requirements_tests.txt new file mode 100644 index 0000000000000000000000000000000000000000..5b833858f3b66a03ec6ad39267eb95d3a4119367 --- /dev/null +++ b/requirements_tests.txt @@ -0,0 +1,21 @@ +torch>=1.5 +tensorflow==2.3.1 +numpy>=1.16.0 +scipy>=0.19.0 +numba==0.48 +librosa==0.7.2 +phonemizer>=2.2.0 +unidecode==0.4.20 +attrdict +tensorboardX +matplotlib +Pillow +flask +tqdm +inflect +pysbd +bokeh==1.4.0 +soundfile +nose==1.3.7 +cardboardlint==1.3.0 +cython \ No newline at end of file diff --git a/run_tests.sh b/run_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..abfc53d4ce8329bbe7b8ae4e22b58afa705d9aba --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,17 @@ +set -e +TF_CPP_MIN_LOG_LEVEL=3 + +# tests +nosetests tests -x &&\ + +# runtime tests +./tests/test_server_package.sh && \ +./tests/test_tacotron_train.sh && \ +./tests/test_glow-tts_train.sh && \ +./tests/test_vocoder_gan_train.sh && \ +./tests/test_vocoder_wavernn_train.sh && \ +./tests/test_vocoder_wavegrad_train.sh && \ +./tests/test_speedy_speech_train.sh && \ + +# linter check +cardboardlinter --refspec master \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2344c8b289944835fe78c6ecaf467c1197b8e2e7 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[build_py] +build-lib=temp_build + +[bdist_wheel] +bdist-dir=temp_build + +[install_lib] +build-dir=temp_build diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1f3be4b4ecd6bfe8b41ba251d427b8b856032886 --- /dev/null +++ b/setup.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python + +import argparse +import os +import shutil +import subprocess +import sys + +import numpy +import setuptools.command.build_py +import setuptools.command.develop + +from setuptools import find_packages, setup +from distutils.extension import Extension +from Cython.Build import cythonize + +# parameters for wheeling server. +parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) +parser.add_argument('--checkpoint', + type=str, + help='Path to checkpoint file to embed in wheel.') +parser.add_argument('--model_config', + type=str, + help='Path to model configuration file to embed in wheel.') +args, unknown_args = parser.parse_known_args() + +# Remove our arguments from argv so that setuptools doesn't see them +sys.argv = [sys.argv[0]] + unknown_args + +version = '0.0.9.2' +cwd = os.path.dirname(os.path.abspath(__file__)) + +# Handle Cython code +# def find_pyx(path='.'): +# pyx_files = [] +# for root, _, filenames in os.walk(path): +# for fname in filenames: +# if fname.endswith('.pyx'): +# pyx_files.append(os.path.join(root, fname)) +# return pyx_files + + +# def find_cython_extensions(path="."): +# exts = cythonize(find_pyx(path), language_level=3) +# for ext in exts: +# ext.include_dirs = [numpy.get_include()] + +# return exts + + +class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors + def run(self): + self.create_version_file() + setuptools.command.build_py.build_py.run(self) + + @staticmethod + def create_version_file(): + print('-- Building version ' + version) + version_path = os.path.join(cwd, 'version.py') + with open(version_path, 'w') as f: + f.write("__version__ = '{}'\n".format(version)) + + +class develop(setuptools.command.develop.develop): + def run(self): + build_py.create_version_file() + setuptools.command.develop.develop.run(self) + + +# The documentation for this feature is in server/README.md +package_data = ['TTS/server/templates/*'] + +if 'bdist_wheel' in unknown_args and args.checkpoint and args.model_config: + print('Embedding model in wheel file...') + model_dir = os.path.join('TTS', 'server', 'model') + tts_dir = os.path.join(model_dir, 'tts') + os.makedirs(tts_dir, exist_ok=True) + embedded_checkpoint_path = os.path.join(tts_dir, 'checkpoint.pth.tar') + shutil.copy(args.checkpoint, embedded_checkpoint_path) + embedded_config_path = os.path.join(tts_dir, 'config.json') + shutil.copy(args.model_config, embedded_config_path) + package_data.extend([embedded_checkpoint_path, embedded_config_path]) + + +def pip_install(package_name): + subprocess.call([sys.executable, '-m', 'pip', 'install', package_name]) + + +requirements = open(os.path.join(cwd, 'requirements.txt'), 'r').readlines() +with open('README.md', "r", encoding="utf-8") as readme_file: + README = readme_file.read() + +exts = [Extension(name='TTS.tts.layers.glow_tts.monotonic_align.core', + sources=["TTS/tts/layers/glow_tts/monotonic_align/core.pyx"])] +setup( + name='TTS', + version=version, + url='https://github.com/mozilla/TTS', + author='Eren Gölge', + author_email='egolge@mozilla.com', + description='Text to Speech with Deep Learning', + long_description=README, + long_description_content_type="text/markdown", + license='MPL-2.0', + # cython + include_dirs=numpy.get_include(), + ext_modules=cythonize(exts, language_level=3), + # ext_modules=find_cython_extensions(), + # package + include_package_data=True, + packages=find_packages(include=['TTS*']), + project_urls={ + 'Documentation': 'https://github.com/mozilla/TTS/wiki', + 'Tracker': 'https://github.com/mozilla/TTS/issues', + 'Repository': 'https://github.com/mozilla/TTS', + 'Discussions': 'https://discourse.mozilla.org/c/tts', + }, + cmdclass={ + 'build_py': build_py, + 'develop': develop, + # 'build_ext': build_ext + }, + install_requires=requirements, + python_requires='>=3.6.0, <3.9', + entry_points={ + 'console_scripts': [ + 'tts=TTS.bin.synthesize:main', + 'tts-server = TTS.server.server:main' + ] + }, + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + 'Development Status :: 3 - Alpha', + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", + 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', + "Topic :: Software Development", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Multimedia", + "Topic :: Scientific/Engineering :: Artificial Intelligence" + ], + zip_safe=False +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..487a5519527b8c406a783644f4f007b2eaea668f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,16 @@ +import os + + +def get_tests_path(): + """Returns the path to the test directory.""" + return os.path.dirname(os.path.realpath(__file__)) + + +def get_tests_input_path(): + """Returns the path to the test data directory.""" + return os.path.join(get_tests_path(), "inputs") + + +def get_tests_output_path(): + """Returns the path to the directory for test outputs.""" + return os.path.join(get_tests_path(), "outputs") diff --git a/tests/data/ljspeech/metadata.csv b/tests/data/ljspeech/metadata.csv new file mode 100644 index 0000000000000000000000000000000000000000..8f7832b59bdfa0d8f5d92b603fc9d152b45b8221 --- /dev/null +++ b/tests/data/ljspeech/metadata.csv @@ -0,0 +1,32 @@ +LJ001-0001|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition +LJ001-0002|in being comparatively modern.|in being comparatively modern. +LJ001-0003|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process +LJ001-0004|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book, +LJ001-0005|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing. +LJ001-0006|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography, +LJ001-0007|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five, +LJ001-0008|has never been surpassed.|has never been surpassed. +LJ001-0009|Printing, then, for our purpose, may be considered as the art of making books by means of movable types.|Printing, then, for our purpose, may be considered as the art of making books by means of movable types. +LJ001-0010|Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress,|Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress, +LJ001-0011|it is of the first importance that the letter used should be fine in form;|it is of the first importance that the letter used should be fine in form; +LJ001-0012|especially as no more time is occupied, or cost incurred, in casting, setting, or printing beautiful letters|especially as no more time is occupied, or cost incurred, in casting, setting, or printing beautiful letters +LJ001-0013|than in the same operations with ugly ones.|than in the same operations with ugly ones. +LJ001-0014|And it was a matter of course that in the Middle Ages, when the craftsmen took care that beautiful form should always be a part of their productions whatever they were,|And it was a matter of course that in the Middle Ages, when the craftsmen took care that beautiful form should always be a part of their productions whatever they were, +LJ001-0015|the forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves.|the forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. +LJ001-0016|The Middle Ages brought calligraphy to perfection, and it was natural therefore|The Middle Ages brought calligraphy to perfection, and it was natural therefore +LJ001-0017|that the forms of printed letters should follow more or less closely those of the written character, and they followed them very closely.|that the forms of printed letters should follow more or less closely those of the written character, and they followed them very closely. +LJ001-0018|The first books were printed in black letter, i.e. the letter which was a Gothic development of the ancient Roman character,|The first books were printed in black letter, i.e. the letter which was a Gothic development of the ancient Roman character, +LJ001-0019|and which developed more completely and satisfactorily on the side of the "lower-case" than the capital letters;|and which developed more completely and satisfactorily on the side of the "lower-case" than the capital letters; +LJ001-0020|the "lower-case" being in fact invented in the early Middle Ages.|the "lower-case" being in fact invented in the early Middle Ages. +LJ001-0021|The earliest book printed with movable type, the aforesaid Gutenberg Bible, is printed in letters which are an exact imitation|The earliest book printed with movable type, the aforesaid Gutenberg Bible, is printed in letters which are an exact imitation +LJ001-0022|of the more formal ecclesiastical writing which obtained at that time; this has since been called "missal type,"|of the more formal ecclesiastical writing which obtained at that time; this has since been called "missal type," +LJ001-0023|and was in fact the kind of letter used in the many splendid missals, psalters, etc., produced by printing in the fifteenth century.|and was in fact the kind of letter used in the many splendid missals, psalters, etc., produced by printing in the fifteenth century. +LJ001-0024|But the first Bible actually dated (which also was printed at Maintz by Peter Schoeffer in the year 1462)|But the first Bible actually dated (which also was printed at Maintz by Peter Schoeffer in the year fourteen sixty-two) +LJ001-0025|imitates a much freer hand, simpler, rounder, and less spiky, and therefore far pleasanter and easier to read.|imitates a much freer hand, simpler, rounder, and less spiky, and therefore far pleasanter and easier to read. +LJ001-0026|On the whole the type of this book may be considered the ne-plus-ultra of Gothic type,|On the whole the type of this book may be considered the ne-plus-ultra of Gothic type, +LJ001-0027|especially as regards the lower-case letters; and type very similar was used during the next fifteen or twenty years not only by Schoeffer,|especially as regards the lower-case letters; and type very similar was used during the next fifteen or twenty years not only by Schoeffer, +LJ001-0028|but by printers in Strasburg, Basle, Paris, Lubeck, and other cities.|but by printers in Strasburg, Basle, Paris, Lubeck, and other cities. +LJ001-0029|But though on the whole, except in Italy, Gothic letter was most often used|But though on the whole, except in Italy, Gothic letter was most often used +LJ001-0030|a very few years saw the birth of Roman character not only in Italy, but in Germany and France.|a very few years saw the birth of Roman character not only in Italy, but in Germany and France. +LJ001-0031|In 1465 Sweynheim and Pannartz began printing in the monastery of Subiaco near Rome,|In fourteen sixty-five Sweynheim and Pannartz began printing in the monastery of Subiaco near Rome, +LJ001-0032|and used an exceedingly beautiful type, which is indeed to look at a transition between Gothic and Roman,|and used an exceedingly beautiful type, which is indeed to look at a transition between Gothic and Roman, \ No newline at end of file diff --git a/tests/data/ljspeech/metadata_attn_mask.txt b/tests/data/ljspeech/metadata_attn_mask.txt new file mode 100644 index 0000000000000000000000000000000000000000..eef9a5f19e14b1cd67454f830121d292e21a7f51 --- /dev/null +++ b/tests/data/ljspeech/metadata_attn_mask.txt @@ -0,0 +1,13100 @@ +tests/data/ljspeech/wavs/LJ022-0002.wav|tests/data/ljspeech/wavs/LJ022-0002.npy +tests/data/ljspeech/wavs/LJ001-0045.wav|tests/data/ljspeech/wavs/LJ001-0045.npy +tests/data/ljspeech/wavs/LJ039-0156.wav|tests/data/ljspeech/wavs/LJ039-0156.npy +tests/data/ljspeech/wavs/LJ047-0148.wav|tests/data/ljspeech/wavs/LJ047-0148.npy +tests/data/ljspeech/wavs/LJ035-0209.wav|tests/data/ljspeech/wavs/LJ035-0209.npy +tests/data/ljspeech/wavs/LJ019-0225.wav|tests/data/ljspeech/wavs/LJ019-0225.npy +tests/data/ljspeech/wavs/LJ049-0154.wav|tests/data/ljspeech/wavs/LJ049-0154.npy +tests/data/ljspeech/wavs/LJ013-0082.wav|tests/data/ljspeech/wavs/LJ013-0082.npy +tests/data/ljspeech/wavs/LJ018-0092.wav|tests/data/ljspeech/wavs/LJ018-0092.npy +tests/data/ljspeech/wavs/LJ045-0058.wav|tests/data/ljspeech/wavs/LJ045-0058.npy +tests/data/ljspeech/wavs/LJ028-0060.wav|tests/data/ljspeech/wavs/LJ028-0060.npy +tests/data/ljspeech/wavs/LJ018-0218.wav|tests/data/ljspeech/wavs/LJ018-0218.npy +tests/data/ljspeech/wavs/LJ029-0107.wav|tests/data/ljspeech/wavs/LJ029-0107.npy +tests/data/ljspeech/wavs/LJ009-0160.wav|tests/data/ljspeech/wavs/LJ009-0160.npy +tests/data/ljspeech/wavs/LJ002-0020.wav|tests/data/ljspeech/wavs/LJ002-0020.npy +tests/data/ljspeech/wavs/LJ032-0155.wav|tests/data/ljspeech/wavs/LJ032-0155.npy +tests/data/ljspeech/wavs/LJ033-0135.wav|tests/data/ljspeech/wavs/LJ033-0135.npy +tests/data/ljspeech/wavs/LJ031-0024.wav|tests/data/ljspeech/wavs/LJ031-0024.npy +tests/data/ljspeech/wavs/LJ032-0100.wav|tests/data/ljspeech/wavs/LJ032-0100.npy +tests/data/ljspeech/wavs/LJ037-0219.wav|tests/data/ljspeech/wavs/LJ037-0219.npy +tests/data/ljspeech/wavs/LJ009-0126.wav|tests/data/ljspeech/wavs/LJ009-0126.npy +tests/data/ljspeech/wavs/LJ009-0074.wav|tests/data/ljspeech/wavs/LJ009-0074.npy +tests/data/ljspeech/wavs/LJ028-0208.wav|tests/data/ljspeech/wavs/LJ028-0208.npy +tests/data/ljspeech/wavs/LJ038-0003.wav|tests/data/ljspeech/wavs/LJ038-0003.npy +tests/data/ljspeech/wavs/LJ009-0294.wav|tests/data/ljspeech/wavs/LJ009-0294.npy +tests/data/ljspeech/wavs/LJ012-0199.wav|tests/data/ljspeech/wavs/LJ012-0199.npy +tests/data/ljspeech/wavs/LJ011-0028.wav|tests/data/ljspeech/wavs/LJ011-0028.npy +tests/data/ljspeech/wavs/LJ006-0281.wav|tests/data/ljspeech/wavs/LJ006-0281.npy +tests/data/ljspeech/wavs/LJ010-0019.wav|tests/data/ljspeech/wavs/LJ010-0019.npy +tests/data/ljspeech/wavs/LJ040-0062.wav|tests/data/ljspeech/wavs/LJ040-0062.npy +tests/data/ljspeech/wavs/LJ037-0003.wav|tests/data/ljspeech/wavs/LJ037-0003.npy +tests/data/ljspeech/wavs/LJ043-0159.wav|tests/data/ljspeech/wavs/LJ043-0159.npy +tests/data/ljspeech/wavs/LJ039-0036.wav|tests/data/ljspeech/wavs/LJ039-0036.npy +tests/data/ljspeech/wavs/LJ028-0255.wav|tests/data/ljspeech/wavs/LJ028-0255.npy +tests/data/ljspeech/wavs/LJ032-0058.wav|tests/data/ljspeech/wavs/LJ032-0058.npy +tests/data/ljspeech/wavs/LJ038-0304.wav|tests/data/ljspeech/wavs/LJ038-0304.npy +tests/data/ljspeech/wavs/LJ032-0239.wav|tests/data/ljspeech/wavs/LJ032-0239.npy +tests/data/ljspeech/wavs/LJ032-0250.wav|tests/data/ljspeech/wavs/LJ032-0250.npy +tests/data/ljspeech/wavs/LJ047-0056.wav|tests/data/ljspeech/wavs/LJ047-0056.npy +tests/data/ljspeech/wavs/LJ038-0079.wav|tests/data/ljspeech/wavs/LJ038-0079.npy +tests/data/ljspeech/wavs/LJ050-0101.wav|tests/data/ljspeech/wavs/LJ050-0101.npy +tests/data/ljspeech/wavs/LJ008-0033.wav|tests/data/ljspeech/wavs/LJ008-0033.npy +tests/data/ljspeech/wavs/LJ037-0157.wav|tests/data/ljspeech/wavs/LJ037-0157.npy +tests/data/ljspeech/wavs/LJ038-0273.wav|tests/data/ljspeech/wavs/LJ038-0273.npy +tests/data/ljspeech/wavs/LJ043-0004.wav|tests/data/ljspeech/wavs/LJ043-0004.npy +tests/data/ljspeech/wavs/LJ002-0035.wav|tests/data/ljspeech/wavs/LJ002-0035.npy +tests/data/ljspeech/wavs/LJ007-0217.wav|tests/data/ljspeech/wavs/LJ007-0217.npy +tests/data/ljspeech/wavs/LJ039-0151.wav|tests/data/ljspeech/wavs/LJ039-0151.npy +tests/data/ljspeech/wavs/LJ039-0027.wav|tests/data/ljspeech/wavs/LJ039-0027.npy +tests/data/ljspeech/wavs/LJ016-0398.wav|tests/data/ljspeech/wavs/LJ016-0398.npy +tests/data/ljspeech/wavs/LJ028-0389.wav|tests/data/ljspeech/wavs/LJ028-0389.npy +tests/data/ljspeech/wavs/LJ034-0003.wav|tests/data/ljspeech/wavs/LJ034-0003.npy +tests/data/ljspeech/wavs/LJ044-0144.wav|tests/data/ljspeech/wavs/LJ044-0144.npy +tests/data/ljspeech/wavs/LJ036-0097.wav|tests/data/ljspeech/wavs/LJ036-0097.npy +tests/data/ljspeech/wavs/LJ020-0064.wav|tests/data/ljspeech/wavs/LJ020-0064.npy +tests/data/ljspeech/wavs/LJ045-0177.wav|tests/data/ljspeech/wavs/LJ045-0177.npy +tests/data/ljspeech/wavs/LJ017-0277.wav|tests/data/ljspeech/wavs/LJ017-0277.npy +tests/data/ljspeech/wavs/LJ021-0165.wav|tests/data/ljspeech/wavs/LJ021-0165.npy +tests/data/ljspeech/wavs/LJ030-0137.wav|tests/data/ljspeech/wavs/LJ030-0137.npy +tests/data/ljspeech/wavs/LJ045-0149.wav|tests/data/ljspeech/wavs/LJ045-0149.npy +tests/data/ljspeech/wavs/LJ049-0075.wav|tests/data/ljspeech/wavs/LJ049-0075.npy +tests/data/ljspeech/wavs/LJ040-0096.wav|tests/data/ljspeech/wavs/LJ040-0096.npy +tests/data/ljspeech/wavs/LJ036-0105.wav|tests/data/ljspeech/wavs/LJ036-0105.npy +tests/data/ljspeech/wavs/LJ037-0076.wav|tests/data/ljspeech/wavs/LJ037-0076.npy +tests/data/ljspeech/wavs/LJ047-0092.wav|tests/data/ljspeech/wavs/LJ047-0092.npy +tests/data/ljspeech/wavs/LJ021-0002.wav|tests/data/ljspeech/wavs/LJ021-0002.npy +tests/data/ljspeech/wavs/LJ040-0058.wav|tests/data/ljspeech/wavs/LJ040-0058.npy +tests/data/ljspeech/wavs/LJ033-0200.wav|tests/data/ljspeech/wavs/LJ033-0200.npy +tests/data/ljspeech/wavs/LJ041-0147.wav|tests/data/ljspeech/wavs/LJ041-0147.npy +tests/data/ljspeech/wavs/LJ033-0164.wav|tests/data/ljspeech/wavs/LJ033-0164.npy +tests/data/ljspeech/wavs/LJ034-0204.wav|tests/data/ljspeech/wavs/LJ034-0204.npy +tests/data/ljspeech/wavs/LJ045-0047.wav|tests/data/ljspeech/wavs/LJ045-0047.npy +tests/data/ljspeech/wavs/LJ024-0003.wav|tests/data/ljspeech/wavs/LJ024-0003.npy +tests/data/ljspeech/wavs/LJ013-0148.wav|tests/data/ljspeech/wavs/LJ013-0148.npy +tests/data/ljspeech/wavs/LJ012-0033.wav|tests/data/ljspeech/wavs/LJ012-0033.npy +tests/data/ljspeech/wavs/LJ044-0004.wav|tests/data/ljspeech/wavs/LJ044-0004.npy +tests/data/ljspeech/wavs/LJ005-0121.wav|tests/data/ljspeech/wavs/LJ005-0121.npy +tests/data/ljspeech/wavs/LJ014-0259.wav|tests/data/ljspeech/wavs/LJ014-0259.npy +tests/data/ljspeech/wavs/LJ041-0050.wav|tests/data/ljspeech/wavs/LJ041-0050.npy +tests/data/ljspeech/wavs/LJ041-0112.wav|tests/data/ljspeech/wavs/LJ041-0112.npy +tests/data/ljspeech/wavs/LJ034-0144.wav|tests/data/ljspeech/wavs/LJ034-0144.npy +tests/data/ljspeech/wavs/LJ010-0107.wav|tests/data/ljspeech/wavs/LJ010-0107.npy +tests/data/ljspeech/wavs/LJ019-0143.wav|tests/data/ljspeech/wavs/LJ019-0143.npy +tests/data/ljspeech/wavs/LJ034-0165.wav|tests/data/ljspeech/wavs/LJ034-0165.npy +tests/data/ljspeech/wavs/LJ016-0426.wav|tests/data/ljspeech/wavs/LJ016-0426.npy +tests/data/ljspeech/wavs/LJ010-0182.wav|tests/data/ljspeech/wavs/LJ010-0182.npy +tests/data/ljspeech/wavs/LJ015-0265.wav|tests/data/ljspeech/wavs/LJ015-0265.npy +tests/data/ljspeech/wavs/LJ029-0091.wav|tests/data/ljspeech/wavs/LJ029-0091.npy +tests/data/ljspeech/wavs/LJ017-0278.wav|tests/data/ljspeech/wavs/LJ017-0278.npy +tests/data/ljspeech/wavs/LJ009-0256.wav|tests/data/ljspeech/wavs/LJ009-0256.npy +tests/data/ljspeech/wavs/LJ014-0186.wav|tests/data/ljspeech/wavs/LJ014-0186.npy +tests/data/ljspeech/wavs/LJ023-0112.wav|tests/data/ljspeech/wavs/LJ023-0112.npy +tests/data/ljspeech/wavs/LJ016-0144.wav|tests/data/ljspeech/wavs/LJ016-0144.npy +tests/data/ljspeech/wavs/LJ016-0378.wav|tests/data/ljspeech/wavs/LJ016-0378.npy +tests/data/ljspeech/wavs/LJ007-0008.wav|tests/data/ljspeech/wavs/LJ007-0008.npy +tests/data/ljspeech/wavs/LJ011-0152.wav|tests/data/ljspeech/wavs/LJ011-0152.npy +tests/data/ljspeech/wavs/LJ032-0158.wav|tests/data/ljspeech/wavs/LJ032-0158.npy +tests/data/ljspeech/wavs/LJ039-0205.wav|tests/data/ljspeech/wavs/LJ039-0205.npy +tests/data/ljspeech/wavs/LJ029-0048.wav|tests/data/ljspeech/wavs/LJ029-0048.npy +tests/data/ljspeech/wavs/LJ009-0170.wav|tests/data/ljspeech/wavs/LJ009-0170.npy +tests/data/ljspeech/wavs/LJ033-0112.wav|tests/data/ljspeech/wavs/LJ033-0112.npy +tests/data/ljspeech/wavs/LJ010-0255.wav|tests/data/ljspeech/wavs/LJ010-0255.npy +tests/data/ljspeech/wavs/LJ036-0116.wav|tests/data/ljspeech/wavs/LJ036-0116.npy +tests/data/ljspeech/wavs/LJ048-0058.wav|tests/data/ljspeech/wavs/LJ048-0058.npy +tests/data/ljspeech/wavs/LJ037-0227.wav|tests/data/ljspeech/wavs/LJ037-0227.npy +tests/data/ljspeech/wavs/LJ004-0127.wav|tests/data/ljspeech/wavs/LJ004-0127.npy +tests/data/ljspeech/wavs/LJ041-0201.wav|tests/data/ljspeech/wavs/LJ041-0201.npy +tests/data/ljspeech/wavs/LJ048-0272.wav|tests/data/ljspeech/wavs/LJ048-0272.npy +tests/data/ljspeech/wavs/LJ050-0023.wav|tests/data/ljspeech/wavs/LJ050-0023.npy +tests/data/ljspeech/wavs/LJ031-0026.wav|tests/data/ljspeech/wavs/LJ031-0026.npy +tests/data/ljspeech/wavs/LJ033-0012.wav|tests/data/ljspeech/wavs/LJ033-0012.npy +tests/data/ljspeech/wavs/LJ038-0135.wav|tests/data/ljspeech/wavs/LJ038-0135.npy +tests/data/ljspeech/wavs/LJ024-0068.wav|tests/data/ljspeech/wavs/LJ024-0068.npy +tests/data/ljspeech/wavs/LJ047-0105.wav|tests/data/ljspeech/wavs/LJ047-0105.npy +tests/data/ljspeech/wavs/LJ030-0023.wav|tests/data/ljspeech/wavs/LJ030-0023.npy +tests/data/ljspeech/wavs/LJ021-0039.wav|tests/data/ljspeech/wavs/LJ021-0039.npy +tests/data/ljspeech/wavs/LJ022-0019.wav|tests/data/ljspeech/wavs/LJ022-0019.npy +tests/data/ljspeech/wavs/LJ018-0363.wav|tests/data/ljspeech/wavs/LJ018-0363.npy +tests/data/ljspeech/wavs/LJ011-0047.wav|tests/data/ljspeech/wavs/LJ011-0047.npy +tests/data/ljspeech/wavs/LJ048-0091.wav|tests/data/ljspeech/wavs/LJ048-0091.npy +tests/data/ljspeech/wavs/LJ010-0072.wav|tests/data/ljspeech/wavs/LJ010-0072.npy +tests/data/ljspeech/wavs/LJ040-0017.wav|tests/data/ljspeech/wavs/LJ040-0017.npy +tests/data/ljspeech/wavs/LJ037-0182.wav|tests/data/ljspeech/wavs/LJ037-0182.npy +tests/data/ljspeech/wavs/LJ007-0228.wav|tests/data/ljspeech/wavs/LJ007-0228.npy +tests/data/ljspeech/wavs/LJ035-0059.wav|tests/data/ljspeech/wavs/LJ035-0059.npy +tests/data/ljspeech/wavs/LJ009-0027.wav|tests/data/ljspeech/wavs/LJ009-0027.npy +tests/data/ljspeech/wavs/LJ016-0233.wav|tests/data/ljspeech/wavs/LJ016-0233.npy +tests/data/ljspeech/wavs/LJ036-0104.wav|tests/data/ljspeech/wavs/LJ036-0104.npy +tests/data/ljspeech/wavs/LJ037-0142.wav|tests/data/ljspeech/wavs/LJ037-0142.npy +tests/data/ljspeech/wavs/LJ012-0250.wav|tests/data/ljspeech/wavs/LJ012-0250.npy +tests/data/ljspeech/wavs/LJ047-0131.wav|tests/data/ljspeech/wavs/LJ047-0131.npy +tests/data/ljspeech/wavs/LJ043-0110.wav|tests/data/ljspeech/wavs/LJ043-0110.npy +tests/data/ljspeech/wavs/LJ036-0120.wav|tests/data/ljspeech/wavs/LJ036-0120.npy +tests/data/ljspeech/wavs/LJ040-0082.wav|tests/data/ljspeech/wavs/LJ040-0082.npy +tests/data/ljspeech/wavs/LJ011-0097.wav|tests/data/ljspeech/wavs/LJ011-0097.npy +tests/data/ljspeech/wavs/LJ023-0031.wav|tests/data/ljspeech/wavs/LJ023-0031.npy +tests/data/ljspeech/wavs/LJ027-0144.wav|tests/data/ljspeech/wavs/LJ027-0144.npy +tests/data/ljspeech/wavs/LJ048-0015.wav|tests/data/ljspeech/wavs/LJ048-0015.npy +tests/data/ljspeech/wavs/LJ030-0097.wav|tests/data/ljspeech/wavs/LJ030-0097.npy +tests/data/ljspeech/wavs/LJ028-0322.wav|tests/data/ljspeech/wavs/LJ028-0322.npy +tests/data/ljspeech/wavs/LJ029-0010.wav|tests/data/ljspeech/wavs/LJ029-0010.npy +tests/data/ljspeech/wavs/LJ029-0170.wav|tests/data/ljspeech/wavs/LJ029-0170.npy +tests/data/ljspeech/wavs/LJ039-0060.wav|tests/data/ljspeech/wavs/LJ039-0060.npy +tests/data/ljspeech/wavs/LJ030-0086.wav|tests/data/ljspeech/wavs/LJ030-0086.npy +tests/data/ljspeech/wavs/LJ050-0037.wav|tests/data/ljspeech/wavs/LJ050-0037.npy +tests/data/ljspeech/wavs/LJ031-0020.wav|tests/data/ljspeech/wavs/LJ031-0020.npy +tests/data/ljspeech/wavs/LJ028-0109.wav|tests/data/ljspeech/wavs/LJ028-0109.npy +tests/data/ljspeech/wavs/LJ039-0231.wav|tests/data/ljspeech/wavs/LJ039-0231.npy +tests/data/ljspeech/wavs/LJ009-0076.wav|tests/data/ljspeech/wavs/LJ009-0076.npy +tests/data/ljspeech/wavs/LJ036-0193.wav|tests/data/ljspeech/wavs/LJ036-0193.npy +tests/data/ljspeech/wavs/LJ050-0195.wav|tests/data/ljspeech/wavs/LJ050-0195.npy +tests/data/ljspeech/wavs/LJ011-0030.wav|tests/data/ljspeech/wavs/LJ011-0030.npy +tests/data/ljspeech/wavs/LJ038-0163.wav|tests/data/ljspeech/wavs/LJ038-0163.npy +tests/data/ljspeech/wavs/LJ021-0172.wav|tests/data/ljspeech/wavs/LJ021-0172.npy +tests/data/ljspeech/wavs/LJ045-0025.wav|tests/data/ljspeech/wavs/LJ045-0025.npy +tests/data/ljspeech/wavs/LJ003-0339.wav|tests/data/ljspeech/wavs/LJ003-0339.npy +tests/data/ljspeech/wavs/LJ005-0172.wav|tests/data/ljspeech/wavs/LJ005-0172.npy +tests/data/ljspeech/wavs/LJ030-0152.wav|tests/data/ljspeech/wavs/LJ030-0152.npy +tests/data/ljspeech/wavs/LJ030-0111.wav|tests/data/ljspeech/wavs/LJ030-0111.npy +tests/data/ljspeech/wavs/LJ048-0147.wav|tests/data/ljspeech/wavs/LJ048-0147.npy +tests/data/ljspeech/wavs/LJ028-0048.wav|tests/data/ljspeech/wavs/LJ028-0048.npy +tests/data/ljspeech/wavs/LJ015-0052.wav|tests/data/ljspeech/wavs/LJ015-0052.npy +tests/data/ljspeech/wavs/LJ023-0002.wav|tests/data/ljspeech/wavs/LJ023-0002.npy +tests/data/ljspeech/wavs/LJ047-0141.wav|tests/data/ljspeech/wavs/LJ047-0141.npy +tests/data/ljspeech/wavs/LJ018-0223.wav|tests/data/ljspeech/wavs/LJ018-0223.npy +tests/data/ljspeech/wavs/LJ038-0255.wav|tests/data/ljspeech/wavs/LJ038-0255.npy +tests/data/ljspeech/wavs/LJ026-0002.wav|tests/data/ljspeech/wavs/LJ026-0002.npy +tests/data/ljspeech/wavs/LJ038-0098.wav|tests/data/ljspeech/wavs/LJ038-0098.npy +tests/data/ljspeech/wavs/LJ050-0146.wav|tests/data/ljspeech/wavs/LJ050-0146.npy +tests/data/ljspeech/wavs/LJ032-0104.wav|tests/data/ljspeech/wavs/LJ032-0104.npy +tests/data/ljspeech/wavs/LJ017-0273.wav|tests/data/ljspeech/wavs/LJ017-0273.npy +tests/data/ljspeech/wavs/LJ030-0109.wav|tests/data/ljspeech/wavs/LJ030-0109.npy +tests/data/ljspeech/wavs/LJ046-0179.wav|tests/data/ljspeech/wavs/LJ046-0179.npy +tests/data/ljspeech/wavs/LJ043-0133.wav|tests/data/ljspeech/wavs/LJ043-0133.npy +tests/data/ljspeech/wavs/LJ007-0009.wav|tests/data/ljspeech/wavs/LJ007-0009.npy +tests/data/ljspeech/wavs/LJ047-0101.wav|tests/data/ljspeech/wavs/LJ047-0101.npy +tests/data/ljspeech/wavs/LJ003-0050.wav|tests/data/ljspeech/wavs/LJ003-0050.npy +tests/data/ljspeech/wavs/LJ045-0147.wav|tests/data/ljspeech/wavs/LJ045-0147.npy +tests/data/ljspeech/wavs/LJ039-0098.wav|tests/data/ljspeech/wavs/LJ039-0098.npy +tests/data/ljspeech/wavs/LJ028-0062.wav|tests/data/ljspeech/wavs/LJ028-0062.npy +tests/data/ljspeech/wavs/LJ037-0229.wav|tests/data/ljspeech/wavs/LJ037-0229.npy +tests/data/ljspeech/wavs/LJ038-0058.wav|tests/data/ljspeech/wavs/LJ038-0058.npy +tests/data/ljspeech/wavs/LJ036-0156.wav|tests/data/ljspeech/wavs/LJ036-0156.npy +tests/data/ljspeech/wavs/LJ009-0057.wav|tests/data/ljspeech/wavs/LJ009-0057.npy +tests/data/ljspeech/wavs/LJ045-0171.wav|tests/data/ljspeech/wavs/LJ045-0171.npy +tests/data/ljspeech/wavs/LJ008-0148.wav|tests/data/ljspeech/wavs/LJ008-0148.npy +tests/data/ljspeech/wavs/LJ045-0024.wav|tests/data/ljspeech/wavs/LJ045-0024.npy +tests/data/ljspeech/wavs/LJ017-0105.wav|tests/data/ljspeech/wavs/LJ017-0105.npy +tests/data/ljspeech/wavs/LJ039-0025.wav|tests/data/ljspeech/wavs/LJ039-0025.npy +tests/data/ljspeech/wavs/LJ049-0004.wav|tests/data/ljspeech/wavs/LJ049-0004.npy +tests/data/ljspeech/wavs/LJ016-0091.wav|tests/data/ljspeech/wavs/LJ016-0091.npy +tests/data/ljspeech/wavs/LJ001-0008.wav|tests/data/ljspeech/wavs/LJ001-0008.npy +tests/data/ljspeech/wavs/LJ032-0240.wav|tests/data/ljspeech/wavs/LJ032-0240.npy +tests/data/ljspeech/wavs/LJ048-0033.wav|tests/data/ljspeech/wavs/LJ048-0033.npy +tests/data/ljspeech/wavs/LJ032-0247.wav|tests/data/ljspeech/wavs/LJ032-0247.npy +tests/data/ljspeech/wavs/LJ025-0050.wav|tests/data/ljspeech/wavs/LJ025-0050.npy +tests/data/ljspeech/wavs/LJ029-0201.wav|tests/data/ljspeech/wavs/LJ029-0201.npy +tests/data/ljspeech/wavs/LJ047-0161.wav|tests/data/ljspeech/wavs/LJ047-0161.npy +tests/data/ljspeech/wavs/LJ031-0192.wav|tests/data/ljspeech/wavs/LJ031-0192.npy +tests/data/ljspeech/wavs/LJ018-0106.wav|tests/data/ljspeech/wavs/LJ018-0106.npy +tests/data/ljspeech/wavs/LJ041-0186.wav|tests/data/ljspeech/wavs/LJ041-0186.npy +tests/data/ljspeech/wavs/LJ021-0077.wav|tests/data/ljspeech/wavs/LJ021-0077.npy +tests/data/ljspeech/wavs/LJ039-0054.wav|tests/data/ljspeech/wavs/LJ039-0054.npy +tests/data/ljspeech/wavs/LJ045-0056.wav|tests/data/ljspeech/wavs/LJ045-0056.npy +tests/data/ljspeech/wavs/LJ015-0138.wav|tests/data/ljspeech/wavs/LJ015-0138.npy +tests/data/ljspeech/wavs/LJ039-0129.wav|tests/data/ljspeech/wavs/LJ039-0129.npy +tests/data/ljspeech/wavs/LJ025-0110.wav|tests/data/ljspeech/wavs/LJ025-0110.npy +tests/data/ljspeech/wavs/LJ048-0154.wav|tests/data/ljspeech/wavs/LJ048-0154.npy +tests/data/ljspeech/wavs/LJ021-0186.wav|tests/data/ljspeech/wavs/LJ021-0186.npy +tests/data/ljspeech/wavs/LJ039-0005.wav|tests/data/ljspeech/wavs/LJ039-0005.npy +tests/data/ljspeech/wavs/LJ011-0208.wav|tests/data/ljspeech/wavs/LJ011-0208.npy +tests/data/ljspeech/wavs/LJ028-0413.wav|tests/data/ljspeech/wavs/LJ028-0413.npy +tests/data/ljspeech/wavs/LJ029-0070.wav|tests/data/ljspeech/wavs/LJ029-0070.npy +tests/data/ljspeech/wavs/LJ042-0014.wav|tests/data/ljspeech/wavs/LJ042-0014.npy +tests/data/ljspeech/wavs/LJ045-0195.wav|tests/data/ljspeech/wavs/LJ045-0195.npy +tests/data/ljspeech/wavs/LJ028-0462.wav|tests/data/ljspeech/wavs/LJ028-0462.npy +tests/data/ljspeech/wavs/LJ033-0075.wav|tests/data/ljspeech/wavs/LJ033-0075.npy +tests/data/ljspeech/wavs/LJ046-0050.wav|tests/data/ljspeech/wavs/LJ046-0050.npy +tests/data/ljspeech/wavs/LJ046-0124.wav|tests/data/ljspeech/wavs/LJ046-0124.npy +tests/data/ljspeech/wavs/LJ016-0051.wav|tests/data/ljspeech/wavs/LJ016-0051.npy +tests/data/ljspeech/wavs/LJ014-0231.wav|tests/data/ljspeech/wavs/LJ014-0231.npy +tests/data/ljspeech/wavs/LJ022-0048.wav|tests/data/ljspeech/wavs/LJ022-0048.npy +tests/data/ljspeech/wavs/LJ033-0003.wav|tests/data/ljspeech/wavs/LJ033-0003.npy +tests/data/ljspeech/wavs/LJ018-0069.wav|tests/data/ljspeech/wavs/LJ018-0069.npy +tests/data/ljspeech/wavs/LJ046-0003.wav|tests/data/ljspeech/wavs/LJ046-0003.npy +tests/data/ljspeech/wavs/LJ008-0022.wav|tests/data/ljspeech/wavs/LJ008-0022.npy +tests/data/ljspeech/wavs/LJ029-0182.wav|tests/data/ljspeech/wavs/LJ029-0182.npy +tests/data/ljspeech/wavs/LJ032-0018.wav|tests/data/ljspeech/wavs/LJ032-0018.npy +tests/data/ljspeech/wavs/LJ024-0143.wav|tests/data/ljspeech/wavs/LJ024-0143.npy +tests/data/ljspeech/wavs/LJ044-0042.wav|tests/data/ljspeech/wavs/LJ044-0042.npy +tests/data/ljspeech/wavs/LJ033-0160.wav|tests/data/ljspeech/wavs/LJ033-0160.npy +tests/data/ljspeech/wavs/LJ004-0008.wav|tests/data/ljspeech/wavs/LJ004-0008.npy +tests/data/ljspeech/wavs/LJ009-0054.wav|tests/data/ljspeech/wavs/LJ009-0054.npy +tests/data/ljspeech/wavs/LJ024-0019.wav|tests/data/ljspeech/wavs/LJ024-0019.npy +tests/data/ljspeech/wavs/LJ034-0039.wav|tests/data/ljspeech/wavs/LJ034-0039.npy +tests/data/ljspeech/wavs/LJ015-0010.wav|tests/data/ljspeech/wavs/LJ015-0010.npy +tests/data/ljspeech/wavs/LJ039-0095.wav|tests/data/ljspeech/wavs/LJ039-0095.npy +tests/data/ljspeech/wavs/LJ047-0205.wav|tests/data/ljspeech/wavs/LJ047-0205.npy +tests/data/ljspeech/wavs/LJ030-0073.wav|tests/data/ljspeech/wavs/LJ030-0073.npy +tests/data/ljspeech/wavs/LJ025-0012.wav|tests/data/ljspeech/wavs/LJ025-0012.npy +tests/data/ljspeech/wavs/LJ007-0149.wav|tests/data/ljspeech/wavs/LJ007-0149.npy +tests/data/ljspeech/wavs/LJ047-0017.wav|tests/data/ljspeech/wavs/LJ047-0017.npy +tests/data/ljspeech/wavs/LJ009-0073.wav|tests/data/ljspeech/wavs/LJ009-0073.npy +tests/data/ljspeech/wavs/LJ017-0212.wav|tests/data/ljspeech/wavs/LJ017-0212.npy +tests/data/ljspeech/wavs/LJ018-0312.wav|tests/data/ljspeech/wavs/LJ018-0312.npy +tests/data/ljspeech/wavs/LJ044-0161.wav|tests/data/ljspeech/wavs/LJ044-0161.npy +tests/data/ljspeech/wavs/LJ016-0197.wav|tests/data/ljspeech/wavs/LJ016-0197.npy +tests/data/ljspeech/wavs/LJ023-0077.wav|tests/data/ljspeech/wavs/LJ023-0077.npy +tests/data/ljspeech/wavs/LJ030-0067.wav|tests/data/ljspeech/wavs/LJ030-0067.npy +tests/data/ljspeech/wavs/LJ034-0202.wav|tests/data/ljspeech/wavs/LJ034-0202.npy +tests/data/ljspeech/wavs/LJ030-0050.wav|tests/data/ljspeech/wavs/LJ030-0050.npy +tests/data/ljspeech/wavs/LJ049-0016.wav|tests/data/ljspeech/wavs/LJ049-0016.npy +tests/data/ljspeech/wavs/LJ035-0029.wav|tests/data/ljspeech/wavs/LJ035-0029.npy +tests/data/ljspeech/wavs/LJ048-0060.wav|tests/data/ljspeech/wavs/LJ048-0060.npy +tests/data/ljspeech/wavs/LJ037-0113.wav|tests/data/ljspeech/wavs/LJ037-0113.npy +tests/data/ljspeech/wavs/LJ016-0022.wav|tests/data/ljspeech/wavs/LJ016-0022.npy +tests/data/ljspeech/wavs/LJ032-0122.wav|tests/data/ljspeech/wavs/LJ032-0122.npy +tests/data/ljspeech/wavs/LJ032-0056.wav|tests/data/ljspeech/wavs/LJ032-0056.npy +tests/data/ljspeech/wavs/LJ025-0038.wav|tests/data/ljspeech/wavs/LJ025-0038.npy +tests/data/ljspeech/wavs/LJ046-0153.wav|tests/data/ljspeech/wavs/LJ046-0153.npy +tests/data/ljspeech/wavs/LJ035-0073.wav|tests/data/ljspeech/wavs/LJ035-0073.npy +tests/data/ljspeech/wavs/LJ021-0153.wav|tests/data/ljspeech/wavs/LJ021-0153.npy +tests/data/ljspeech/wavs/LJ050-0137.wav|tests/data/ljspeech/wavs/LJ050-0137.npy +tests/data/ljspeech/wavs/LJ042-0189.wav|tests/data/ljspeech/wavs/LJ042-0189.npy +tests/data/ljspeech/wavs/LJ022-0116.wav|tests/data/ljspeech/wavs/LJ022-0116.npy +tests/data/ljspeech/wavs/LJ018-0196.wav|tests/data/ljspeech/wavs/LJ018-0196.npy +tests/data/ljspeech/wavs/LJ037-0006.wav|tests/data/ljspeech/wavs/LJ037-0006.npy +tests/data/ljspeech/wavs/LJ030-0103.wav|tests/data/ljspeech/wavs/LJ030-0103.npy +tests/data/ljspeech/wavs/LJ047-0022.wav|tests/data/ljspeech/wavs/LJ047-0022.npy +tests/data/ljspeech/wavs/LJ022-0169.wav|tests/data/ljspeech/wavs/LJ022-0169.npy +tests/data/ljspeech/wavs/LJ013-0031.wav|tests/data/ljspeech/wavs/LJ013-0031.npy +tests/data/ljspeech/wavs/LJ021-0199.wav|tests/data/ljspeech/wavs/LJ021-0199.npy +tests/data/ljspeech/wavs/LJ009-0243.wav|tests/data/ljspeech/wavs/LJ009-0243.npy +tests/data/ljspeech/wavs/LJ033-0068.wav|tests/data/ljspeech/wavs/LJ033-0068.npy +tests/data/ljspeech/wavs/LJ011-0107.wav|tests/data/ljspeech/wavs/LJ011-0107.npy +tests/data/ljspeech/wavs/LJ008-0110.wav|tests/data/ljspeech/wavs/LJ008-0110.npy +tests/data/ljspeech/wavs/LJ043-0041.wav|tests/data/ljspeech/wavs/LJ043-0041.npy +tests/data/ljspeech/wavs/LJ020-0008.wav|tests/data/ljspeech/wavs/LJ020-0008.npy +tests/data/ljspeech/wavs/LJ030-0003.wav|tests/data/ljspeech/wavs/LJ030-0003.npy +tests/data/ljspeech/wavs/LJ043-0007.wav|tests/data/ljspeech/wavs/LJ043-0007.npy +tests/data/ljspeech/wavs/LJ009-0260.wav|tests/data/ljspeech/wavs/LJ009-0260.npy +tests/data/ljspeech/wavs/LJ030-0162.wav|tests/data/ljspeech/wavs/LJ030-0162.npy +tests/data/ljspeech/wavs/LJ027-0051.wav|tests/data/ljspeech/wavs/LJ027-0051.npy +tests/data/ljspeech/wavs/LJ024-0089.wav|tests/data/ljspeech/wavs/LJ024-0089.npy +tests/data/ljspeech/wavs/LJ024-0120.wav|tests/data/ljspeech/wavs/LJ024-0120.npy +tests/data/ljspeech/wavs/LJ008-0294.wav|tests/data/ljspeech/wavs/LJ008-0294.npy +tests/data/ljspeech/wavs/LJ038-0174.wav|tests/data/ljspeech/wavs/LJ038-0174.npy +tests/data/ljspeech/wavs/LJ008-0197.wav|tests/data/ljspeech/wavs/LJ008-0197.npy +tests/data/ljspeech/wavs/LJ045-0109.wav|tests/data/ljspeech/wavs/LJ045-0109.npy +tests/data/ljspeech/wavs/LJ018-0222.wav|tests/data/ljspeech/wavs/LJ018-0222.npy +tests/data/ljspeech/wavs/LJ043-0097.wav|tests/data/ljspeech/wavs/LJ043-0097.npy +tests/data/ljspeech/wavs/LJ002-0125.wav|tests/data/ljspeech/wavs/LJ002-0125.npy +tests/data/ljspeech/wavs/LJ049-0055.wav|tests/data/ljspeech/wavs/LJ049-0055.npy +tests/data/ljspeech/wavs/LJ012-0154.wav|tests/data/ljspeech/wavs/LJ012-0154.npy +tests/data/ljspeech/wavs/LJ042-0004.wav|tests/data/ljspeech/wavs/LJ042-0004.npy +tests/data/ljspeech/wavs/LJ043-0142.wav|tests/data/ljspeech/wavs/LJ043-0142.npy +tests/data/ljspeech/wavs/LJ020-0069.wav|tests/data/ljspeech/wavs/LJ020-0069.npy +tests/data/ljspeech/wavs/LJ030-0035.wav|tests/data/ljspeech/wavs/LJ030-0035.npy +tests/data/ljspeech/wavs/LJ009-0168.wav|tests/data/ljspeech/wavs/LJ009-0168.npy +tests/data/ljspeech/wavs/LJ020-0067.wav|tests/data/ljspeech/wavs/LJ020-0067.npy +tests/data/ljspeech/wavs/LJ015-0135.wav|tests/data/ljspeech/wavs/LJ015-0135.npy +tests/data/ljspeech/wavs/LJ028-0361.wav|tests/data/ljspeech/wavs/LJ028-0361.npy +tests/data/ljspeech/wavs/LJ001-0002.wav|tests/data/ljspeech/wavs/LJ001-0002.npy +tests/data/ljspeech/wavs/LJ002-0234.wav|tests/data/ljspeech/wavs/LJ002-0234.npy +tests/data/ljspeech/wavs/LJ019-0274.wav|tests/data/ljspeech/wavs/LJ019-0274.npy +tests/data/ljspeech/wavs/LJ049-0080.wav|tests/data/ljspeech/wavs/LJ049-0080.npy +tests/data/ljspeech/wavs/LJ015-0293.wav|tests/data/ljspeech/wavs/LJ015-0293.npy +tests/data/ljspeech/wavs/LJ030-0083.wav|tests/data/ljspeech/wavs/LJ030-0083.npy +tests/data/ljspeech/wavs/LJ049-0068.wav|tests/data/ljspeech/wavs/LJ049-0068.npy +tests/data/ljspeech/wavs/LJ032-0013.wav|tests/data/ljspeech/wavs/LJ032-0013.npy +tests/data/ljspeech/wavs/LJ015-0032.wav|tests/data/ljspeech/wavs/LJ015-0032.npy +tests/data/ljspeech/wavs/LJ035-0164.wav|tests/data/ljspeech/wavs/LJ035-0164.npy +tests/data/ljspeech/wavs/LJ014-0314.wav|tests/data/ljspeech/wavs/LJ014-0314.npy +tests/data/ljspeech/wavs/LJ050-0002.wav|tests/data/ljspeech/wavs/LJ050-0002.npy +tests/data/ljspeech/wavs/LJ033-0105.wav|tests/data/ljspeech/wavs/LJ033-0105.npy +tests/data/ljspeech/wavs/LJ020-0072.wav|tests/data/ljspeech/wavs/LJ020-0072.npy +tests/data/ljspeech/wavs/LJ016-0138.wav|tests/data/ljspeech/wavs/LJ016-0138.npy +tests/data/ljspeech/wavs/LJ023-0063.wav|tests/data/ljspeech/wavs/LJ023-0063.npy +tests/data/ljspeech/wavs/LJ005-0210.wav|tests/data/ljspeech/wavs/LJ005-0210.npy +tests/data/ljspeech/wavs/LJ010-0262.wav|tests/data/ljspeech/wavs/LJ010-0262.npy +tests/data/ljspeech/wavs/LJ039-0032.wav|tests/data/ljspeech/wavs/LJ039-0032.npy +tests/data/ljspeech/wavs/LJ019-0020.wav|tests/data/ljspeech/wavs/LJ019-0020.npy +tests/data/ljspeech/wavs/LJ039-0170.wav|tests/data/ljspeech/wavs/LJ039-0170.npy +tests/data/ljspeech/wavs/LJ016-0183.wav|tests/data/ljspeech/wavs/LJ016-0183.npy +tests/data/ljspeech/wavs/LJ038-0271.wav|tests/data/ljspeech/wavs/LJ038-0271.npy +tests/data/ljspeech/wavs/LJ037-0082.wav|tests/data/ljspeech/wavs/LJ037-0082.npy +tests/data/ljspeech/wavs/LJ047-0066.wav|tests/data/ljspeech/wavs/LJ047-0066.npy +tests/data/ljspeech/wavs/LJ044-0194.wav|tests/data/ljspeech/wavs/LJ044-0194.npy +tests/data/ljspeech/wavs/LJ032-0197.wav|tests/data/ljspeech/wavs/LJ032-0197.npy +tests/data/ljspeech/wavs/LJ028-0333.wav|tests/data/ljspeech/wavs/LJ028-0333.npy +tests/data/ljspeech/wavs/LJ015-0036.wav|tests/data/ljspeech/wavs/LJ015-0036.npy +tests/data/ljspeech/wavs/LJ044-0069.wav|tests/data/ljspeech/wavs/LJ044-0069.npy +tests/data/ljspeech/wavs/LJ001-0104.wav|tests/data/ljspeech/wavs/LJ001-0104.npy +tests/data/ljspeech/wavs/LJ016-0286.wav|tests/data/ljspeech/wavs/LJ016-0286.npy +tests/data/ljspeech/wavs/LJ031-0037.wav|tests/data/ljspeech/wavs/LJ031-0037.npy +tests/data/ljspeech/wavs/LJ014-0035.wav|tests/data/ljspeech/wavs/LJ014-0035.npy +tests/data/ljspeech/wavs/LJ001-0078.wav|tests/data/ljspeech/wavs/LJ001-0078.npy +tests/data/ljspeech/wavs/LJ014-0088.wav|tests/data/ljspeech/wavs/LJ014-0088.npy +tests/data/ljspeech/wavs/LJ002-0208.wav|tests/data/ljspeech/wavs/LJ002-0208.npy +tests/data/ljspeech/wavs/LJ028-0515.wav|tests/data/ljspeech/wavs/LJ028-0515.npy +tests/data/ljspeech/wavs/LJ035-0007.wav|tests/data/ljspeech/wavs/LJ035-0007.npy +tests/data/ljspeech/wavs/LJ048-0107.wav|tests/data/ljspeech/wavs/LJ048-0107.npy +tests/data/ljspeech/wavs/LJ022-0121.wav|tests/data/ljspeech/wavs/LJ022-0121.npy +tests/data/ljspeech/wavs/LJ014-0330.wav|tests/data/ljspeech/wavs/LJ014-0330.npy +tests/data/ljspeech/wavs/LJ013-0177.wav|tests/data/ljspeech/wavs/LJ013-0177.npy +tests/data/ljspeech/wavs/LJ050-0030.wav|tests/data/ljspeech/wavs/LJ050-0030.npy +tests/data/ljspeech/wavs/LJ024-0116.wav|tests/data/ljspeech/wavs/LJ024-0116.npy +tests/data/ljspeech/wavs/LJ031-0100.wav|tests/data/ljspeech/wavs/LJ031-0100.npy +tests/data/ljspeech/wavs/LJ001-0170.wav|tests/data/ljspeech/wavs/LJ001-0170.npy +tests/data/ljspeech/wavs/LJ050-0197.wav|tests/data/ljspeech/wavs/LJ050-0197.npy +tests/data/ljspeech/wavs/LJ050-0135.wav|tests/data/ljspeech/wavs/LJ050-0135.npy +tests/data/ljspeech/wavs/LJ011-0213.wav|tests/data/ljspeech/wavs/LJ011-0213.npy +tests/data/ljspeech/wavs/LJ037-0191.wav|tests/data/ljspeech/wavs/LJ037-0191.npy +tests/data/ljspeech/wavs/LJ047-0133.wav|tests/data/ljspeech/wavs/LJ047-0133.npy +tests/data/ljspeech/wavs/LJ034-0151.wav|tests/data/ljspeech/wavs/LJ034-0151.npy +tests/data/ljspeech/wavs/LJ036-0115.wav|tests/data/ljspeech/wavs/LJ036-0115.npy +tests/data/ljspeech/wavs/LJ036-0113.wav|tests/data/ljspeech/wavs/LJ036-0113.npy +tests/data/ljspeech/wavs/LJ037-0002.wav|tests/data/ljspeech/wavs/LJ037-0002.npy +tests/data/ljspeech/wavs/LJ023-0070.wav|tests/data/ljspeech/wavs/LJ023-0070.npy +tests/data/ljspeech/wavs/LJ032-0002.wav|tests/data/ljspeech/wavs/LJ032-0002.npy +tests/data/ljspeech/wavs/LJ023-0089.wav|tests/data/ljspeech/wavs/LJ023-0089.npy +tests/data/ljspeech/wavs/LJ032-0079.wav|tests/data/ljspeech/wavs/LJ032-0079.npy +tests/data/ljspeech/wavs/LJ039-0229.wav|tests/data/ljspeech/wavs/LJ039-0229.npy +tests/data/ljspeech/wavs/LJ003-0004.wav|tests/data/ljspeech/wavs/LJ003-0004.npy +tests/data/ljspeech/wavs/LJ037-0010.wav|tests/data/ljspeech/wavs/LJ037-0010.npy +tests/data/ljspeech/wavs/LJ048-0134.wav|tests/data/ljspeech/wavs/LJ048-0134.npy +tests/data/ljspeech/wavs/LJ003-0129.wav|tests/data/ljspeech/wavs/LJ003-0129.npy +tests/data/ljspeech/wavs/LJ037-0068.wav|tests/data/ljspeech/wavs/LJ037-0068.npy +tests/data/ljspeech/wavs/LJ017-0133.wav|tests/data/ljspeech/wavs/LJ017-0133.npy +tests/data/ljspeech/wavs/LJ028-0479.wav|tests/data/ljspeech/wavs/LJ028-0479.npy +tests/data/ljspeech/wavs/LJ021-0164.wav|tests/data/ljspeech/wavs/LJ021-0164.npy +tests/data/ljspeech/wavs/LJ031-0171.wav|tests/data/ljspeech/wavs/LJ031-0171.npy +tests/data/ljspeech/wavs/LJ048-0029.wav|tests/data/ljspeech/wavs/LJ048-0029.npy +tests/data/ljspeech/wavs/LJ050-0077.wav|tests/data/ljspeech/wavs/LJ050-0077.npy +tests/data/ljspeech/wavs/LJ047-0087.wav|tests/data/ljspeech/wavs/LJ047-0087.npy +tests/data/ljspeech/wavs/LJ009-0062.wav|tests/data/ljspeech/wavs/LJ009-0062.npy +tests/data/ljspeech/wavs/LJ004-0151.wav|tests/data/ljspeech/wavs/LJ004-0151.npy +tests/data/ljspeech/wavs/LJ030-0188.wav|tests/data/ljspeech/wavs/LJ030-0188.npy +tests/data/ljspeech/wavs/LJ028-0275.wav|tests/data/ljspeech/wavs/LJ028-0275.npy +tests/data/ljspeech/wavs/LJ010-0250.wav|tests/data/ljspeech/wavs/LJ010-0250.npy +tests/data/ljspeech/wavs/LJ012-0141.wav|tests/data/ljspeech/wavs/LJ012-0141.npy +tests/data/ljspeech/wavs/LJ028-0364.wav|tests/data/ljspeech/wavs/LJ028-0364.npy +tests/data/ljspeech/wavs/LJ019-0253.wav|tests/data/ljspeech/wavs/LJ019-0253.npy +tests/data/ljspeech/wavs/LJ037-0257.wav|tests/data/ljspeech/wavs/LJ037-0257.npy +tests/data/ljspeech/wavs/LJ005-0142.wav|tests/data/ljspeech/wavs/LJ005-0142.npy +tests/data/ljspeech/wavs/LJ042-0116.wav|tests/data/ljspeech/wavs/LJ042-0116.npy +tests/data/ljspeech/wavs/LJ003-0289.wav|tests/data/ljspeech/wavs/LJ003-0289.npy +tests/data/ljspeech/wavs/LJ038-0245.wav|tests/data/ljspeech/wavs/LJ038-0245.npy +tests/data/ljspeech/wavs/LJ036-0002.wav|tests/data/ljspeech/wavs/LJ036-0002.npy +tests/data/ljspeech/wavs/LJ033-0002.wav|tests/data/ljspeech/wavs/LJ033-0002.npy +tests/data/ljspeech/wavs/LJ039-0004.wav|tests/data/ljspeech/wavs/LJ039-0004.npy +tests/data/ljspeech/wavs/LJ047-0240.wav|tests/data/ljspeech/wavs/LJ047-0240.npy +tests/data/ljspeech/wavs/LJ044-0067.wav|tests/data/ljspeech/wavs/LJ044-0067.npy +tests/data/ljspeech/wavs/LJ034-0109.wav|tests/data/ljspeech/wavs/LJ034-0109.npy +tests/data/ljspeech/wavs/LJ003-0287.wav|tests/data/ljspeech/wavs/LJ003-0287.npy +tests/data/ljspeech/wavs/LJ030-0093.wav|tests/data/ljspeech/wavs/LJ030-0093.npy +tests/data/ljspeech/wavs/LJ038-0130.wav|tests/data/ljspeech/wavs/LJ038-0130.npy +tests/data/ljspeech/wavs/LJ020-0010.wav|tests/data/ljspeech/wavs/LJ020-0010.npy +tests/data/ljspeech/wavs/LJ019-0280.wav|tests/data/ljspeech/wavs/LJ019-0280.npy +tests/data/ljspeech/wavs/LJ019-0392.wav|tests/data/ljspeech/wavs/LJ019-0392.npy +tests/data/ljspeech/wavs/LJ037-0194.wav|tests/data/ljspeech/wavs/LJ037-0194.npy +tests/data/ljspeech/wavs/LJ025-0077.wav|tests/data/ljspeech/wavs/LJ025-0077.npy +tests/data/ljspeech/wavs/LJ030-0105.wav|tests/data/ljspeech/wavs/LJ030-0105.npy +tests/data/ljspeech/wavs/LJ018-0357.wav|tests/data/ljspeech/wavs/LJ018-0357.npy +tests/data/ljspeech/wavs/LJ038-0113.wav|tests/data/ljspeech/wavs/LJ038-0113.npy +tests/data/ljspeech/wavs/LJ032-0243.wav|tests/data/ljspeech/wavs/LJ032-0243.npy +tests/data/ljspeech/wavs/LJ031-0184.wav|tests/data/ljspeech/wavs/LJ031-0184.npy +tests/data/ljspeech/wavs/LJ022-0053.wav|tests/data/ljspeech/wavs/LJ022-0053.npy +tests/data/ljspeech/wavs/LJ008-0167.wav|tests/data/ljspeech/wavs/LJ008-0167.npy +tests/data/ljspeech/wavs/LJ020-0034.wav|tests/data/ljspeech/wavs/LJ020-0034.npy +tests/data/ljspeech/wavs/LJ026-0029.wav|tests/data/ljspeech/wavs/LJ026-0029.npy +tests/data/ljspeech/wavs/LJ045-0132.wav|tests/data/ljspeech/wavs/LJ045-0132.npy +tests/data/ljspeech/wavs/LJ011-0059.wav|tests/data/ljspeech/wavs/LJ011-0059.npy +tests/data/ljspeech/wavs/LJ048-0038.wav|tests/data/ljspeech/wavs/LJ048-0038.npy +tests/data/ljspeech/wavs/LJ048-0113.wav|tests/data/ljspeech/wavs/LJ048-0113.npy +tests/data/ljspeech/wavs/LJ013-0059.wav|tests/data/ljspeech/wavs/LJ013-0059.npy +tests/data/ljspeech/wavs/LJ005-0074.wav|tests/data/ljspeech/wavs/LJ005-0074.npy +tests/data/ljspeech/wavs/LJ037-0052.wav|tests/data/ljspeech/wavs/LJ037-0052.npy +tests/data/ljspeech/wavs/LJ033-0110.wav|tests/data/ljspeech/wavs/LJ033-0110.npy +tests/data/ljspeech/wavs/LJ022-0078.wav|tests/data/ljspeech/wavs/LJ022-0078.npy +tests/data/ljspeech/wavs/LJ043-0131.wav|tests/data/ljspeech/wavs/LJ043-0131.npy +tests/data/ljspeech/wavs/LJ021-0050.wav|tests/data/ljspeech/wavs/LJ021-0050.npy +tests/data/ljspeech/wavs/LJ008-0073.wav|tests/data/ljspeech/wavs/LJ008-0073.npy +tests/data/ljspeech/wavs/LJ016-0049.wav|tests/data/ljspeech/wavs/LJ016-0049.npy +tests/data/ljspeech/wavs/LJ049-0202.wav|tests/data/ljspeech/wavs/LJ049-0202.npy +tests/data/ljspeech/wavs/LJ012-0159.wav|tests/data/ljspeech/wavs/LJ012-0159.npy +tests/data/ljspeech/wavs/LJ014-0102.wav|tests/data/ljspeech/wavs/LJ014-0102.npy +tests/data/ljspeech/wavs/LJ033-0057.wav|tests/data/ljspeech/wavs/LJ033-0057.npy +tests/data/ljspeech/wavs/LJ024-0008.wav|tests/data/ljspeech/wavs/LJ024-0008.npy +tests/data/ljspeech/wavs/LJ049-0194.wav|tests/data/ljspeech/wavs/LJ049-0194.npy +tests/data/ljspeech/wavs/LJ024-0109.wav|tests/data/ljspeech/wavs/LJ024-0109.npy +tests/data/ljspeech/wavs/LJ043-0104.wav|tests/data/ljspeech/wavs/LJ043-0104.npy +tests/data/ljspeech/wavs/LJ024-0024.wav|tests/data/ljspeech/wavs/LJ024-0024.npy +tests/data/ljspeech/wavs/LJ003-0225.wav|tests/data/ljspeech/wavs/LJ003-0225.npy +tests/data/ljspeech/wavs/LJ012-0120.wav|tests/data/ljspeech/wavs/LJ012-0120.npy +tests/data/ljspeech/wavs/LJ016-0048.wav|tests/data/ljspeech/wavs/LJ016-0048.npy +tests/data/ljspeech/wavs/LJ014-0009.wav|tests/data/ljspeech/wavs/LJ014-0009.npy +tests/data/ljspeech/wavs/LJ021-0201.wav|tests/data/ljspeech/wavs/LJ021-0201.npy +tests/data/ljspeech/wavs/LJ008-0298.wav|tests/data/ljspeech/wavs/LJ008-0298.npy +tests/data/ljspeech/wavs/LJ016-0230.wav|tests/data/ljspeech/wavs/LJ016-0230.npy +tests/data/ljspeech/wavs/LJ017-0072.wav|tests/data/ljspeech/wavs/LJ017-0072.npy +tests/data/ljspeech/wavs/LJ037-0232.wav|tests/data/ljspeech/wavs/LJ037-0232.npy +tests/data/ljspeech/wavs/LJ017-0225.wav|tests/data/ljspeech/wavs/LJ017-0225.npy +tests/data/ljspeech/wavs/LJ016-0174.wav|tests/data/ljspeech/wavs/LJ016-0174.npy +tests/data/ljspeech/wavs/LJ038-0148.wav|tests/data/ljspeech/wavs/LJ038-0148.npy +tests/data/ljspeech/wavs/LJ009-0034.wav|tests/data/ljspeech/wavs/LJ009-0034.npy +tests/data/ljspeech/wavs/LJ032-0231.wav|tests/data/ljspeech/wavs/LJ032-0231.npy +tests/data/ljspeech/wavs/LJ002-0012.wav|tests/data/ljspeech/wavs/LJ002-0012.npy +tests/data/ljspeech/wavs/LJ004-0104.wav|tests/data/ljspeech/wavs/LJ004-0104.npy +tests/data/ljspeech/wavs/LJ024-0002.wav|tests/data/ljspeech/wavs/LJ024-0002.npy +tests/data/ljspeech/wavs/LJ037-0186.wav|tests/data/ljspeech/wavs/LJ037-0186.npy +tests/data/ljspeech/wavs/LJ032-0088.wav|tests/data/ljspeech/wavs/LJ032-0088.npy +tests/data/ljspeech/wavs/LJ018-0282.wav|tests/data/ljspeech/wavs/LJ018-0282.npy +tests/data/ljspeech/wavs/LJ034-0119.wav|tests/data/ljspeech/wavs/LJ034-0119.npy +tests/data/ljspeech/wavs/LJ017-0236.wav|tests/data/ljspeech/wavs/LJ017-0236.npy +tests/data/ljspeech/wavs/LJ042-0126.wav|tests/data/ljspeech/wavs/LJ042-0126.npy +tests/data/ljspeech/wavs/LJ011-0280.wav|tests/data/ljspeech/wavs/LJ011-0280.npy +tests/data/ljspeech/wavs/LJ031-0125.wav|tests/data/ljspeech/wavs/LJ031-0125.npy +tests/data/ljspeech/wavs/LJ032-0112.wav|tests/data/ljspeech/wavs/LJ032-0112.npy +tests/data/ljspeech/wavs/LJ033-0017.wav|tests/data/ljspeech/wavs/LJ033-0017.npy +tests/data/ljspeech/wavs/LJ030-0202.wav|tests/data/ljspeech/wavs/LJ030-0202.npy +tests/data/ljspeech/wavs/LJ040-0022.wav|tests/data/ljspeech/wavs/LJ040-0022.npy +tests/data/ljspeech/wavs/LJ027-0132.wav|tests/data/ljspeech/wavs/LJ027-0132.npy +tests/data/ljspeech/wavs/LJ041-0057.wav|tests/data/ljspeech/wavs/LJ041-0057.npy +tests/data/ljspeech/wavs/LJ033-0129.wav|tests/data/ljspeech/wavs/LJ033-0129.npy +tests/data/ljspeech/wavs/LJ028-0123.wav|tests/data/ljspeech/wavs/LJ028-0123.npy +tests/data/ljspeech/wavs/LJ011-0217.wav|tests/data/ljspeech/wavs/LJ011-0217.npy +tests/data/ljspeech/wavs/LJ008-0062.wav|tests/data/ljspeech/wavs/LJ008-0062.npy +tests/data/ljspeech/wavs/LJ002-0044.wav|tests/data/ljspeech/wavs/LJ002-0044.npy +tests/data/ljspeech/wavs/LJ007-0081.wav|tests/data/ljspeech/wavs/LJ007-0081.npy +tests/data/ljspeech/wavs/LJ016-0027.wav|tests/data/ljspeech/wavs/LJ016-0027.npy +tests/data/ljspeech/wavs/LJ048-0026.wav|tests/data/ljspeech/wavs/LJ048-0026.npy +tests/data/ljspeech/wavs/LJ014-0050.wav|tests/data/ljspeech/wavs/LJ014-0050.npy +tests/data/ljspeech/wavs/LJ035-0144.wav|tests/data/ljspeech/wavs/LJ035-0144.npy +tests/data/ljspeech/wavs/LJ009-0086.wav|tests/data/ljspeech/wavs/LJ009-0086.npy +tests/data/ljspeech/wavs/LJ009-0303.wav|tests/data/ljspeech/wavs/LJ009-0303.npy +tests/data/ljspeech/wavs/LJ016-0007.wav|tests/data/ljspeech/wavs/LJ016-0007.npy +tests/data/ljspeech/wavs/LJ049-0180.wav|tests/data/ljspeech/wavs/LJ049-0180.npy +tests/data/ljspeech/wavs/LJ022-0179.wav|tests/data/ljspeech/wavs/LJ022-0179.npy +tests/data/ljspeech/wavs/LJ013-0231.wav|tests/data/ljspeech/wavs/LJ013-0231.npy +tests/data/ljspeech/wavs/LJ046-0135.wav|tests/data/ljspeech/wavs/LJ046-0135.npy +tests/data/ljspeech/wavs/LJ036-0199.wav|tests/data/ljspeech/wavs/LJ036-0199.npy +tests/data/ljspeech/wavs/LJ008-0198.wav|tests/data/ljspeech/wavs/LJ008-0198.npy +tests/data/ljspeech/wavs/LJ031-0194.wav|tests/data/ljspeech/wavs/LJ031-0194.npy +tests/data/ljspeech/wavs/LJ036-0065.wav|tests/data/ljspeech/wavs/LJ036-0065.npy +tests/data/ljspeech/wavs/LJ050-0053.wav|tests/data/ljspeech/wavs/LJ050-0053.npy +tests/data/ljspeech/wavs/LJ019-0030.wav|tests/data/ljspeech/wavs/LJ019-0030.npy +tests/data/ljspeech/wavs/LJ014-0254.wav|tests/data/ljspeech/wavs/LJ014-0254.npy +tests/data/ljspeech/wavs/LJ018-0140.wav|tests/data/ljspeech/wavs/LJ018-0140.npy +tests/data/ljspeech/wavs/LJ045-0066.wav|tests/data/ljspeech/wavs/LJ045-0066.npy +tests/data/ljspeech/wavs/LJ027-0039.wav|tests/data/ljspeech/wavs/LJ027-0039.npy +tests/data/ljspeech/wavs/LJ011-0186.wav|tests/data/ljspeech/wavs/LJ011-0186.npy +tests/data/ljspeech/wavs/LJ048-0267.wav|tests/data/ljspeech/wavs/LJ048-0267.npy +tests/data/ljspeech/wavs/LJ022-0017.wav|tests/data/ljspeech/wavs/LJ022-0017.npy +tests/data/ljspeech/wavs/LJ034-0079.wav|tests/data/ljspeech/wavs/LJ034-0079.npy +tests/data/ljspeech/wavs/LJ003-0297.wav|tests/data/ljspeech/wavs/LJ003-0297.npy +tests/data/ljspeech/wavs/LJ019-0292.wav|tests/data/ljspeech/wavs/LJ019-0292.npy +tests/data/ljspeech/wavs/LJ018-0227.wav|tests/data/ljspeech/wavs/LJ018-0227.npy +tests/data/ljspeech/wavs/LJ041-0060.wav|tests/data/ljspeech/wavs/LJ041-0060.npy +tests/data/ljspeech/wavs/LJ045-0167.wav|tests/data/ljspeech/wavs/LJ045-0167.npy +tests/data/ljspeech/wavs/LJ022-0131.wav|tests/data/ljspeech/wavs/LJ022-0131.npy +tests/data/ljspeech/wavs/LJ033-0091.wav|tests/data/ljspeech/wavs/LJ033-0091.npy +tests/data/ljspeech/wavs/LJ008-0127.wav|tests/data/ljspeech/wavs/LJ008-0127.npy +tests/data/ljspeech/wavs/LJ021-0195.wav|tests/data/ljspeech/wavs/LJ021-0195.npy +tests/data/ljspeech/wavs/LJ019-0239.wav|tests/data/ljspeech/wavs/LJ019-0239.npy +tests/data/ljspeech/wavs/LJ028-0474.wav|tests/data/ljspeech/wavs/LJ028-0474.npy +tests/data/ljspeech/wavs/LJ018-0114.wav|tests/data/ljspeech/wavs/LJ018-0114.npy +tests/data/ljspeech/wavs/LJ006-0220.wav|tests/data/ljspeech/wavs/LJ006-0220.npy +tests/data/ljspeech/wavs/LJ039-0239.wav|tests/data/ljspeech/wavs/LJ039-0239.npy +tests/data/ljspeech/wavs/LJ018-0273.wav|tests/data/ljspeech/wavs/LJ018-0273.npy +tests/data/ljspeech/wavs/LJ038-0287.wav|tests/data/ljspeech/wavs/LJ038-0287.npy +tests/data/ljspeech/wavs/LJ050-0075.wav|tests/data/ljspeech/wavs/LJ050-0075.npy +tests/data/ljspeech/wavs/LJ033-0076.wav|tests/data/ljspeech/wavs/LJ033-0076.npy +tests/data/ljspeech/wavs/LJ036-0027.wav|tests/data/ljspeech/wavs/LJ036-0027.npy +tests/data/ljspeech/wavs/LJ044-0063.wav|tests/data/ljspeech/wavs/LJ044-0063.npy +tests/data/ljspeech/wavs/LJ046-0175.wav|tests/data/ljspeech/wavs/LJ046-0175.npy +tests/data/ljspeech/wavs/LJ007-0103.wav|tests/data/ljspeech/wavs/LJ007-0103.npy +tests/data/ljspeech/wavs/LJ037-0115.wav|tests/data/ljspeech/wavs/LJ037-0115.npy +tests/data/ljspeech/wavs/LJ015-0117.wav|tests/data/ljspeech/wavs/LJ015-0117.npy +tests/data/ljspeech/wavs/LJ021-0119.wav|tests/data/ljspeech/wavs/LJ021-0119.npy +tests/data/ljspeech/wavs/LJ020-0066.wav|tests/data/ljspeech/wavs/LJ020-0066.npy +tests/data/ljspeech/wavs/LJ031-0027.wav|tests/data/ljspeech/wavs/LJ031-0027.npy +tests/data/ljspeech/wavs/LJ046-0145.wav|tests/data/ljspeech/wavs/LJ046-0145.npy +tests/data/ljspeech/wavs/LJ038-0124.wav|tests/data/ljspeech/wavs/LJ038-0124.npy +tests/data/ljspeech/wavs/LJ048-0006.wav|tests/data/ljspeech/wavs/LJ048-0006.npy +tests/data/ljspeech/wavs/LJ038-0118.wav|tests/data/ljspeech/wavs/LJ038-0118.npy +tests/data/ljspeech/wavs/LJ009-0044.wav|tests/data/ljspeech/wavs/LJ009-0044.npy +tests/data/ljspeech/wavs/LJ028-0191.wav|tests/data/ljspeech/wavs/LJ028-0191.npy +tests/data/ljspeech/wavs/LJ008-0131.wav|tests/data/ljspeech/wavs/LJ008-0131.npy +tests/data/ljspeech/wavs/LJ018-0070.wav|tests/data/ljspeech/wavs/LJ018-0070.npy +tests/data/ljspeech/wavs/LJ028-0384.wav|tests/data/ljspeech/wavs/LJ028-0384.npy +tests/data/ljspeech/wavs/LJ043-0016.wav|tests/data/ljspeech/wavs/LJ043-0016.npy +tests/data/ljspeech/wavs/LJ032-0248.wav|tests/data/ljspeech/wavs/LJ032-0248.npy +tests/data/ljspeech/wavs/LJ040-0231.wav|tests/data/ljspeech/wavs/LJ040-0231.npy +tests/data/ljspeech/wavs/LJ027-0012.wav|tests/data/ljspeech/wavs/LJ027-0012.npy +tests/data/ljspeech/wavs/LJ032-0039.wav|tests/data/ljspeech/wavs/LJ032-0039.npy +tests/data/ljspeech/wavs/LJ014-0325.wav|tests/data/ljspeech/wavs/LJ014-0325.npy +tests/data/ljspeech/wavs/LJ047-0198.wav|tests/data/ljspeech/wavs/LJ047-0198.npy +tests/data/ljspeech/wavs/LJ023-0046.wav|tests/data/ljspeech/wavs/LJ023-0046.npy +tests/data/ljspeech/wavs/LJ018-0182.wav|tests/data/ljspeech/wavs/LJ018-0182.npy +tests/data/ljspeech/wavs/LJ049-0175.wav|tests/data/ljspeech/wavs/LJ049-0175.npy +tests/data/ljspeech/wavs/LJ025-0068.wav|tests/data/ljspeech/wavs/LJ025-0068.npy +tests/data/ljspeech/wavs/LJ016-0062.wav|tests/data/ljspeech/wavs/LJ016-0062.npy +tests/data/ljspeech/wavs/LJ014-0053.wav|tests/data/ljspeech/wavs/LJ014-0053.npy +tests/data/ljspeech/wavs/LJ044-0154.wav|tests/data/ljspeech/wavs/LJ044-0154.npy +tests/data/ljspeech/wavs/LJ033-0013.wav|tests/data/ljspeech/wavs/LJ033-0013.npy +tests/data/ljspeech/wavs/LJ029-0134.wav|tests/data/ljspeech/wavs/LJ029-0134.npy +tests/data/ljspeech/wavs/LJ039-0013.wav|tests/data/ljspeech/wavs/LJ039-0013.npy +tests/data/ljspeech/wavs/LJ038-0199.wav|tests/data/ljspeech/wavs/LJ038-0199.npy +tests/data/ljspeech/wavs/LJ034-0033.wav|tests/data/ljspeech/wavs/LJ034-0033.npy +tests/data/ljspeech/wavs/LJ040-0111.wav|tests/data/ljspeech/wavs/LJ040-0111.npy +tests/data/ljspeech/wavs/LJ024-0077.wav|tests/data/ljspeech/wavs/LJ024-0077.npy +tests/data/ljspeech/wavs/LJ015-0086.wav|tests/data/ljspeech/wavs/LJ015-0086.npy +tests/data/ljspeech/wavs/LJ018-0291.wav|tests/data/ljspeech/wavs/LJ018-0291.npy +tests/data/ljspeech/wavs/LJ026-0016.wav|tests/data/ljspeech/wavs/LJ026-0016.npy +tests/data/ljspeech/wavs/LJ046-0223.wav|tests/data/ljspeech/wavs/LJ046-0223.npy +tests/data/ljspeech/wavs/LJ040-0201.wav|tests/data/ljspeech/wavs/LJ040-0201.npy +tests/data/ljspeech/wavs/LJ018-0119.wav|tests/data/ljspeech/wavs/LJ018-0119.npy +tests/data/ljspeech/wavs/LJ049-0051.wav|tests/data/ljspeech/wavs/LJ049-0051.npy +tests/data/ljspeech/wavs/LJ016-0308.wav|tests/data/ljspeech/wavs/LJ016-0308.npy +tests/data/ljspeech/wavs/LJ040-0118.wav|tests/data/ljspeech/wavs/LJ040-0118.npy +tests/data/ljspeech/wavs/LJ028-0290.wav|tests/data/ljspeech/wavs/LJ028-0290.npy +tests/data/ljspeech/wavs/LJ034-0090.wav|tests/data/ljspeech/wavs/LJ034-0090.npy +tests/data/ljspeech/wavs/LJ014-0005.wav|tests/data/ljspeech/wavs/LJ014-0005.npy +tests/data/ljspeech/wavs/LJ039-0168.wav|tests/data/ljspeech/wavs/LJ039-0168.npy +tests/data/ljspeech/wavs/LJ048-0196.wav|tests/data/ljspeech/wavs/LJ048-0196.npy +tests/data/ljspeech/wavs/LJ040-0027.wav|tests/data/ljspeech/wavs/LJ040-0027.npy +tests/data/ljspeech/wavs/LJ028-0475.wav|tests/data/ljspeech/wavs/LJ028-0475.npy +tests/data/ljspeech/wavs/LJ049-0204.wav|tests/data/ljspeech/wavs/LJ049-0204.npy +tests/data/ljspeech/wavs/LJ035-0070.wav|tests/data/ljspeech/wavs/LJ035-0070.npy +tests/data/ljspeech/wavs/LJ028-0175.wav|tests/data/ljspeech/wavs/LJ028-0175.npy +tests/data/ljspeech/wavs/LJ003-0104.wav|tests/data/ljspeech/wavs/LJ003-0104.npy +tests/data/ljspeech/wavs/LJ014-0194.wav|tests/data/ljspeech/wavs/LJ014-0194.npy +tests/data/ljspeech/wavs/LJ014-0137.wav|tests/data/ljspeech/wavs/LJ014-0137.npy +tests/data/ljspeech/wavs/LJ050-0144.wav|tests/data/ljspeech/wavs/LJ050-0144.npy +tests/data/ljspeech/wavs/LJ016-0310.wav|tests/data/ljspeech/wavs/LJ016-0310.npy +tests/data/ljspeech/wavs/LJ036-0117.wav|tests/data/ljspeech/wavs/LJ036-0117.npy +tests/data/ljspeech/wavs/LJ044-0105.wav|tests/data/ljspeech/wavs/LJ044-0105.npy +tests/data/ljspeech/wavs/LJ035-0116.wav|tests/data/ljspeech/wavs/LJ035-0116.npy +tests/data/ljspeech/wavs/LJ043-0050.wav|tests/data/ljspeech/wavs/LJ043-0050.npy +tests/data/ljspeech/wavs/LJ048-0230.wav|tests/data/ljspeech/wavs/LJ048-0230.npy +tests/data/ljspeech/wavs/LJ022-0147.wav|tests/data/ljspeech/wavs/LJ022-0147.npy +tests/data/ljspeech/wavs/LJ036-0003.wav|tests/data/ljspeech/wavs/LJ036-0003.npy +tests/data/ljspeech/wavs/LJ044-0131.wav|tests/data/ljspeech/wavs/LJ044-0131.npy +tests/data/ljspeech/wavs/LJ029-0002.wav|tests/data/ljspeech/wavs/LJ029-0002.npy +tests/data/ljspeech/wavs/LJ030-0002.wav|tests/data/ljspeech/wavs/LJ030-0002.npy +tests/data/ljspeech/wavs/LJ047-0199.wav|tests/data/ljspeech/wavs/LJ047-0199.npy +tests/data/ljspeech/wavs/LJ024-0017.wav|tests/data/ljspeech/wavs/LJ024-0017.npy +tests/data/ljspeech/wavs/LJ033-0178.wav|tests/data/ljspeech/wavs/LJ033-0178.npy +tests/data/ljspeech/wavs/LJ043-0064.wav|tests/data/ljspeech/wavs/LJ043-0064.npy +tests/data/ljspeech/wavs/LJ006-0278.wav|tests/data/ljspeech/wavs/LJ006-0278.npy +tests/data/ljspeech/wavs/LJ002-0136.wav|tests/data/ljspeech/wavs/LJ002-0136.npy +tests/data/ljspeech/wavs/LJ038-0089.wav|tests/data/ljspeech/wavs/LJ038-0089.npy +tests/data/ljspeech/wavs/LJ048-0260.wav|tests/data/ljspeech/wavs/LJ048-0260.npy +tests/data/ljspeech/wavs/LJ034-0047.wav|tests/data/ljspeech/wavs/LJ034-0047.npy +tests/data/ljspeech/wavs/LJ019-0022.wav|tests/data/ljspeech/wavs/LJ019-0022.npy +tests/data/ljspeech/wavs/LJ018-0191.wav|tests/data/ljspeech/wavs/LJ018-0191.npy +tests/data/ljspeech/wavs/LJ006-0066.wav|tests/data/ljspeech/wavs/LJ006-0066.npy +tests/data/ljspeech/wavs/LJ030-0165.wav|tests/data/ljspeech/wavs/LJ030-0165.npy +tests/data/ljspeech/wavs/LJ023-0103.wav|tests/data/ljspeech/wavs/LJ023-0103.npy +tests/data/ljspeech/wavs/LJ033-0021.wav|tests/data/ljspeech/wavs/LJ033-0021.npy +tests/data/ljspeech/wavs/LJ003-0022.wav|tests/data/ljspeech/wavs/LJ003-0022.npy +tests/data/ljspeech/wavs/LJ019-0247.wav|tests/data/ljspeech/wavs/LJ019-0247.npy +tests/data/ljspeech/wavs/LJ031-0164.wav|tests/data/ljspeech/wavs/LJ031-0164.npy +tests/data/ljspeech/wavs/LJ043-0046.wav|tests/data/ljspeech/wavs/LJ043-0046.npy +tests/data/ljspeech/wavs/LJ041-0026.wav|tests/data/ljspeech/wavs/LJ041-0026.npy +tests/data/ljspeech/wavs/LJ008-0224.wav|tests/data/ljspeech/wavs/LJ008-0224.npy +tests/data/ljspeech/wavs/LJ016-0363.wav|tests/data/ljspeech/wavs/LJ016-0363.npy +tests/data/ljspeech/wavs/LJ038-0223.wav|tests/data/ljspeech/wavs/LJ038-0223.npy +tests/data/ljspeech/wavs/LJ034-0117.wav|tests/data/ljspeech/wavs/LJ034-0117.npy +tests/data/ljspeech/wavs/LJ013-0008.wav|tests/data/ljspeech/wavs/LJ013-0008.npy +tests/data/ljspeech/wavs/LJ045-0184.wav|tests/data/ljspeech/wavs/LJ045-0184.npy +tests/data/ljspeech/wavs/LJ026-0113.wav|tests/data/ljspeech/wavs/LJ026-0113.npy +tests/data/ljspeech/wavs/LJ032-0094.wav|tests/data/ljspeech/wavs/LJ032-0094.npy +tests/data/ljspeech/wavs/LJ017-0260.wav|tests/data/ljspeech/wavs/LJ017-0260.npy +tests/data/ljspeech/wavs/LJ042-0104.wav|tests/data/ljspeech/wavs/LJ042-0104.npy +tests/data/ljspeech/wavs/LJ036-0207.wav|tests/data/ljspeech/wavs/LJ036-0207.npy +tests/data/ljspeech/wavs/LJ029-0063.wav|tests/data/ljspeech/wavs/LJ029-0063.npy +tests/data/ljspeech/wavs/LJ020-0068.wav|tests/data/ljspeech/wavs/LJ020-0068.npy +tests/data/ljspeech/wavs/LJ010-0051.wav|tests/data/ljspeech/wavs/LJ010-0051.npy +tests/data/ljspeech/wavs/LJ003-0228.wav|tests/data/ljspeech/wavs/LJ003-0228.npy +tests/data/ljspeech/wavs/LJ009-0090.wav|tests/data/ljspeech/wavs/LJ009-0090.npy +tests/data/ljspeech/wavs/LJ037-0195.wav|tests/data/ljspeech/wavs/LJ037-0195.npy +tests/data/ljspeech/wavs/LJ030-0245.wav|tests/data/ljspeech/wavs/LJ030-0245.npy +tests/data/ljspeech/wavs/LJ015-0148.wav|tests/data/ljspeech/wavs/LJ015-0148.npy +tests/data/ljspeech/wavs/LJ038-0077.wav|tests/data/ljspeech/wavs/LJ038-0077.npy +tests/data/ljspeech/wavs/LJ039-0194.wav|tests/data/ljspeech/wavs/LJ039-0194.npy +tests/data/ljspeech/wavs/LJ031-0203.wav|tests/data/ljspeech/wavs/LJ031-0203.npy +tests/data/ljspeech/wavs/LJ048-0206.wav|tests/data/ljspeech/wavs/LJ048-0206.npy +tests/data/ljspeech/wavs/LJ014-0302.wav|tests/data/ljspeech/wavs/LJ014-0302.npy +tests/data/ljspeech/wavs/LJ043-0158.wav|tests/data/ljspeech/wavs/LJ043-0158.npy +tests/data/ljspeech/wavs/LJ050-0232.wav|tests/data/ljspeech/wavs/LJ050-0232.npy +tests/data/ljspeech/wavs/LJ037-0267.wav|tests/data/ljspeech/wavs/LJ037-0267.npy +tests/data/ljspeech/wavs/LJ009-0096.wav|tests/data/ljspeech/wavs/LJ009-0096.npy +tests/data/ljspeech/wavs/LJ018-0319.wav|tests/data/ljspeech/wavs/LJ018-0319.npy +tests/data/ljspeech/wavs/LJ002-0032.wav|tests/data/ljspeech/wavs/LJ002-0032.npy +tests/data/ljspeech/wavs/LJ003-0067.wav|tests/data/ljspeech/wavs/LJ003-0067.npy +tests/data/ljspeech/wavs/LJ016-0328.wav|tests/data/ljspeech/wavs/LJ016-0328.npy +tests/data/ljspeech/wavs/LJ050-0092.wav|tests/data/ljspeech/wavs/LJ050-0092.npy +tests/data/ljspeech/wavs/LJ011-0171.wav|tests/data/ljspeech/wavs/LJ011-0171.npy +tests/data/ljspeech/wavs/LJ017-0074.wav|tests/data/ljspeech/wavs/LJ017-0074.npy +tests/data/ljspeech/wavs/LJ002-0119.wav|tests/data/ljspeech/wavs/LJ002-0119.npy +tests/data/ljspeech/wavs/LJ010-0298.wav|tests/data/ljspeech/wavs/LJ010-0298.npy +tests/data/ljspeech/wavs/LJ048-0238.wav|tests/data/ljspeech/wavs/LJ048-0238.npy +tests/data/ljspeech/wavs/LJ031-0132.wav|tests/data/ljspeech/wavs/LJ031-0132.npy +tests/data/ljspeech/wavs/LJ021-0014.wav|tests/data/ljspeech/wavs/LJ021-0014.npy +tests/data/ljspeech/wavs/LJ021-0052.wav|tests/data/ljspeech/wavs/LJ021-0052.npy +tests/data/ljspeech/wavs/LJ014-0003.wav|tests/data/ljspeech/wavs/LJ014-0003.npy +tests/data/ljspeech/wavs/LJ045-0105.wav|tests/data/ljspeech/wavs/LJ045-0105.npy +tests/data/ljspeech/wavs/LJ048-0263.wav|tests/data/ljspeech/wavs/LJ048-0263.npy +tests/data/ljspeech/wavs/LJ004-0012.wav|tests/data/ljspeech/wavs/LJ004-0012.npy +tests/data/ljspeech/wavs/LJ047-0015.wav|tests/data/ljspeech/wavs/LJ047-0015.npy +tests/data/ljspeech/wavs/LJ014-0240.wav|tests/data/ljspeech/wavs/LJ014-0240.npy +tests/data/ljspeech/wavs/LJ050-0204.wav|tests/data/ljspeech/wavs/LJ050-0204.npy +tests/data/ljspeech/wavs/LJ001-0165.wav|tests/data/ljspeech/wavs/LJ001-0165.npy +tests/data/ljspeech/wavs/LJ018-0159.wav|tests/data/ljspeech/wavs/LJ018-0159.npy +tests/data/ljspeech/wavs/LJ002-0153.wav|tests/data/ljspeech/wavs/LJ002-0153.npy +tests/data/ljspeech/wavs/LJ020-0065.wav|tests/data/ljspeech/wavs/LJ020-0065.npy +tests/data/ljspeech/wavs/LJ014-0183.wav|tests/data/ljspeech/wavs/LJ014-0183.npy +tests/data/ljspeech/wavs/LJ013-0213.wav|tests/data/ljspeech/wavs/LJ013-0213.npy +tests/data/ljspeech/wavs/LJ021-0076.wav|tests/data/ljspeech/wavs/LJ021-0076.npy +tests/data/ljspeech/wavs/LJ021-0208.wav|tests/data/ljspeech/wavs/LJ021-0208.npy +tests/data/ljspeech/wavs/LJ016-0154.wav|tests/data/ljspeech/wavs/LJ016-0154.npy +tests/data/ljspeech/wavs/LJ043-0029.wav|tests/data/ljspeech/wavs/LJ043-0029.npy +tests/data/ljspeech/wavs/LJ050-0255.wav|tests/data/ljspeech/wavs/LJ050-0255.npy +tests/data/ljspeech/wavs/LJ018-0309.wav|tests/data/ljspeech/wavs/LJ018-0309.npy +tests/data/ljspeech/wavs/LJ037-0020.wav|tests/data/ljspeech/wavs/LJ037-0020.npy +tests/data/ljspeech/wavs/LJ032-0109.wav|tests/data/ljspeech/wavs/LJ032-0109.npy +tests/data/ljspeech/wavs/LJ032-0219.wav|tests/data/ljspeech/wavs/LJ032-0219.npy +tests/data/ljspeech/wavs/LJ014-0250.wav|tests/data/ljspeech/wavs/LJ014-0250.npy +tests/data/ljspeech/wavs/LJ018-0205.wav|tests/data/ljspeech/wavs/LJ018-0205.npy +tests/data/ljspeech/wavs/LJ021-0054.wav|tests/data/ljspeech/wavs/LJ021-0054.npy +tests/data/ljspeech/wavs/LJ050-0239.wav|tests/data/ljspeech/wavs/LJ050-0239.npy +tests/data/ljspeech/wavs/LJ039-0104.wav|tests/data/ljspeech/wavs/LJ039-0104.npy +tests/data/ljspeech/wavs/LJ036-0152.wav|tests/data/ljspeech/wavs/LJ036-0152.npy +tests/data/ljspeech/wavs/LJ043-0003.wav|tests/data/ljspeech/wavs/LJ043-0003.npy +tests/data/ljspeech/wavs/LJ034-0183.wav|tests/data/ljspeech/wavs/LJ034-0183.npy +tests/data/ljspeech/wavs/LJ038-0155.wav|tests/data/ljspeech/wavs/LJ038-0155.npy +tests/data/ljspeech/wavs/LJ005-0261.wav|tests/data/ljspeech/wavs/LJ005-0261.npy +tests/data/ljspeech/wavs/LJ045-0037.wav|tests/data/ljspeech/wavs/LJ045-0037.npy +tests/data/ljspeech/wavs/LJ027-0111.wav|tests/data/ljspeech/wavs/LJ027-0111.npy +tests/data/ljspeech/wavs/LJ025-0008.wav|tests/data/ljspeech/wavs/LJ025-0008.npy +tests/data/ljspeech/wavs/LJ024-0040.wav|tests/data/ljspeech/wavs/LJ024-0040.npy +tests/data/ljspeech/wavs/LJ019-0371.wav|tests/data/ljspeech/wavs/LJ019-0371.npy +tests/data/ljspeech/wavs/LJ023-0140.wav|tests/data/ljspeech/wavs/LJ023-0140.npy +tests/data/ljspeech/wavs/LJ025-0004.wav|tests/data/ljspeech/wavs/LJ025-0004.npy +tests/data/ljspeech/wavs/LJ006-0202.wav|tests/data/ljspeech/wavs/LJ006-0202.npy +tests/data/ljspeech/wavs/LJ032-0107.wav|tests/data/ljspeech/wavs/LJ032-0107.npy +tests/data/ljspeech/wavs/LJ006-0016.wav|tests/data/ljspeech/wavs/LJ006-0016.npy +tests/data/ljspeech/wavs/LJ027-0126.wav|tests/data/ljspeech/wavs/LJ027-0126.npy +tests/data/ljspeech/wavs/LJ041-0097.wav|tests/data/ljspeech/wavs/LJ041-0097.npy +tests/data/ljspeech/wavs/LJ036-0175.wav|tests/data/ljspeech/wavs/LJ036-0175.npy +tests/data/ljspeech/wavs/LJ017-0012.wav|tests/data/ljspeech/wavs/LJ017-0012.npy +tests/data/ljspeech/wavs/LJ047-0157.wav|tests/data/ljspeech/wavs/LJ047-0157.npy +tests/data/ljspeech/wavs/LJ023-0104.wav|tests/data/ljspeech/wavs/LJ023-0104.npy +tests/data/ljspeech/wavs/LJ023-0098.wav|tests/data/ljspeech/wavs/LJ023-0098.npy +tests/data/ljspeech/wavs/LJ004-0109.wav|tests/data/ljspeech/wavs/LJ004-0109.npy +tests/data/ljspeech/wavs/LJ027-0112.wav|tests/data/ljspeech/wavs/LJ027-0112.npy +tests/data/ljspeech/wavs/LJ031-0174.wav|tests/data/ljspeech/wavs/LJ031-0174.npy +tests/data/ljspeech/wavs/LJ013-0060.wav|tests/data/ljspeech/wavs/LJ013-0060.npy +tests/data/ljspeech/wavs/LJ029-0036.wav|tests/data/ljspeech/wavs/LJ029-0036.npy +tests/data/ljspeech/wavs/LJ002-0216.wav|tests/data/ljspeech/wavs/LJ002-0216.npy +tests/data/ljspeech/wavs/LJ024-0042.wav|tests/data/ljspeech/wavs/LJ024-0042.npy +tests/data/ljspeech/wavs/LJ004-0040.wav|tests/data/ljspeech/wavs/LJ004-0040.npy +tests/data/ljspeech/wavs/LJ046-0132.wav|tests/data/ljspeech/wavs/LJ046-0132.npy +tests/data/ljspeech/wavs/LJ034-0081.wav|tests/data/ljspeech/wavs/LJ034-0081.npy +tests/data/ljspeech/wavs/LJ023-0137.wav|tests/data/ljspeech/wavs/LJ023-0137.npy +tests/data/ljspeech/wavs/LJ042-0003.wav|tests/data/ljspeech/wavs/LJ042-0003.npy +tests/data/ljspeech/wavs/LJ017-0209.wav|tests/data/ljspeech/wavs/LJ017-0209.npy +tests/data/ljspeech/wavs/LJ025-0094.wav|tests/data/ljspeech/wavs/LJ025-0094.npy +tests/data/ljspeech/wavs/LJ024-0111.wav|tests/data/ljspeech/wavs/LJ024-0111.npy +tests/data/ljspeech/wavs/LJ006-0225.wav|tests/data/ljspeech/wavs/LJ006-0225.npy +tests/data/ljspeech/wavs/LJ015-0098.wav|tests/data/ljspeech/wavs/LJ015-0098.npy +tests/data/ljspeech/wavs/LJ036-0088.wav|tests/data/ljspeech/wavs/LJ036-0088.npy +tests/data/ljspeech/wavs/LJ038-0197.wav|tests/data/ljspeech/wavs/LJ038-0197.npy +tests/data/ljspeech/wavs/LJ045-0170.wav|tests/data/ljspeech/wavs/LJ045-0170.npy +tests/data/ljspeech/wavs/LJ022-0202.wav|tests/data/ljspeech/wavs/LJ022-0202.npy +tests/data/ljspeech/wavs/LJ044-0169.wav|tests/data/ljspeech/wavs/LJ044-0169.npy +tests/data/ljspeech/wavs/LJ032-0082.wav|tests/data/ljspeech/wavs/LJ032-0082.npy +tests/data/ljspeech/wavs/LJ023-0037.wav|tests/data/ljspeech/wavs/LJ023-0037.npy +tests/data/ljspeech/wavs/LJ049-0036.wav|tests/data/ljspeech/wavs/LJ049-0036.npy +tests/data/ljspeech/wavs/LJ018-0281.wav|tests/data/ljspeech/wavs/LJ018-0281.npy +tests/data/ljspeech/wavs/LJ018-0062.wav|tests/data/ljspeech/wavs/LJ018-0062.npy +tests/data/ljspeech/wavs/LJ010-0074.wav|tests/data/ljspeech/wavs/LJ010-0074.npy +tests/data/ljspeech/wavs/LJ020-0063.wav|tests/data/ljspeech/wavs/LJ020-0063.npy +tests/data/ljspeech/wavs/LJ038-0297.wav|tests/data/ljspeech/wavs/LJ038-0297.npy +tests/data/ljspeech/wavs/LJ009-0219.wav|tests/data/ljspeech/wavs/LJ009-0219.npy +tests/data/ljspeech/wavs/LJ008-0291.wav|tests/data/ljspeech/wavs/LJ008-0291.npy +tests/data/ljspeech/wavs/LJ006-0103.wav|tests/data/ljspeech/wavs/LJ006-0103.npy +tests/data/ljspeech/wavs/LJ026-0101.wav|tests/data/ljspeech/wavs/LJ026-0101.npy +tests/data/ljspeech/wavs/LJ023-0005.wav|tests/data/ljspeech/wavs/LJ023-0005.npy +tests/data/ljspeech/wavs/LJ046-0037.wav|tests/data/ljspeech/wavs/LJ046-0037.npy +tests/data/ljspeech/wavs/LJ012-0282.wav|tests/data/ljspeech/wavs/LJ012-0282.npy +tests/data/ljspeech/wavs/LJ010-0161.wav|tests/data/ljspeech/wavs/LJ010-0161.npy +tests/data/ljspeech/wavs/LJ040-0196.wav|tests/data/ljspeech/wavs/LJ040-0196.npy +tests/data/ljspeech/wavs/LJ013-0255.wav|tests/data/ljspeech/wavs/LJ013-0255.npy +tests/data/ljspeech/wavs/LJ002-0026.wav|tests/data/ljspeech/wavs/LJ002-0026.npy +tests/data/ljspeech/wavs/LJ008-0013.wav|tests/data/ljspeech/wavs/LJ008-0013.npy +tests/data/ljspeech/wavs/LJ047-0160.wav|tests/data/ljspeech/wavs/LJ047-0160.npy +tests/data/ljspeech/wavs/LJ031-0113.wav|tests/data/ljspeech/wavs/LJ031-0113.npy +tests/data/ljspeech/wavs/LJ035-0178.wav|tests/data/ljspeech/wavs/LJ035-0178.npy +tests/data/ljspeech/wavs/LJ002-0009.wav|tests/data/ljspeech/wavs/LJ002-0009.npy +tests/data/ljspeech/wavs/LJ049-0107.wav|tests/data/ljspeech/wavs/LJ049-0107.npy +tests/data/ljspeech/wavs/LJ028-0187.wav|tests/data/ljspeech/wavs/LJ028-0187.npy +tests/data/ljspeech/wavs/LJ031-0231.wav|tests/data/ljspeech/wavs/LJ031-0231.npy +tests/data/ljspeech/wavs/LJ010-0144.wav|tests/data/ljspeech/wavs/LJ010-0144.npy +tests/data/ljspeech/wavs/LJ003-0028.wav|tests/data/ljspeech/wavs/LJ003-0028.npy +tests/data/ljspeech/wavs/LJ013-0203.wav|tests/data/ljspeech/wavs/LJ013-0203.npy +tests/data/ljspeech/wavs/LJ018-0284.wav|tests/data/ljspeech/wavs/LJ018-0284.npy +tests/data/ljspeech/wavs/LJ050-0157.wav|tests/data/ljspeech/wavs/LJ050-0157.npy +tests/data/ljspeech/wavs/LJ028-0211.wav|tests/data/ljspeech/wavs/LJ028-0211.npy +tests/data/ljspeech/wavs/LJ004-0126.wav|tests/data/ljspeech/wavs/LJ004-0126.npy +tests/data/ljspeech/wavs/LJ039-0011.wav|tests/data/ljspeech/wavs/LJ039-0011.npy +tests/data/ljspeech/wavs/LJ040-0080.wav|tests/data/ljspeech/wavs/LJ040-0080.npy +tests/data/ljspeech/wavs/LJ013-0120.wav|tests/data/ljspeech/wavs/LJ013-0120.npy +tests/data/ljspeech/wavs/LJ002-0131.wav|tests/data/ljspeech/wavs/LJ002-0131.npy +tests/data/ljspeech/wavs/LJ039-0113.wav|tests/data/ljspeech/wavs/LJ039-0113.npy +tests/data/ljspeech/wavs/LJ024-0140.wav|tests/data/ljspeech/wavs/LJ024-0140.npy +tests/data/ljspeech/wavs/LJ021-0085.wav|tests/data/ljspeech/wavs/LJ021-0085.npy +tests/data/ljspeech/wavs/LJ034-0036.wav|tests/data/ljspeech/wavs/LJ034-0036.npy +tests/data/ljspeech/wavs/LJ040-0007.wav|tests/data/ljspeech/wavs/LJ040-0007.npy +tests/data/ljspeech/wavs/LJ011-0266.wav|tests/data/ljspeech/wavs/LJ011-0266.npy +tests/data/ljspeech/wavs/LJ023-0095.wav|tests/data/ljspeech/wavs/LJ023-0095.npy +tests/data/ljspeech/wavs/LJ010-0169.wav|tests/data/ljspeech/wavs/LJ010-0169.npy +tests/data/ljspeech/wavs/LJ013-0124.wav|tests/data/ljspeech/wavs/LJ013-0124.npy +tests/data/ljspeech/wavs/LJ030-0122.wav|tests/data/ljspeech/wavs/LJ030-0122.npy +tests/data/ljspeech/wavs/LJ023-0035.wav|tests/data/ljspeech/wavs/LJ023-0035.npy +tests/data/ljspeech/wavs/LJ018-0361.wav|tests/data/ljspeech/wavs/LJ018-0361.npy +tests/data/ljspeech/wavs/LJ037-0193.wav|tests/data/ljspeech/wavs/LJ037-0193.npy +tests/data/ljspeech/wavs/LJ039-0227.wav|tests/data/ljspeech/wavs/LJ039-0227.npy +tests/data/ljspeech/wavs/LJ035-0088.wav|tests/data/ljspeech/wavs/LJ035-0088.npy +tests/data/ljspeech/wavs/LJ029-0054.wav|tests/data/ljspeech/wavs/LJ029-0054.npy +tests/data/ljspeech/wavs/LJ002-0065.wav|tests/data/ljspeech/wavs/LJ002-0065.npy +tests/data/ljspeech/wavs/LJ022-0062.wav|tests/data/ljspeech/wavs/LJ022-0062.npy +tests/data/ljspeech/wavs/LJ009-0296.wav|tests/data/ljspeech/wavs/LJ009-0296.npy +tests/data/ljspeech/wavs/LJ021-0008.wav|tests/data/ljspeech/wavs/LJ021-0008.npy +tests/data/ljspeech/wavs/LJ032-0266.wav|tests/data/ljspeech/wavs/LJ032-0266.npy +tests/data/ljspeech/wavs/LJ006-0176.wav|tests/data/ljspeech/wavs/LJ006-0176.npy +tests/data/ljspeech/wavs/LJ042-0203.wav|tests/data/ljspeech/wavs/LJ042-0203.npy +tests/data/ljspeech/wavs/LJ014-0151.wav|tests/data/ljspeech/wavs/LJ014-0151.npy +tests/data/ljspeech/wavs/LJ032-0016.wav|tests/data/ljspeech/wavs/LJ032-0016.npy +tests/data/ljspeech/wavs/LJ015-0159.wav|tests/data/ljspeech/wavs/LJ015-0159.npy +tests/data/ljspeech/wavs/LJ010-0141.wav|tests/data/ljspeech/wavs/LJ010-0141.npy +tests/data/ljspeech/wavs/LJ025-0053.wav|tests/data/ljspeech/wavs/LJ025-0053.npy +tests/data/ljspeech/wavs/LJ043-0106.wav|tests/data/ljspeech/wavs/LJ043-0106.npy +tests/data/ljspeech/wavs/LJ009-0273.wav|tests/data/ljspeech/wavs/LJ009-0273.npy +tests/data/ljspeech/wavs/LJ027-0137.wav|tests/data/ljspeech/wavs/LJ027-0137.npy +tests/data/ljspeech/wavs/LJ050-0004.wav|tests/data/ljspeech/wavs/LJ050-0004.npy +tests/data/ljspeech/wavs/LJ045-0212.wav|tests/data/ljspeech/wavs/LJ045-0212.npy +tests/data/ljspeech/wavs/LJ014-0017.wav|tests/data/ljspeech/wavs/LJ014-0017.npy +tests/data/ljspeech/wavs/LJ033-0055.wav|tests/data/ljspeech/wavs/LJ033-0055.npy +tests/data/ljspeech/wavs/LJ037-0164.wav|tests/data/ljspeech/wavs/LJ037-0164.npy +tests/data/ljspeech/wavs/LJ035-0093.wav|tests/data/ljspeech/wavs/LJ035-0093.npy +tests/data/ljspeech/wavs/LJ020-0086.wav|tests/data/ljspeech/wavs/LJ020-0086.npy +tests/data/ljspeech/wavs/LJ046-0142.wav|tests/data/ljspeech/wavs/LJ046-0142.npy +tests/data/ljspeech/wavs/LJ026-0011.wav|tests/data/ljspeech/wavs/LJ026-0011.npy +tests/data/ljspeech/wavs/LJ002-0198.wav|tests/data/ljspeech/wavs/LJ002-0198.npy +tests/data/ljspeech/wavs/LJ010-0081.wav|tests/data/ljspeech/wavs/LJ010-0081.npy +tests/data/ljspeech/wavs/LJ016-0355.wav|tests/data/ljspeech/wavs/LJ016-0355.npy +tests/data/ljspeech/wavs/LJ009-0049.wav|tests/data/ljspeech/wavs/LJ009-0049.npy +tests/data/ljspeech/wavs/LJ009-0267.wav|tests/data/ljspeech/wavs/LJ009-0267.npy +tests/data/ljspeech/wavs/LJ044-0213.wav|tests/data/ljspeech/wavs/LJ044-0213.npy +tests/data/ljspeech/wavs/LJ039-0109.wav|tests/data/ljspeech/wavs/LJ039-0109.npy +tests/data/ljspeech/wavs/LJ002-0298.wav|tests/data/ljspeech/wavs/LJ002-0298.npy +tests/data/ljspeech/wavs/LJ010-0301.wav|tests/data/ljspeech/wavs/LJ010-0301.npy +tests/data/ljspeech/wavs/LJ049-0130.wav|tests/data/ljspeech/wavs/LJ049-0130.npy +tests/data/ljspeech/wavs/LJ024-0142.wav|tests/data/ljspeech/wavs/LJ024-0142.npy +tests/data/ljspeech/wavs/LJ028-0484.wav|tests/data/ljspeech/wavs/LJ028-0484.npy +tests/data/ljspeech/wavs/LJ046-0109.wav|tests/data/ljspeech/wavs/LJ046-0109.npy +tests/data/ljspeech/wavs/LJ016-0191.wav|tests/data/ljspeech/wavs/LJ016-0191.npy +tests/data/ljspeech/wavs/LJ027-0037.wav|tests/data/ljspeech/wavs/LJ027-0037.npy +tests/data/ljspeech/wavs/LJ004-0194.wav|tests/data/ljspeech/wavs/LJ004-0194.npy +tests/data/ljspeech/wavs/LJ005-0284.wav|tests/data/ljspeech/wavs/LJ005-0284.npy +tests/data/ljspeech/wavs/LJ016-0296.wav|tests/data/ljspeech/wavs/LJ016-0296.npy +tests/data/ljspeech/wavs/LJ044-0142.wav|tests/data/ljspeech/wavs/LJ044-0142.npy +tests/data/ljspeech/wavs/LJ013-0097.wav|tests/data/ljspeech/wavs/LJ013-0097.npy +tests/data/ljspeech/wavs/LJ021-0015.wav|tests/data/ljspeech/wavs/LJ021-0015.npy +tests/data/ljspeech/wavs/LJ045-0096.wav|tests/data/ljspeech/wavs/LJ045-0096.npy +tests/data/ljspeech/wavs/LJ038-0051.wav|tests/data/ljspeech/wavs/LJ038-0051.npy +tests/data/ljspeech/wavs/LJ026-0013.wav|tests/data/ljspeech/wavs/LJ026-0013.npy +tests/data/ljspeech/wavs/LJ012-0011.wav|tests/data/ljspeech/wavs/LJ012-0011.npy +tests/data/ljspeech/wavs/LJ019-0384.wav|tests/data/ljspeech/wavs/LJ019-0384.npy +tests/data/ljspeech/wavs/LJ013-0064.wav|tests/data/ljspeech/wavs/LJ013-0064.npy +tests/data/ljspeech/wavs/LJ017-0126.wav|tests/data/ljspeech/wavs/LJ017-0126.npy +tests/data/ljspeech/wavs/LJ046-0212.wav|tests/data/ljspeech/wavs/LJ046-0212.npy +tests/data/ljspeech/wavs/LJ029-0176.wav|tests/data/ljspeech/wavs/LJ029-0176.npy +tests/data/ljspeech/wavs/LJ012-0138.wav|tests/data/ljspeech/wavs/LJ012-0138.npy +tests/data/ljspeech/wavs/LJ029-0167.wav|tests/data/ljspeech/wavs/LJ029-0167.npy +tests/data/ljspeech/wavs/LJ028-0403.wav|tests/data/ljspeech/wavs/LJ028-0403.npy +tests/data/ljspeech/wavs/LJ023-0032.wav|tests/data/ljspeech/wavs/LJ023-0032.npy +tests/data/ljspeech/wavs/LJ028-0381.wav|tests/data/ljspeech/wavs/LJ028-0381.npy +tests/data/ljspeech/wavs/LJ013-0249.wav|tests/data/ljspeech/wavs/LJ013-0249.npy +tests/data/ljspeech/wavs/LJ028-0071.wav|tests/data/ljspeech/wavs/LJ028-0071.npy +tests/data/ljspeech/wavs/LJ036-0123.wav|tests/data/ljspeech/wavs/LJ036-0123.npy +tests/data/ljspeech/wavs/LJ037-0206.wav|tests/data/ljspeech/wavs/LJ037-0206.npy +tests/data/ljspeech/wavs/LJ030-0151.wav|tests/data/ljspeech/wavs/LJ030-0151.npy +tests/data/ljspeech/wavs/LJ029-0024.wav|tests/data/ljspeech/wavs/LJ029-0024.npy +tests/data/ljspeech/wavs/LJ050-0182.wav|tests/data/ljspeech/wavs/LJ050-0182.npy +tests/data/ljspeech/wavs/LJ034-0115.wav|tests/data/ljspeech/wavs/LJ034-0115.npy +tests/data/ljspeech/wavs/LJ026-0054.wav|tests/data/ljspeech/wavs/LJ026-0054.npy +tests/data/ljspeech/wavs/LJ039-0200.wav|tests/data/ljspeech/wavs/LJ039-0200.npy +tests/data/ljspeech/wavs/LJ015-0240.wav|tests/data/ljspeech/wavs/LJ015-0240.npy +tests/data/ljspeech/wavs/LJ020-0073.wav|tests/data/ljspeech/wavs/LJ020-0073.npy +tests/data/ljspeech/wavs/LJ039-0133.wav|tests/data/ljspeech/wavs/LJ039-0133.npy +tests/data/ljspeech/wavs/LJ035-0150.wav|tests/data/ljspeech/wavs/LJ035-0150.npy +tests/data/ljspeech/wavs/LJ038-0213.wav|tests/data/ljspeech/wavs/LJ038-0213.npy +tests/data/ljspeech/wavs/LJ016-0407.wav|tests/data/ljspeech/wavs/LJ016-0407.npy +tests/data/ljspeech/wavs/LJ038-0257.wav|tests/data/ljspeech/wavs/LJ038-0257.npy +tests/data/ljspeech/wavs/LJ029-0090.wav|tests/data/ljspeech/wavs/LJ029-0090.npy +tests/data/ljspeech/wavs/LJ035-0012.wav|tests/data/ljspeech/wavs/LJ035-0012.npy +tests/data/ljspeech/wavs/LJ041-0164.wav|tests/data/ljspeech/wavs/LJ041-0164.npy +tests/data/ljspeech/wavs/LJ005-0119.wav|tests/data/ljspeech/wavs/LJ005-0119.npy +tests/data/ljspeech/wavs/LJ024-0090.wav|tests/data/ljspeech/wavs/LJ024-0090.npy +tests/data/ljspeech/wavs/LJ002-0250.wav|tests/data/ljspeech/wavs/LJ002-0250.npy +tests/data/ljspeech/wavs/LJ013-0106.wav|tests/data/ljspeech/wavs/LJ013-0106.npy +tests/data/ljspeech/wavs/LJ033-0122.wav|tests/data/ljspeech/wavs/LJ033-0122.npy +tests/data/ljspeech/wavs/LJ050-0162.wav|tests/data/ljspeech/wavs/LJ050-0162.npy +tests/data/ljspeech/wavs/LJ007-0086.wav|tests/data/ljspeech/wavs/LJ007-0086.npy +tests/data/ljspeech/wavs/LJ013-0154.wav|tests/data/ljspeech/wavs/LJ013-0154.npy +tests/data/ljspeech/wavs/LJ045-0173.wav|tests/data/ljspeech/wavs/LJ045-0173.npy +tests/data/ljspeech/wavs/LJ014-0178.wav|tests/data/ljspeech/wavs/LJ014-0178.npy +tests/data/ljspeech/wavs/LJ005-0154.wav|tests/data/ljspeech/wavs/LJ005-0154.npy +tests/data/ljspeech/wavs/LJ021-0193.wav|tests/data/ljspeech/wavs/LJ021-0193.npy +tests/data/ljspeech/wavs/LJ033-0126.wav|tests/data/ljspeech/wavs/LJ033-0126.npy +tests/data/ljspeech/wavs/LJ043-0119.wav|tests/data/ljspeech/wavs/LJ043-0119.npy +tests/data/ljspeech/wavs/LJ034-0097.wav|tests/data/ljspeech/wavs/LJ034-0097.npy +tests/data/ljspeech/wavs/LJ037-0146.wav|tests/data/ljspeech/wavs/LJ037-0146.npy +tests/data/ljspeech/wavs/LJ011-0055.wav|tests/data/ljspeech/wavs/LJ011-0055.npy +tests/data/ljspeech/wavs/LJ042-0059.wav|tests/data/ljspeech/wavs/LJ042-0059.npy +tests/data/ljspeech/wavs/LJ010-0188.wav|tests/data/ljspeech/wavs/LJ010-0188.npy +tests/data/ljspeech/wavs/LJ044-0164.wav|tests/data/ljspeech/wavs/LJ044-0164.npy +tests/data/ljspeech/wavs/LJ013-0050.wav|tests/data/ljspeech/wavs/LJ013-0050.npy +tests/data/ljspeech/wavs/LJ006-0083.wav|tests/data/ljspeech/wavs/LJ006-0083.npy +tests/data/ljspeech/wavs/LJ040-0167.wav|tests/data/ljspeech/wavs/LJ040-0167.npy +tests/data/ljspeech/wavs/LJ021-0176.wav|tests/data/ljspeech/wavs/LJ021-0176.npy +tests/data/ljspeech/wavs/LJ026-0151.wav|tests/data/ljspeech/wavs/LJ026-0151.npy +tests/data/ljspeech/wavs/LJ046-0227.wav|tests/data/ljspeech/wavs/LJ046-0227.npy +tests/data/ljspeech/wavs/LJ008-0026.wav|tests/data/ljspeech/wavs/LJ008-0026.npy +tests/data/ljspeech/wavs/LJ013-0062.wav|tests/data/ljspeech/wavs/LJ013-0062.npy +tests/data/ljspeech/wavs/LJ026-0068.wav|tests/data/ljspeech/wavs/LJ026-0068.npy +tests/data/ljspeech/wavs/LJ031-0120.wav|tests/data/ljspeech/wavs/LJ031-0120.npy +tests/data/ljspeech/wavs/LJ009-0265.wav|tests/data/ljspeech/wavs/LJ009-0265.npy +tests/data/ljspeech/wavs/LJ018-0080.wav|tests/data/ljspeech/wavs/LJ018-0080.npy +tests/data/ljspeech/wavs/LJ002-0139.wav|tests/data/ljspeech/wavs/LJ002-0139.npy +tests/data/ljspeech/wavs/LJ011-0202.wav|tests/data/ljspeech/wavs/LJ011-0202.npy +tests/data/ljspeech/wavs/LJ024-0118.wav|tests/data/ljspeech/wavs/LJ024-0118.npy +tests/data/ljspeech/wavs/LJ009-0210.wav|tests/data/ljspeech/wavs/LJ009-0210.npy +tests/data/ljspeech/wavs/LJ001-0013.wav|tests/data/ljspeech/wavs/LJ001-0013.npy +tests/data/ljspeech/wavs/LJ039-0176.wav|tests/data/ljspeech/wavs/LJ039-0176.npy +tests/data/ljspeech/wavs/LJ045-0155.wav|tests/data/ljspeech/wavs/LJ045-0155.npy +tests/data/ljspeech/wavs/LJ028-0342.wav|tests/data/ljspeech/wavs/LJ028-0342.npy +tests/data/ljspeech/wavs/LJ006-0145.wav|tests/data/ljspeech/wavs/LJ006-0145.npy +tests/data/ljspeech/wavs/LJ014-0242.wav|tests/data/ljspeech/wavs/LJ014-0242.npy +tests/data/ljspeech/wavs/LJ002-0023.wav|tests/data/ljspeech/wavs/LJ002-0023.npy +tests/data/ljspeech/wavs/LJ031-0122.wav|tests/data/ljspeech/wavs/LJ031-0122.npy +tests/data/ljspeech/wavs/LJ028-0121.wav|tests/data/ljspeech/wavs/LJ028-0121.npy +tests/data/ljspeech/wavs/LJ036-0206.wav|tests/data/ljspeech/wavs/LJ036-0206.npy +tests/data/ljspeech/wavs/LJ050-0068.wav|tests/data/ljspeech/wavs/LJ050-0068.npy +tests/data/ljspeech/wavs/LJ043-0091.wav|tests/data/ljspeech/wavs/LJ043-0091.npy +tests/data/ljspeech/wavs/LJ011-0269.wav|tests/data/ljspeech/wavs/LJ011-0269.npy +tests/data/ljspeech/wavs/LJ016-0050.wav|tests/data/ljspeech/wavs/LJ016-0050.npy +tests/data/ljspeech/wavs/LJ029-0174.wav|tests/data/ljspeech/wavs/LJ029-0174.npy +tests/data/ljspeech/wavs/LJ008-0009.wav|tests/data/ljspeech/wavs/LJ008-0009.npy +tests/data/ljspeech/wavs/LJ048-0152.wav|tests/data/ljspeech/wavs/LJ048-0152.npy +tests/data/ljspeech/wavs/LJ047-0195.wav|tests/data/ljspeech/wavs/LJ047-0195.npy +tests/data/ljspeech/wavs/LJ010-0131.wav|tests/data/ljspeech/wavs/LJ010-0131.npy +tests/data/ljspeech/wavs/LJ005-0137.wav|tests/data/ljspeech/wavs/LJ005-0137.npy +tests/data/ljspeech/wavs/LJ049-0151.wav|tests/data/ljspeech/wavs/LJ049-0151.npy +tests/data/ljspeech/wavs/LJ048-0013.wav|tests/data/ljspeech/wavs/LJ048-0013.npy +tests/data/ljspeech/wavs/LJ016-0388.wav|tests/data/ljspeech/wavs/LJ016-0388.npy +tests/data/ljspeech/wavs/LJ006-0182.wav|tests/data/ljspeech/wavs/LJ006-0182.npy +tests/data/ljspeech/wavs/LJ018-0255.wav|tests/data/ljspeech/wavs/LJ018-0255.npy +tests/data/ljspeech/wavs/LJ047-0188.wav|tests/data/ljspeech/wavs/LJ047-0188.npy +tests/data/ljspeech/wavs/LJ028-0014.wav|tests/data/ljspeech/wavs/LJ028-0014.npy +tests/data/ljspeech/wavs/LJ037-0211.wav|tests/data/ljspeech/wavs/LJ037-0211.npy +tests/data/ljspeech/wavs/LJ038-0162.wav|tests/data/ljspeech/wavs/LJ038-0162.npy +tests/data/ljspeech/wavs/LJ018-0019.wav|tests/data/ljspeech/wavs/LJ018-0019.npy +tests/data/ljspeech/wavs/LJ035-0053.wav|tests/data/ljspeech/wavs/LJ035-0053.npy +tests/data/ljspeech/wavs/LJ008-0159.wav|tests/data/ljspeech/wavs/LJ008-0159.npy +tests/data/ljspeech/wavs/LJ037-0032.wav|tests/data/ljspeech/wavs/LJ037-0032.npy +tests/data/ljspeech/wavs/LJ028-0508.wav|tests/data/ljspeech/wavs/LJ028-0508.npy +tests/data/ljspeech/wavs/LJ015-0286.wav|tests/data/ljspeech/wavs/LJ015-0286.npy +tests/data/ljspeech/wavs/LJ048-0158.wav|tests/data/ljspeech/wavs/LJ048-0158.npy +tests/data/ljspeech/wavs/LJ002-0213.wav|tests/data/ljspeech/wavs/LJ002-0213.npy +tests/data/ljspeech/wavs/LJ028-0252.wav|tests/data/ljspeech/wavs/LJ028-0252.npy +tests/data/ljspeech/wavs/LJ011-0129.wav|tests/data/ljspeech/wavs/LJ011-0129.npy +tests/data/ljspeech/wavs/LJ018-0337.wav|tests/data/ljspeech/wavs/LJ018-0337.npy +tests/data/ljspeech/wavs/LJ046-0188.wav|tests/data/ljspeech/wavs/LJ046-0188.npy +tests/data/ljspeech/wavs/LJ043-0107.wav|tests/data/ljspeech/wavs/LJ043-0107.npy +tests/data/ljspeech/wavs/LJ032-0046.wav|tests/data/ljspeech/wavs/LJ032-0046.npy +tests/data/ljspeech/wavs/LJ046-0230.wav|tests/data/ljspeech/wavs/LJ046-0230.npy +tests/data/ljspeech/wavs/LJ040-0153.wav|tests/data/ljspeech/wavs/LJ040-0153.npy +tests/data/ljspeech/wavs/LJ002-0029.wav|tests/data/ljspeech/wavs/LJ002-0029.npy +tests/data/ljspeech/wavs/LJ002-0151.wav|tests/data/ljspeech/wavs/LJ002-0151.npy +tests/data/ljspeech/wavs/LJ050-0150.wav|tests/data/ljspeech/wavs/LJ050-0150.npy +tests/data/ljspeech/wavs/LJ038-0183.wav|tests/data/ljspeech/wavs/LJ038-0183.npy +tests/data/ljspeech/wavs/LJ033-0191.wav|tests/data/ljspeech/wavs/LJ033-0191.npy +tests/data/ljspeech/wavs/LJ020-0004.wav|tests/data/ljspeech/wavs/LJ020-0004.npy +tests/data/ljspeech/wavs/LJ023-0130.wav|tests/data/ljspeech/wavs/LJ023-0130.npy +tests/data/ljspeech/wavs/LJ022-0005.wav|tests/data/ljspeech/wavs/LJ022-0005.npy +tests/data/ljspeech/wavs/LJ015-0274.wav|tests/data/ljspeech/wavs/LJ015-0274.npy +tests/data/ljspeech/wavs/LJ046-0168.wav|tests/data/ljspeech/wavs/LJ046-0168.npy +tests/data/ljspeech/wavs/LJ028-0137.wav|tests/data/ljspeech/wavs/LJ028-0137.npy +tests/data/ljspeech/wavs/LJ016-0058.wav|tests/data/ljspeech/wavs/LJ016-0058.npy +tests/data/ljspeech/wavs/LJ004-0175.wav|tests/data/ljspeech/wavs/LJ004-0175.npy +tests/data/ljspeech/wavs/LJ024-0069.wav|tests/data/ljspeech/wavs/LJ024-0069.npy +tests/data/ljspeech/wavs/LJ037-0130.wav|tests/data/ljspeech/wavs/LJ037-0130.npy +tests/data/ljspeech/wavs/LJ023-0074.wav|tests/data/ljspeech/wavs/LJ023-0074.npy +tests/data/ljspeech/wavs/LJ022-0152.wav|tests/data/ljspeech/wavs/LJ022-0152.npy +tests/data/ljspeech/wavs/LJ001-0179.wav|tests/data/ljspeech/wavs/LJ001-0179.npy +tests/data/ljspeech/wavs/LJ023-0067.wav|tests/data/ljspeech/wavs/LJ023-0067.npy +tests/data/ljspeech/wavs/LJ024-0132.wav|tests/data/ljspeech/wavs/LJ024-0132.npy +tests/data/ljspeech/wavs/LJ015-0091.wav|tests/data/ljspeech/wavs/LJ015-0091.npy +tests/data/ljspeech/wavs/LJ009-0071.wav|tests/data/ljspeech/wavs/LJ009-0071.npy +tests/data/ljspeech/wavs/LJ024-0083.wav|tests/data/ljspeech/wavs/LJ024-0083.npy +tests/data/ljspeech/wavs/LJ002-0069.wav|tests/data/ljspeech/wavs/LJ002-0069.npy +tests/data/ljspeech/wavs/LJ028-0107.wav|tests/data/ljspeech/wavs/LJ028-0107.npy +tests/data/ljspeech/wavs/LJ006-0143.wav|tests/data/ljspeech/wavs/LJ006-0143.npy +tests/data/ljspeech/wavs/LJ038-0072.wav|tests/data/ljspeech/wavs/LJ038-0072.npy +tests/data/ljspeech/wavs/LJ001-0125.wav|tests/data/ljspeech/wavs/LJ001-0125.npy +tests/data/ljspeech/wavs/LJ031-0104.wav|tests/data/ljspeech/wavs/LJ031-0104.npy +tests/data/ljspeech/wavs/LJ007-0208.wav|tests/data/ljspeech/wavs/LJ007-0208.npy +tests/data/ljspeech/wavs/LJ027-0005.wav|tests/data/ljspeech/wavs/LJ027-0005.npy +tests/data/ljspeech/wavs/LJ042-0039.wav|tests/data/ljspeech/wavs/LJ042-0039.npy +tests/data/ljspeech/wavs/LJ048-0056.wav|tests/data/ljspeech/wavs/LJ048-0056.npy +tests/data/ljspeech/wavs/LJ014-0125.wav|tests/data/ljspeech/wavs/LJ014-0125.npy +tests/data/ljspeech/wavs/LJ011-0004.wav|tests/data/ljspeech/wavs/LJ011-0004.npy +tests/data/ljspeech/wavs/LJ007-0088.wav|tests/data/ljspeech/wavs/LJ007-0088.npy +tests/data/ljspeech/wavs/LJ018-0105.wav|tests/data/ljspeech/wavs/LJ018-0105.npy +tests/data/ljspeech/wavs/LJ036-0064.wav|tests/data/ljspeech/wavs/LJ036-0064.npy +tests/data/ljspeech/wavs/LJ002-0083.wav|tests/data/ljspeech/wavs/LJ002-0083.npy +tests/data/ljspeech/wavs/LJ013-0081.wav|tests/data/ljspeech/wavs/LJ013-0081.npy +tests/data/ljspeech/wavs/LJ048-0223.wav|tests/data/ljspeech/wavs/LJ048-0223.npy +tests/data/ljspeech/wavs/LJ041-0156.wav|tests/data/ljspeech/wavs/LJ041-0156.npy +tests/data/ljspeech/wavs/LJ039-0130.wav|tests/data/ljspeech/wavs/LJ039-0130.npy +tests/data/ljspeech/wavs/LJ006-0150.wav|tests/data/ljspeech/wavs/LJ006-0150.npy +tests/data/ljspeech/wavs/LJ013-0072.wav|tests/data/ljspeech/wavs/LJ013-0072.npy +tests/data/ljspeech/wavs/LJ017-0179.wav|tests/data/ljspeech/wavs/LJ017-0179.npy +tests/data/ljspeech/wavs/LJ002-0287.wav|tests/data/ljspeech/wavs/LJ002-0287.npy +tests/data/ljspeech/wavs/LJ007-0169.wav|tests/data/ljspeech/wavs/LJ007-0169.npy +tests/data/ljspeech/wavs/LJ006-0240.wav|tests/data/ljspeech/wavs/LJ006-0240.npy +tests/data/ljspeech/wavs/LJ005-0156.wav|tests/data/ljspeech/wavs/LJ005-0156.npy +tests/data/ljspeech/wavs/LJ020-0104.wav|tests/data/ljspeech/wavs/LJ020-0104.npy +tests/data/ljspeech/wavs/LJ036-0145.wav|tests/data/ljspeech/wavs/LJ036-0145.npy +tests/data/ljspeech/wavs/LJ031-0068.wav|tests/data/ljspeech/wavs/LJ031-0068.npy +tests/data/ljspeech/wavs/LJ017-0229.wav|tests/data/ljspeech/wavs/LJ017-0229.npy +tests/data/ljspeech/wavs/LJ035-0133.wav|tests/data/ljspeech/wavs/LJ035-0133.npy +tests/data/ljspeech/wavs/LJ017-0132.wav|tests/data/ljspeech/wavs/LJ017-0132.npy +tests/data/ljspeech/wavs/LJ037-0172.wav|tests/data/ljspeech/wavs/LJ037-0172.npy +tests/data/ljspeech/wavs/LJ034-0161.wav|tests/data/ljspeech/wavs/LJ034-0161.npy +tests/data/ljspeech/wavs/LJ002-0236.wav|tests/data/ljspeech/wavs/LJ002-0236.npy +tests/data/ljspeech/wavs/LJ034-0155.wav|tests/data/ljspeech/wavs/LJ034-0155.npy +tests/data/ljspeech/wavs/LJ050-0266.wav|tests/data/ljspeech/wavs/LJ050-0266.npy +tests/data/ljspeech/wavs/LJ044-0234.wav|tests/data/ljspeech/wavs/LJ044-0234.npy +tests/data/ljspeech/wavs/LJ039-0154.wav|tests/data/ljspeech/wavs/LJ039-0154.npy +tests/data/ljspeech/wavs/LJ015-0058.wav|tests/data/ljspeech/wavs/LJ015-0058.npy +tests/data/ljspeech/wavs/LJ002-0005.wav|tests/data/ljspeech/wavs/LJ002-0005.npy +tests/data/ljspeech/wavs/LJ021-0174.wav|tests/data/ljspeech/wavs/LJ021-0174.npy +tests/data/ljspeech/wavs/LJ034-0093.wav|tests/data/ljspeech/wavs/LJ034-0093.npy +tests/data/ljspeech/wavs/LJ049-0085.wav|tests/data/ljspeech/wavs/LJ049-0085.npy +tests/data/ljspeech/wavs/LJ011-0036.wav|tests/data/ljspeech/wavs/LJ011-0036.npy +tests/data/ljspeech/wavs/LJ017-0263.wav|tests/data/ljspeech/wavs/LJ017-0263.npy +tests/data/ljspeech/wavs/LJ030-0107.wav|tests/data/ljspeech/wavs/LJ030-0107.npy +tests/data/ljspeech/wavs/LJ028-0139.wav|tests/data/ljspeech/wavs/LJ028-0139.npy +tests/data/ljspeech/wavs/LJ042-0040.wav|tests/data/ljspeech/wavs/LJ042-0040.npy +tests/data/ljspeech/wavs/LJ016-0102.wav|tests/data/ljspeech/wavs/LJ016-0102.npy +tests/data/ljspeech/wavs/LJ025-0091.wav|tests/data/ljspeech/wavs/LJ025-0091.npy +tests/data/ljspeech/wavs/LJ011-0109.wav|tests/data/ljspeech/wavs/LJ011-0109.npy +tests/data/ljspeech/wavs/LJ006-0169.wav|tests/data/ljspeech/wavs/LJ006-0169.npy +tests/data/ljspeech/wavs/LJ008-0104.wav|tests/data/ljspeech/wavs/LJ008-0104.npy +tests/data/ljspeech/wavs/LJ034-0089.wav|tests/data/ljspeech/wavs/LJ034-0089.npy +tests/data/ljspeech/wavs/LJ013-0267.wav|tests/data/ljspeech/wavs/LJ013-0267.npy +tests/data/ljspeech/wavs/LJ050-0126.wav|tests/data/ljspeech/wavs/LJ050-0126.npy +tests/data/ljspeech/wavs/LJ014-0115.wav|tests/data/ljspeech/wavs/LJ014-0115.npy +tests/data/ljspeech/wavs/LJ046-0136.wav|tests/data/ljspeech/wavs/LJ046-0136.npy +tests/data/ljspeech/wavs/LJ041-0188.wav|tests/data/ljspeech/wavs/LJ041-0188.npy +tests/data/ljspeech/wavs/LJ036-0118.wav|tests/data/ljspeech/wavs/LJ036-0118.npy +tests/data/ljspeech/wavs/LJ009-0058.wav|tests/data/ljspeech/wavs/LJ009-0058.npy +tests/data/ljspeech/wavs/LJ013-0211.wav|tests/data/ljspeech/wavs/LJ013-0211.npy +tests/data/ljspeech/wavs/LJ028-0231.wav|tests/data/ljspeech/wavs/LJ028-0231.npy +tests/data/ljspeech/wavs/LJ017-0210.wav|tests/data/ljspeech/wavs/LJ017-0210.npy +tests/data/ljspeech/wavs/LJ013-0258.wav|tests/data/ljspeech/wavs/LJ013-0258.npy +tests/data/ljspeech/wavs/LJ017-0051.wav|tests/data/ljspeech/wavs/LJ017-0051.npy +tests/data/ljspeech/wavs/LJ006-0061.wav|tests/data/ljspeech/wavs/LJ006-0061.npy +tests/data/ljspeech/wavs/LJ018-0252.wav|tests/data/ljspeech/wavs/LJ018-0252.npy +tests/data/ljspeech/wavs/LJ045-0213.wav|tests/data/ljspeech/wavs/LJ045-0213.npy +tests/data/ljspeech/wavs/LJ043-0144.wav|tests/data/ljspeech/wavs/LJ043-0144.npy +tests/data/ljspeech/wavs/LJ040-0088.wav|tests/data/ljspeech/wavs/LJ040-0088.npy +tests/data/ljspeech/wavs/LJ025-0107.wav|tests/data/ljspeech/wavs/LJ025-0107.npy +tests/data/ljspeech/wavs/LJ032-0014.wav|tests/data/ljspeech/wavs/LJ032-0014.npy +tests/data/ljspeech/wavs/LJ031-0147.wav|tests/data/ljspeech/wavs/LJ031-0147.npy +tests/data/ljspeech/wavs/LJ038-0159.wav|tests/data/ljspeech/wavs/LJ038-0159.npy +tests/data/ljspeech/wavs/LJ026-0033.wav|tests/data/ljspeech/wavs/LJ026-0033.npy +tests/data/ljspeech/wavs/LJ011-0090.wav|tests/data/ljspeech/wavs/LJ011-0090.npy +tests/data/ljspeech/wavs/LJ035-0068.wav|tests/data/ljspeech/wavs/LJ035-0068.npy +tests/data/ljspeech/wavs/LJ022-0089.wav|tests/data/ljspeech/wavs/LJ022-0089.npy +tests/data/ljspeech/wavs/LJ004-0123.wav|tests/data/ljspeech/wavs/LJ004-0123.npy +tests/data/ljspeech/wavs/LJ028-0222.wav|tests/data/ljspeech/wavs/LJ028-0222.npy +tests/data/ljspeech/wavs/LJ028-0115.wav|tests/data/ljspeech/wavs/LJ028-0115.npy +tests/data/ljspeech/wavs/LJ004-0114.wav|tests/data/ljspeech/wavs/LJ004-0114.npy +tests/data/ljspeech/wavs/LJ019-0194.wav|tests/data/ljspeech/wavs/LJ019-0194.npy +tests/data/ljspeech/wavs/LJ028-0277.wav|tests/data/ljspeech/wavs/LJ028-0277.npy +tests/data/ljspeech/wavs/LJ011-0155.wav|tests/data/ljspeech/wavs/LJ011-0155.npy +tests/data/ljspeech/wavs/LJ038-0099.wav|tests/data/ljspeech/wavs/LJ038-0099.npy +tests/data/ljspeech/wavs/LJ019-0269.wav|tests/data/ljspeech/wavs/LJ019-0269.npy +tests/data/ljspeech/wavs/LJ002-0155.wav|tests/data/ljspeech/wavs/LJ002-0155.npy +tests/data/ljspeech/wavs/LJ044-0174.wav|tests/data/ljspeech/wavs/LJ044-0174.npy +tests/data/ljspeech/wavs/LJ041-0117.wav|tests/data/ljspeech/wavs/LJ041-0117.npy +tests/data/ljspeech/wavs/LJ018-0231.wav|tests/data/ljspeech/wavs/LJ018-0231.npy +tests/data/ljspeech/wavs/LJ003-0197.wav|tests/data/ljspeech/wavs/LJ003-0197.npy +tests/data/ljspeech/wavs/LJ010-0288.wav|tests/data/ljspeech/wavs/LJ010-0288.npy +tests/data/ljspeech/wavs/LJ030-0061.wav|tests/data/ljspeech/wavs/LJ030-0061.npy +tests/data/ljspeech/wavs/LJ039-0225.wav|tests/data/ljspeech/wavs/LJ039-0225.npy +tests/data/ljspeech/wavs/LJ014-0081.wav|tests/data/ljspeech/wavs/LJ014-0081.npy +tests/data/ljspeech/wavs/LJ042-0144.wav|tests/data/ljspeech/wavs/LJ042-0144.npy +tests/data/ljspeech/wavs/LJ028-0432.wav|tests/data/ljspeech/wavs/LJ028-0432.npy +tests/data/ljspeech/wavs/LJ018-0016.wav|tests/data/ljspeech/wavs/LJ018-0016.npy +tests/data/ljspeech/wavs/LJ030-0161.wav|tests/data/ljspeech/wavs/LJ030-0161.npy +tests/data/ljspeech/wavs/LJ025-0041.wav|tests/data/ljspeech/wavs/LJ025-0041.npy +tests/data/ljspeech/wavs/LJ005-0053.wav|tests/data/ljspeech/wavs/LJ005-0053.npy +tests/data/ljspeech/wavs/LJ007-0105.wav|tests/data/ljspeech/wavs/LJ007-0105.npy +tests/data/ljspeech/wavs/LJ017-0046.wav|tests/data/ljspeech/wavs/LJ017-0046.npy +tests/data/ljspeech/wavs/LJ050-0184.wav|tests/data/ljspeech/wavs/LJ050-0184.npy +tests/data/ljspeech/wavs/LJ023-0022.wav|tests/data/ljspeech/wavs/LJ023-0022.npy +tests/data/ljspeech/wavs/LJ013-0189.wav|tests/data/ljspeech/wavs/LJ013-0189.npy +tests/data/ljspeech/wavs/LJ048-0135.wav|tests/data/ljspeech/wavs/LJ048-0135.npy +tests/data/ljspeech/wavs/LJ019-0355.wav|tests/data/ljspeech/wavs/LJ019-0355.npy +tests/data/ljspeech/wavs/LJ036-0035.wav|tests/data/ljspeech/wavs/LJ036-0035.npy +tests/data/ljspeech/wavs/LJ017-0156.wav|tests/data/ljspeech/wavs/LJ017-0156.npy +tests/data/ljspeech/wavs/LJ017-0095.wav|tests/data/ljspeech/wavs/LJ017-0095.npy +tests/data/ljspeech/wavs/LJ023-0122.wav|tests/data/ljspeech/wavs/LJ023-0122.npy +tests/data/ljspeech/wavs/LJ028-0500.wav|tests/data/ljspeech/wavs/LJ028-0500.npy +tests/data/ljspeech/wavs/LJ042-0094.wav|tests/data/ljspeech/wavs/LJ042-0094.npy +tests/data/ljspeech/wavs/LJ013-0138.wav|tests/data/ljspeech/wavs/LJ013-0138.npy +tests/data/ljspeech/wavs/LJ002-0311.wav|tests/data/ljspeech/wavs/LJ002-0311.npy +tests/data/ljspeech/wavs/LJ028-0454.wav|tests/data/ljspeech/wavs/LJ028-0454.npy +tests/data/ljspeech/wavs/LJ035-0136.wav|tests/data/ljspeech/wavs/LJ035-0136.npy +tests/data/ljspeech/wavs/LJ007-0191.wav|tests/data/ljspeech/wavs/LJ007-0191.npy +tests/data/ljspeech/wavs/LJ018-0166.wav|tests/data/ljspeech/wavs/LJ018-0166.npy +tests/data/ljspeech/wavs/LJ017-0040.wav|tests/data/ljspeech/wavs/LJ017-0040.npy +tests/data/ljspeech/wavs/LJ018-0067.wav|tests/data/ljspeech/wavs/LJ018-0067.npy +tests/data/ljspeech/wavs/LJ007-0015.wav|tests/data/ljspeech/wavs/LJ007-0015.npy +tests/data/ljspeech/wavs/LJ017-0027.wav|tests/data/ljspeech/wavs/LJ017-0027.npy +tests/data/ljspeech/wavs/LJ047-0109.wav|tests/data/ljspeech/wavs/LJ047-0109.npy +tests/data/ljspeech/wavs/LJ034-0163.wav|tests/data/ljspeech/wavs/LJ034-0163.npy +tests/data/ljspeech/wavs/LJ028-0164.wav|tests/data/ljspeech/wavs/LJ028-0164.npy +tests/data/ljspeech/wavs/LJ023-0009.wav|tests/data/ljspeech/wavs/LJ023-0009.npy +tests/data/ljspeech/wavs/LJ034-0215.wav|tests/data/ljspeech/wavs/LJ034-0215.npy +tests/data/ljspeech/wavs/LJ015-0008.wav|tests/data/ljspeech/wavs/LJ015-0008.npy +tests/data/ljspeech/wavs/LJ044-0146.wav|tests/data/ljspeech/wavs/LJ044-0146.npy +tests/data/ljspeech/wavs/LJ032-0050.wav|tests/data/ljspeech/wavs/LJ032-0050.npy +tests/data/ljspeech/wavs/LJ045-0125.wav|tests/data/ljspeech/wavs/LJ045-0125.npy +tests/data/ljspeech/wavs/LJ037-0240.wav|tests/data/ljspeech/wavs/LJ037-0240.npy +tests/data/ljspeech/wavs/LJ016-0181.wav|tests/data/ljspeech/wavs/LJ016-0181.npy +tests/data/ljspeech/wavs/LJ021-0183.wav|tests/data/ljspeech/wavs/LJ021-0183.npy +tests/data/ljspeech/wavs/LJ025-0001.wav|tests/data/ljspeech/wavs/LJ025-0001.npy +tests/data/ljspeech/wavs/LJ032-0255.wav|tests/data/ljspeech/wavs/LJ032-0255.npy +tests/data/ljspeech/wavs/LJ031-0098.wav|tests/data/ljspeech/wavs/LJ031-0098.npy +tests/data/ljspeech/wavs/LJ029-0113.wav|tests/data/ljspeech/wavs/LJ029-0113.npy +tests/data/ljspeech/wavs/LJ005-0247.wav|tests/data/ljspeech/wavs/LJ005-0247.npy +tests/data/ljspeech/wavs/LJ014-0165.wav|tests/data/ljspeech/wavs/LJ014-0165.npy +tests/data/ljspeech/wavs/LJ024-0134.wav|tests/data/ljspeech/wavs/LJ024-0134.npy +tests/data/ljspeech/wavs/LJ038-0121.wav|tests/data/ljspeech/wavs/LJ038-0121.npy +tests/data/ljspeech/wavs/LJ006-0147.wav|tests/data/ljspeech/wavs/LJ006-0147.npy +tests/data/ljspeech/wavs/LJ031-0094.wav|tests/data/ljspeech/wavs/LJ031-0094.npy +tests/data/ljspeech/wavs/LJ015-0252.wav|tests/data/ljspeech/wavs/LJ015-0252.npy +tests/data/ljspeech/wavs/LJ021-0147.wav|tests/data/ljspeech/wavs/LJ021-0147.npy +tests/data/ljspeech/wavs/LJ010-0044.wav|tests/data/ljspeech/wavs/LJ010-0044.npy +tests/data/ljspeech/wavs/LJ045-0098.wav|tests/data/ljspeech/wavs/LJ045-0098.npy +tests/data/ljspeech/wavs/LJ016-0100.wav|tests/data/ljspeech/wavs/LJ016-0100.npy +tests/data/ljspeech/wavs/LJ015-0225.wav|tests/data/ljspeech/wavs/LJ015-0225.npy +tests/data/ljspeech/wavs/LJ004-0054.wav|tests/data/ljspeech/wavs/LJ004-0054.npy +tests/data/ljspeech/wavs/LJ004-0160.wav|tests/data/ljspeech/wavs/LJ004-0160.npy +tests/data/ljspeech/wavs/LJ018-0157.wav|tests/data/ljspeech/wavs/LJ018-0157.npy +tests/data/ljspeech/wavs/LJ010-0266.wav|tests/data/ljspeech/wavs/LJ010-0266.npy +tests/data/ljspeech/wavs/LJ027-0159.wav|tests/data/ljspeech/wavs/LJ027-0159.npy +tests/data/ljspeech/wavs/LJ034-0134.wav|tests/data/ljspeech/wavs/LJ034-0134.npy +tests/data/ljspeech/wavs/LJ010-0035.wav|tests/data/ljspeech/wavs/LJ010-0035.npy +tests/data/ljspeech/wavs/LJ014-0037.wav|tests/data/ljspeech/wavs/LJ014-0037.npy +tests/data/ljspeech/wavs/LJ024-0091.wav|tests/data/ljspeech/wavs/LJ024-0091.npy +tests/data/ljspeech/wavs/LJ002-0129.wav|tests/data/ljspeech/wavs/LJ002-0129.npy +tests/data/ljspeech/wavs/LJ040-0121.wav|tests/data/ljspeech/wavs/LJ040-0121.npy +tests/data/ljspeech/wavs/LJ048-0221.wav|tests/data/ljspeech/wavs/LJ048-0221.npy +tests/data/ljspeech/wavs/LJ005-0057.wav|tests/data/ljspeech/wavs/LJ005-0057.npy +tests/data/ljspeech/wavs/LJ029-0180.wav|tests/data/ljspeech/wavs/LJ029-0180.npy +tests/data/ljspeech/wavs/LJ048-0232.wav|tests/data/ljspeech/wavs/LJ048-0232.npy +tests/data/ljspeech/wavs/LJ030-0242.wav|tests/data/ljspeech/wavs/LJ030-0242.npy +tests/data/ljspeech/wavs/LJ021-0089.wav|tests/data/ljspeech/wavs/LJ021-0089.npy +tests/data/ljspeech/wavs/LJ039-0140.wav|tests/data/ljspeech/wavs/LJ039-0140.npy +tests/data/ljspeech/wavs/LJ038-0006.wav|tests/data/ljspeech/wavs/LJ038-0006.npy +tests/data/ljspeech/wavs/LJ003-0164.wav|tests/data/ljspeech/wavs/LJ003-0164.npy +tests/data/ljspeech/wavs/LJ009-0207.wav|tests/data/ljspeech/wavs/LJ009-0207.npy +tests/data/ljspeech/wavs/LJ006-0257.wav|tests/data/ljspeech/wavs/LJ006-0257.npy +tests/data/ljspeech/wavs/LJ028-0193.wav|tests/data/ljspeech/wavs/LJ028-0193.npy +tests/data/ljspeech/wavs/LJ033-0147.wav|tests/data/ljspeech/wavs/LJ033-0147.npy +tests/data/ljspeech/wavs/LJ028-0119.wav|tests/data/ljspeech/wavs/LJ028-0119.npy +tests/data/ljspeech/wavs/LJ045-0160.wav|tests/data/ljspeech/wavs/LJ045-0160.npy +tests/data/ljspeech/wavs/LJ008-0230.wav|tests/data/ljspeech/wavs/LJ008-0230.npy +tests/data/ljspeech/wavs/LJ007-0196.wav|tests/data/ljspeech/wavs/LJ007-0196.npy +tests/data/ljspeech/wavs/LJ015-0220.wav|tests/data/ljspeech/wavs/LJ015-0220.npy +tests/data/ljspeech/wavs/LJ036-0112.wav|tests/data/ljspeech/wavs/LJ036-0112.npy +tests/data/ljspeech/wavs/LJ016-0420.wav|tests/data/ljspeech/wavs/LJ016-0420.npy +tests/data/ljspeech/wavs/LJ027-0073.wav|tests/data/ljspeech/wavs/LJ027-0073.npy +tests/data/ljspeech/wavs/LJ043-0086.wav|tests/data/ljspeech/wavs/LJ043-0086.npy +tests/data/ljspeech/wavs/LJ050-0025.wav|tests/data/ljspeech/wavs/LJ050-0025.npy +tests/data/ljspeech/wavs/LJ010-0149.wav|tests/data/ljspeech/wavs/LJ010-0149.npy +tests/data/ljspeech/wavs/LJ020-0028.wav|tests/data/ljspeech/wavs/LJ020-0028.npy +tests/data/ljspeech/wavs/LJ018-0332.wav|tests/data/ljspeech/wavs/LJ018-0332.npy +tests/data/ljspeech/wavs/LJ011-0150.wav|tests/data/ljspeech/wavs/LJ011-0150.npy +tests/data/ljspeech/wavs/LJ028-0380.wav|tests/data/ljspeech/wavs/LJ028-0380.npy +tests/data/ljspeech/wavs/LJ033-0006.wav|tests/data/ljspeech/wavs/LJ033-0006.npy +tests/data/ljspeech/wavs/LJ030-0140.wav|tests/data/ljspeech/wavs/LJ030-0140.npy +tests/data/ljspeech/wavs/LJ036-0139.wav|tests/data/ljspeech/wavs/LJ036-0139.npy +tests/data/ljspeech/wavs/LJ046-0125.wav|tests/data/ljspeech/wavs/LJ046-0125.npy +tests/data/ljspeech/wavs/LJ009-0154.wav|tests/data/ljspeech/wavs/LJ009-0154.npy +tests/data/ljspeech/wavs/LJ005-0132.wav|tests/data/ljspeech/wavs/LJ005-0132.npy +tests/data/ljspeech/wavs/LJ039-0144.wav|tests/data/ljspeech/wavs/LJ039-0144.npy +tests/data/ljspeech/wavs/LJ014-0011.wav|tests/data/ljspeech/wavs/LJ014-0011.npy +tests/data/ljspeech/wavs/LJ012-0161.wav|tests/data/ljspeech/wavs/LJ012-0161.npy +tests/data/ljspeech/wavs/LJ041-0071.wav|tests/data/ljspeech/wavs/LJ041-0071.npy +tests/data/ljspeech/wavs/LJ003-0061.wav|tests/data/ljspeech/wavs/LJ003-0061.npy +tests/data/ljspeech/wavs/LJ010-0297.wav|tests/data/ljspeech/wavs/LJ010-0297.npy +tests/data/ljspeech/wavs/LJ033-0082.wav|tests/data/ljspeech/wavs/LJ033-0082.npy +tests/data/ljspeech/wavs/LJ015-0030.wav|tests/data/ljspeech/wavs/LJ015-0030.npy +tests/data/ljspeech/wavs/LJ024-0123.wav|tests/data/ljspeech/wavs/LJ024-0123.npy +tests/data/ljspeech/wavs/LJ039-0222.wav|tests/data/ljspeech/wavs/LJ039-0222.npy +tests/data/ljspeech/wavs/LJ025-0160.wav|tests/data/ljspeech/wavs/LJ025-0160.npy +tests/data/ljspeech/wavs/LJ020-0015.wav|tests/data/ljspeech/wavs/LJ020-0015.npy +tests/data/ljspeech/wavs/LJ011-0056.wav|tests/data/ljspeech/wavs/LJ011-0056.npy +tests/data/ljspeech/wavs/LJ013-0023.wav|tests/data/ljspeech/wavs/LJ013-0023.npy +tests/data/ljspeech/wavs/LJ050-0203.wav|tests/data/ljspeech/wavs/LJ050-0203.npy +tests/data/ljspeech/wavs/LJ022-0108.wav|tests/data/ljspeech/wavs/LJ022-0108.npy +tests/data/ljspeech/wavs/LJ029-0072.wav|tests/data/ljspeech/wavs/LJ029-0072.npy +tests/data/ljspeech/wavs/LJ002-0076.wav|tests/data/ljspeech/wavs/LJ002-0076.npy +tests/data/ljspeech/wavs/LJ004-0143.wav|tests/data/ljspeech/wavs/LJ004-0143.npy +tests/data/ljspeech/wavs/LJ005-0207.wav|tests/data/ljspeech/wavs/LJ005-0207.npy +tests/data/ljspeech/wavs/LJ019-0200.wav|tests/data/ljspeech/wavs/LJ019-0200.npy +tests/data/ljspeech/wavs/LJ017-0087.wav|tests/data/ljspeech/wavs/LJ017-0087.npy +tests/data/ljspeech/wavs/LJ010-0007.wav|tests/data/ljspeech/wavs/LJ010-0007.npy +tests/data/ljspeech/wavs/LJ037-0030.wav|tests/data/ljspeech/wavs/LJ037-0030.npy +tests/data/ljspeech/wavs/LJ022-0139.wav|tests/data/ljspeech/wavs/LJ022-0139.npy +tests/data/ljspeech/wavs/LJ017-0123.wav|tests/data/ljspeech/wavs/LJ017-0123.npy +tests/data/ljspeech/wavs/LJ003-0017.wav|tests/data/ljspeech/wavs/LJ003-0017.npy +tests/data/ljspeech/wavs/LJ032-0017.wav|tests/data/ljspeech/wavs/LJ032-0017.npy +tests/data/ljspeech/wavs/LJ010-0306.wav|tests/data/ljspeech/wavs/LJ010-0306.npy +tests/data/ljspeech/wavs/LJ046-0163.wav|tests/data/ljspeech/wavs/LJ046-0163.npy +tests/data/ljspeech/wavs/LJ023-0021.wav|tests/data/ljspeech/wavs/LJ023-0021.npy +tests/data/ljspeech/wavs/LJ036-0171.wav|tests/data/ljspeech/wavs/LJ036-0171.npy +tests/data/ljspeech/wavs/LJ004-0061.wav|tests/data/ljspeech/wavs/LJ004-0061.npy +tests/data/ljspeech/wavs/LJ031-0117.wav|tests/data/ljspeech/wavs/LJ031-0117.npy +tests/data/ljspeech/wavs/LJ047-0039.wav|tests/data/ljspeech/wavs/LJ047-0039.npy +tests/data/ljspeech/wavs/LJ019-0298.wav|tests/data/ljspeech/wavs/LJ019-0298.npy +tests/data/ljspeech/wavs/LJ013-0101.wav|tests/data/ljspeech/wavs/LJ013-0101.npy +tests/data/ljspeech/wavs/LJ021-0092.wav|tests/data/ljspeech/wavs/LJ021-0092.npy +tests/data/ljspeech/wavs/LJ026-0111.wav|tests/data/ljspeech/wavs/LJ026-0111.npy +tests/data/ljspeech/wavs/LJ019-0204.wav|tests/data/ljspeech/wavs/LJ019-0204.npy +tests/data/ljspeech/wavs/LJ027-0017.wav|tests/data/ljspeech/wavs/LJ027-0017.npy +tests/data/ljspeech/wavs/LJ017-0138.wav|tests/data/ljspeech/wavs/LJ017-0138.npy +tests/data/ljspeech/wavs/LJ031-0177.wav|tests/data/ljspeech/wavs/LJ031-0177.npy +tests/data/ljspeech/wavs/LJ047-0121.wav|tests/data/ljspeech/wavs/LJ047-0121.npy +tests/data/ljspeech/wavs/LJ043-0155.wav|tests/data/ljspeech/wavs/LJ043-0155.npy +tests/data/ljspeech/wavs/LJ019-0059.wav|tests/data/ljspeech/wavs/LJ019-0059.npy +tests/data/ljspeech/wavs/LJ014-0191.wav|tests/data/ljspeech/wavs/LJ014-0191.npy +tests/data/ljspeech/wavs/LJ016-0287.wav|tests/data/ljspeech/wavs/LJ016-0287.npy +tests/data/ljspeech/wavs/LJ016-0341.wav|tests/data/ljspeech/wavs/LJ016-0341.npy +tests/data/ljspeech/wavs/LJ037-0200.wav|tests/data/ljspeech/wavs/LJ037-0200.npy +tests/data/ljspeech/wavs/LJ021-0178.wav|tests/data/ljspeech/wavs/LJ021-0178.npy +tests/data/ljspeech/wavs/LJ036-0214.wav|tests/data/ljspeech/wavs/LJ036-0214.npy +tests/data/ljspeech/wavs/LJ018-0339.wav|tests/data/ljspeech/wavs/LJ018-0339.npy +tests/data/ljspeech/wavs/LJ037-0097.wav|tests/data/ljspeech/wavs/LJ037-0097.npy +tests/data/ljspeech/wavs/LJ036-0218.wav|tests/data/ljspeech/wavs/LJ036-0218.npy +tests/data/ljspeech/wavs/LJ023-0085.wav|tests/data/ljspeech/wavs/LJ023-0085.npy +tests/data/ljspeech/wavs/LJ049-0124.wav|tests/data/ljspeech/wavs/LJ049-0124.npy +tests/data/ljspeech/wavs/LJ035-0135.wav|tests/data/ljspeech/wavs/LJ035-0135.npy +tests/data/ljspeech/wavs/LJ029-0155.wav|tests/data/ljspeech/wavs/LJ029-0155.npy +tests/data/ljspeech/wavs/LJ001-0174.wav|tests/data/ljspeech/wavs/LJ001-0174.npy +tests/data/ljspeech/wavs/LJ028-0363.wav|tests/data/ljspeech/wavs/LJ028-0363.npy +tests/data/ljspeech/wavs/LJ046-0134.wav|tests/data/ljspeech/wavs/LJ046-0134.npy +tests/data/ljspeech/wavs/LJ015-0129.wav|tests/data/ljspeech/wavs/LJ015-0129.npy +tests/data/ljspeech/wavs/LJ046-0161.wav|tests/data/ljspeech/wavs/LJ046-0161.npy +tests/data/ljspeech/wavs/LJ042-0043.wav|tests/data/ljspeech/wavs/LJ042-0043.npy +tests/data/ljspeech/wavs/LJ020-0071.wav|tests/data/ljspeech/wavs/LJ020-0071.npy +tests/data/ljspeech/wavs/LJ020-0025.wav|tests/data/ljspeech/wavs/LJ020-0025.npy +tests/data/ljspeech/wavs/LJ043-0071.wav|tests/data/ljspeech/wavs/LJ043-0071.npy +tests/data/ljspeech/wavs/LJ021-0189.wav|tests/data/ljspeech/wavs/LJ021-0189.npy +tests/data/ljspeech/wavs/LJ022-0065.wav|tests/data/ljspeech/wavs/LJ022-0065.npy +tests/data/ljspeech/wavs/LJ015-0102.wav|tests/data/ljspeech/wavs/LJ015-0102.npy +tests/data/ljspeech/wavs/LJ048-0050.wav|tests/data/ljspeech/wavs/LJ048-0050.npy +tests/data/ljspeech/wavs/LJ012-0274.wav|tests/data/ljspeech/wavs/LJ012-0274.npy +tests/data/ljspeech/wavs/LJ013-0002.wav|tests/data/ljspeech/wavs/LJ013-0002.npy +tests/data/ljspeech/wavs/LJ006-0227.wav|tests/data/ljspeech/wavs/LJ006-0227.npy +tests/data/ljspeech/wavs/LJ039-0072.wav|tests/data/ljspeech/wavs/LJ039-0072.npy +tests/data/ljspeech/wavs/LJ008-0226.wav|tests/data/ljspeech/wavs/LJ008-0226.npy +tests/data/ljspeech/wavs/LJ039-0080.wav|tests/data/ljspeech/wavs/LJ039-0080.npy +tests/data/ljspeech/wavs/LJ003-0134.wav|tests/data/ljspeech/wavs/LJ003-0134.npy +tests/data/ljspeech/wavs/LJ048-0150.wav|tests/data/ljspeech/wavs/LJ048-0150.npy +tests/data/ljspeech/wavs/LJ002-0191.wav|tests/data/ljspeech/wavs/LJ002-0191.npy +tests/data/ljspeech/wavs/LJ045-0030.wav|tests/data/ljspeech/wavs/LJ045-0030.npy +tests/data/ljspeech/wavs/LJ021-0032.wav|tests/data/ljspeech/wavs/LJ021-0032.npy +tests/data/ljspeech/wavs/LJ010-0118.wav|tests/data/ljspeech/wavs/LJ010-0118.npy +tests/data/ljspeech/wavs/LJ024-0033.wav|tests/data/ljspeech/wavs/LJ024-0033.npy +tests/data/ljspeech/wavs/LJ012-0002.wav|tests/data/ljspeech/wavs/LJ012-0002.npy +tests/data/ljspeech/wavs/LJ046-0014.wav|tests/data/ljspeech/wavs/LJ046-0014.npy +tests/data/ljspeech/wavs/LJ028-0265.wav|tests/data/ljspeech/wavs/LJ028-0265.npy +tests/data/ljspeech/wavs/LJ007-0006.wav|tests/data/ljspeech/wavs/LJ007-0006.npy +tests/data/ljspeech/wavs/LJ006-0291.wav|tests/data/ljspeech/wavs/LJ006-0291.npy +tests/data/ljspeech/wavs/LJ008-0218.wav|tests/data/ljspeech/wavs/LJ008-0218.npy +tests/data/ljspeech/wavs/LJ008-0180.wav|tests/data/ljspeech/wavs/LJ008-0180.npy +tests/data/ljspeech/wavs/LJ016-0204.wav|tests/data/ljspeech/wavs/LJ016-0204.npy +tests/data/ljspeech/wavs/LJ018-0130.wav|tests/data/ljspeech/wavs/LJ018-0130.npy +tests/data/ljspeech/wavs/LJ036-0077.wav|tests/data/ljspeech/wavs/LJ036-0077.npy +tests/data/ljspeech/wavs/LJ028-0134.wav|tests/data/ljspeech/wavs/LJ028-0134.npy +tests/data/ljspeech/wavs/LJ046-0057.wav|tests/data/ljspeech/wavs/LJ046-0057.npy +tests/data/ljspeech/wavs/LJ045-0141.wav|tests/data/ljspeech/wavs/LJ045-0141.npy +tests/data/ljspeech/wavs/LJ041-0003.wav|tests/data/ljspeech/wavs/LJ041-0003.npy +tests/data/ljspeech/wavs/LJ029-0154.wav|tests/data/ljspeech/wavs/LJ029-0154.npy +tests/data/ljspeech/wavs/LJ046-0170.wav|tests/data/ljspeech/wavs/LJ046-0170.npy +tests/data/ljspeech/wavs/LJ023-0025.wav|tests/data/ljspeech/wavs/LJ023-0025.npy +tests/data/ljspeech/wavs/LJ038-0035.wav|tests/data/ljspeech/wavs/LJ038-0035.npy +tests/data/ljspeech/wavs/LJ037-0239.wav|tests/data/ljspeech/wavs/LJ037-0239.npy +tests/data/ljspeech/wavs/LJ004-0101.wav|tests/data/ljspeech/wavs/LJ004-0101.npy +tests/data/ljspeech/wavs/LJ015-0110.wav|tests/data/ljspeech/wavs/LJ015-0110.npy +tests/data/ljspeech/wavs/LJ036-0127.wav|tests/data/ljspeech/wavs/LJ036-0127.npy +tests/data/ljspeech/wavs/LJ044-0143.wav|tests/data/ljspeech/wavs/LJ044-0143.npy +tests/data/ljspeech/wavs/LJ024-0020.wav|tests/data/ljspeech/wavs/LJ024-0020.npy +tests/data/ljspeech/wavs/LJ014-0288.wav|tests/data/ljspeech/wavs/LJ014-0288.npy +tests/data/ljspeech/wavs/LJ028-0336.wav|tests/data/ljspeech/wavs/LJ028-0336.npy +tests/data/ljspeech/wavs/LJ041-0005.wav|tests/data/ljspeech/wavs/LJ041-0005.npy +tests/data/ljspeech/wavs/LJ003-0058.wav|tests/data/ljspeech/wavs/LJ003-0058.npy +tests/data/ljspeech/wavs/LJ014-0227.wav|tests/data/ljspeech/wavs/LJ014-0227.npy +tests/data/ljspeech/wavs/LJ015-0209.wav|tests/data/ljspeech/wavs/LJ015-0209.npy +tests/data/ljspeech/wavs/LJ045-0172.wav|tests/data/ljspeech/wavs/LJ045-0172.npy +tests/data/ljspeech/wavs/LJ038-0083.wav|tests/data/ljspeech/wavs/LJ038-0083.npy +tests/data/ljspeech/wavs/LJ016-0126.wav|tests/data/ljspeech/wavs/LJ016-0126.npy +tests/data/ljspeech/wavs/LJ004-0099.wav|tests/data/ljspeech/wavs/LJ004-0099.npy +tests/data/ljspeech/wavs/LJ050-0205.wav|tests/data/ljspeech/wavs/LJ050-0205.npy +tests/data/ljspeech/wavs/LJ050-0236.wav|tests/data/ljspeech/wavs/LJ050-0236.npy +tests/data/ljspeech/wavs/LJ038-0279.wav|tests/data/ljspeech/wavs/LJ038-0279.npy +tests/data/ljspeech/wavs/LJ019-0389.wav|tests/data/ljspeech/wavs/LJ019-0389.npy +tests/data/ljspeech/wavs/LJ032-0147.wav|tests/data/ljspeech/wavs/LJ032-0147.npy +tests/data/ljspeech/wavs/LJ004-0092.wav|tests/data/ljspeech/wavs/LJ004-0092.npy +tests/data/ljspeech/wavs/LJ047-0070.wav|tests/data/ljspeech/wavs/LJ047-0070.npy +tests/data/ljspeech/wavs/LJ013-0013.wav|tests/data/ljspeech/wavs/LJ013-0013.npy +tests/data/ljspeech/wavs/LJ047-0176.wav|tests/data/ljspeech/wavs/LJ047-0176.npy +tests/data/ljspeech/wavs/LJ048-0002.wav|tests/data/ljspeech/wavs/LJ048-0002.npy +tests/data/ljspeech/wavs/LJ049-0213.wav|tests/data/ljspeech/wavs/LJ049-0213.npy +tests/data/ljspeech/wavs/LJ039-0082.wav|tests/data/ljspeech/wavs/LJ039-0082.npy +tests/data/ljspeech/wavs/LJ031-0146.wav|tests/data/ljspeech/wavs/LJ031-0146.npy +tests/data/ljspeech/wavs/LJ016-0312.wav|tests/data/ljspeech/wavs/LJ016-0312.npy +tests/data/ljspeech/wavs/LJ008-0114.wav|tests/data/ljspeech/wavs/LJ008-0114.npy +tests/data/ljspeech/wavs/LJ023-0062.wav|tests/data/ljspeech/wavs/LJ023-0062.npy +tests/data/ljspeech/wavs/LJ011-0020.wav|tests/data/ljspeech/wavs/LJ011-0020.npy +tests/data/ljspeech/wavs/LJ024-0064.wav|tests/data/ljspeech/wavs/LJ024-0064.npy +tests/data/ljspeech/wavs/LJ005-0037.wav|tests/data/ljspeech/wavs/LJ005-0037.npy +tests/data/ljspeech/wavs/LJ043-0112.wav|tests/data/ljspeech/wavs/LJ043-0112.npy +tests/data/ljspeech/wavs/LJ024-0038.wav|tests/data/ljspeech/wavs/LJ024-0038.npy +tests/data/ljspeech/wavs/LJ023-0011.wav|tests/data/ljspeech/wavs/LJ023-0011.npy +tests/data/ljspeech/wavs/LJ006-0208.wav|tests/data/ljspeech/wavs/LJ006-0208.npy +tests/data/ljspeech/wavs/LJ025-0051.wav|tests/data/ljspeech/wavs/LJ025-0051.npy +tests/data/ljspeech/wavs/LJ035-0187.wav|tests/data/ljspeech/wavs/LJ035-0187.npy +tests/data/ljspeech/wavs/LJ028-0160.wav|tests/data/ljspeech/wavs/LJ028-0160.npy +tests/data/ljspeech/wavs/LJ041-0091.wav|tests/data/ljspeech/wavs/LJ041-0091.npy +tests/data/ljspeech/wavs/LJ008-0161.wav|tests/data/ljspeech/wavs/LJ008-0161.npy +tests/data/ljspeech/wavs/LJ017-0042.wav|tests/data/ljspeech/wavs/LJ017-0042.npy +tests/data/ljspeech/wavs/LJ036-0151.wav|tests/data/ljspeech/wavs/LJ036-0151.npy +tests/data/ljspeech/wavs/LJ032-0229.wav|tests/data/ljspeech/wavs/LJ032-0229.npy +tests/data/ljspeech/wavs/LJ008-0271.wav|tests/data/ljspeech/wavs/LJ008-0271.npy +tests/data/ljspeech/wavs/LJ047-0210.wav|tests/data/ljspeech/wavs/LJ047-0210.npy +tests/data/ljspeech/wavs/LJ028-0224.wav|tests/data/ljspeech/wavs/LJ028-0224.npy +tests/data/ljspeech/wavs/LJ024-0034.wav|tests/data/ljspeech/wavs/LJ024-0034.npy +tests/data/ljspeech/wavs/LJ009-0088.wav|tests/data/ljspeech/wavs/LJ009-0088.npy +tests/data/ljspeech/wavs/LJ031-0062.wav|tests/data/ljspeech/wavs/LJ031-0062.npy +tests/data/ljspeech/wavs/LJ037-0237.wav|tests/data/ljspeech/wavs/LJ037-0237.npy +tests/data/ljspeech/wavs/LJ040-0128.wav|tests/data/ljspeech/wavs/LJ040-0128.npy +tests/data/ljspeech/wavs/LJ032-0061.wav|tests/data/ljspeech/wavs/LJ032-0061.npy +tests/data/ljspeech/wavs/LJ050-0107.wav|tests/data/ljspeech/wavs/LJ050-0107.npy +tests/data/ljspeech/wavs/LJ007-0033.wav|tests/data/ljspeech/wavs/LJ007-0033.npy +tests/data/ljspeech/wavs/LJ037-0222.wav|tests/data/ljspeech/wavs/LJ037-0222.npy +tests/data/ljspeech/wavs/LJ016-0409.wav|tests/data/ljspeech/wavs/LJ016-0409.npy +tests/data/ljspeech/wavs/LJ015-0179.wav|tests/data/ljspeech/wavs/LJ015-0179.npy +tests/data/ljspeech/wavs/LJ016-0187.wav|tests/data/ljspeech/wavs/LJ016-0187.npy +tests/data/ljspeech/wavs/LJ007-0071.wav|tests/data/ljspeech/wavs/LJ007-0071.npy +tests/data/ljspeech/wavs/LJ022-0060.wav|tests/data/ljspeech/wavs/LJ022-0060.npy +tests/data/ljspeech/wavs/LJ047-0135.wav|tests/data/ljspeech/wavs/LJ047-0135.npy +tests/data/ljspeech/wavs/LJ040-0090.wav|tests/data/ljspeech/wavs/LJ040-0090.npy +tests/data/ljspeech/wavs/LJ011-0044.wav|tests/data/ljspeech/wavs/LJ011-0044.npy +tests/data/ljspeech/wavs/LJ015-0279.wav|tests/data/ljspeech/wavs/LJ015-0279.npy +tests/data/ljspeech/wavs/LJ014-0237.wav|tests/data/ljspeech/wavs/LJ014-0237.npy +tests/data/ljspeech/wavs/LJ014-0202.wav|tests/data/ljspeech/wavs/LJ014-0202.npy +tests/data/ljspeech/wavs/LJ028-0112.wav|tests/data/ljspeech/wavs/LJ028-0112.npy +tests/data/ljspeech/wavs/LJ010-0094.wav|tests/data/ljspeech/wavs/LJ010-0094.npy +tests/data/ljspeech/wavs/LJ015-0224.wav|tests/data/ljspeech/wavs/LJ015-0224.npy +tests/data/ljspeech/wavs/LJ009-0301.wav|tests/data/ljspeech/wavs/LJ009-0301.npy +tests/data/ljspeech/wavs/LJ012-0228.wav|tests/data/ljspeech/wavs/LJ012-0228.npy +tests/data/ljspeech/wavs/LJ047-0219.wav|tests/data/ljspeech/wavs/LJ047-0219.npy +tests/data/ljspeech/wavs/LJ025-0130.wav|tests/data/ljspeech/wavs/LJ025-0130.npy +tests/data/ljspeech/wavs/LJ038-0033.wav|tests/data/ljspeech/wavs/LJ038-0033.npy +tests/data/ljspeech/wavs/LJ037-0061.wav|tests/data/ljspeech/wavs/LJ037-0061.npy +tests/data/ljspeech/wavs/LJ025-0105.wav|tests/data/ljspeech/wavs/LJ025-0105.npy +tests/data/ljspeech/wavs/LJ027-0064.wav|tests/data/ljspeech/wavs/LJ027-0064.npy +tests/data/ljspeech/wavs/LJ021-0059.wav|tests/data/ljspeech/wavs/LJ021-0059.npy +tests/data/ljspeech/wavs/LJ011-0234.wav|tests/data/ljspeech/wavs/LJ011-0234.npy +tests/data/ljspeech/wavs/LJ009-0010.wav|tests/data/ljspeech/wavs/LJ009-0010.npy +tests/data/ljspeech/wavs/LJ015-0161.wav|tests/data/ljspeech/wavs/LJ015-0161.npy +tests/data/ljspeech/wavs/LJ012-0025.wav|tests/data/ljspeech/wavs/LJ012-0025.npy +tests/data/ljspeech/wavs/LJ032-0075.wav|tests/data/ljspeech/wavs/LJ032-0075.npy +tests/data/ljspeech/wavs/LJ029-0185.wav|tests/data/ljspeech/wavs/LJ029-0185.npy +tests/data/ljspeech/wavs/LJ046-0151.wav|tests/data/ljspeech/wavs/LJ046-0151.npy +tests/data/ljspeech/wavs/LJ044-0205.wav|tests/data/ljspeech/wavs/LJ044-0205.npy +tests/data/ljspeech/wavs/LJ023-0096.wav|tests/data/ljspeech/wavs/LJ023-0096.npy +tests/data/ljspeech/wavs/LJ043-0052.wav|tests/data/ljspeech/wavs/LJ043-0052.npy +tests/data/ljspeech/wavs/LJ049-0003.wav|tests/data/ljspeech/wavs/LJ049-0003.npy +tests/data/ljspeech/wavs/LJ048-0156.wav|tests/data/ljspeech/wavs/LJ048-0156.npy +tests/data/ljspeech/wavs/LJ019-0180.wav|tests/data/ljspeech/wavs/LJ019-0180.npy +tests/data/ljspeech/wavs/LJ047-0002.wav|tests/data/ljspeech/wavs/LJ047-0002.npy +tests/data/ljspeech/wavs/LJ042-0118.wav|tests/data/ljspeech/wavs/LJ042-0118.npy +tests/data/ljspeech/wavs/LJ034-0105.wav|tests/data/ljspeech/wavs/LJ034-0105.npy +tests/data/ljspeech/wavs/LJ005-0178.wav|tests/data/ljspeech/wavs/LJ005-0178.npy +tests/data/ljspeech/wavs/LJ004-0052.wav|tests/data/ljspeech/wavs/LJ004-0052.npy +tests/data/ljspeech/wavs/LJ003-0123.wav|tests/data/ljspeech/wavs/LJ003-0123.npy +tests/data/ljspeech/wavs/LJ026-0105.wav|tests/data/ljspeech/wavs/LJ026-0105.npy +tests/data/ljspeech/wavs/LJ019-0126.wav|tests/data/ljspeech/wavs/LJ019-0126.npy +tests/data/ljspeech/wavs/LJ030-0075.wav|tests/data/ljspeech/wavs/LJ030-0075.npy +tests/data/ljspeech/wavs/LJ017-0258.wav|tests/data/ljspeech/wavs/LJ017-0258.npy +tests/data/ljspeech/wavs/LJ035-0103.wav|tests/data/ljspeech/wavs/LJ035-0103.npy +tests/data/ljspeech/wavs/LJ045-0221.wav|tests/data/ljspeech/wavs/LJ045-0221.npy +tests/data/ljspeech/wavs/LJ018-0008.wav|tests/data/ljspeech/wavs/LJ018-0008.npy +tests/data/ljspeech/wavs/LJ011-0132.wav|tests/data/ljspeech/wavs/LJ011-0132.npy +tests/data/ljspeech/wavs/LJ049-0018.wav|tests/data/ljspeech/wavs/LJ049-0018.npy +tests/data/ljspeech/wavs/LJ003-0279.wav|tests/data/ljspeech/wavs/LJ003-0279.npy +tests/data/ljspeech/wavs/LJ049-0010.wav|tests/data/ljspeech/wavs/LJ049-0010.npy +tests/data/ljspeech/wavs/LJ036-0061.wav|tests/data/ljspeech/wavs/LJ036-0061.npy +tests/data/ljspeech/wavs/LJ045-0084.wav|tests/data/ljspeech/wavs/LJ045-0084.npy +tests/data/ljspeech/wavs/LJ027-0040.wav|tests/data/ljspeech/wavs/LJ027-0040.npy +tests/data/ljspeech/wavs/LJ008-0069.wav|tests/data/ljspeech/wavs/LJ008-0069.npy +tests/data/ljspeech/wavs/LJ030-0240.wav|tests/data/ljspeech/wavs/LJ030-0240.npy +tests/data/ljspeech/wavs/LJ042-0011.wav|tests/data/ljspeech/wavs/LJ042-0011.npy +tests/data/ljspeech/wavs/LJ043-0134.wav|tests/data/ljspeech/wavs/LJ043-0134.npy +tests/data/ljspeech/wavs/LJ032-0024.wav|tests/data/ljspeech/wavs/LJ032-0024.npy +tests/data/ljspeech/wavs/LJ033-0113.wav|tests/data/ljspeech/wavs/LJ033-0113.npy +tests/data/ljspeech/wavs/LJ031-0070.wav|tests/data/ljspeech/wavs/LJ031-0070.npy +tests/data/ljspeech/wavs/LJ036-0177.wav|tests/data/ljspeech/wavs/LJ036-0177.npy +tests/data/ljspeech/wavs/LJ034-0020.wav|tests/data/ljspeech/wavs/LJ034-0020.npy +tests/data/ljspeech/wavs/LJ026-0126.wav|tests/data/ljspeech/wavs/LJ026-0126.npy +tests/data/ljspeech/wavs/LJ032-0023.wav|tests/data/ljspeech/wavs/LJ032-0023.npy +tests/data/ljspeech/wavs/LJ050-0019.wav|tests/data/ljspeech/wavs/LJ050-0019.npy +tests/data/ljspeech/wavs/LJ043-0167.wav|tests/data/ljspeech/wavs/LJ043-0167.npy +tests/data/ljspeech/wavs/LJ004-0150.wav|tests/data/ljspeech/wavs/LJ004-0150.npy +tests/data/ljspeech/wavs/LJ036-0014.wav|tests/data/ljspeech/wavs/LJ036-0014.npy +tests/data/ljspeech/wavs/LJ005-0231.wav|tests/data/ljspeech/wavs/LJ005-0231.npy +tests/data/ljspeech/wavs/LJ049-0025.wav|tests/data/ljspeech/wavs/LJ049-0025.npy +tests/data/ljspeech/wavs/LJ035-0051.wav|tests/data/ljspeech/wavs/LJ035-0051.npy +tests/data/ljspeech/wavs/LJ049-0090.wav|tests/data/ljspeech/wavs/LJ049-0090.npy +tests/data/ljspeech/wavs/LJ022-0192.wav|tests/data/ljspeech/wavs/LJ022-0192.npy +tests/data/ljspeech/wavs/LJ038-0063.wav|tests/data/ljspeech/wavs/LJ038-0063.npy +tests/data/ljspeech/wavs/LJ016-0055.wav|tests/data/ljspeech/wavs/LJ016-0055.npy +tests/data/ljspeech/wavs/LJ035-0014.wav|tests/data/ljspeech/wavs/LJ035-0014.npy +tests/data/ljspeech/wavs/LJ011-0008.wav|tests/data/ljspeech/wavs/LJ011-0008.npy +tests/data/ljspeech/wavs/LJ028-0018.wav|tests/data/ljspeech/wavs/LJ028-0018.npy +tests/data/ljspeech/wavs/LJ041-0094.wav|tests/data/ljspeech/wavs/LJ041-0094.npy +tests/data/ljspeech/wavs/LJ001-0090.wav|tests/data/ljspeech/wavs/LJ001-0090.npy +tests/data/ljspeech/wavs/LJ010-0283.wav|tests/data/ljspeech/wavs/LJ010-0283.npy +tests/data/ljspeech/wavs/LJ036-0136.wav|tests/data/ljspeech/wavs/LJ036-0136.npy +tests/data/ljspeech/wavs/LJ010-0286.wav|tests/data/ljspeech/wavs/LJ010-0286.npy +tests/data/ljspeech/wavs/LJ002-0007.wav|tests/data/ljspeech/wavs/LJ002-0007.npy +tests/data/ljspeech/wavs/LJ004-0137.wav|tests/data/ljspeech/wavs/LJ004-0137.npy +tests/data/ljspeech/wavs/LJ035-0196.wav|tests/data/ljspeech/wavs/LJ035-0196.npy +tests/data/ljspeech/wavs/LJ030-0029.wav|tests/data/ljspeech/wavs/LJ030-0029.npy +tests/data/ljspeech/wavs/LJ034-0146.wav|tests/data/ljspeech/wavs/LJ034-0146.npy +tests/data/ljspeech/wavs/LJ036-0074.wav|tests/data/ljspeech/wavs/LJ036-0074.npy +tests/data/ljspeech/wavs/LJ038-0164.wav|tests/data/ljspeech/wavs/LJ038-0164.npy +tests/data/ljspeech/wavs/LJ034-0009.wav|tests/data/ljspeech/wavs/LJ034-0009.npy +tests/data/ljspeech/wavs/LJ031-0003.wav|tests/data/ljspeech/wavs/LJ031-0003.npy +tests/data/ljspeech/wavs/LJ003-0220.wav|tests/data/ljspeech/wavs/LJ003-0220.npy +tests/data/ljspeech/wavs/LJ017-0271.wav|tests/data/ljspeech/wavs/LJ017-0271.npy +tests/data/ljspeech/wavs/LJ046-0116.wav|tests/data/ljspeech/wavs/LJ046-0116.npy +tests/data/ljspeech/wavs/LJ041-0162.wav|tests/data/ljspeech/wavs/LJ041-0162.npy +tests/data/ljspeech/wavs/LJ039-0030.wav|tests/data/ljspeech/wavs/LJ039-0030.npy +tests/data/ljspeech/wavs/LJ002-0142.wav|tests/data/ljspeech/wavs/LJ002-0142.npy +tests/data/ljspeech/wavs/LJ024-0051.wav|tests/data/ljspeech/wavs/LJ024-0051.npy +tests/data/ljspeech/wavs/LJ009-0025.wav|tests/data/ljspeech/wavs/LJ009-0025.npy +tests/data/ljspeech/wavs/LJ028-0461.wav|tests/data/ljspeech/wavs/LJ028-0461.npy +tests/data/ljspeech/wavs/LJ005-0034.wav|tests/data/ljspeech/wavs/LJ005-0034.npy +tests/data/ljspeech/wavs/LJ002-0239.wav|tests/data/ljspeech/wavs/LJ002-0239.npy +tests/data/ljspeech/wavs/LJ046-0024.wav|tests/data/ljspeech/wavs/LJ046-0024.npy +tests/data/ljspeech/wavs/LJ016-0193.wav|tests/data/ljspeech/wavs/LJ016-0193.npy +tests/data/ljspeech/wavs/LJ039-0152.wav|tests/data/ljspeech/wavs/LJ039-0152.npy +tests/data/ljspeech/wavs/LJ010-0192.wav|tests/data/ljspeech/wavs/LJ010-0192.npy +tests/data/ljspeech/wavs/LJ040-0099.wav|tests/data/ljspeech/wavs/LJ040-0099.npy +tests/data/ljspeech/wavs/LJ010-0042.wav|tests/data/ljspeech/wavs/LJ010-0042.npy +tests/data/ljspeech/wavs/LJ013-0216.wav|tests/data/ljspeech/wavs/LJ013-0216.npy +tests/data/ljspeech/wavs/LJ008-0233.wav|tests/data/ljspeech/wavs/LJ008-0233.npy +tests/data/ljspeech/wavs/LJ014-0104.wav|tests/data/ljspeech/wavs/LJ014-0104.npy +tests/data/ljspeech/wavs/LJ014-0311.wav|tests/data/ljspeech/wavs/LJ014-0311.npy +tests/data/ljspeech/wavs/LJ001-0185.wav|tests/data/ljspeech/wavs/LJ001-0185.npy +tests/data/ljspeech/wavs/LJ031-0160.wav|tests/data/ljspeech/wavs/LJ031-0160.npy +tests/data/ljspeech/wavs/LJ023-0055.wav|tests/data/ljspeech/wavs/LJ023-0055.npy +tests/data/ljspeech/wavs/LJ017-0025.wav|tests/data/ljspeech/wavs/LJ017-0025.npy +tests/data/ljspeech/wavs/LJ029-0136.wav|tests/data/ljspeech/wavs/LJ029-0136.npy +tests/data/ljspeech/wavs/LJ012-0136.wav|tests/data/ljspeech/wavs/LJ012-0136.npy +tests/data/ljspeech/wavs/LJ042-0127.wav|tests/data/ljspeech/wavs/LJ042-0127.npy +tests/data/ljspeech/wavs/LJ034-0110.wav|tests/data/ljspeech/wavs/LJ034-0110.npy +tests/data/ljspeech/wavs/LJ032-0066.wav|tests/data/ljspeech/wavs/LJ032-0066.npy +tests/data/ljspeech/wavs/LJ006-0007.wav|tests/data/ljspeech/wavs/LJ006-0007.npy +tests/data/ljspeech/wavs/LJ035-0074.wav|tests/data/ljspeech/wavs/LJ035-0074.npy +tests/data/ljspeech/wavs/LJ047-0045.wav|tests/data/ljspeech/wavs/LJ047-0045.npy +tests/data/ljspeech/wavs/LJ007-0073.wav|tests/data/ljspeech/wavs/LJ007-0073.npy +tests/data/ljspeech/wavs/LJ022-0148.wav|tests/data/ljspeech/wavs/LJ022-0148.npy +tests/data/ljspeech/wavs/LJ017-0150.wav|tests/data/ljspeech/wavs/LJ017-0150.npy +tests/data/ljspeech/wavs/LJ019-0380.wav|tests/data/ljspeech/wavs/LJ019-0380.npy +tests/data/ljspeech/wavs/LJ028-0260.wav|tests/data/ljspeech/wavs/LJ028-0260.npy +tests/data/ljspeech/wavs/LJ030-0094.wav|tests/data/ljspeech/wavs/LJ030-0094.npy +tests/data/ljspeech/wavs/LJ029-0128.wav|tests/data/ljspeech/wavs/LJ029-0128.npy +tests/data/ljspeech/wavs/LJ015-0053.wav|tests/data/ljspeech/wavs/LJ015-0053.npy +tests/data/ljspeech/wavs/LJ027-0043.wav|tests/data/ljspeech/wavs/LJ027-0043.npy +tests/data/ljspeech/wavs/LJ050-0238.wav|tests/data/ljspeech/wavs/LJ050-0238.npy +tests/data/ljspeech/wavs/LJ013-0099.wav|tests/data/ljspeech/wavs/LJ013-0099.npy +tests/data/ljspeech/wavs/LJ020-0006.wav|tests/data/ljspeech/wavs/LJ020-0006.npy +tests/data/ljspeech/wavs/LJ024-0021.wav|tests/data/ljspeech/wavs/LJ024-0021.npy +tests/data/ljspeech/wavs/LJ019-0305.wav|tests/data/ljspeech/wavs/LJ019-0305.npy +tests/data/ljspeech/wavs/LJ033-0158.wav|tests/data/ljspeech/wavs/LJ033-0158.npy +tests/data/ljspeech/wavs/LJ016-0088.wav|tests/data/ljspeech/wavs/LJ016-0088.npy +tests/data/ljspeech/wavs/LJ014-0338.wav|tests/data/ljspeech/wavs/LJ014-0338.npy +tests/data/ljspeech/wavs/LJ019-0249.wav|tests/data/ljspeech/wavs/LJ019-0249.npy +tests/data/ljspeech/wavs/LJ003-0259.wav|tests/data/ljspeech/wavs/LJ003-0259.npy +tests/data/ljspeech/wavs/LJ035-0151.wav|tests/data/ljspeech/wavs/LJ035-0151.npy +tests/data/ljspeech/wavs/LJ004-0221.wav|tests/data/ljspeech/wavs/LJ004-0221.npy +tests/data/ljspeech/wavs/LJ035-0081.wav|tests/data/ljspeech/wavs/LJ035-0081.npy +tests/data/ljspeech/wavs/LJ038-0301.wav|tests/data/ljspeech/wavs/LJ038-0301.npy +tests/data/ljspeech/wavs/LJ012-0194.wav|tests/data/ljspeech/wavs/LJ012-0194.npy +tests/data/ljspeech/wavs/LJ003-0200.wav|tests/data/ljspeech/wavs/LJ003-0200.npy +tests/data/ljspeech/wavs/LJ018-0316.wav|tests/data/ljspeech/wavs/LJ018-0316.npy +tests/data/ljspeech/wavs/LJ048-0041.wav|tests/data/ljspeech/wavs/LJ048-0041.npy +tests/data/ljspeech/wavs/LJ046-0131.wav|tests/data/ljspeech/wavs/LJ046-0131.npy +tests/data/ljspeech/wavs/LJ014-0230.wav|tests/data/ljspeech/wavs/LJ014-0230.npy +tests/data/ljspeech/wavs/LJ016-0350.wav|tests/data/ljspeech/wavs/LJ016-0350.npy +tests/data/ljspeech/wavs/LJ042-0251.wav|tests/data/ljspeech/wavs/LJ042-0251.npy +tests/data/ljspeech/wavs/LJ014-0304.wav|tests/data/ljspeech/wavs/LJ014-0304.npy +tests/data/ljspeech/wavs/LJ009-0246.wav|tests/data/ljspeech/wavs/LJ009-0246.npy +tests/data/ljspeech/wavs/LJ024-0050.wav|tests/data/ljspeech/wavs/LJ024-0050.npy +tests/data/ljspeech/wavs/LJ036-0188.wav|tests/data/ljspeech/wavs/LJ036-0188.npy +tests/data/ljspeech/wavs/LJ001-0081.wav|tests/data/ljspeech/wavs/LJ001-0081.npy +tests/data/ljspeech/wavs/LJ045-0223.wav|tests/data/ljspeech/wavs/LJ045-0223.npy +tests/data/ljspeech/wavs/LJ022-0182.wav|tests/data/ljspeech/wavs/LJ022-0182.npy +tests/data/ljspeech/wavs/LJ027-0151.wav|tests/data/ljspeech/wavs/LJ027-0151.npy +tests/data/ljspeech/wavs/LJ014-0290.wav|tests/data/ljspeech/wavs/LJ014-0290.npy +tests/data/ljspeech/wavs/LJ008-0137.wav|tests/data/ljspeech/wavs/LJ008-0137.npy +tests/data/ljspeech/wavs/LJ037-0126.wav|tests/data/ljspeech/wavs/LJ037-0126.npy +tests/data/ljspeech/wavs/LJ050-0230.wav|tests/data/ljspeech/wavs/LJ050-0230.npy +tests/data/ljspeech/wavs/LJ027-0148.wav|tests/data/ljspeech/wavs/LJ027-0148.npy +tests/data/ljspeech/wavs/LJ028-0369.wav|tests/data/ljspeech/wavs/LJ028-0369.npy +tests/data/ljspeech/wavs/LJ015-0270.wav|tests/data/ljspeech/wavs/LJ015-0270.npy +tests/data/ljspeech/wavs/LJ040-0226.wav|tests/data/ljspeech/wavs/LJ040-0226.npy +tests/data/ljspeech/wavs/LJ012-0222.wav|tests/data/ljspeech/wavs/LJ012-0222.npy +tests/data/ljspeech/wavs/LJ003-0044.wav|tests/data/ljspeech/wavs/LJ003-0044.npy +tests/data/ljspeech/wavs/LJ017-0005.wav|tests/data/ljspeech/wavs/LJ017-0005.npy +tests/data/ljspeech/wavs/LJ016-0289.wav|tests/data/ljspeech/wavs/LJ016-0289.npy +tests/data/ljspeech/wavs/LJ027-0080.wav|tests/data/ljspeech/wavs/LJ027-0080.npy +tests/data/ljspeech/wavs/LJ043-0056.wav|tests/data/ljspeech/wavs/LJ043-0056.npy +tests/data/ljspeech/wavs/LJ008-0290.wav|tests/data/ljspeech/wavs/LJ008-0290.npy +tests/data/ljspeech/wavs/LJ021-0136.wav|tests/data/ljspeech/wavs/LJ021-0136.npy +tests/data/ljspeech/wavs/LJ005-0288.wav|tests/data/ljspeech/wavs/LJ005-0288.npy +tests/data/ljspeech/wavs/LJ023-0034.wav|tests/data/ljspeech/wavs/LJ023-0034.npy +tests/data/ljspeech/wavs/LJ027-0150.wav|tests/data/ljspeech/wavs/LJ027-0150.npy +tests/data/ljspeech/wavs/LJ018-0294.wav|tests/data/ljspeech/wavs/LJ018-0294.npy +tests/data/ljspeech/wavs/LJ031-0115.wav|tests/data/ljspeech/wavs/LJ031-0115.npy +tests/data/ljspeech/wavs/LJ038-0103.wav|tests/data/ljspeech/wavs/LJ038-0103.npy +tests/data/ljspeech/wavs/LJ046-0193.wav|tests/data/ljspeech/wavs/LJ046-0193.npy +tests/data/ljspeech/wavs/LJ030-0101.wav|tests/data/ljspeech/wavs/LJ030-0101.npy +tests/data/ljspeech/wavs/LJ019-0084.wav|tests/data/ljspeech/wavs/LJ019-0084.npy +tests/data/ljspeech/wavs/LJ014-0293.wav|tests/data/ljspeech/wavs/LJ014-0293.npy +tests/data/ljspeech/wavs/LJ005-0076.wav|tests/data/ljspeech/wavs/LJ005-0076.npy +tests/data/ljspeech/wavs/LJ036-0083.wav|tests/data/ljspeech/wavs/LJ036-0083.npy +tests/data/ljspeech/wavs/LJ036-0025.wav|tests/data/ljspeech/wavs/LJ036-0025.npy +tests/data/ljspeech/wavs/LJ038-0187.wav|tests/data/ljspeech/wavs/LJ038-0187.npy +tests/data/ljspeech/wavs/LJ018-0163.wav|tests/data/ljspeech/wavs/LJ018-0163.npy +tests/data/ljspeech/wavs/LJ036-0130.wav|tests/data/ljspeech/wavs/LJ036-0130.npy +tests/data/ljspeech/wavs/LJ028-0367.wav|tests/data/ljspeech/wavs/LJ028-0367.npy +tests/data/ljspeech/wavs/LJ028-0168.wav|tests/data/ljspeech/wavs/LJ028-0168.npy +tests/data/ljspeech/wavs/LJ014-0095.wav|tests/data/ljspeech/wavs/LJ014-0095.npy +tests/data/ljspeech/wavs/LJ018-0082.wav|tests/data/ljspeech/wavs/LJ018-0082.npy +tests/data/ljspeech/wavs/LJ030-0174.wav|tests/data/ljspeech/wavs/LJ030-0174.npy +tests/data/ljspeech/wavs/LJ038-0141.wav|tests/data/ljspeech/wavs/LJ038-0141.npy +tests/data/ljspeech/wavs/LJ017-0045.wav|tests/data/ljspeech/wavs/LJ017-0045.npy +tests/data/ljspeech/wavs/LJ039-0041.wav|tests/data/ljspeech/wavs/LJ039-0041.npy +tests/data/ljspeech/wavs/LJ012-0280.wav|tests/data/ljspeech/wavs/LJ012-0280.npy +tests/data/ljspeech/wavs/LJ006-0168.wav|tests/data/ljspeech/wavs/LJ006-0168.npy +tests/data/ljspeech/wavs/LJ018-0304.wav|tests/data/ljspeech/wavs/LJ018-0304.npy +tests/data/ljspeech/wavs/LJ015-0184.wav|tests/data/ljspeech/wavs/LJ015-0184.npy +tests/data/ljspeech/wavs/LJ046-0018.wav|tests/data/ljspeech/wavs/LJ046-0018.npy +tests/data/ljspeech/wavs/LJ015-0050.wav|tests/data/ljspeech/wavs/LJ015-0050.npy +tests/data/ljspeech/wavs/LJ017-0152.wav|tests/data/ljspeech/wavs/LJ017-0152.npy +tests/data/ljspeech/wavs/LJ028-0199.wav|tests/data/ljspeech/wavs/LJ028-0199.npy +tests/data/ljspeech/wavs/LJ017-0192.wav|tests/data/ljspeech/wavs/LJ017-0192.npy +tests/data/ljspeech/wavs/LJ038-0228.wav|tests/data/ljspeech/wavs/LJ038-0228.npy +tests/data/ljspeech/wavs/LJ012-0259.wav|tests/data/ljspeech/wavs/LJ012-0259.npy +tests/data/ljspeech/wavs/LJ034-0121.wav|tests/data/ljspeech/wavs/LJ034-0121.npy +tests/data/ljspeech/wavs/LJ046-0235.wav|tests/data/ljspeech/wavs/LJ046-0235.npy +tests/data/ljspeech/wavs/LJ047-0077.wav|tests/data/ljspeech/wavs/LJ047-0077.npy +tests/data/ljspeech/wavs/LJ006-0023.wav|tests/data/ljspeech/wavs/LJ006-0023.npy +tests/data/ljspeech/wavs/LJ011-0112.wav|tests/data/ljspeech/wavs/LJ011-0112.npy +tests/data/ljspeech/wavs/LJ020-0070.wav|tests/data/ljspeech/wavs/LJ020-0070.npy +tests/data/ljspeech/wavs/LJ042-0148.wav|tests/data/ljspeech/wavs/LJ042-0148.npy +tests/data/ljspeech/wavs/LJ007-0230.wav|tests/data/ljspeech/wavs/LJ007-0230.npy +tests/data/ljspeech/wavs/LJ025-0100.wav|tests/data/ljspeech/wavs/LJ025-0100.npy +tests/data/ljspeech/wavs/LJ048-0016.wav|tests/data/ljspeech/wavs/LJ048-0016.npy +tests/data/ljspeech/wavs/LJ010-0193.wav|tests/data/ljspeech/wavs/LJ010-0193.npy +tests/data/ljspeech/wavs/LJ042-0072.wav|tests/data/ljspeech/wavs/LJ042-0072.npy +tests/data/ljspeech/wavs/LJ021-0028.wav|tests/data/ljspeech/wavs/LJ021-0028.npy +tests/data/ljspeech/wavs/LJ042-0080.wav|tests/data/ljspeech/wavs/LJ042-0080.npy +tests/data/ljspeech/wavs/LJ050-0017.wav|tests/data/ljspeech/wavs/LJ050-0017.npy +tests/data/ljspeech/wavs/LJ049-0224.wav|tests/data/ljspeech/wavs/LJ049-0224.npy +tests/data/ljspeech/wavs/LJ004-0068.wav|tests/data/ljspeech/wavs/LJ004-0068.npy +tests/data/ljspeech/wavs/LJ010-0135.wav|tests/data/ljspeech/wavs/LJ010-0135.npy +tests/data/ljspeech/wavs/LJ021-0105.wav|tests/data/ljspeech/wavs/LJ021-0105.npy +tests/data/ljspeech/wavs/LJ021-0063.wav|tests/data/ljspeech/wavs/LJ021-0063.npy +tests/data/ljspeech/wavs/LJ012-0220.wav|tests/data/ljspeech/wavs/LJ012-0220.npy +tests/data/ljspeech/wavs/LJ042-0111.wav|tests/data/ljspeech/wavs/LJ042-0111.npy +tests/data/ljspeech/wavs/LJ047-0084.wav|tests/data/ljspeech/wavs/LJ047-0084.npy +tests/data/ljspeech/wavs/LJ001-0126.wav|tests/data/ljspeech/wavs/LJ001-0126.npy +tests/data/ljspeech/wavs/LJ022-0018.wav|tests/data/ljspeech/wavs/LJ022-0018.npy +tests/data/ljspeech/wavs/LJ023-0008.wav|tests/data/ljspeech/wavs/LJ023-0008.npy +tests/data/ljspeech/wavs/LJ005-0280.wav|tests/data/ljspeech/wavs/LJ005-0280.npy +tests/data/ljspeech/wavs/LJ004-0243.wav|tests/data/ljspeech/wavs/LJ004-0243.npy +tests/data/ljspeech/wavs/LJ008-0112.wav|tests/data/ljspeech/wavs/LJ008-0112.npy +tests/data/ljspeech/wavs/LJ009-0279.wav|tests/data/ljspeech/wavs/LJ009-0279.npy +tests/data/ljspeech/wavs/LJ046-0084.wav|tests/data/ljspeech/wavs/LJ046-0084.npy +tests/data/ljspeech/wavs/LJ008-0123.wav|tests/data/ljspeech/wavs/LJ008-0123.npy +tests/data/ljspeech/wavs/LJ032-0026.wav|tests/data/ljspeech/wavs/LJ032-0026.npy +tests/data/ljspeech/wavs/LJ044-0065.wav|tests/data/ljspeech/wavs/LJ044-0065.npy +tests/data/ljspeech/wavs/LJ032-0220.wav|tests/data/ljspeech/wavs/LJ032-0220.npy +tests/data/ljspeech/wavs/LJ042-0031.wav|tests/data/ljspeech/wavs/LJ042-0031.npy +tests/data/ljspeech/wavs/LJ025-0079.wav|tests/data/ljspeech/wavs/LJ025-0079.npy +tests/data/ljspeech/wavs/LJ028-0420.wav|tests/data/ljspeech/wavs/LJ028-0420.npy +tests/data/ljspeech/wavs/LJ045-0003.wav|tests/data/ljspeech/wavs/LJ045-0003.npy +tests/data/ljspeech/wavs/LJ009-0047.wav|tests/data/ljspeech/wavs/LJ009-0047.npy +tests/data/ljspeech/wavs/LJ001-0141.wav|tests/data/ljspeech/wavs/LJ001-0141.npy +tests/data/ljspeech/wavs/LJ008-0317.wav|tests/data/ljspeech/wavs/LJ008-0317.npy +tests/data/ljspeech/wavs/LJ005-0166.wav|tests/data/ljspeech/wavs/LJ005-0166.npy +tests/data/ljspeech/wavs/LJ005-0276.wav|tests/data/ljspeech/wavs/LJ005-0276.npy +tests/data/ljspeech/wavs/LJ002-0281.wav|tests/data/ljspeech/wavs/LJ002-0281.npy +tests/data/ljspeech/wavs/LJ015-0310.wav|tests/data/ljspeech/wavs/LJ015-0310.npy +tests/data/ljspeech/wavs/LJ021-0159.wav|tests/data/ljspeech/wavs/LJ021-0159.npy +tests/data/ljspeech/wavs/LJ008-0288.wav|tests/data/ljspeech/wavs/LJ008-0288.npy +tests/data/ljspeech/wavs/LJ017-0008.wav|tests/data/ljspeech/wavs/LJ017-0008.npy +tests/data/ljspeech/wavs/LJ009-0248.wav|tests/data/ljspeech/wavs/LJ009-0248.npy +tests/data/ljspeech/wavs/LJ025-0010.wav|tests/data/ljspeech/wavs/LJ025-0010.npy +tests/data/ljspeech/wavs/LJ034-0031.wav|tests/data/ljspeech/wavs/LJ034-0031.npy +tests/data/ljspeech/wavs/LJ024-0086.wav|tests/data/ljspeech/wavs/LJ024-0086.npy +tests/data/ljspeech/wavs/LJ045-0161.wav|tests/data/ljspeech/wavs/LJ045-0161.npy +tests/data/ljspeech/wavs/LJ016-0158.wav|tests/data/ljspeech/wavs/LJ016-0158.npy +tests/data/ljspeech/wavs/LJ048-0074.wav|tests/data/ljspeech/wavs/LJ048-0074.npy +tests/data/ljspeech/wavs/LJ048-0271.wav|tests/data/ljspeech/wavs/LJ048-0271.npy +tests/data/ljspeech/wavs/LJ032-0191.wav|tests/data/ljspeech/wavs/LJ032-0191.npy +tests/data/ljspeech/wavs/LJ021-0182.wav|tests/data/ljspeech/wavs/LJ021-0182.npy +tests/data/ljspeech/wavs/LJ044-0008.wav|tests/data/ljspeech/wavs/LJ044-0008.npy +tests/data/ljspeech/wavs/LJ009-0231.wav|tests/data/ljspeech/wavs/LJ009-0231.npy +tests/data/ljspeech/wavs/LJ027-0059.wav|tests/data/ljspeech/wavs/LJ027-0059.npy +tests/data/ljspeech/wavs/LJ021-0135.wav|tests/data/ljspeech/wavs/LJ021-0135.npy +tests/data/ljspeech/wavs/LJ008-0024.wav|tests/data/ljspeech/wavs/LJ008-0024.npy +tests/data/ljspeech/wavs/LJ002-0127.wav|tests/data/ljspeech/wavs/LJ002-0127.npy +tests/data/ljspeech/wavs/LJ033-0025.wav|tests/data/ljspeech/wavs/LJ033-0025.npy +tests/data/ljspeech/wavs/LJ003-0281.wav|tests/data/ljspeech/wavs/LJ003-0281.npy +tests/data/ljspeech/wavs/LJ022-0146.wav|tests/data/ljspeech/wavs/LJ022-0146.npy +tests/data/ljspeech/wavs/LJ006-0141.wav|tests/data/ljspeech/wavs/LJ006-0141.npy +tests/data/ljspeech/wavs/LJ031-0058.wav|tests/data/ljspeech/wavs/LJ031-0058.npy +tests/data/ljspeech/wavs/LJ014-0069.wav|tests/data/ljspeech/wavs/LJ014-0069.npy +tests/data/ljspeech/wavs/LJ014-0155.wav|tests/data/ljspeech/wavs/LJ014-0155.npy +tests/data/ljspeech/wavs/LJ006-0132.wav|tests/data/ljspeech/wavs/LJ006-0132.npy +tests/data/ljspeech/wavs/LJ013-0193.wav|tests/data/ljspeech/wavs/LJ013-0193.npy +tests/data/ljspeech/wavs/LJ050-0209.wav|tests/data/ljspeech/wavs/LJ050-0209.npy +tests/data/ljspeech/wavs/LJ028-0144.wav|tests/data/ljspeech/wavs/LJ028-0144.npy +tests/data/ljspeech/wavs/LJ045-0143.wav|tests/data/ljspeech/wavs/LJ045-0143.npy +tests/data/ljspeech/wavs/LJ017-0100.wav|tests/data/ljspeech/wavs/LJ017-0100.npy +tests/data/ljspeech/wavs/LJ020-0027.wav|tests/data/ljspeech/wavs/LJ020-0027.npy +tests/data/ljspeech/wavs/LJ026-0007.wav|tests/data/ljspeech/wavs/LJ026-0007.npy +tests/data/ljspeech/wavs/LJ034-0138.wav|tests/data/ljspeech/wavs/LJ034-0138.npy +tests/data/ljspeech/wavs/LJ002-0045.wav|tests/data/ljspeech/wavs/LJ002-0045.npy +tests/data/ljspeech/wavs/LJ018-0310.wav|tests/data/ljspeech/wavs/LJ018-0310.npy +tests/data/ljspeech/wavs/LJ001-0061.wav|tests/data/ljspeech/wavs/LJ001-0061.npy +tests/data/ljspeech/wavs/LJ009-0127.wav|tests/data/ljspeech/wavs/LJ009-0127.npy +tests/data/ljspeech/wavs/LJ012-0261.wav|tests/data/ljspeech/wavs/LJ012-0261.npy +tests/data/ljspeech/wavs/LJ019-0171.wav|tests/data/ljspeech/wavs/LJ019-0171.npy +tests/data/ljspeech/wavs/LJ028-0181.wav|tests/data/ljspeech/wavs/LJ028-0181.npy +tests/data/ljspeech/wavs/LJ027-0180.wav|tests/data/ljspeech/wavs/LJ027-0180.npy +tests/data/ljspeech/wavs/LJ004-0167.wav|tests/data/ljspeech/wavs/LJ004-0167.npy +tests/data/ljspeech/wavs/LJ005-0204.wav|tests/data/ljspeech/wavs/LJ005-0204.npy +tests/data/ljspeech/wavs/LJ013-0172.wav|tests/data/ljspeech/wavs/LJ013-0172.npy +tests/data/ljspeech/wavs/LJ028-0058.wav|tests/data/ljspeech/wavs/LJ028-0058.npy +tests/data/ljspeech/wavs/LJ035-0106.wav|tests/data/ljspeech/wavs/LJ035-0106.npy +tests/data/ljspeech/wavs/LJ018-0385.wav|tests/data/ljspeech/wavs/LJ018-0385.npy +tests/data/ljspeech/wavs/LJ050-0264.wav|tests/data/ljspeech/wavs/LJ050-0264.npy +tests/data/ljspeech/wavs/LJ040-0086.wav|tests/data/ljspeech/wavs/LJ040-0086.npy +tests/data/ljspeech/wavs/LJ010-0228.wav|tests/data/ljspeech/wavs/LJ010-0228.npy +tests/data/ljspeech/wavs/LJ038-0194.wav|tests/data/ljspeech/wavs/LJ038-0194.npy +tests/data/ljspeech/wavs/LJ015-0014.wav|tests/data/ljspeech/wavs/LJ015-0014.npy +tests/data/ljspeech/wavs/LJ008-0015.wav|tests/data/ljspeech/wavs/LJ008-0015.npy +tests/data/ljspeech/wavs/LJ010-0113.wav|tests/data/ljspeech/wavs/LJ010-0113.npy +tests/data/ljspeech/wavs/LJ009-0144.wav|tests/data/ljspeech/wavs/LJ009-0144.npy +tests/data/ljspeech/wavs/LJ011-0263.wav|tests/data/ljspeech/wavs/LJ011-0263.npy +tests/data/ljspeech/wavs/LJ012-0045.wav|tests/data/ljspeech/wavs/LJ012-0045.npy +tests/data/ljspeech/wavs/LJ039-0046.wav|tests/data/ljspeech/wavs/LJ039-0046.npy +tests/data/ljspeech/wavs/LJ005-0136.wav|tests/data/ljspeech/wavs/LJ005-0136.npy +tests/data/ljspeech/wavs/LJ028-0348.wav|tests/data/ljspeech/wavs/LJ028-0348.npy +tests/data/ljspeech/wavs/LJ018-0212.wav|tests/data/ljspeech/wavs/LJ018-0212.npy +tests/data/ljspeech/wavs/LJ039-0020.wav|tests/data/ljspeech/wavs/LJ039-0020.npy +tests/data/ljspeech/wavs/LJ013-0095.wav|tests/data/ljspeech/wavs/LJ013-0095.npy +tests/data/ljspeech/wavs/LJ005-0014.wav|tests/data/ljspeech/wavs/LJ005-0014.npy +tests/data/ljspeech/wavs/LJ012-0128.wav|tests/data/ljspeech/wavs/LJ012-0128.npy +tests/data/ljspeech/wavs/LJ028-0331.wav|tests/data/ljspeech/wavs/LJ028-0331.npy +tests/data/ljspeech/wavs/LJ010-0269.wav|tests/data/ljspeech/wavs/LJ010-0269.npy +tests/data/ljspeech/wavs/LJ011-0024.wav|tests/data/ljspeech/wavs/LJ011-0024.npy +tests/data/ljspeech/wavs/LJ045-0072.wav|tests/data/ljspeech/wavs/LJ045-0072.npy +tests/data/ljspeech/wavs/LJ028-0207.wav|tests/data/ljspeech/wavs/LJ028-0207.npy +tests/data/ljspeech/wavs/LJ008-0220.wav|tests/data/ljspeech/wavs/LJ008-0220.npy +tests/data/ljspeech/wavs/LJ042-0179.wav|tests/data/ljspeech/wavs/LJ042-0179.npy +tests/data/ljspeech/wavs/LJ012-0055.wav|tests/data/ljspeech/wavs/LJ012-0055.npy +tests/data/ljspeech/wavs/LJ015-0035.wav|tests/data/ljspeech/wavs/LJ015-0035.npy +tests/data/ljspeech/wavs/LJ007-0203.wav|tests/data/ljspeech/wavs/LJ007-0203.npy +tests/data/ljspeech/wavs/LJ008-0168.wav|tests/data/ljspeech/wavs/LJ008-0168.npy +tests/data/ljspeech/wavs/LJ012-0114.wav|tests/data/ljspeech/wavs/LJ012-0114.npy +tests/data/ljspeech/wavs/LJ012-0243.wav|tests/data/ljspeech/wavs/LJ012-0243.npy +tests/data/ljspeech/wavs/LJ012-0216.wav|tests/data/ljspeech/wavs/LJ012-0216.npy +tests/data/ljspeech/wavs/LJ018-0354.wav|tests/data/ljspeech/wavs/LJ018-0354.npy +tests/data/ljspeech/wavs/LJ032-0198.wav|tests/data/ljspeech/wavs/LJ032-0198.npy +tests/data/ljspeech/wavs/LJ034-0148.wav|tests/data/ljspeech/wavs/LJ034-0148.npy +tests/data/ljspeech/wavs/LJ047-0062.wav|tests/data/ljspeech/wavs/LJ047-0062.npy +tests/data/ljspeech/wavs/LJ038-0231.wav|tests/data/ljspeech/wavs/LJ038-0231.npy +tests/data/ljspeech/wavs/LJ036-0122.wav|tests/data/ljspeech/wavs/LJ036-0122.npy +tests/data/ljspeech/wavs/LJ002-0272.wav|tests/data/ljspeech/wavs/LJ002-0272.npy +tests/data/ljspeech/wavs/LJ017-0245.wav|tests/data/ljspeech/wavs/LJ017-0245.npy +tests/data/ljspeech/wavs/LJ030-0169.wav|tests/data/ljspeech/wavs/LJ030-0169.npy +tests/data/ljspeech/wavs/LJ048-0009.wav|tests/data/ljspeech/wavs/LJ048-0009.npy +tests/data/ljspeech/wavs/LJ022-0113.wav|tests/data/ljspeech/wavs/LJ022-0113.npy +tests/data/ljspeech/wavs/LJ003-0135.wav|tests/data/ljspeech/wavs/LJ003-0135.npy +tests/data/ljspeech/wavs/LJ029-0122.wav|tests/data/ljspeech/wavs/LJ029-0122.npy +tests/data/ljspeech/wavs/LJ018-0150.wav|tests/data/ljspeech/wavs/LJ018-0150.npy +tests/data/ljspeech/wavs/LJ004-0129.wav|tests/data/ljspeech/wavs/LJ004-0129.npy +tests/data/ljspeech/wavs/LJ002-0019.wav|tests/data/ljspeech/wavs/LJ002-0019.npy +tests/data/ljspeech/wavs/LJ021-0065.wav|tests/data/ljspeech/wavs/LJ021-0065.npy +tests/data/ljspeech/wavs/LJ028-0098.wav|tests/data/ljspeech/wavs/LJ028-0098.npy +tests/data/ljspeech/wavs/LJ036-0161.wav|tests/data/ljspeech/wavs/LJ036-0161.npy +tests/data/ljspeech/wavs/LJ022-0085.wav|tests/data/ljspeech/wavs/LJ022-0085.npy +tests/data/ljspeech/wavs/LJ030-0212.wav|tests/data/ljspeech/wavs/LJ030-0212.npy +tests/data/ljspeech/wavs/LJ020-0042.wav|tests/data/ljspeech/wavs/LJ020-0042.npy +tests/data/ljspeech/wavs/LJ022-0154.wav|tests/data/ljspeech/wavs/LJ022-0154.npy +tests/data/ljspeech/wavs/LJ017-0250.wav|tests/data/ljspeech/wavs/LJ017-0250.npy +tests/data/ljspeech/wavs/LJ015-0076.wav|tests/data/ljspeech/wavs/LJ015-0076.npy +tests/data/ljspeech/wavs/LJ021-0161.wav|tests/data/ljspeech/wavs/LJ021-0161.npy +tests/data/ljspeech/wavs/LJ029-0162.wav|tests/data/ljspeech/wavs/LJ029-0162.npy +tests/data/ljspeech/wavs/LJ011-0134.wav|tests/data/ljspeech/wavs/LJ011-0134.npy +tests/data/ljspeech/wavs/LJ044-0224.wav|tests/data/ljspeech/wavs/LJ044-0224.npy +tests/data/ljspeech/wavs/LJ016-0120.wav|tests/data/ljspeech/wavs/LJ016-0120.npy +tests/data/ljspeech/wavs/LJ045-0238.wav|tests/data/ljspeech/wavs/LJ045-0238.npy +tests/data/ljspeech/wavs/LJ034-0129.wav|tests/data/ljspeech/wavs/LJ034-0129.npy +tests/data/ljspeech/wavs/LJ011-0254.wav|tests/data/ljspeech/wavs/LJ011-0254.npy +tests/data/ljspeech/wavs/LJ046-0059.wav|tests/data/ljspeech/wavs/LJ046-0059.npy +tests/data/ljspeech/wavs/LJ027-0002.wav|tests/data/ljspeech/wavs/LJ027-0002.npy +tests/data/ljspeech/wavs/LJ033-0050.wav|tests/data/ljspeech/wavs/LJ033-0050.npy +tests/data/ljspeech/wavs/LJ048-0194.wav|tests/data/ljspeech/wavs/LJ048-0194.npy +tests/data/ljspeech/wavs/LJ046-0079.wav|tests/data/ljspeech/wavs/LJ046-0079.npy +tests/data/ljspeech/wavs/LJ023-0136.wav|tests/data/ljspeech/wavs/LJ023-0136.npy +tests/data/ljspeech/wavs/LJ012-0203.wav|tests/data/ljspeech/wavs/LJ012-0203.npy +tests/data/ljspeech/wavs/LJ027-0010.wav|tests/data/ljspeech/wavs/LJ027-0010.npy +tests/data/ljspeech/wavs/LJ010-0186.wav|tests/data/ljspeech/wavs/LJ010-0186.npy +tests/data/ljspeech/wavs/LJ040-0132.wav|tests/data/ljspeech/wavs/LJ040-0132.npy +tests/data/ljspeech/wavs/LJ019-0197.wav|tests/data/ljspeech/wavs/LJ019-0197.npy +tests/data/ljspeech/wavs/LJ021-0133.wav|tests/data/ljspeech/wavs/LJ021-0133.npy +tests/data/ljspeech/wavs/LJ039-0115.wav|tests/data/ljspeech/wavs/LJ039-0115.npy +tests/data/ljspeech/wavs/LJ045-0092.wav|tests/data/ljspeech/wavs/LJ045-0092.npy +tests/data/ljspeech/wavs/LJ019-0055.wav|tests/data/ljspeech/wavs/LJ019-0055.npy +tests/data/ljspeech/wavs/LJ039-0147.wav|tests/data/ljspeech/wavs/LJ039-0147.npy +tests/data/ljspeech/wavs/LJ006-0081.wav|tests/data/ljspeech/wavs/LJ006-0081.npy +tests/data/ljspeech/wavs/LJ001-0160.wav|tests/data/ljspeech/wavs/LJ001-0160.npy +tests/data/ljspeech/wavs/LJ026-0006.wav|tests/data/ljspeech/wavs/LJ026-0006.npy +tests/data/ljspeech/wavs/LJ037-0048.wav|tests/data/ljspeech/wavs/LJ037-0048.npy +tests/data/ljspeech/wavs/LJ014-0140.wav|tests/data/ljspeech/wavs/LJ014-0140.npy +tests/data/ljspeech/wavs/LJ018-0178.wav|tests/data/ljspeech/wavs/LJ018-0178.npy +tests/data/ljspeech/wavs/LJ019-0328.wav|tests/data/ljspeech/wavs/LJ019-0328.npy +tests/data/ljspeech/wavs/LJ010-0171.wav|tests/data/ljspeech/wavs/LJ010-0171.npy +tests/data/ljspeech/wavs/LJ005-0218.wav|tests/data/ljspeech/wavs/LJ005-0218.npy +tests/data/ljspeech/wavs/LJ021-0109.wav|tests/data/ljspeech/wavs/LJ021-0109.npy +tests/data/ljspeech/wavs/LJ035-0054.wav|tests/data/ljspeech/wavs/LJ035-0054.npy +tests/data/ljspeech/wavs/LJ002-0109.wav|tests/data/ljspeech/wavs/LJ002-0109.npy +tests/data/ljspeech/wavs/LJ003-0284.wav|tests/data/ljspeech/wavs/LJ003-0284.npy +tests/data/ljspeech/wavs/LJ021-0142.wav|tests/data/ljspeech/wavs/LJ021-0142.npy +tests/data/ljspeech/wavs/LJ028-0201.wav|tests/data/ljspeech/wavs/LJ028-0201.npy +tests/data/ljspeech/wavs/LJ007-0120.wav|tests/data/ljspeech/wavs/LJ007-0120.npy +tests/data/ljspeech/wavs/LJ021-0067.wav|tests/data/ljspeech/wavs/LJ021-0067.npy +tests/data/ljspeech/wavs/LJ031-0211.wav|tests/data/ljspeech/wavs/LJ031-0211.npy +tests/data/ljspeech/wavs/LJ038-0268.wav|tests/data/ljspeech/wavs/LJ038-0268.npy +tests/data/ljspeech/wavs/LJ006-0166.wav|tests/data/ljspeech/wavs/LJ006-0166.npy +tests/data/ljspeech/wavs/LJ024-0027.wav|tests/data/ljspeech/wavs/LJ024-0027.npy +tests/data/ljspeech/wavs/LJ031-0052.wav|tests/data/ljspeech/wavs/LJ031-0052.npy +tests/data/ljspeech/wavs/LJ049-0229.wav|tests/data/ljspeech/wavs/LJ049-0229.npy +tests/data/ljspeech/wavs/LJ044-0186.wav|tests/data/ljspeech/wavs/LJ044-0186.npy +tests/data/ljspeech/wavs/LJ028-0251.wav|tests/data/ljspeech/wavs/LJ028-0251.npy +tests/data/ljspeech/wavs/LJ021-0017.wav|tests/data/ljspeech/wavs/LJ021-0017.npy +tests/data/ljspeech/wavs/LJ012-0223.wav|tests/data/ljspeech/wavs/LJ012-0223.npy +tests/data/ljspeech/wavs/LJ005-0045.wav|tests/data/ljspeech/wavs/LJ005-0045.npy +tests/data/ljspeech/wavs/LJ040-0047.wav|tests/data/ljspeech/wavs/LJ040-0047.npy +tests/data/ljspeech/wavs/LJ005-0104.wav|tests/data/ljspeech/wavs/LJ005-0104.npy +tests/data/ljspeech/wavs/LJ028-0267.wav|tests/data/ljspeech/wavs/LJ028-0267.npy +tests/data/ljspeech/wavs/LJ023-0024.wav|tests/data/ljspeech/wavs/LJ023-0024.npy +tests/data/ljspeech/wavs/LJ023-0028.wav|tests/data/ljspeech/wavs/LJ023-0028.npy +tests/data/ljspeech/wavs/LJ040-0108.wav|tests/data/ljspeech/wavs/LJ040-0108.npy +tests/data/ljspeech/wavs/LJ031-0067.wav|tests/data/ljspeech/wavs/LJ031-0067.npy +tests/data/ljspeech/wavs/LJ035-0011.wav|tests/data/ljspeech/wavs/LJ035-0011.npy +tests/data/ljspeech/wavs/LJ003-0241.wav|tests/data/ljspeech/wavs/LJ003-0241.npy +tests/data/ljspeech/wavs/LJ013-0109.wav|tests/data/ljspeech/wavs/LJ013-0109.npy +tests/data/ljspeech/wavs/LJ033-0104.wav|tests/data/ljspeech/wavs/LJ033-0104.npy +tests/data/ljspeech/wavs/LJ028-0220.wav|tests/data/ljspeech/wavs/LJ028-0220.npy +tests/data/ljspeech/wavs/LJ015-0106.wav|tests/data/ljspeech/wavs/LJ015-0106.npy +tests/data/ljspeech/wavs/LJ043-0130.wav|tests/data/ljspeech/wavs/LJ043-0130.npy +tests/data/ljspeech/wavs/LJ048-0052.wav|tests/data/ljspeech/wavs/LJ048-0052.npy +tests/data/ljspeech/wavs/LJ012-0009.wav|tests/data/ljspeech/wavs/LJ012-0009.npy +tests/data/ljspeech/wavs/LJ042-0220.wav|tests/data/ljspeech/wavs/LJ042-0220.npy +tests/data/ljspeech/wavs/LJ033-0101.wav|tests/data/ljspeech/wavs/LJ033-0101.npy +tests/data/ljspeech/wavs/LJ002-0002.wav|tests/data/ljspeech/wavs/LJ002-0002.npy +tests/data/ljspeech/wavs/LJ002-0150.wav|tests/data/ljspeech/wavs/LJ002-0150.npy +tests/data/ljspeech/wavs/LJ017-0158.wav|tests/data/ljspeech/wavs/LJ017-0158.npy +tests/data/ljspeech/wavs/LJ045-0190.wav|tests/data/ljspeech/wavs/LJ045-0190.npy +tests/data/ljspeech/wavs/LJ045-0136.wav|tests/data/ljspeech/wavs/LJ045-0136.npy +tests/data/ljspeech/wavs/LJ041-0178.wav|tests/data/ljspeech/wavs/LJ041-0178.npy +tests/data/ljspeech/wavs/LJ005-0116.wav|tests/data/ljspeech/wavs/LJ005-0116.npy +tests/data/ljspeech/wavs/LJ017-0163.wav|tests/data/ljspeech/wavs/LJ017-0163.npy +tests/data/ljspeech/wavs/LJ033-0146.wav|tests/data/ljspeech/wavs/LJ033-0146.npy +tests/data/ljspeech/wavs/LJ010-0023.wav|tests/data/ljspeech/wavs/LJ010-0023.npy +tests/data/ljspeech/wavs/LJ006-0031.wav|tests/data/ljspeech/wavs/LJ006-0031.npy +tests/data/ljspeech/wavs/LJ027-0014.wav|tests/data/ljspeech/wavs/LJ027-0014.npy +tests/data/ljspeech/wavs/LJ046-0251.wav|tests/data/ljspeech/wavs/LJ046-0251.npy +tests/data/ljspeech/wavs/LJ028-0063.wav|tests/data/ljspeech/wavs/LJ028-0063.npy +tests/data/ljspeech/wavs/LJ042-0068.wav|tests/data/ljspeech/wavs/LJ042-0068.npy +tests/data/ljspeech/wavs/LJ022-0118.wav|tests/data/ljspeech/wavs/LJ022-0118.npy +tests/data/ljspeech/wavs/LJ046-0177.wav|tests/data/ljspeech/wavs/LJ046-0177.npy +tests/data/ljspeech/wavs/LJ015-0306.wav|tests/data/ljspeech/wavs/LJ015-0306.npy +tests/data/ljspeech/wavs/LJ037-0034.wav|tests/data/ljspeech/wavs/LJ037-0034.npy +tests/data/ljspeech/wavs/LJ024-0015.wav|tests/data/ljspeech/wavs/LJ024-0015.npy +tests/data/ljspeech/wavs/LJ023-0110.wav|tests/data/ljspeech/wavs/LJ023-0110.npy +tests/data/ljspeech/wavs/LJ044-0012.wav|tests/data/ljspeech/wavs/LJ044-0012.npy +tests/data/ljspeech/wavs/LJ047-0174.wav|tests/data/ljspeech/wavs/LJ047-0174.npy +tests/data/ljspeech/wavs/LJ030-0136.wav|tests/data/ljspeech/wavs/LJ030-0136.npy +tests/data/ljspeech/wavs/LJ009-0242.wav|tests/data/ljspeech/wavs/LJ009-0242.npy +tests/data/ljspeech/wavs/LJ022-0144.wav|tests/data/ljspeech/wavs/LJ022-0144.npy +tests/data/ljspeech/wavs/LJ028-0492.wav|tests/data/ljspeech/wavs/LJ028-0492.npy +tests/data/ljspeech/wavs/LJ017-0203.wav|tests/data/ljspeech/wavs/LJ017-0203.npy +tests/data/ljspeech/wavs/LJ017-0081.wav|tests/data/ljspeech/wavs/LJ017-0081.npy +tests/data/ljspeech/wavs/LJ015-0288.wav|tests/data/ljspeech/wavs/LJ015-0288.npy +tests/data/ljspeech/wavs/LJ001-0129.wav|tests/data/ljspeech/wavs/LJ001-0129.npy +tests/data/ljspeech/wavs/LJ022-0185.wav|tests/data/ljspeech/wavs/LJ022-0185.npy +tests/data/ljspeech/wavs/LJ016-0012.wav|tests/data/ljspeech/wavs/LJ016-0012.npy +tests/data/ljspeech/wavs/LJ008-0150.wav|tests/data/ljspeech/wavs/LJ008-0150.npy +tests/data/ljspeech/wavs/LJ044-0203.wav|tests/data/ljspeech/wavs/LJ044-0203.npy +tests/data/ljspeech/wavs/LJ030-0255.wav|tests/data/ljspeech/wavs/LJ030-0255.npy +tests/data/ljspeech/wavs/LJ005-0241.wav|tests/data/ljspeech/wavs/LJ005-0241.npy +tests/data/ljspeech/wavs/LJ033-0145.wav|tests/data/ljspeech/wavs/LJ033-0145.npy +tests/data/ljspeech/wavs/LJ044-0096.wav|tests/data/ljspeech/wavs/LJ044-0096.npy +tests/data/ljspeech/wavs/LJ046-0182.wav|tests/data/ljspeech/wavs/LJ046-0182.npy +tests/data/ljspeech/wavs/LJ041-0195.wav|tests/data/ljspeech/wavs/LJ041-0195.npy +tests/data/ljspeech/wavs/LJ040-0070.wav|tests/data/ljspeech/wavs/LJ040-0070.npy +tests/data/ljspeech/wavs/LJ010-0123.wav|tests/data/ljspeech/wavs/LJ010-0123.npy +tests/data/ljspeech/wavs/LJ023-0080.wav|tests/data/ljspeech/wavs/LJ023-0080.npy +tests/data/ljspeech/wavs/LJ008-0210.wav|tests/data/ljspeech/wavs/LJ008-0210.npy +tests/data/ljspeech/wavs/LJ033-0148.wav|tests/data/ljspeech/wavs/LJ033-0148.npy +tests/data/ljspeech/wavs/LJ021-0122.wav|tests/data/ljspeech/wavs/LJ021-0122.npy +tests/data/ljspeech/wavs/LJ016-0179.wav|tests/data/ljspeech/wavs/LJ016-0179.npy +tests/data/ljspeech/wavs/LJ006-0041.wav|tests/data/ljspeech/wavs/LJ006-0041.npy +tests/data/ljspeech/wavs/LJ023-0121.wav|tests/data/ljspeech/wavs/LJ023-0121.npy +tests/data/ljspeech/wavs/LJ019-0304.wav|tests/data/ljspeech/wavs/LJ019-0304.npy +tests/data/ljspeech/wavs/LJ025-0174.wav|tests/data/ljspeech/wavs/LJ025-0174.npy +tests/data/ljspeech/wavs/LJ009-0102.wav|tests/data/ljspeech/wavs/LJ009-0102.npy +tests/data/ljspeech/wavs/LJ022-0022.wav|tests/data/ljspeech/wavs/LJ022-0022.npy +tests/data/ljspeech/wavs/LJ011-0250.wav|tests/data/ljspeech/wavs/LJ011-0250.npy +tests/data/ljspeech/wavs/LJ022-0199.wav|tests/data/ljspeech/wavs/LJ022-0199.npy +tests/data/ljspeech/wavs/LJ001-0079.wav|tests/data/ljspeech/wavs/LJ001-0079.npy +tests/data/ljspeech/wavs/LJ004-0191.wav|tests/data/ljspeech/wavs/LJ004-0191.npy +tests/data/ljspeech/wavs/LJ020-0080.wav|tests/data/ljspeech/wavs/LJ020-0080.npy +tests/data/ljspeech/wavs/LJ028-0504.wav|tests/data/ljspeech/wavs/LJ028-0504.npy +tests/data/ljspeech/wavs/LJ009-0164.wav|tests/data/ljspeech/wavs/LJ009-0164.npy +tests/data/ljspeech/wavs/LJ028-0422.wav|tests/data/ljspeech/wavs/LJ028-0422.npy +tests/data/ljspeech/wavs/LJ019-0368.wav|tests/data/ljspeech/wavs/LJ019-0368.npy +tests/data/ljspeech/wavs/LJ008-0108.wav|tests/data/ljspeech/wavs/LJ008-0108.npy +tests/data/ljspeech/wavs/LJ010-0088.wav|tests/data/ljspeech/wavs/LJ010-0088.npy +tests/data/ljspeech/wavs/LJ005-0224.wav|tests/data/ljspeech/wavs/LJ005-0224.npy +tests/data/ljspeech/wavs/LJ042-0143.wav|tests/data/ljspeech/wavs/LJ042-0143.npy +tests/data/ljspeech/wavs/LJ045-0151.wav|tests/data/ljspeech/wavs/LJ045-0151.npy +tests/data/ljspeech/wavs/LJ036-0209.wav|tests/data/ljspeech/wavs/LJ036-0209.npy +tests/data/ljspeech/wavs/LJ049-0049.wav|tests/data/ljspeech/wavs/LJ049-0049.npy +tests/data/ljspeech/wavs/LJ014-0238.wav|tests/data/ljspeech/wavs/LJ014-0238.npy +tests/data/ljspeech/wavs/LJ023-0116.wav|tests/data/ljspeech/wavs/LJ023-0116.npy +tests/data/ljspeech/wavs/LJ008-0287.wav|tests/data/ljspeech/wavs/LJ008-0287.npy +tests/data/ljspeech/wavs/LJ028-0099.wav|tests/data/ljspeech/wavs/LJ028-0099.npy +tests/data/ljspeech/wavs/LJ026-0106.wav|tests/data/ljspeech/wavs/LJ026-0106.npy +tests/data/ljspeech/wavs/LJ005-0139.wav|tests/data/ljspeech/wavs/LJ005-0139.npy +tests/data/ljspeech/wavs/LJ027-0028.wav|tests/data/ljspeech/wavs/LJ027-0028.npy +tests/data/ljspeech/wavs/LJ003-0113.wav|tests/data/ljspeech/wavs/LJ003-0113.npy +tests/data/ljspeech/wavs/LJ014-0189.wav|tests/data/ljspeech/wavs/LJ014-0189.npy +tests/data/ljspeech/wavs/LJ045-0133.wav|tests/data/ljspeech/wavs/LJ045-0133.npy +tests/data/ljspeech/wavs/LJ050-0174.wav|tests/data/ljspeech/wavs/LJ050-0174.npy +tests/data/ljspeech/wavs/LJ038-0092.wav|tests/data/ljspeech/wavs/LJ038-0092.npy +tests/data/ljspeech/wavs/LJ046-0046.wav|tests/data/ljspeech/wavs/LJ046-0046.npy +tests/data/ljspeech/wavs/LJ038-0138.wav|tests/data/ljspeech/wavs/LJ038-0138.npy +tests/data/ljspeech/wavs/LJ039-0068.wav|tests/data/ljspeech/wavs/LJ039-0068.npy +tests/data/ljspeech/wavs/LJ015-0027.wav|tests/data/ljspeech/wavs/LJ015-0027.npy +tests/data/ljspeech/wavs/LJ030-0113.wav|tests/data/ljspeech/wavs/LJ030-0113.npy +tests/data/ljspeech/wavs/LJ011-0192.wav|tests/data/ljspeech/wavs/LJ011-0192.npy +tests/data/ljspeech/wavs/LJ036-0102.wav|tests/data/ljspeech/wavs/LJ036-0102.npy +tests/data/ljspeech/wavs/LJ045-0117.wav|tests/data/ljspeech/wavs/LJ045-0117.npy +tests/data/ljspeech/wavs/LJ017-0088.wav|tests/data/ljspeech/wavs/LJ017-0088.npy +tests/data/ljspeech/wavs/LJ038-0247.wav|tests/data/ljspeech/wavs/LJ038-0247.npy +tests/data/ljspeech/wavs/LJ042-0170.wav|tests/data/ljspeech/wavs/LJ042-0170.npy +tests/data/ljspeech/wavs/LJ028-0286.wav|tests/data/ljspeech/wavs/LJ028-0286.npy +tests/data/ljspeech/wavs/LJ018-0322.wav|tests/data/ljspeech/wavs/LJ018-0322.npy +tests/data/ljspeech/wavs/LJ038-0097.wav|tests/data/ljspeech/wavs/LJ038-0097.npy +tests/data/ljspeech/wavs/LJ036-0012.wav|tests/data/ljspeech/wavs/LJ036-0012.npy +tests/data/ljspeech/wavs/LJ017-0003.wav|tests/data/ljspeech/wavs/LJ017-0003.npy +tests/data/ljspeech/wavs/LJ003-0043.wav|tests/data/ljspeech/wavs/LJ003-0043.npy +tests/data/ljspeech/wavs/LJ033-0049.wav|tests/data/ljspeech/wavs/LJ033-0049.npy +tests/data/ljspeech/wavs/LJ045-0176.wav|tests/data/ljspeech/wavs/LJ045-0176.npy +tests/data/ljspeech/wavs/LJ017-0199.wav|tests/data/ljspeech/wavs/LJ017-0199.npy +tests/data/ljspeech/wavs/LJ004-0192.wav|tests/data/ljspeech/wavs/LJ004-0192.npy +tests/data/ljspeech/wavs/LJ007-0202.wav|tests/data/ljspeech/wavs/LJ007-0202.npy +tests/data/ljspeech/wavs/LJ018-0018.wav|tests/data/ljspeech/wavs/LJ018-0018.npy +tests/data/ljspeech/wavs/LJ043-0113.wav|tests/data/ljspeech/wavs/LJ043-0113.npy +tests/data/ljspeech/wavs/LJ050-0081.wav|tests/data/ljspeech/wavs/LJ050-0081.npy +tests/data/ljspeech/wavs/LJ013-0128.wav|tests/data/ljspeech/wavs/LJ013-0128.npy +tests/data/ljspeech/wavs/LJ002-0332.wav|tests/data/ljspeech/wavs/LJ002-0332.npy +tests/data/ljspeech/wavs/LJ035-0124.wav|tests/data/ljspeech/wavs/LJ035-0124.npy +tests/data/ljspeech/wavs/LJ002-0114.wav|tests/data/ljspeech/wavs/LJ002-0114.npy +tests/data/ljspeech/wavs/LJ017-0282.wav|tests/data/ljspeech/wavs/LJ017-0282.npy +tests/data/ljspeech/wavs/LJ019-0325.wav|tests/data/ljspeech/wavs/LJ019-0325.npy +tests/data/ljspeech/wavs/LJ014-0127.wav|tests/data/ljspeech/wavs/LJ014-0127.npy +tests/data/ljspeech/wavs/LJ018-0061.wav|tests/data/ljspeech/wavs/LJ018-0061.npy +tests/data/ljspeech/wavs/LJ004-0156.wav|tests/data/ljspeech/wavs/LJ004-0156.npy +tests/data/ljspeech/wavs/LJ010-0069.wav|tests/data/ljspeech/wavs/LJ010-0069.npy +tests/data/ljspeech/wavs/LJ005-0195.wav|tests/data/ljspeech/wavs/LJ005-0195.npy +tests/data/ljspeech/wavs/LJ017-0048.wav|tests/data/ljspeech/wavs/LJ017-0048.npy +tests/data/ljspeech/wavs/LJ004-0179.wav|tests/data/ljspeech/wavs/LJ004-0179.npy +tests/data/ljspeech/wavs/LJ026-0048.wav|tests/data/ljspeech/wavs/LJ026-0048.npy +tests/data/ljspeech/wavs/LJ019-0057.wav|tests/data/ljspeech/wavs/LJ019-0057.npy +tests/data/ljspeech/wavs/LJ019-0048.wav|tests/data/ljspeech/wavs/LJ019-0048.npy +tests/data/ljspeech/wavs/LJ012-0294.wav|tests/data/ljspeech/wavs/LJ012-0294.npy +tests/data/ljspeech/wavs/LJ045-0210.wav|tests/data/ljspeech/wavs/LJ045-0210.npy +tests/data/ljspeech/wavs/LJ029-0078.wav|tests/data/ljspeech/wavs/LJ029-0078.npy +tests/data/ljspeech/wavs/LJ015-0296.wav|tests/data/ljspeech/wavs/LJ015-0296.npy +tests/data/ljspeech/wavs/LJ003-0172.wav|tests/data/ljspeech/wavs/LJ003-0172.npy +tests/data/ljspeech/wavs/LJ014-0184.wav|tests/data/ljspeech/wavs/LJ014-0184.npy +tests/data/ljspeech/wavs/LJ007-0066.wav|tests/data/ljspeech/wavs/LJ007-0066.npy +tests/data/ljspeech/wavs/LJ041-0053.wav|tests/data/ljspeech/wavs/LJ041-0053.npy +tests/data/ljspeech/wavs/LJ047-0069.wav|tests/data/ljspeech/wavs/LJ047-0069.npy +tests/data/ljspeech/wavs/LJ042-0196.wav|tests/data/ljspeech/wavs/LJ042-0196.npy +tests/data/ljspeech/wavs/LJ022-0021.wav|tests/data/ljspeech/wavs/LJ022-0021.npy +tests/data/ljspeech/wavs/LJ044-0003.wav|tests/data/ljspeech/wavs/LJ044-0003.npy +tests/data/ljspeech/wavs/LJ038-0011.wav|tests/data/ljspeech/wavs/LJ038-0011.npy +tests/data/ljspeech/wavs/LJ015-0189.wav|tests/data/ljspeech/wavs/LJ015-0189.npy +tests/data/ljspeech/wavs/LJ030-0119.wav|tests/data/ljspeech/wavs/LJ030-0119.npy +tests/data/ljspeech/wavs/LJ022-0165.wav|tests/data/ljspeech/wavs/LJ022-0165.npy +tests/data/ljspeech/wavs/LJ022-0028.wav|tests/data/ljspeech/wavs/LJ022-0028.npy +tests/data/ljspeech/wavs/LJ046-0004.wav|tests/data/ljspeech/wavs/LJ046-0004.npy +tests/data/ljspeech/wavs/LJ004-0217.wav|tests/data/ljspeech/wavs/LJ004-0217.npy +tests/data/ljspeech/wavs/LJ025-0007.wav|tests/data/ljspeech/wavs/LJ025-0007.npy +tests/data/ljspeech/wavs/LJ039-0117.wav|tests/data/ljspeech/wavs/LJ039-0117.npy +tests/data/ljspeech/wavs/LJ027-0096.wav|tests/data/ljspeech/wavs/LJ027-0096.npy +tests/data/ljspeech/wavs/LJ033-0047.wav|tests/data/ljspeech/wavs/LJ033-0047.npy +tests/data/ljspeech/wavs/LJ035-0083.wav|tests/data/ljspeech/wavs/LJ035-0083.npy +tests/data/ljspeech/wavs/LJ028-0151.wav|tests/data/ljspeech/wavs/LJ028-0151.npy +tests/data/ljspeech/wavs/LJ022-0034.wav|tests/data/ljspeech/wavs/LJ022-0034.npy +tests/data/ljspeech/wavs/LJ005-0174.wav|tests/data/ljspeech/wavs/LJ005-0174.npy +tests/data/ljspeech/wavs/LJ022-0114.wav|tests/data/ljspeech/wavs/LJ022-0114.npy +tests/data/ljspeech/wavs/LJ023-0030.wav|tests/data/ljspeech/wavs/LJ023-0030.npy +tests/data/ljspeech/wavs/LJ030-0191.wav|tests/data/ljspeech/wavs/LJ030-0191.npy +tests/data/ljspeech/wavs/LJ006-0009.wav|tests/data/ljspeech/wavs/LJ006-0009.npy +tests/data/ljspeech/wavs/LJ050-0244.wav|tests/data/ljspeech/wavs/LJ050-0244.npy +tests/data/ljspeech/wavs/LJ007-0236.wav|tests/data/ljspeech/wavs/LJ007-0236.npy +tests/data/ljspeech/wavs/LJ002-0275.wav|tests/data/ljspeech/wavs/LJ002-0275.npy +tests/data/ljspeech/wavs/LJ037-0254.wav|tests/data/ljspeech/wavs/LJ037-0254.npy +tests/data/ljspeech/wavs/LJ031-0092.wav|tests/data/ljspeech/wavs/LJ031-0092.npy +tests/data/ljspeech/wavs/LJ028-0325.wav|tests/data/ljspeech/wavs/LJ028-0325.npy +tests/data/ljspeech/wavs/LJ038-0049.wav|tests/data/ljspeech/wavs/LJ038-0049.npy +tests/data/ljspeech/wavs/LJ008-0134.wav|tests/data/ljspeech/wavs/LJ008-0134.npy +tests/data/ljspeech/wavs/LJ039-0188.wav|tests/data/ljspeech/wavs/LJ039-0188.npy +tests/data/ljspeech/wavs/LJ004-0145.wav|tests/data/ljspeech/wavs/LJ004-0145.npy +tests/data/ljspeech/wavs/LJ029-0077.wav|tests/data/ljspeech/wavs/LJ029-0077.npy +tests/data/ljspeech/wavs/LJ028-0517.wav|tests/data/ljspeech/wavs/LJ028-0517.npy +tests/data/ljspeech/wavs/LJ019-0166.wav|tests/data/ljspeech/wavs/LJ019-0166.npy +tests/data/ljspeech/wavs/LJ029-0151.wav|tests/data/ljspeech/wavs/LJ029-0151.npy +tests/data/ljspeech/wavs/LJ029-0125.wav|tests/data/ljspeech/wavs/LJ029-0125.npy +tests/data/ljspeech/wavs/LJ005-0105.wav|tests/data/ljspeech/wavs/LJ005-0105.npy +tests/data/ljspeech/wavs/LJ046-0254.wav|tests/data/ljspeech/wavs/LJ046-0254.npy +tests/data/ljspeech/wavs/LJ002-0159.wav|tests/data/ljspeech/wavs/LJ002-0159.npy +tests/data/ljspeech/wavs/LJ020-0029.wav|tests/data/ljspeech/wavs/LJ020-0029.npy +tests/data/ljspeech/wavs/LJ021-0096.wav|tests/data/ljspeech/wavs/LJ021-0096.npy +tests/data/ljspeech/wavs/LJ044-0208.wav|tests/data/ljspeech/wavs/LJ044-0208.npy +tests/data/ljspeech/wavs/LJ047-0130.wav|tests/data/ljspeech/wavs/LJ047-0130.npy +tests/data/ljspeech/wavs/LJ031-0089.wav|tests/data/ljspeech/wavs/LJ031-0089.npy +tests/data/ljspeech/wavs/LJ038-0216.wav|tests/data/ljspeech/wavs/LJ038-0216.npy +tests/data/ljspeech/wavs/LJ006-0175.wav|tests/data/ljspeech/wavs/LJ006-0175.npy +tests/data/ljspeech/wavs/LJ027-0103.wav|tests/data/ljspeech/wavs/LJ027-0103.npy +tests/data/ljspeech/wavs/LJ005-0078.wav|tests/data/ljspeech/wavs/LJ005-0078.npy +tests/data/ljspeech/wavs/LJ044-0014.wav|tests/data/ljspeech/wavs/LJ044-0014.npy +tests/data/ljspeech/wavs/LJ043-0093.wav|tests/data/ljspeech/wavs/LJ043-0093.npy +tests/data/ljspeech/wavs/LJ021-0022.wav|tests/data/ljspeech/wavs/LJ021-0022.npy +tests/data/ljspeech/wavs/LJ018-0383.wav|tests/data/ljspeech/wavs/LJ018-0383.npy +tests/data/ljspeech/wavs/LJ010-0275.wav|tests/data/ljspeech/wavs/LJ010-0275.npy +tests/data/ljspeech/wavs/LJ007-0048.wav|tests/data/ljspeech/wavs/LJ007-0048.npy +tests/data/ljspeech/wavs/LJ005-0190.wav|tests/data/ljspeech/wavs/LJ005-0190.npy +tests/data/ljspeech/wavs/LJ001-0037.wav|tests/data/ljspeech/wavs/LJ001-0037.npy +tests/data/ljspeech/wavs/LJ012-0255.wav|tests/data/ljspeech/wavs/LJ012-0255.npy +tests/data/ljspeech/wavs/LJ033-0026.wav|tests/data/ljspeech/wavs/LJ033-0026.npy +tests/data/ljspeech/wavs/LJ029-0102.wav|tests/data/ljspeech/wavs/LJ029-0102.npy +tests/data/ljspeech/wavs/LJ049-0219.wav|tests/data/ljspeech/wavs/LJ049-0219.npy +tests/data/ljspeech/wavs/LJ016-0066.wav|tests/data/ljspeech/wavs/LJ016-0066.npy +tests/data/ljspeech/wavs/LJ042-0029.wav|tests/data/ljspeech/wavs/LJ042-0029.npy +tests/data/ljspeech/wavs/LJ035-0188.wav|tests/data/ljspeech/wavs/LJ035-0188.npy +tests/data/ljspeech/wavs/LJ018-0180.wav|tests/data/ljspeech/wavs/LJ018-0180.npy +tests/data/ljspeech/wavs/LJ044-0113.wav|tests/data/ljspeech/wavs/LJ044-0113.npy +tests/data/ljspeech/wavs/LJ034-0143.wav|tests/data/ljspeech/wavs/LJ034-0143.npy +tests/data/ljspeech/wavs/LJ035-0080.wav|tests/data/ljspeech/wavs/LJ035-0080.npy +tests/data/ljspeech/wavs/LJ047-0203.wav|tests/data/ljspeech/wavs/LJ047-0203.npy +tests/data/ljspeech/wavs/LJ010-0201.wav|tests/data/ljspeech/wavs/LJ010-0201.npy +tests/data/ljspeech/wavs/LJ035-0061.wav|tests/data/ljspeech/wavs/LJ035-0061.npy +tests/data/ljspeech/wavs/LJ002-0037.wav|tests/data/ljspeech/wavs/LJ002-0037.npy +tests/data/ljspeech/wavs/LJ037-0049.wav|tests/data/ljspeech/wavs/LJ037-0049.npy +tests/data/ljspeech/wavs/LJ030-0129.wav|tests/data/ljspeech/wavs/LJ030-0129.npy +tests/data/ljspeech/wavs/LJ033-0095.wav|tests/data/ljspeech/wavs/LJ033-0095.npy +tests/data/ljspeech/wavs/LJ028-0155.wav|tests/data/ljspeech/wavs/LJ028-0155.npy +tests/data/ljspeech/wavs/LJ050-0193.wav|tests/data/ljspeech/wavs/LJ050-0193.npy +tests/data/ljspeech/wavs/LJ016-0109.wav|tests/data/ljspeech/wavs/LJ016-0109.npy +tests/data/ljspeech/wavs/LJ011-0286.wav|tests/data/ljspeech/wavs/LJ011-0286.npy +tests/data/ljspeech/wavs/LJ035-0031.wav|tests/data/ljspeech/wavs/LJ035-0031.npy +tests/data/ljspeech/wavs/LJ014-0033.wav|tests/data/ljspeech/wavs/LJ014-0033.npy +tests/data/ljspeech/wavs/LJ004-0089.wav|tests/data/ljspeech/wavs/LJ004-0089.npy +tests/data/ljspeech/wavs/LJ011-0011.wav|tests/data/ljspeech/wavs/LJ011-0011.npy +tests/data/ljspeech/wavs/LJ002-0202.wav|tests/data/ljspeech/wavs/LJ002-0202.npy +tests/data/ljspeech/wavs/LJ046-0106.wav|tests/data/ljspeech/wavs/LJ046-0106.npy +tests/data/ljspeech/wavs/LJ015-0182.wav|tests/data/ljspeech/wavs/LJ015-0182.npy +tests/data/ljspeech/wavs/LJ030-0019.wav|tests/data/ljspeech/wavs/LJ030-0019.npy +tests/data/ljspeech/wavs/LJ016-0268.wav|tests/data/ljspeech/wavs/LJ016-0268.npy +tests/data/ljspeech/wavs/LJ028-0305.wav|tests/data/ljspeech/wavs/LJ028-0305.npy +tests/data/ljspeech/wavs/LJ037-0248.wav|tests/data/ljspeech/wavs/LJ037-0248.npy +tests/data/ljspeech/wavs/LJ016-0104.wav|tests/data/ljspeech/wavs/LJ016-0104.npy +tests/data/ljspeech/wavs/LJ028-0026.wav|tests/data/ljspeech/wavs/LJ028-0026.npy +tests/data/ljspeech/wavs/LJ049-0064.wav|tests/data/ljspeech/wavs/LJ049-0064.npy +tests/data/ljspeech/wavs/LJ035-0175.wav|tests/data/ljspeech/wavs/LJ035-0175.npy +tests/data/ljspeech/wavs/LJ047-0146.wav|tests/data/ljspeech/wavs/LJ047-0146.npy +tests/data/ljspeech/wavs/LJ048-0139.wav|tests/data/ljspeech/wavs/LJ048-0139.npy +tests/data/ljspeech/wavs/LJ003-0307.wav|tests/data/ljspeech/wavs/LJ003-0307.npy +tests/data/ljspeech/wavs/LJ050-0115.wav|tests/data/ljspeech/wavs/LJ050-0115.npy +tests/data/ljspeech/wavs/LJ022-0093.wav|tests/data/ljspeech/wavs/LJ022-0093.npy +tests/data/ljspeech/wavs/LJ015-0124.wav|tests/data/ljspeech/wavs/LJ015-0124.npy +tests/data/ljspeech/wavs/LJ016-0167.wav|tests/data/ljspeech/wavs/LJ016-0167.npy +tests/data/ljspeech/wavs/LJ007-0012.wav|tests/data/ljspeech/wavs/LJ007-0012.npy +tests/data/ljspeech/wavs/LJ033-0034.wav|tests/data/ljspeech/wavs/LJ033-0034.npy +tests/data/ljspeech/wavs/LJ002-0212.wav|tests/data/ljspeech/wavs/LJ002-0212.npy +tests/data/ljspeech/wavs/LJ017-0198.wav|tests/data/ljspeech/wavs/LJ017-0198.npy +tests/data/ljspeech/wavs/LJ019-0123.wav|tests/data/ljspeech/wavs/LJ019-0123.npy +tests/data/ljspeech/wavs/LJ019-0283.wav|tests/data/ljspeech/wavs/LJ019-0283.npy +tests/data/ljspeech/wavs/LJ014-0119.wav|tests/data/ljspeech/wavs/LJ014-0119.npy +tests/data/ljspeech/wavs/LJ002-0078.wav|tests/data/ljspeech/wavs/LJ002-0078.npy +tests/data/ljspeech/wavs/LJ001-0175.wav|tests/data/ljspeech/wavs/LJ001-0175.npy +tests/data/ljspeech/wavs/LJ018-0073.wav|tests/data/ljspeech/wavs/LJ018-0073.npy +tests/data/ljspeech/wavs/LJ032-0047.wav|tests/data/ljspeech/wavs/LJ032-0047.npy +tests/data/ljspeech/wavs/LJ025-0134.wav|tests/data/ljspeech/wavs/LJ025-0134.npy +tests/data/ljspeech/wavs/LJ012-0014.wav|tests/data/ljspeech/wavs/LJ012-0014.npy +tests/data/ljspeech/wavs/LJ013-0045.wav|tests/data/ljspeech/wavs/LJ013-0045.npy +tests/data/ljspeech/wavs/LJ034-0073.wav|tests/data/ljspeech/wavs/LJ034-0073.npy +tests/data/ljspeech/wavs/LJ009-0067.wav|tests/data/ljspeech/wavs/LJ009-0067.npy +tests/data/ljspeech/wavs/LJ004-0178.wav|tests/data/ljspeech/wavs/LJ004-0178.npy +tests/data/ljspeech/wavs/LJ001-0095.wav|tests/data/ljspeech/wavs/LJ001-0095.npy +tests/data/ljspeech/wavs/LJ009-0070.wav|tests/data/ljspeech/wavs/LJ009-0070.npy +tests/data/ljspeech/wavs/LJ043-0031.wav|tests/data/ljspeech/wavs/LJ043-0031.npy +tests/data/ljspeech/wavs/LJ026-0049.wav|tests/data/ljspeech/wavs/LJ026-0049.npy +tests/data/ljspeech/wavs/LJ042-0155.wav|tests/data/ljspeech/wavs/LJ042-0155.npy +tests/data/ljspeech/wavs/LJ007-0213.wav|tests/data/ljspeech/wavs/LJ007-0213.npy +tests/data/ljspeech/wavs/LJ043-0178.wav|tests/data/ljspeech/wavs/LJ043-0178.npy +tests/data/ljspeech/wavs/LJ014-0285.wav|tests/data/ljspeech/wavs/LJ014-0285.npy +tests/data/ljspeech/wavs/LJ032-0054.wav|tests/data/ljspeech/wavs/LJ032-0054.npy +tests/data/ljspeech/wavs/LJ050-0095.wav|tests/data/ljspeech/wavs/LJ050-0095.npy +tests/data/ljspeech/wavs/LJ040-0151.wav|tests/data/ljspeech/wavs/LJ040-0151.npy +tests/data/ljspeech/wavs/LJ035-0065.wav|tests/data/ljspeech/wavs/LJ035-0065.npy +tests/data/ljspeech/wavs/LJ011-0282.wav|tests/data/ljspeech/wavs/LJ011-0282.npy +tests/data/ljspeech/wavs/LJ006-0097.wav|tests/data/ljspeech/wavs/LJ006-0097.npy +tests/data/ljspeech/wavs/LJ005-0228.wav|tests/data/ljspeech/wavs/LJ005-0228.npy +tests/data/ljspeech/wavs/LJ028-0319.wav|tests/data/ljspeech/wavs/LJ028-0319.npy +tests/data/ljspeech/wavs/LJ043-0121.wav|tests/data/ljspeech/wavs/LJ043-0121.npy +tests/data/ljspeech/wavs/LJ042-0249.wav|tests/data/ljspeech/wavs/LJ042-0249.npy +tests/data/ljspeech/wavs/LJ044-0179.wav|tests/data/ljspeech/wavs/LJ044-0179.npy +tests/data/ljspeech/wavs/LJ016-0152.wav|tests/data/ljspeech/wavs/LJ016-0152.npy +tests/data/ljspeech/wavs/LJ013-0197.wav|tests/data/ljspeech/wavs/LJ013-0197.npy +tests/data/ljspeech/wavs/LJ011-0033.wav|tests/data/ljspeech/wavs/LJ011-0033.npy +tests/data/ljspeech/wavs/LJ012-0148.wav|tests/data/ljspeech/wavs/LJ012-0148.npy +tests/data/ljspeech/wavs/LJ008-0152.wav|tests/data/ljspeech/wavs/LJ008-0152.npy +tests/data/ljspeech/wavs/LJ013-0201.wav|tests/data/ljspeech/wavs/LJ013-0201.npy +tests/data/ljspeech/wavs/LJ037-0175.wav|tests/data/ljspeech/wavs/LJ037-0175.npy +tests/data/ljspeech/wavs/LJ006-0105.wav|tests/data/ljspeech/wavs/LJ006-0105.npy +tests/data/ljspeech/wavs/LJ015-0038.wav|tests/data/ljspeech/wavs/LJ015-0038.npy +tests/data/ljspeech/wavs/LJ029-0138.wav|tests/data/ljspeech/wavs/LJ029-0138.npy +tests/data/ljspeech/wavs/LJ044-0027.wav|tests/data/ljspeech/wavs/LJ044-0027.npy +tests/data/ljspeech/wavs/LJ029-0117.wav|tests/data/ljspeech/wavs/LJ029-0117.npy +tests/data/ljspeech/wavs/LJ014-0158.wav|tests/data/ljspeech/wavs/LJ014-0158.npy +tests/data/ljspeech/wavs/LJ037-0148.wav|tests/data/ljspeech/wavs/LJ037-0148.npy +tests/data/ljspeech/wavs/LJ006-0100.wav|tests/data/ljspeech/wavs/LJ006-0100.npy +tests/data/ljspeech/wavs/LJ007-0188.wav|tests/data/ljspeech/wavs/LJ007-0188.npy +tests/data/ljspeech/wavs/LJ011-0021.wav|tests/data/ljspeech/wavs/LJ011-0021.npy +tests/data/ljspeech/wavs/LJ032-0264.wav|tests/data/ljspeech/wavs/LJ032-0264.npy +tests/data/ljspeech/wavs/LJ013-0159.wav|tests/data/ljspeech/wavs/LJ013-0159.npy +tests/data/ljspeech/wavs/LJ016-0148.wav|tests/data/ljspeech/wavs/LJ016-0148.npy +tests/data/ljspeech/wavs/LJ002-0101.wav|tests/data/ljspeech/wavs/LJ002-0101.npy +tests/data/ljspeech/wavs/LJ039-0171.wav|tests/data/ljspeech/wavs/LJ039-0171.npy +tests/data/ljspeech/wavs/LJ008-0191.wav|tests/data/ljspeech/wavs/LJ008-0191.npy +tests/data/ljspeech/wavs/LJ008-0002.wav|tests/data/ljspeech/wavs/LJ008-0002.npy +tests/data/ljspeech/wavs/LJ026-0115.wav|tests/data/ljspeech/wavs/LJ026-0115.npy +tests/data/ljspeech/wavs/LJ001-0107.wav|tests/data/ljspeech/wavs/LJ001-0107.npy +tests/data/ljspeech/wavs/LJ026-0130.wav|tests/data/ljspeech/wavs/LJ026-0130.npy +tests/data/ljspeech/wavs/LJ022-0014.wav|tests/data/ljspeech/wavs/LJ022-0014.npy +tests/data/ljspeech/wavs/LJ014-0041.wav|tests/data/ljspeech/wavs/LJ014-0041.npy +tests/data/ljspeech/wavs/LJ021-0129.wav|tests/data/ljspeech/wavs/LJ021-0129.npy +tests/data/ljspeech/wavs/LJ047-0091.wav|tests/data/ljspeech/wavs/LJ047-0091.npy +tests/data/ljspeech/wavs/LJ028-0393.wav|tests/data/ljspeech/wavs/LJ028-0393.npy +tests/data/ljspeech/wavs/LJ036-0092.wav|tests/data/ljspeech/wavs/LJ036-0092.npy +tests/data/ljspeech/wavs/LJ033-0138.wav|tests/data/ljspeech/wavs/LJ033-0138.npy +tests/data/ljspeech/wavs/LJ015-0113.wav|tests/data/ljspeech/wavs/LJ015-0113.npy +tests/data/ljspeech/wavs/LJ026-0118.wav|tests/data/ljspeech/wavs/LJ026-0118.npy +tests/data/ljspeech/wavs/LJ008-0186.wav|tests/data/ljspeech/wavs/LJ008-0186.npy +tests/data/ljspeech/wavs/LJ030-0040.wav|tests/data/ljspeech/wavs/LJ030-0040.npy +tests/data/ljspeech/wavs/LJ004-0196.wav|tests/data/ljspeech/wavs/LJ004-0196.npy +tests/data/ljspeech/wavs/LJ006-0298.wav|tests/data/ljspeech/wavs/LJ006-0298.npy +tests/data/ljspeech/wavs/LJ002-0193.wav|tests/data/ljspeech/wavs/LJ002-0193.npy +tests/data/ljspeech/wavs/LJ037-0179.wav|tests/data/ljspeech/wavs/LJ037-0179.npy +tests/data/ljspeech/wavs/LJ018-0201.wav|tests/data/ljspeech/wavs/LJ018-0201.npy +tests/data/ljspeech/wavs/LJ003-0106.wav|tests/data/ljspeech/wavs/LJ003-0106.npy +tests/data/ljspeech/wavs/LJ009-0135.wav|tests/data/ljspeech/wavs/LJ009-0135.npy +tests/data/ljspeech/wavs/LJ030-0177.wav|tests/data/ljspeech/wavs/LJ030-0177.npy +tests/data/ljspeech/wavs/LJ015-0213.wav|tests/data/ljspeech/wavs/LJ015-0213.npy +tests/data/ljspeech/wavs/LJ001-0114.wav|tests/data/ljspeech/wavs/LJ001-0114.npy +tests/data/ljspeech/wavs/LJ040-0177.wav|tests/data/ljspeech/wavs/LJ040-0177.npy +tests/data/ljspeech/wavs/LJ004-0201.wav|tests/data/ljspeech/wavs/LJ004-0201.npy +tests/data/ljspeech/wavs/LJ036-0011.wav|tests/data/ljspeech/wavs/LJ036-0011.npy +tests/data/ljspeech/wavs/LJ002-0223.wav|tests/data/ljspeech/wavs/LJ002-0223.npy +tests/data/ljspeech/wavs/LJ003-0137.wav|tests/data/ljspeech/wavs/LJ003-0137.npy +tests/data/ljspeech/wavs/LJ032-0006.wav|tests/data/ljspeech/wavs/LJ032-0006.npy +tests/data/ljspeech/wavs/LJ048-0220.wav|tests/data/ljspeech/wavs/LJ048-0220.npy +tests/data/ljspeech/wavs/LJ036-0191.wav|tests/data/ljspeech/wavs/LJ036-0191.npy +tests/data/ljspeech/wavs/LJ033-0174.wav|tests/data/ljspeech/wavs/LJ033-0174.npy +tests/data/ljspeech/wavs/LJ002-0052.wav|tests/data/ljspeech/wavs/LJ002-0052.npy +tests/data/ljspeech/wavs/LJ024-0073.wav|tests/data/ljspeech/wavs/LJ024-0073.npy +tests/data/ljspeech/wavs/LJ036-0179.wav|tests/data/ljspeech/wavs/LJ036-0179.npy +tests/data/ljspeech/wavs/LJ027-0098.wav|tests/data/ljspeech/wavs/LJ027-0098.npy +tests/data/ljspeech/wavs/LJ042-0244.wav|tests/data/ljspeech/wavs/LJ042-0244.npy +tests/data/ljspeech/wavs/LJ042-0158.wav|tests/data/ljspeech/wavs/LJ042-0158.npy +tests/data/ljspeech/wavs/LJ016-0173.wav|tests/data/ljspeech/wavs/LJ016-0173.npy +tests/data/ljspeech/wavs/LJ004-0077.wav|tests/data/ljspeech/wavs/LJ004-0077.npy +tests/data/ljspeech/wavs/LJ044-0084.wav|tests/data/ljspeech/wavs/LJ044-0084.npy +tests/data/ljspeech/wavs/LJ009-0103.wav|tests/data/ljspeech/wavs/LJ009-0103.npy +tests/data/ljspeech/wavs/LJ024-0048.wav|tests/data/ljspeech/wavs/LJ024-0048.npy +tests/data/ljspeech/wavs/LJ031-0224.wav|tests/data/ljspeech/wavs/LJ031-0224.npy +tests/data/ljspeech/wavs/LJ008-0100.wav|tests/data/ljspeech/wavs/LJ008-0100.npy +tests/data/ljspeech/wavs/LJ003-0019.wav|tests/data/ljspeech/wavs/LJ003-0019.npy +tests/data/ljspeech/wavs/LJ001-0039.wav|tests/data/ljspeech/wavs/LJ001-0039.npy +tests/data/ljspeech/wavs/LJ034-0169.wav|tests/data/ljspeech/wavs/LJ034-0169.npy +tests/data/ljspeech/wavs/LJ019-0327.wav|tests/data/ljspeech/wavs/LJ019-0327.npy +tests/data/ljspeech/wavs/LJ008-0172.wav|tests/data/ljspeech/wavs/LJ008-0172.npy +tests/data/ljspeech/wavs/LJ016-0395.wav|tests/data/ljspeech/wavs/LJ016-0395.npy +tests/data/ljspeech/wavs/LJ017-0036.wav|tests/data/ljspeech/wavs/LJ017-0036.npy +tests/data/ljspeech/wavs/LJ007-0027.wav|tests/data/ljspeech/wavs/LJ007-0027.npy +tests/data/ljspeech/wavs/LJ020-0095.wav|tests/data/ljspeech/wavs/LJ020-0095.npy +tests/data/ljspeech/wavs/LJ034-0179.wav|tests/data/ljspeech/wavs/LJ034-0179.npy +tests/data/ljspeech/wavs/LJ007-0146.wav|tests/data/ljspeech/wavs/LJ007-0146.npy +tests/data/ljspeech/wavs/LJ016-0446.wav|tests/data/ljspeech/wavs/LJ016-0446.npy +tests/data/ljspeech/wavs/LJ024-0082.wav|tests/data/ljspeech/wavs/LJ024-0082.npy +tests/data/ljspeech/wavs/LJ008-0088.wav|tests/data/ljspeech/wavs/LJ008-0088.npy +tests/data/ljspeech/wavs/LJ020-0032.wav|tests/data/ljspeech/wavs/LJ020-0032.npy +tests/data/ljspeech/wavs/LJ008-0266.wav|tests/data/ljspeech/wavs/LJ008-0266.npy +tests/data/ljspeech/wavs/LJ046-0130.wav|tests/data/ljspeech/wavs/LJ046-0130.npy +tests/data/ljspeech/wavs/LJ038-0243.wav|tests/data/ljspeech/wavs/LJ038-0243.npy +tests/data/ljspeech/wavs/LJ043-0088.wav|tests/data/ljspeech/wavs/LJ043-0088.npy +tests/data/ljspeech/wavs/LJ050-0051.wav|tests/data/ljspeech/wavs/LJ050-0051.npy +tests/data/ljspeech/wavs/LJ029-0192.wav|tests/data/ljspeech/wavs/LJ029-0192.npy +tests/data/ljspeech/wavs/LJ011-0118.wav|tests/data/ljspeech/wavs/LJ011-0118.npy +tests/data/ljspeech/wavs/LJ042-0185.wav|tests/data/ljspeech/wavs/LJ042-0185.npy +tests/data/ljspeech/wavs/LJ022-0128.wav|tests/data/ljspeech/wavs/LJ022-0128.npy +tests/data/ljspeech/wavs/LJ045-0163.wav|tests/data/ljspeech/wavs/LJ045-0163.npy +tests/data/ljspeech/wavs/LJ018-0254.wav|tests/data/ljspeech/wavs/LJ018-0254.npy +tests/data/ljspeech/wavs/LJ035-0203.wav|tests/data/ljspeech/wavs/LJ035-0203.npy +tests/data/ljspeech/wavs/LJ036-0216.wav|tests/data/ljspeech/wavs/LJ036-0216.npy +tests/data/ljspeech/wavs/LJ010-0011.wav|tests/data/ljspeech/wavs/LJ010-0011.npy +tests/data/ljspeech/wavs/LJ043-0173.wav|tests/data/ljspeech/wavs/LJ043-0173.npy +tests/data/ljspeech/wavs/LJ032-0041.wav|tests/data/ljspeech/wavs/LJ032-0041.npy +tests/data/ljspeech/wavs/LJ043-0161.wav|tests/data/ljspeech/wavs/LJ043-0161.npy +tests/data/ljspeech/wavs/LJ037-0007.wav|tests/data/ljspeech/wavs/LJ037-0007.npy +tests/data/ljspeech/wavs/LJ038-0111.wav|tests/data/ljspeech/wavs/LJ038-0111.npy +tests/data/ljspeech/wavs/LJ015-0217.wav|tests/data/ljspeech/wavs/LJ015-0217.npy +tests/data/ljspeech/wavs/LJ010-0101.wav|tests/data/ljspeech/wavs/LJ010-0101.npy +tests/data/ljspeech/wavs/LJ012-0026.wav|tests/data/ljspeech/wavs/LJ012-0026.npy +tests/data/ljspeech/wavs/LJ008-0314.wav|tests/data/ljspeech/wavs/LJ008-0314.npy +tests/data/ljspeech/wavs/LJ008-0308.wav|tests/data/ljspeech/wavs/LJ008-0308.npy +tests/data/ljspeech/wavs/LJ012-0285.wav|tests/data/ljspeech/wavs/LJ012-0285.npy +tests/data/ljspeech/wavs/LJ010-0128.wav|tests/data/ljspeech/wavs/LJ010-0128.npy +tests/data/ljspeech/wavs/LJ013-0259.wav|tests/data/ljspeech/wavs/LJ013-0259.npy +tests/data/ljspeech/wavs/LJ019-0066.wav|tests/data/ljspeech/wavs/LJ019-0066.npy +tests/data/ljspeech/wavs/LJ008-0222.wav|tests/data/ljspeech/wavs/LJ008-0222.npy +tests/data/ljspeech/wavs/LJ018-0083.wav|tests/data/ljspeech/wavs/LJ018-0083.npy +tests/data/ljspeech/wavs/LJ045-0006.wav|tests/data/ljspeech/wavs/LJ045-0006.npy +tests/data/ljspeech/wavs/LJ018-0262.wav|tests/data/ljspeech/wavs/LJ018-0262.npy +tests/data/ljspeech/wavs/LJ038-0261.wav|tests/data/ljspeech/wavs/LJ038-0261.npy +tests/data/ljspeech/wavs/LJ002-0148.wav|tests/data/ljspeech/wavs/LJ002-0148.npy +tests/data/ljspeech/wavs/LJ038-0175.wav|tests/data/ljspeech/wavs/LJ038-0175.npy +tests/data/ljspeech/wavs/LJ002-0179.wav|tests/data/ljspeech/wavs/LJ002-0179.npy +tests/data/ljspeech/wavs/LJ028-0184.wav|tests/data/ljspeech/wavs/LJ028-0184.npy +tests/data/ljspeech/wavs/LJ039-0202.wav|tests/data/ljspeech/wavs/LJ039-0202.npy +tests/data/ljspeech/wavs/LJ029-0110.wav|tests/data/ljspeech/wavs/LJ029-0110.npy +tests/data/ljspeech/wavs/LJ028-0300.wav|tests/data/ljspeech/wavs/LJ028-0300.npy +tests/data/ljspeech/wavs/LJ018-0207.wav|tests/data/ljspeech/wavs/LJ018-0207.npy +tests/data/ljspeech/wavs/LJ010-0264.wav|tests/data/ljspeech/wavs/LJ010-0264.npy +tests/data/ljspeech/wavs/LJ016-0444.wav|tests/data/ljspeech/wavs/LJ016-0444.npy +tests/data/ljspeech/wavs/LJ033-0051.wav|tests/data/ljspeech/wavs/LJ033-0051.npy +tests/data/ljspeech/wavs/LJ042-0237.wav|tests/data/ljspeech/wavs/LJ042-0237.npy +tests/data/ljspeech/wavs/LJ022-0077.wav|tests/data/ljspeech/wavs/LJ022-0077.npy +tests/data/ljspeech/wavs/LJ034-0086.wav|tests/data/ljspeech/wavs/LJ034-0086.npy +tests/data/ljspeech/wavs/LJ042-0052.wav|tests/data/ljspeech/wavs/LJ042-0052.npy +tests/data/ljspeech/wavs/LJ011-0182.wav|tests/data/ljspeech/wavs/LJ011-0182.npy +tests/data/ljspeech/wavs/LJ039-0150.wav|tests/data/ljspeech/wavs/LJ039-0150.npy +tests/data/ljspeech/wavs/LJ039-0198.wav|tests/data/ljspeech/wavs/LJ039-0198.npy +tests/data/ljspeech/wavs/LJ040-0209.wav|tests/data/ljspeech/wavs/LJ040-0209.npy +tests/data/ljspeech/wavs/LJ018-0378.wav|tests/data/ljspeech/wavs/LJ018-0378.npy +tests/data/ljspeech/wavs/LJ017-0234.wav|tests/data/ljspeech/wavs/LJ017-0234.npy +tests/data/ljspeech/wavs/LJ039-0056.wav|tests/data/ljspeech/wavs/LJ039-0056.npy +tests/data/ljspeech/wavs/LJ019-0191.wav|tests/data/ljspeech/wavs/LJ019-0191.npy +tests/data/ljspeech/wavs/LJ005-0212.wav|tests/data/ljspeech/wavs/LJ005-0212.npy +tests/data/ljspeech/wavs/LJ007-0193.wav|tests/data/ljspeech/wavs/LJ007-0193.npy +tests/data/ljspeech/wavs/LJ024-0097.wav|tests/data/ljspeech/wavs/LJ024-0097.npy +tests/data/ljspeech/wavs/LJ018-0344.wav|tests/data/ljspeech/wavs/LJ018-0344.npy +tests/data/ljspeech/wavs/LJ003-0182.wav|tests/data/ljspeech/wavs/LJ003-0182.npy +tests/data/ljspeech/wavs/LJ042-0175.wav|tests/data/ljspeech/wavs/LJ042-0175.npy +tests/data/ljspeech/wavs/LJ032-0138.wav|tests/data/ljspeech/wavs/LJ032-0138.npy +tests/data/ljspeech/wavs/LJ009-0113.wav|tests/data/ljspeech/wavs/LJ009-0113.npy +tests/data/ljspeech/wavs/LJ041-0116.wav|tests/data/ljspeech/wavs/LJ041-0116.npy +tests/data/ljspeech/wavs/LJ022-0159.wav|tests/data/ljspeech/wavs/LJ022-0159.npy +tests/data/ljspeech/wavs/LJ004-0146.wav|tests/data/ljspeech/wavs/LJ004-0146.npy +tests/data/ljspeech/wavs/LJ023-0039.wav|tests/data/ljspeech/wavs/LJ023-0039.npy +tests/data/ljspeech/wavs/LJ019-0347.wav|tests/data/ljspeech/wavs/LJ019-0347.npy +tests/data/ljspeech/wavs/LJ044-0148.wav|tests/data/ljspeech/wavs/LJ044-0148.npy +tests/data/ljspeech/wavs/LJ022-0171.wav|tests/data/ljspeech/wavs/LJ022-0171.npy +tests/data/ljspeech/wavs/LJ035-0017.wav|tests/data/ljspeech/wavs/LJ035-0017.npy +tests/data/ljspeech/wavs/LJ011-0016.wav|tests/data/ljspeech/wavs/LJ011-0016.npy +tests/data/ljspeech/wavs/LJ005-0017.wav|tests/data/ljspeech/wavs/LJ005-0017.npy +tests/data/ljspeech/wavs/LJ050-0143.wav|tests/data/ljspeech/wavs/LJ050-0143.npy +tests/data/ljspeech/wavs/LJ045-0241.wav|tests/data/ljspeech/wavs/LJ045-0241.npy +tests/data/ljspeech/wavs/LJ004-0241.wav|tests/data/ljspeech/wavs/LJ004-0241.npy +tests/data/ljspeech/wavs/LJ002-0254.wav|tests/data/ljspeech/wavs/LJ002-0254.npy +tests/data/ljspeech/wavs/LJ011-0079.wav|tests/data/ljspeech/wavs/LJ011-0079.npy +tests/data/ljspeech/wavs/LJ040-0233.wav|tests/data/ljspeech/wavs/LJ040-0233.npy +tests/data/ljspeech/wavs/LJ028-0498.wav|tests/data/ljspeech/wavs/LJ028-0498.npy +tests/data/ljspeech/wavs/LJ028-0407.wav|tests/data/ljspeech/wavs/LJ028-0407.npy +tests/data/ljspeech/wavs/LJ009-0205.wav|tests/data/ljspeech/wavs/LJ009-0205.npy +tests/data/ljspeech/wavs/LJ028-0113.wav|tests/data/ljspeech/wavs/LJ028-0113.npy +tests/data/ljspeech/wavs/LJ017-0114.wav|tests/data/ljspeech/wavs/LJ017-0114.npy +tests/data/ljspeech/wavs/LJ015-0015.wav|tests/data/ljspeech/wavs/LJ015-0015.npy +tests/data/ljspeech/wavs/LJ013-0020.wav|tests/data/ljspeech/wavs/LJ013-0020.npy +tests/data/ljspeech/wavs/LJ021-0131.wav|tests/data/ljspeech/wavs/LJ021-0131.npy +tests/data/ljspeech/wavs/LJ048-0021.wav|tests/data/ljspeech/wavs/LJ048-0021.npy +tests/data/ljspeech/wavs/LJ043-0156.wav|tests/data/ljspeech/wavs/LJ043-0156.npy +tests/data/ljspeech/wavs/LJ013-0024.wav|tests/data/ljspeech/wavs/LJ013-0024.npy +tests/data/ljspeech/wavs/LJ042-0160.wav|tests/data/ljspeech/wavs/LJ042-0160.npy +tests/data/ljspeech/wavs/LJ009-0262.wav|tests/data/ljspeech/wavs/LJ009-0262.npy +tests/data/ljspeech/wavs/LJ044-0117.wav|tests/data/ljspeech/wavs/LJ044-0117.npy +tests/data/ljspeech/wavs/LJ040-0084.wav|tests/data/ljspeech/wavs/LJ040-0084.npy +tests/data/ljspeech/wavs/LJ003-0142.wav|tests/data/ljspeech/wavs/LJ003-0142.npy +tests/data/ljspeech/wavs/LJ034-0113.wav|tests/data/ljspeech/wavs/LJ034-0113.npy +tests/data/ljspeech/wavs/LJ043-0135.wav|tests/data/ljspeech/wavs/LJ043-0135.npy +tests/data/ljspeech/wavs/LJ035-0006.wav|tests/data/ljspeech/wavs/LJ035-0006.npy +tests/data/ljspeech/wavs/LJ046-0118.wav|tests/data/ljspeech/wavs/LJ046-0118.npy +tests/data/ljspeech/wavs/LJ045-0145.wav|tests/data/ljspeech/wavs/LJ045-0145.npy +tests/data/ljspeech/wavs/LJ015-0077.wav|tests/data/ljspeech/wavs/LJ015-0077.npy +tests/data/ljspeech/wavs/LJ020-0007.wav|tests/data/ljspeech/wavs/LJ020-0007.npy +tests/data/ljspeech/wavs/LJ038-0128.wav|tests/data/ljspeech/wavs/LJ038-0128.npy +tests/data/ljspeech/wavs/LJ033-0028.wav|tests/data/ljspeech/wavs/LJ033-0028.npy +tests/data/ljspeech/wavs/LJ007-0221.wav|tests/data/ljspeech/wavs/LJ007-0221.npy +tests/data/ljspeech/wavs/LJ004-0027.wav|tests/data/ljspeech/wavs/LJ004-0027.npy +tests/data/ljspeech/wavs/LJ005-0094.wav|tests/data/ljspeech/wavs/LJ005-0094.npy +tests/data/ljspeech/wavs/LJ003-0232.wav|tests/data/ljspeech/wavs/LJ003-0232.npy +tests/data/ljspeech/wavs/LJ038-0068.wav|tests/data/ljspeech/wavs/LJ038-0068.npy +tests/data/ljspeech/wavs/LJ009-0121.wav|tests/data/ljspeech/wavs/LJ009-0121.npy +tests/data/ljspeech/wavs/LJ004-0010.wav|tests/data/ljspeech/wavs/LJ004-0010.npy +tests/data/ljspeech/wavs/LJ021-0033.wav|tests/data/ljspeech/wavs/LJ021-0033.npy +tests/data/ljspeech/wavs/LJ006-0089.wav|tests/data/ljspeech/wavs/LJ006-0089.npy +tests/data/ljspeech/wavs/LJ028-0436.wav|tests/data/ljspeech/wavs/LJ028-0436.npy +tests/data/ljspeech/wavs/LJ019-0316.wav|tests/data/ljspeech/wavs/LJ019-0316.npy +tests/data/ljspeech/wavs/LJ021-0048.wav|tests/data/ljspeech/wavs/LJ021-0048.npy +tests/data/ljspeech/wavs/LJ008-0285.wav|tests/data/ljspeech/wavs/LJ008-0285.npy +tests/data/ljspeech/wavs/LJ019-0338.wav|tests/data/ljspeech/wavs/LJ019-0338.npy +tests/data/ljspeech/wavs/LJ014-0147.wav|tests/data/ljspeech/wavs/LJ014-0147.npy +tests/data/ljspeech/wavs/LJ003-0149.wav|tests/data/ljspeech/wavs/LJ003-0149.npy +tests/data/ljspeech/wavs/LJ004-0206.wav|tests/data/ljspeech/wavs/LJ004-0206.npy +tests/data/ljspeech/wavs/LJ015-0172.wav|tests/data/ljspeech/wavs/LJ015-0172.npy +tests/data/ljspeech/wavs/LJ009-0236.wav|tests/data/ljspeech/wavs/LJ009-0236.npy +tests/data/ljspeech/wavs/LJ038-0144.wav|tests/data/ljspeech/wavs/LJ038-0144.npy +tests/data/ljspeech/wavs/LJ021-0102.wav|tests/data/ljspeech/wavs/LJ021-0102.npy +tests/data/ljspeech/wavs/LJ028-0433.wav|tests/data/ljspeech/wavs/LJ028-0433.npy +tests/data/ljspeech/wavs/LJ028-0087.wav|tests/data/ljspeech/wavs/LJ028-0087.npy +tests/data/ljspeech/wavs/LJ037-0197.wav|tests/data/ljspeech/wavs/LJ037-0197.npy +tests/data/ljspeech/wavs/LJ030-0159.wav|tests/data/ljspeech/wavs/LJ030-0159.npy +tests/data/ljspeech/wavs/LJ025-0013.wav|tests/data/ljspeech/wavs/LJ025-0013.npy +tests/data/ljspeech/wavs/LJ016-0276.wav|tests/data/ljspeech/wavs/LJ016-0276.npy +tests/data/ljspeech/wavs/LJ019-0206.wav|tests/data/ljspeech/wavs/LJ019-0206.npy +tests/data/ljspeech/wavs/LJ005-0158.wav|tests/data/ljspeech/wavs/LJ005-0158.npy +tests/data/ljspeech/wavs/LJ027-0162.wav|tests/data/ljspeech/wavs/LJ027-0162.npy +tests/data/ljspeech/wavs/LJ043-0183.wav|tests/data/ljspeech/wavs/LJ043-0183.npy +tests/data/ljspeech/wavs/LJ024-0104.wav|tests/data/ljspeech/wavs/LJ024-0104.npy +tests/data/ljspeech/wavs/LJ050-0164.wav|tests/data/ljspeech/wavs/LJ050-0164.npy +tests/data/ljspeech/wavs/LJ011-0233.wav|tests/data/ljspeech/wavs/LJ011-0233.npy +tests/data/ljspeech/wavs/LJ023-0065.wav|tests/data/ljspeech/wavs/LJ023-0065.npy +tests/data/ljspeech/wavs/LJ046-0032.wav|tests/data/ljspeech/wavs/LJ046-0032.npy +tests/data/ljspeech/wavs/LJ016-0347.wav|tests/data/ljspeech/wavs/LJ016-0347.npy +tests/data/ljspeech/wavs/LJ005-0182.wav|tests/data/ljspeech/wavs/LJ005-0182.npy +tests/data/ljspeech/wavs/LJ011-0237.wav|tests/data/ljspeech/wavs/LJ011-0237.npy +tests/data/ljspeech/wavs/LJ027-0168.wav|tests/data/ljspeech/wavs/LJ027-0168.npy +tests/data/ljspeech/wavs/LJ017-0167.wav|tests/data/ljspeech/wavs/LJ017-0167.npy +tests/data/ljspeech/wavs/LJ037-0086.wav|tests/data/ljspeech/wavs/LJ037-0086.npy +tests/data/ljspeech/wavs/LJ045-0250.wav|tests/data/ljspeech/wavs/LJ045-0250.npy +tests/data/ljspeech/wavs/LJ010-0251.wav|tests/data/ljspeech/wavs/LJ010-0251.npy +tests/data/ljspeech/wavs/LJ036-0068.wav|tests/data/ljspeech/wavs/LJ036-0068.npy +tests/data/ljspeech/wavs/LJ019-0282.wav|tests/data/ljspeech/wavs/LJ019-0282.npy +tests/data/ljspeech/wavs/LJ028-0141.wav|tests/data/ljspeech/wavs/LJ028-0141.npy +tests/data/ljspeech/wavs/LJ016-0281.wav|tests/data/ljspeech/wavs/LJ016-0281.npy +tests/data/ljspeech/wavs/LJ023-0102.wav|tests/data/ljspeech/wavs/LJ023-0102.npy +tests/data/ljspeech/wavs/LJ018-0056.wav|tests/data/ljspeech/wavs/LJ018-0056.npy +tests/data/ljspeech/wavs/LJ007-0171.wav|tests/data/ljspeech/wavs/LJ007-0171.npy +tests/data/ljspeech/wavs/LJ016-0393.wav|tests/data/ljspeech/wavs/LJ016-0393.npy +tests/data/ljspeech/wavs/LJ010-0213.wav|tests/data/ljspeech/wavs/LJ010-0213.npy +tests/data/ljspeech/wavs/LJ005-0297.wav|tests/data/ljspeech/wavs/LJ005-0297.npy +tests/data/ljspeech/wavs/LJ008-0122.wav|tests/data/ljspeech/wavs/LJ008-0122.npy +tests/data/ljspeech/wavs/LJ011-0074.wav|tests/data/ljspeech/wavs/LJ011-0074.npy +tests/data/ljspeech/wavs/LJ036-0185.wav|tests/data/ljspeech/wavs/LJ036-0185.npy +tests/data/ljspeech/wavs/LJ037-0095.wav|tests/data/ljspeech/wavs/LJ037-0095.npy +tests/data/ljspeech/wavs/LJ033-0124.wav|tests/data/ljspeech/wavs/LJ033-0124.npy +tests/data/ljspeech/wavs/LJ033-0069.wav|tests/data/ljspeech/wavs/LJ033-0069.npy +tests/data/ljspeech/wavs/LJ027-0125.wav|tests/data/ljspeech/wavs/LJ027-0125.npy +tests/data/ljspeech/wavs/LJ038-0290.wav|tests/data/ljspeech/wavs/LJ038-0290.npy +tests/data/ljspeech/wavs/LJ016-0232.wav|tests/data/ljspeech/wavs/LJ016-0232.npy +tests/data/ljspeech/wavs/LJ040-0179.wav|tests/data/ljspeech/wavs/LJ040-0179.npy +tests/data/ljspeech/wavs/LJ042-0137.wav|tests/data/ljspeech/wavs/LJ042-0137.npy +tests/data/ljspeech/wavs/LJ023-0048.wav|tests/data/ljspeech/wavs/LJ023-0048.npy +tests/data/ljspeech/wavs/LJ042-0070.wav|tests/data/ljspeech/wavs/LJ042-0070.npy +tests/data/ljspeech/wavs/LJ004-0117.wav|tests/data/ljspeech/wavs/LJ004-0117.npy +tests/data/ljspeech/wavs/LJ008-0237.wav|tests/data/ljspeech/wavs/LJ008-0237.npy +tests/data/ljspeech/wavs/LJ019-0336.wav|tests/data/ljspeech/wavs/LJ019-0336.npy +tests/data/ljspeech/wavs/LJ019-0334.wav|tests/data/ljspeech/wavs/LJ019-0334.npy +tests/data/ljspeech/wavs/LJ016-0177.wav|tests/data/ljspeech/wavs/LJ016-0177.npy +tests/data/ljspeech/wavs/LJ050-0251.wav|tests/data/ljspeech/wavs/LJ050-0251.npy +tests/data/ljspeech/wavs/LJ023-0052.wav|tests/data/ljspeech/wavs/LJ023-0052.npy +tests/data/ljspeech/wavs/LJ005-0279.wav|tests/data/ljspeech/wavs/LJ005-0279.npy +tests/data/ljspeech/wavs/LJ037-0063.wav|tests/data/ljspeech/wavs/LJ037-0063.npy +tests/data/ljspeech/wavs/LJ024-0028.wav|tests/data/ljspeech/wavs/LJ024-0028.npy +tests/data/ljspeech/wavs/LJ011-0231.wav|tests/data/ljspeech/wavs/LJ011-0231.npy +tests/data/ljspeech/wavs/LJ037-0129.wav|tests/data/ljspeech/wavs/LJ037-0129.npy +tests/data/ljspeech/wavs/LJ002-0309.wav|tests/data/ljspeech/wavs/LJ002-0309.npy +tests/data/ljspeech/wavs/LJ016-0176.wav|tests/data/ljspeech/wavs/LJ016-0176.npy +tests/data/ljspeech/wavs/LJ002-0096.wav|tests/data/ljspeech/wavs/LJ002-0096.npy +tests/data/ljspeech/wavs/LJ002-0252.wav|tests/data/ljspeech/wavs/LJ002-0252.npy +tests/data/ljspeech/wavs/LJ040-0158.wav|tests/data/ljspeech/wavs/LJ040-0158.npy +tests/data/ljspeech/wavs/LJ001-0043.wav|tests/data/ljspeech/wavs/LJ001-0043.npy +tests/data/ljspeech/wavs/LJ030-0197.wav|tests/data/ljspeech/wavs/LJ030-0197.npy +tests/data/ljspeech/wavs/LJ014-0130.wav|tests/data/ljspeech/wavs/LJ014-0130.npy +tests/data/ljspeech/wavs/LJ014-0272.wav|tests/data/ljspeech/wavs/LJ014-0272.npy +tests/data/ljspeech/wavs/LJ039-0169.wav|tests/data/ljspeech/wavs/LJ039-0169.npy +tests/data/ljspeech/wavs/LJ039-0093.wav|tests/data/ljspeech/wavs/LJ039-0093.npy +tests/data/ljspeech/wavs/LJ045-0134.wav|tests/data/ljspeech/wavs/LJ045-0134.npy +tests/data/ljspeech/wavs/LJ032-0092.wav|tests/data/ljspeech/wavs/LJ032-0092.npy +tests/data/ljspeech/wavs/LJ032-0040.wav|tests/data/ljspeech/wavs/LJ032-0040.npy +tests/data/ljspeech/wavs/LJ040-0048.wav|tests/data/ljspeech/wavs/LJ040-0048.npy +tests/data/ljspeech/wavs/LJ022-0109.wav|tests/data/ljspeech/wavs/LJ022-0109.npy +tests/data/ljspeech/wavs/LJ033-0197.wav|tests/data/ljspeech/wavs/LJ033-0197.npy +tests/data/ljspeech/wavs/LJ022-0051.wav|tests/data/ljspeech/wavs/LJ022-0051.npy +tests/data/ljspeech/wavs/LJ045-0079.wav|tests/data/ljspeech/wavs/LJ045-0079.npy +tests/data/ljspeech/wavs/LJ012-0268.wav|tests/data/ljspeech/wavs/LJ012-0268.npy +tests/data/ljspeech/wavs/LJ031-0106.wav|tests/data/ljspeech/wavs/LJ031-0106.npy +tests/data/ljspeech/wavs/LJ045-0119.wav|tests/data/ljspeech/wavs/LJ045-0119.npy +tests/data/ljspeech/wavs/LJ042-0231.wav|tests/data/ljspeech/wavs/LJ042-0231.npy +tests/data/ljspeech/wavs/LJ024-0062.wav|tests/data/ljspeech/wavs/LJ024-0062.npy +tests/data/ljspeech/wavs/LJ040-0203.wav|tests/data/ljspeech/wavs/LJ040-0203.npy +tests/data/ljspeech/wavs/LJ002-0070.wav|tests/data/ljspeech/wavs/LJ002-0070.npy +tests/data/ljspeech/wavs/LJ020-0091.wav|tests/data/ljspeech/wavs/LJ020-0091.npy +tests/data/ljspeech/wavs/LJ015-0005.wav|tests/data/ljspeech/wavs/LJ015-0005.npy +tests/data/ljspeech/wavs/LJ027-0084.wav|tests/data/ljspeech/wavs/LJ027-0084.npy +tests/data/ljspeech/wavs/LJ018-0206.wav|tests/data/ljspeech/wavs/LJ018-0206.npy +tests/data/ljspeech/wavs/LJ023-0094.wav|tests/data/ljspeech/wavs/LJ023-0094.npy +tests/data/ljspeech/wavs/LJ011-0162.wav|tests/data/ljspeech/wavs/LJ011-0162.npy +tests/data/ljspeech/wavs/LJ024-0006.wav|tests/data/ljspeech/wavs/LJ024-0006.npy +tests/data/ljspeech/wavs/LJ028-0043.wav|tests/data/ljspeech/wavs/LJ028-0043.npy +tests/data/ljspeech/wavs/LJ038-0205.wav|tests/data/ljspeech/wavs/LJ038-0205.npy +tests/data/ljspeech/wavs/LJ028-0080.wav|tests/data/ljspeech/wavs/LJ028-0080.npy +tests/data/ljspeech/wavs/LJ015-0222.wav|tests/data/ljspeech/wavs/LJ015-0222.npy +tests/data/ljspeech/wavs/LJ039-0166.wav|tests/data/ljspeech/wavs/LJ039-0166.npy +tests/data/ljspeech/wavs/LJ004-0239.wav|tests/data/ljspeech/wavs/LJ004-0239.npy +tests/data/ljspeech/wavs/LJ001-0123.wav|tests/data/ljspeech/wavs/LJ001-0123.npy +tests/data/ljspeech/wavs/LJ028-0065.wav|tests/data/ljspeech/wavs/LJ028-0065.npy +tests/data/ljspeech/wavs/LJ026-0045.wav|tests/data/ljspeech/wavs/LJ026-0045.npy +tests/data/ljspeech/wavs/LJ036-0005.wav|tests/data/ljspeech/wavs/LJ036-0005.npy +tests/data/ljspeech/wavs/LJ043-0080.wav|tests/data/ljspeech/wavs/LJ043-0080.npy +tests/data/ljspeech/wavs/LJ048-0247.wav|tests/data/ljspeech/wavs/LJ048-0247.npy +tests/data/ljspeech/wavs/LJ022-0203.wav|tests/data/ljspeech/wavs/LJ022-0203.npy +tests/data/ljspeech/wavs/LJ042-0016.wav|tests/data/ljspeech/wavs/LJ042-0016.npy +tests/data/ljspeech/wavs/LJ028-0443.wav|tests/data/ljspeech/wavs/LJ028-0443.npy +tests/data/ljspeech/wavs/LJ040-0227.wav|tests/data/ljspeech/wavs/LJ040-0227.npy +tests/data/ljspeech/wavs/LJ002-0118.wav|tests/data/ljspeech/wavs/LJ002-0118.npy +tests/data/ljspeech/wavs/LJ004-0042.wav|tests/data/ljspeech/wavs/LJ004-0042.npy +tests/data/ljspeech/wavs/LJ010-0230.wav|tests/data/ljspeech/wavs/LJ010-0230.npy +tests/data/ljspeech/wavs/LJ018-0285.wav|tests/data/ljspeech/wavs/LJ018-0285.npy +tests/data/ljspeech/wavs/LJ013-0243.wav|tests/data/ljspeech/wavs/LJ013-0243.npy +tests/data/ljspeech/wavs/LJ014-0030.wav|tests/data/ljspeech/wavs/LJ014-0030.npy +tests/data/ljspeech/wavs/LJ008-0251.wav|tests/data/ljspeech/wavs/LJ008-0251.npy +tests/data/ljspeech/wavs/LJ016-0315.wav|tests/data/ljspeech/wavs/LJ016-0315.npy +tests/data/ljspeech/wavs/LJ019-0119.wav|tests/data/ljspeech/wavs/LJ019-0119.npy +tests/data/ljspeech/wavs/LJ016-0333.wav|tests/data/ljspeech/wavs/LJ016-0333.npy +tests/data/ljspeech/wavs/LJ014-0072.wav|tests/data/ljspeech/wavs/LJ014-0072.npy +tests/data/ljspeech/wavs/LJ016-0321.wav|tests/data/ljspeech/wavs/LJ016-0321.npy +tests/data/ljspeech/wavs/LJ042-0234.wav|tests/data/ljspeech/wavs/LJ042-0234.npy +tests/data/ljspeech/wavs/LJ043-0074.wav|tests/data/ljspeech/wavs/LJ043-0074.npy +tests/data/ljspeech/wavs/LJ001-0094.wav|tests/data/ljspeech/wavs/LJ001-0094.npy +tests/data/ljspeech/wavs/LJ019-0105.wav|tests/data/ljspeech/wavs/LJ019-0105.npy +tests/data/ljspeech/wavs/LJ036-0081.wav|tests/data/ljspeech/wavs/LJ036-0081.npy +tests/data/ljspeech/wavs/LJ016-0279.wav|tests/data/ljspeech/wavs/LJ016-0279.npy +tests/data/ljspeech/wavs/LJ006-0178.wav|tests/data/ljspeech/wavs/LJ006-0178.npy +tests/data/ljspeech/wavs/LJ019-0073.wav|tests/data/ljspeech/wavs/LJ019-0073.npy +tests/data/ljspeech/wavs/LJ038-0026.wav|tests/data/ljspeech/wavs/LJ038-0026.npy +tests/data/ljspeech/wavs/LJ003-0140.wav|tests/data/ljspeech/wavs/LJ003-0140.npy +tests/data/ljspeech/wavs/LJ046-0012.wav|tests/data/ljspeech/wavs/LJ046-0012.npy +tests/data/ljspeech/wavs/LJ016-0275.wav|tests/data/ljspeech/wavs/LJ016-0275.npy +tests/data/ljspeech/wavs/LJ046-0192.wav|tests/data/ljspeech/wavs/LJ046-0192.npy +tests/data/ljspeech/wavs/LJ030-0147.wav|tests/data/ljspeech/wavs/LJ030-0147.npy +tests/data/ljspeech/wavs/LJ004-0024.wav|tests/data/ljspeech/wavs/LJ004-0024.npy +tests/data/ljspeech/wavs/LJ016-0169.wav|tests/data/ljspeech/wavs/LJ016-0169.npy +tests/data/ljspeech/wavs/LJ030-0058.wav|tests/data/ljspeech/wavs/LJ030-0058.npy +tests/data/ljspeech/wavs/LJ027-0120.wav|tests/data/ljspeech/wavs/LJ027-0120.npy +tests/data/ljspeech/wavs/LJ012-0153.wav|tests/data/ljspeech/wavs/LJ012-0153.npy +tests/data/ljspeech/wavs/LJ043-0040.wav|tests/data/ljspeech/wavs/LJ043-0040.npy +tests/data/ljspeech/wavs/LJ018-0142.wav|tests/data/ljspeech/wavs/LJ018-0142.npy +tests/data/ljspeech/wavs/LJ030-0185.wav|tests/data/ljspeech/wavs/LJ030-0185.npy +tests/data/ljspeech/wavs/LJ030-0041.wav|tests/data/ljspeech/wavs/LJ030-0041.npy +tests/data/ljspeech/wavs/LJ034-0217.wav|tests/data/ljspeech/wavs/LJ034-0217.npy +tests/data/ljspeech/wavs/LJ018-0220.wav|tests/data/ljspeech/wavs/LJ018-0220.npy +tests/data/ljspeech/wavs/LJ040-0224.wav|tests/data/ljspeech/wavs/LJ040-0224.npy +tests/data/ljspeech/wavs/LJ018-0287.wav|tests/data/ljspeech/wavs/LJ018-0287.npy +tests/data/ljspeech/wavs/LJ015-0056.wav|tests/data/ljspeech/wavs/LJ015-0056.npy +tests/data/ljspeech/wavs/LJ018-0393.wav|tests/data/ljspeech/wavs/LJ018-0393.npy +tests/data/ljspeech/wavs/LJ010-0115.wav|tests/data/ljspeech/wavs/LJ010-0115.npy +tests/data/ljspeech/wavs/LJ015-0108.wav|tests/data/ljspeech/wavs/LJ015-0108.npy +tests/data/ljspeech/wavs/LJ002-0182.wav|tests/data/ljspeech/wavs/LJ002-0182.npy +tests/data/ljspeech/wavs/LJ019-0079.wav|tests/data/ljspeech/wavs/LJ019-0079.npy +tests/data/ljspeech/wavs/LJ015-0165.wav|tests/data/ljspeech/wavs/LJ015-0165.npy +tests/data/ljspeech/wavs/LJ037-0118.wav|tests/data/ljspeech/wavs/LJ037-0118.npy +tests/data/ljspeech/wavs/LJ028-0313.wav|tests/data/ljspeech/wavs/LJ028-0313.npy +tests/data/ljspeech/wavs/LJ018-0049.wav|tests/data/ljspeech/wavs/LJ018-0049.npy +tests/data/ljspeech/wavs/LJ012-0186.wav|tests/data/ljspeech/wavs/LJ012-0186.npy +tests/data/ljspeech/wavs/LJ009-0148.wav|tests/data/ljspeech/wavs/LJ009-0148.npy +tests/data/ljspeech/wavs/LJ003-0120.wav|tests/data/ljspeech/wavs/LJ003-0120.npy +tests/data/ljspeech/wavs/LJ009-0156.wav|tests/data/ljspeech/wavs/LJ009-0156.npy +tests/data/ljspeech/wavs/LJ040-0115.wav|tests/data/ljspeech/wavs/LJ040-0115.npy +tests/data/ljspeech/wavs/LJ010-0065.wav|tests/data/ljspeech/wavs/LJ010-0065.npy +tests/data/ljspeech/wavs/LJ050-0216.wav|tests/data/ljspeech/wavs/LJ050-0216.npy +tests/data/ljspeech/wavs/LJ032-0118.wav|tests/data/ljspeech/wavs/LJ032-0118.npy +tests/data/ljspeech/wavs/LJ036-0169.wav|tests/data/ljspeech/wavs/LJ036-0169.npy +tests/data/ljspeech/wavs/LJ003-0071.wav|tests/data/ljspeech/wavs/LJ003-0071.npy +tests/data/ljspeech/wavs/LJ040-0029.wav|tests/data/ljspeech/wavs/LJ040-0029.npy +tests/data/ljspeech/wavs/LJ048-0045.wav|tests/data/ljspeech/wavs/LJ048-0045.npy +tests/data/ljspeech/wavs/LJ025-0120.wav|tests/data/ljspeech/wavs/LJ025-0120.npy +tests/data/ljspeech/wavs/LJ032-0223.wav|tests/data/ljspeech/wavs/LJ032-0223.npy +tests/data/ljspeech/wavs/LJ012-0208.wav|tests/data/ljspeech/wavs/LJ012-0208.npy +tests/data/ljspeech/wavs/LJ001-0054.wav|tests/data/ljspeech/wavs/LJ001-0054.npy +tests/data/ljspeech/wavs/LJ038-0226.wav|tests/data/ljspeech/wavs/LJ038-0226.npy +tests/data/ljspeech/wavs/LJ008-0086.wav|tests/data/ljspeech/wavs/LJ008-0086.npy +tests/data/ljspeech/wavs/LJ002-0111.wav|tests/data/ljspeech/wavs/LJ002-0111.npy +tests/data/ljspeech/wavs/LJ025-0063.wav|tests/data/ljspeech/wavs/LJ025-0063.npy +tests/data/ljspeech/wavs/LJ021-0011.wav|tests/data/ljspeech/wavs/LJ021-0011.npy +tests/data/ljspeech/wavs/LJ006-0210.wav|tests/data/ljspeech/wavs/LJ006-0210.npy +tests/data/ljspeech/wavs/LJ035-0056.wav|tests/data/ljspeech/wavs/LJ035-0056.npy +tests/data/ljspeech/wavs/LJ042-0053.wav|tests/data/ljspeech/wavs/LJ042-0053.npy +tests/data/ljspeech/wavs/LJ013-0141.wav|tests/data/ljspeech/wavs/LJ013-0141.npy +tests/data/ljspeech/wavs/LJ011-0257.wav|tests/data/ljspeech/wavs/LJ011-0257.npy +tests/data/ljspeech/wavs/LJ005-0244.wav|tests/data/ljspeech/wavs/LJ005-0244.npy +tests/data/ljspeech/wavs/LJ030-0052.wav|tests/data/ljspeech/wavs/LJ030-0052.npy +tests/data/ljspeech/wavs/LJ038-0061.wav|tests/data/ljspeech/wavs/LJ038-0061.npy +tests/data/ljspeech/wavs/LJ050-0089.wav|tests/data/ljspeech/wavs/LJ050-0089.npy +tests/data/ljspeech/wavs/LJ009-0132.wav|tests/data/ljspeech/wavs/LJ009-0132.npy +tests/data/ljspeech/wavs/LJ019-0130.wav|tests/data/ljspeech/wavs/LJ019-0130.npy +tests/data/ljspeech/wavs/LJ008-0310.wav|tests/data/ljspeech/wavs/LJ008-0310.npy +tests/data/ljspeech/wavs/LJ022-0201.wav|tests/data/ljspeech/wavs/LJ022-0201.npy +tests/data/ljspeech/wavs/LJ021-0042.wav|tests/data/ljspeech/wavs/LJ021-0042.npy +tests/data/ljspeech/wavs/LJ011-0167.wav|tests/data/ljspeech/wavs/LJ011-0167.npy +tests/data/ljspeech/wavs/LJ033-0117.wav|tests/data/ljspeech/wavs/LJ033-0117.npy +tests/data/ljspeech/wavs/LJ028-0410.wav|tests/data/ljspeech/wavs/LJ028-0410.npy +tests/data/ljspeech/wavs/LJ005-0135.wav|tests/data/ljspeech/wavs/LJ005-0135.npy +tests/data/ljspeech/wavs/LJ044-0156.wav|tests/data/ljspeech/wavs/LJ044-0156.npy +tests/data/ljspeech/wavs/LJ019-0076.wav|tests/data/ljspeech/wavs/LJ019-0076.npy +tests/data/ljspeech/wavs/LJ025-0028.wav|tests/data/ljspeech/wavs/LJ025-0028.npy +tests/data/ljspeech/wavs/LJ048-0200.wav|tests/data/ljspeech/wavs/LJ048-0200.npy +tests/data/ljspeech/wavs/LJ041-0039.wav|tests/data/ljspeech/wavs/LJ041-0039.npy +tests/data/ljspeech/wavs/LJ017-0090.wav|tests/data/ljspeech/wavs/LJ017-0090.npy +tests/data/ljspeech/wavs/LJ027-0146.wav|tests/data/ljspeech/wavs/LJ027-0146.npy +tests/data/ljspeech/wavs/LJ023-0040.wav|tests/data/ljspeech/wavs/LJ023-0040.npy +tests/data/ljspeech/wavs/LJ009-0015.wav|tests/data/ljspeech/wavs/LJ009-0015.npy +tests/data/ljspeech/wavs/LJ047-0113.wav|tests/data/ljspeech/wavs/LJ047-0113.npy +tests/data/ljspeech/wavs/LJ049-0031.wav|tests/data/ljspeech/wavs/LJ049-0031.npy +tests/data/ljspeech/wavs/LJ043-0150.wav|tests/data/ljspeech/wavs/LJ043-0150.npy +tests/data/ljspeech/wavs/LJ016-0059.wav|tests/data/ljspeech/wavs/LJ016-0059.npy +tests/data/ljspeech/wavs/LJ030-0228.wav|tests/data/ljspeech/wavs/LJ030-0228.npy +tests/data/ljspeech/wavs/LJ019-0265.wav|tests/data/ljspeech/wavs/LJ019-0265.npy +tests/data/ljspeech/wavs/LJ028-0206.wav|tests/data/ljspeech/wavs/LJ028-0206.npy +tests/data/ljspeech/wavs/LJ021-0117.wav|tests/data/ljspeech/wavs/LJ021-0117.npy +tests/data/ljspeech/wavs/LJ008-0215.wav|tests/data/ljspeech/wavs/LJ008-0215.npy +tests/data/ljspeech/wavs/LJ010-0234.wav|tests/data/ljspeech/wavs/LJ010-0234.npy +tests/data/ljspeech/wavs/LJ023-0051.wav|tests/data/ljspeech/wavs/LJ023-0051.npy +tests/data/ljspeech/wavs/LJ012-0249.wav|tests/data/ljspeech/wavs/LJ012-0249.npy +tests/data/ljspeech/wavs/LJ050-0274.wav|tests/data/ljspeech/wavs/LJ050-0274.npy +tests/data/ljspeech/wavs/LJ034-0195.wav|tests/data/ljspeech/wavs/LJ034-0195.npy +tests/data/ljspeech/wavs/LJ005-0282.wav|tests/data/ljspeech/wavs/LJ005-0282.npy +tests/data/ljspeech/wavs/LJ001-0020.wav|tests/data/ljspeech/wavs/LJ001-0020.npy +tests/data/ljspeech/wavs/LJ028-0398.wav|tests/data/ljspeech/wavs/LJ028-0398.npy +tests/data/ljspeech/wavs/LJ030-0047.wav|tests/data/ljspeech/wavs/LJ030-0047.npy +tests/data/ljspeech/wavs/LJ013-0215.wav|tests/data/ljspeech/wavs/LJ013-0215.npy +tests/data/ljspeech/wavs/LJ028-0428.wav|tests/data/ljspeech/wavs/LJ028-0428.npy +tests/data/ljspeech/wavs/LJ012-0240.wav|tests/data/ljspeech/wavs/LJ012-0240.npy +tests/data/ljspeech/wavs/LJ005-0269.wav|tests/data/ljspeech/wavs/LJ005-0269.npy +tests/data/ljspeech/wavs/LJ032-0142.wav|tests/data/ljspeech/wavs/LJ032-0142.npy +tests/data/ljspeech/wavs/LJ018-0117.wav|tests/data/ljspeech/wavs/LJ018-0117.npy +tests/data/ljspeech/wavs/LJ040-0077.wav|tests/data/ljspeech/wavs/LJ040-0077.npy +tests/data/ljspeech/wavs/LJ046-0011.wav|tests/data/ljspeech/wavs/LJ046-0011.npy +tests/data/ljspeech/wavs/LJ037-0153.wav|tests/data/ljspeech/wavs/LJ037-0153.npy +tests/data/ljspeech/wavs/LJ040-0074.wav|tests/data/ljspeech/wavs/LJ040-0074.npy +tests/data/ljspeech/wavs/LJ019-0109.wav|tests/data/ljspeech/wavs/LJ019-0109.npy +tests/data/ljspeech/wavs/LJ003-0153.wav|tests/data/ljspeech/wavs/LJ003-0153.npy +tests/data/ljspeech/wavs/LJ021-0149.wav|tests/data/ljspeech/wavs/LJ021-0149.npy +tests/data/ljspeech/wavs/LJ016-0441.wav|tests/data/ljspeech/wavs/LJ016-0441.npy +tests/data/ljspeech/wavs/LJ034-0021.wav|tests/data/ljspeech/wavs/LJ034-0021.npy +tests/data/ljspeech/wavs/LJ005-0103.wav|tests/data/ljspeech/wavs/LJ005-0103.npy +tests/data/ljspeech/wavs/LJ002-0049.wav|tests/data/ljspeech/wavs/LJ002-0049.npy +tests/data/ljspeech/wavs/LJ028-0166.wav|tests/data/ljspeech/wavs/LJ028-0166.npy +tests/data/ljspeech/wavs/LJ015-0046.wav|tests/data/ljspeech/wavs/LJ015-0046.npy +tests/data/ljspeech/wavs/LJ003-0273.wav|tests/data/ljspeech/wavs/LJ003-0273.npy +tests/data/ljspeech/wavs/LJ032-0143.wav|tests/data/ljspeech/wavs/LJ032-0143.npy +tests/data/ljspeech/wavs/LJ016-0069.wav|tests/data/ljspeech/wavs/LJ016-0069.npy +tests/data/ljspeech/wavs/LJ050-0088.wav|tests/data/ljspeech/wavs/LJ050-0088.npy +tests/data/ljspeech/wavs/LJ026-0076.wav|tests/data/ljspeech/wavs/LJ026-0076.npy +tests/data/ljspeech/wavs/LJ010-0240.wav|tests/data/ljspeech/wavs/LJ010-0240.npy +tests/data/ljspeech/wavs/LJ022-0029.wav|tests/data/ljspeech/wavs/LJ022-0029.npy +tests/data/ljspeech/wavs/LJ002-0261.wav|tests/data/ljspeech/wavs/LJ002-0261.npy +tests/data/ljspeech/wavs/LJ043-0146.wav|tests/data/ljspeech/wavs/LJ043-0146.npy +tests/data/ljspeech/wavs/LJ032-0188.wav|tests/data/ljspeech/wavs/LJ032-0188.npy +tests/data/ljspeech/wavs/LJ017-0220.wav|tests/data/ljspeech/wavs/LJ017-0220.npy +tests/data/ljspeech/wavs/LJ028-0229.wav|tests/data/ljspeech/wavs/LJ028-0229.npy +tests/data/ljspeech/wavs/LJ007-0069.wav|tests/data/ljspeech/wavs/LJ007-0069.npy +tests/data/ljspeech/wavs/LJ017-0173.wav|tests/data/ljspeech/wavs/LJ017-0173.npy +tests/data/ljspeech/wavs/LJ049-0117.wav|tests/data/ljspeech/wavs/LJ049-0117.npy +tests/data/ljspeech/wavs/LJ046-0113.wav|tests/data/ljspeech/wavs/LJ046-0113.npy +tests/data/ljspeech/wavs/LJ041-0136.wav|tests/data/ljspeech/wavs/LJ041-0136.npy +tests/data/ljspeech/wavs/LJ038-0221.wav|tests/data/ljspeech/wavs/LJ038-0221.npy +tests/data/ljspeech/wavs/LJ044-0190.wav|tests/data/ljspeech/wavs/LJ044-0190.npy +tests/data/ljspeech/wavs/LJ050-0035.wav|tests/data/ljspeech/wavs/LJ050-0035.npy +tests/data/ljspeech/wavs/LJ028-0032.wav|tests/data/ljspeech/wavs/LJ028-0032.npy +tests/data/ljspeech/wavs/LJ028-0294.wav|tests/data/ljspeech/wavs/LJ028-0294.npy +tests/data/ljspeech/wavs/LJ042-0065.wav|tests/data/ljspeech/wavs/LJ042-0065.npy +tests/data/ljspeech/wavs/LJ008-0006.wav|tests/data/ljspeech/wavs/LJ008-0006.npy +tests/data/ljspeech/wavs/LJ040-0025.wav|tests/data/ljspeech/wavs/LJ040-0025.npy +tests/data/ljspeech/wavs/LJ026-0127.wav|tests/data/ljspeech/wavs/LJ026-0127.npy +tests/data/ljspeech/wavs/LJ005-0110.wav|tests/data/ljspeech/wavs/LJ005-0110.npy +tests/data/ljspeech/wavs/LJ022-0084.wav|tests/data/ljspeech/wavs/LJ022-0084.npy +tests/data/ljspeech/wavs/LJ020-0090.wav|tests/data/ljspeech/wavs/LJ020-0090.npy +tests/data/ljspeech/wavs/LJ012-0049.wav|tests/data/ljspeech/wavs/LJ012-0049.npy +tests/data/ljspeech/wavs/LJ011-0103.wav|tests/data/ljspeech/wavs/LJ011-0103.npy +tests/data/ljspeech/wavs/LJ004-0248.wav|tests/data/ljspeech/wavs/LJ004-0248.npy +tests/data/ljspeech/wavs/LJ016-0200.wav|tests/data/ljspeech/wavs/LJ016-0200.npy +tests/data/ljspeech/wavs/LJ021-0192.wav|tests/data/ljspeech/wavs/LJ021-0192.npy +tests/data/ljspeech/wavs/LJ018-0280.wav|tests/data/ljspeech/wavs/LJ018-0280.npy +tests/data/ljspeech/wavs/LJ024-0071.wav|tests/data/ljspeech/wavs/LJ024-0071.npy +tests/data/ljspeech/wavs/LJ027-0058.wav|tests/data/ljspeech/wavs/LJ027-0058.npy +tests/data/ljspeech/wavs/LJ016-0273.wav|tests/data/ljspeech/wavs/LJ016-0273.npy +tests/data/ljspeech/wavs/LJ010-0179.wav|tests/data/ljspeech/wavs/LJ010-0179.npy +tests/data/ljspeech/wavs/LJ008-0262.wav|tests/data/ljspeech/wavs/LJ008-0262.npy +tests/data/ljspeech/wavs/LJ003-0188.wav|tests/data/ljspeech/wavs/LJ003-0188.npy +tests/data/ljspeech/wavs/LJ028-0250.wav|tests/data/ljspeech/wavs/LJ028-0250.npy +tests/data/ljspeech/wavs/LJ028-0506.wav|tests/data/ljspeech/wavs/LJ028-0506.npy +tests/data/ljspeech/wavs/LJ022-0008.wav|tests/data/ljspeech/wavs/LJ022-0008.npy +tests/data/ljspeech/wavs/LJ018-0245.wav|tests/data/ljspeech/wavs/LJ018-0245.npy +tests/data/ljspeech/wavs/LJ020-0050.wav|tests/data/ljspeech/wavs/LJ020-0050.npy +tests/data/ljspeech/wavs/LJ008-0077.wav|tests/data/ljspeech/wavs/LJ008-0077.npy +tests/data/ljspeech/wavs/LJ024-0072.wav|tests/data/ljspeech/wavs/LJ024-0072.npy +tests/data/ljspeech/wavs/LJ037-0259.wav|tests/data/ljspeech/wavs/LJ037-0259.npy +tests/data/ljspeech/wavs/LJ038-0220.wav|tests/data/ljspeech/wavs/LJ038-0220.npy +tests/data/ljspeech/wavs/LJ046-0129.wav|tests/data/ljspeech/wavs/LJ046-0129.npy +tests/data/ljspeech/wavs/LJ048-0032.wav|tests/data/ljspeech/wavs/LJ048-0032.npy +tests/data/ljspeech/wavs/LJ044-0111.wav|tests/data/ljspeech/wavs/LJ044-0111.npy +tests/data/ljspeech/wavs/LJ002-0164.wav|tests/data/ljspeech/wavs/LJ002-0164.npy +tests/data/ljspeech/wavs/LJ036-0059.wav|tests/data/ljspeech/wavs/LJ036-0059.npy +tests/data/ljspeech/wavs/LJ028-0385.wav|tests/data/ljspeech/wavs/LJ028-0385.npy +tests/data/ljspeech/wavs/LJ024-0102.wav|tests/data/ljspeech/wavs/LJ024-0102.npy +tests/data/ljspeech/wavs/LJ026-0065.wav|tests/data/ljspeech/wavs/LJ026-0065.npy +tests/data/ljspeech/wavs/LJ018-0156.wav|tests/data/ljspeech/wavs/LJ018-0156.npy +tests/data/ljspeech/wavs/LJ029-0127.wav|tests/data/ljspeech/wavs/LJ029-0127.npy +tests/data/ljspeech/wavs/LJ019-0113.wav|tests/data/ljspeech/wavs/LJ019-0113.npy +tests/data/ljspeech/wavs/LJ028-0038.wav|tests/data/ljspeech/wavs/LJ028-0038.npy +tests/data/ljspeech/wavs/LJ031-0173.wav|tests/data/ljspeech/wavs/LJ031-0173.npy +tests/data/ljspeech/wavs/LJ040-0159.wav|tests/data/ljspeech/wavs/LJ040-0159.npy +tests/data/ljspeech/wavs/LJ003-0252.wav|tests/data/ljspeech/wavs/LJ003-0252.npy +tests/data/ljspeech/wavs/LJ002-0166.wav|tests/data/ljspeech/wavs/LJ002-0166.npy +tests/data/ljspeech/wavs/LJ004-0172.wav|tests/data/ljspeech/wavs/LJ004-0172.npy +tests/data/ljspeech/wavs/LJ038-0207.wav|tests/data/ljspeech/wavs/LJ038-0207.npy +tests/data/ljspeech/wavs/LJ030-0184.wav|tests/data/ljspeech/wavs/LJ030-0184.npy +tests/data/ljspeech/wavs/LJ028-0339.wav|tests/data/ljspeech/wavs/LJ028-0339.npy +tests/data/ljspeech/wavs/LJ020-0039.wav|tests/data/ljspeech/wavs/LJ020-0039.npy +tests/data/ljspeech/wavs/LJ018-0145.wav|tests/data/ljspeech/wavs/LJ018-0145.npy +tests/data/ljspeech/wavs/LJ002-0204.wav|tests/data/ljspeech/wavs/LJ002-0204.npy +tests/data/ljspeech/wavs/LJ016-0298.wav|tests/data/ljspeech/wavs/LJ016-0298.npy +tests/data/ljspeech/wavs/LJ012-0062.wav|tests/data/ljspeech/wavs/LJ012-0062.npy +tests/data/ljspeech/wavs/LJ018-0202.wav|tests/data/ljspeech/wavs/LJ018-0202.npy +tests/data/ljspeech/wavs/LJ006-0123.wav|tests/data/ljspeech/wavs/LJ006-0123.npy +tests/data/ljspeech/wavs/LJ010-0138.wav|tests/data/ljspeech/wavs/LJ010-0138.npy +tests/data/ljspeech/wavs/LJ013-0090.wav|tests/data/ljspeech/wavs/LJ013-0090.npy +tests/data/ljspeech/wavs/LJ017-0272.wav|tests/data/ljspeech/wavs/LJ017-0272.npy +tests/data/ljspeech/wavs/LJ049-0024.wav|tests/data/ljspeech/wavs/LJ049-0024.npy +tests/data/ljspeech/wavs/LJ032-0036.wav|tests/data/ljspeech/wavs/LJ032-0036.npy +tests/data/ljspeech/wavs/LJ014-0280.wav|tests/data/ljspeech/wavs/LJ014-0280.npy +tests/data/ljspeech/wavs/LJ046-0138.wav|tests/data/ljspeech/wavs/LJ046-0138.npy +tests/data/ljspeech/wavs/LJ015-0143.wav|tests/data/ljspeech/wavs/LJ015-0143.npy +tests/data/ljspeech/wavs/LJ013-0003.wav|tests/data/ljspeech/wavs/LJ013-0003.npy +tests/data/ljspeech/wavs/LJ022-0180.wav|tests/data/ljspeech/wavs/LJ022-0180.npy +tests/data/ljspeech/wavs/LJ048-0046.wav|tests/data/ljspeech/wavs/LJ048-0046.npy +tests/data/ljspeech/wavs/LJ049-0212.wav|tests/data/ljspeech/wavs/LJ049-0212.npy +tests/data/ljspeech/wavs/LJ010-0021.wav|tests/data/ljspeech/wavs/LJ010-0021.npy +tests/data/ljspeech/wavs/LJ037-0090.wav|tests/data/ljspeech/wavs/LJ037-0090.npy +tests/data/ljspeech/wavs/LJ005-0024.wav|tests/data/ljspeech/wavs/LJ005-0024.npy +tests/data/ljspeech/wavs/LJ015-0152.wav|tests/data/ljspeech/wavs/LJ015-0152.npy +tests/data/ljspeech/wavs/LJ009-0092.wav|tests/data/ljspeech/wavs/LJ009-0092.npy +tests/data/ljspeech/wavs/LJ038-0028.wav|tests/data/ljspeech/wavs/LJ038-0028.npy +tests/data/ljspeech/wavs/LJ002-0302.wav|tests/data/ljspeech/wavs/LJ002-0302.npy +tests/data/ljspeech/wavs/LJ003-0108.wav|tests/data/ljspeech/wavs/LJ003-0108.npy +tests/data/ljspeech/wavs/LJ040-0106.wav|tests/data/ljspeech/wavs/LJ040-0106.npy +tests/data/ljspeech/wavs/LJ008-0315.wav|tests/data/ljspeech/wavs/LJ008-0315.npy +tests/data/ljspeech/wavs/LJ018-0237.wav|tests/data/ljspeech/wavs/LJ018-0237.npy +tests/data/ljspeech/wavs/LJ008-0244.wav|tests/data/ljspeech/wavs/LJ008-0244.npy +tests/data/ljspeech/wavs/LJ002-0039.wav|tests/data/ljspeech/wavs/LJ002-0039.npy +tests/data/ljspeech/wavs/LJ009-0271.wav|tests/data/ljspeech/wavs/LJ009-0271.npy +tests/data/ljspeech/wavs/LJ016-0006.wav|tests/data/ljspeech/wavs/LJ016-0006.npy +tests/data/ljspeech/wavs/LJ018-0064.wav|tests/data/ljspeech/wavs/LJ018-0064.npy +tests/data/ljspeech/wavs/LJ040-0229.wav|tests/data/ljspeech/wavs/LJ040-0229.npy +tests/data/ljspeech/wavs/LJ013-0250.wav|tests/data/ljspeech/wavs/LJ013-0250.npy +tests/data/ljspeech/wavs/LJ011-0073.wav|tests/data/ljspeech/wavs/LJ011-0073.npy +tests/data/ljspeech/wavs/LJ010-0037.wav|tests/data/ljspeech/wavs/LJ010-0037.npy +tests/data/ljspeech/wavs/LJ012-0112.wav|tests/data/ljspeech/wavs/LJ012-0112.npy +tests/data/ljspeech/wavs/LJ050-0170.wav|tests/data/ljspeech/wavs/LJ050-0170.npy +tests/data/ljspeech/wavs/LJ016-0438.wav|tests/data/ljspeech/wavs/LJ016-0438.npy +tests/data/ljspeech/wavs/LJ006-0229.wav|tests/data/ljspeech/wavs/LJ006-0229.npy +tests/data/ljspeech/wavs/LJ002-0010.wav|tests/data/ljspeech/wavs/LJ002-0010.npy +tests/data/ljspeech/wavs/LJ045-0216.wav|tests/data/ljspeech/wavs/LJ045-0216.npy +tests/data/ljspeech/wavs/LJ032-0074.wav|tests/data/ljspeech/wavs/LJ032-0074.npy +tests/data/ljspeech/wavs/LJ047-0177.wav|tests/data/ljspeech/wavs/LJ047-0177.npy +tests/data/ljspeech/wavs/LJ037-0054.wav|tests/data/ljspeech/wavs/LJ037-0054.npy +tests/data/ljspeech/wavs/LJ014-0226.wav|tests/data/ljspeech/wavs/LJ014-0226.npy +tests/data/ljspeech/wavs/LJ024-0004.wav|tests/data/ljspeech/wavs/LJ024-0004.npy +tests/data/ljspeech/wavs/LJ011-0116.wav|tests/data/ljspeech/wavs/LJ011-0116.npy +tests/data/ljspeech/wavs/LJ009-0108.wav|tests/data/ljspeech/wavs/LJ009-0108.npy +tests/data/ljspeech/wavs/LJ039-0018.wav|tests/data/ljspeech/wavs/LJ039-0018.npy +tests/data/ljspeech/wavs/LJ002-0080.wav|tests/data/ljspeech/wavs/LJ002-0080.npy +tests/data/ljspeech/wavs/LJ042-0241.wav|tests/data/ljspeech/wavs/LJ042-0241.npy +tests/data/ljspeech/wavs/LJ020-0038.wav|tests/data/ljspeech/wavs/LJ020-0038.npy +tests/data/ljspeech/wavs/LJ038-0131.wav|tests/data/ljspeech/wavs/LJ038-0131.npy +tests/data/ljspeech/wavs/LJ012-0152.wav|tests/data/ljspeech/wavs/LJ012-0152.npy +tests/data/ljspeech/wavs/LJ033-0107.wav|tests/data/ljspeech/wavs/LJ033-0107.npy +tests/data/ljspeech/wavs/LJ019-0360.wav|tests/data/ljspeech/wavs/LJ019-0360.npy +tests/data/ljspeech/wavs/LJ046-0244.wav|tests/data/ljspeech/wavs/LJ046-0244.npy +tests/data/ljspeech/wavs/LJ047-0060.wav|tests/data/ljspeech/wavs/LJ047-0060.npy +tests/data/ljspeech/wavs/LJ033-0171.wav|tests/data/ljspeech/wavs/LJ033-0171.npy +tests/data/ljspeech/wavs/LJ009-0138.wav|tests/data/ljspeech/wavs/LJ009-0138.npy +tests/data/ljspeech/wavs/LJ006-0018.wav|tests/data/ljspeech/wavs/LJ006-0018.npy +tests/data/ljspeech/wavs/LJ004-0081.wav|tests/data/ljspeech/wavs/LJ004-0081.npy +tests/data/ljspeech/wavs/LJ028-0097.wav|tests/data/ljspeech/wavs/LJ028-0097.npy +tests/data/ljspeech/wavs/LJ048-0274.wav|tests/data/ljspeech/wavs/LJ048-0274.npy +tests/data/ljspeech/wavs/LJ030-0203.wav|tests/data/ljspeech/wavs/LJ030-0203.npy +tests/data/ljspeech/wavs/LJ048-0086.wav|tests/data/ljspeech/wavs/LJ048-0086.npy +tests/data/ljspeech/wavs/LJ028-0455.wav|tests/data/ljspeech/wavs/LJ028-0455.npy +tests/data/ljspeech/wavs/LJ011-0015.wav|tests/data/ljspeech/wavs/LJ011-0015.npy +tests/data/ljspeech/wavs/LJ003-0271.wav|tests/data/ljspeech/wavs/LJ003-0271.npy +tests/data/ljspeech/wavs/LJ037-0124.wav|tests/data/ljspeech/wavs/LJ037-0124.npy +tests/data/ljspeech/wavs/LJ013-0173.wav|tests/data/ljspeech/wavs/LJ013-0173.npy +tests/data/ljspeech/wavs/LJ039-0201.wav|tests/data/ljspeech/wavs/LJ039-0201.npy +tests/data/ljspeech/wavs/LJ044-0017.wav|tests/data/ljspeech/wavs/LJ044-0017.npy +tests/data/ljspeech/wavs/LJ039-0014.wav|tests/data/ljspeech/wavs/LJ039-0014.npy +tests/data/ljspeech/wavs/LJ016-0252.wav|tests/data/ljspeech/wavs/LJ016-0252.npy +tests/data/ljspeech/wavs/LJ029-0096.wav|tests/data/ljspeech/wavs/LJ029-0096.npy +tests/data/ljspeech/wavs/LJ013-0052.wav|tests/data/ljspeech/wavs/LJ013-0052.npy +tests/data/ljspeech/wavs/LJ039-0116.wav|tests/data/ljspeech/wavs/LJ039-0116.npy +tests/data/ljspeech/wavs/LJ044-0078.wav|tests/data/ljspeech/wavs/LJ044-0078.npy +tests/data/ljspeech/wavs/LJ016-0348.wav|tests/data/ljspeech/wavs/LJ016-0348.npy +tests/data/ljspeech/wavs/LJ033-0060.wav|tests/data/ljspeech/wavs/LJ033-0060.npy +tests/data/ljspeech/wavs/LJ030-0179.wav|tests/data/ljspeech/wavs/LJ030-0179.npy +tests/data/ljspeech/wavs/LJ050-0148.wav|tests/data/ljspeech/wavs/LJ050-0148.npy +tests/data/ljspeech/wavs/LJ008-0143.wav|tests/data/ljspeech/wavs/LJ008-0143.npy +tests/data/ljspeech/wavs/LJ027-0031.wav|tests/data/ljspeech/wavs/LJ027-0031.npy +tests/data/ljspeech/wavs/LJ028-0261.wav|tests/data/ljspeech/wavs/LJ028-0261.npy +tests/data/ljspeech/wavs/LJ040-0012.wav|tests/data/ljspeech/wavs/LJ040-0012.npy +tests/data/ljspeech/wavs/LJ008-0068.wav|tests/data/ljspeech/wavs/LJ008-0068.npy +tests/data/ljspeech/wavs/LJ009-0264.wav|tests/data/ljspeech/wavs/LJ009-0264.npy +tests/data/ljspeech/wavs/LJ017-0224.wav|tests/data/ljspeech/wavs/LJ017-0224.npy +tests/data/ljspeech/wavs/LJ002-0116.wav|tests/data/ljspeech/wavs/LJ002-0116.npy +tests/data/ljspeech/wavs/LJ027-0038.wav|tests/data/ljspeech/wavs/LJ027-0038.npy +tests/data/ljspeech/wavs/LJ016-0081.wav|tests/data/ljspeech/wavs/LJ016-0081.npy +tests/data/ljspeech/wavs/LJ022-0031.wav|tests/data/ljspeech/wavs/LJ022-0031.npy +tests/data/ljspeech/wavs/LJ017-0195.wav|tests/data/ljspeech/wavs/LJ017-0195.npy +tests/data/ljspeech/wavs/LJ002-0237.wav|tests/data/ljspeech/wavs/LJ002-0237.npy +tests/data/ljspeech/wavs/LJ016-0082.wav|tests/data/ljspeech/wavs/LJ016-0082.npy +tests/data/ljspeech/wavs/LJ013-0093.wav|tests/data/ljspeech/wavs/LJ013-0093.npy +tests/data/ljspeech/wavs/LJ002-0245.wav|tests/data/ljspeech/wavs/LJ002-0245.npy +tests/data/ljspeech/wavs/LJ028-0496.wav|tests/data/ljspeech/wavs/LJ028-0496.npy +tests/data/ljspeech/wavs/LJ004-0125.wav|tests/data/ljspeech/wavs/LJ004-0125.npy +tests/data/ljspeech/wavs/LJ005-0176.wav|tests/data/ljspeech/wavs/LJ005-0176.npy +tests/data/ljspeech/wavs/LJ007-0035.wav|tests/data/ljspeech/wavs/LJ007-0035.npy +tests/data/ljspeech/wavs/LJ037-0203.wav|tests/data/ljspeech/wavs/LJ037-0203.npy +tests/data/ljspeech/wavs/LJ029-0013.wav|tests/data/ljspeech/wavs/LJ029-0013.npy +tests/data/ljspeech/wavs/LJ022-0155.wav|tests/data/ljspeech/wavs/LJ022-0155.npy +tests/data/ljspeech/wavs/LJ042-0056.wav|tests/data/ljspeech/wavs/LJ042-0056.npy +tests/data/ljspeech/wavs/LJ047-0025.wav|tests/data/ljspeech/wavs/LJ047-0025.npy +tests/data/ljspeech/wavs/LJ048-0080.wav|tests/data/ljspeech/wavs/LJ048-0080.npy +tests/data/ljspeech/wavs/LJ040-0068.wav|tests/data/ljspeech/wavs/LJ040-0068.npy +tests/data/ljspeech/wavs/LJ038-0280.wav|tests/data/ljspeech/wavs/LJ038-0280.npy +tests/data/ljspeech/wavs/LJ011-0247.wav|tests/data/ljspeech/wavs/LJ011-0247.npy +tests/data/ljspeech/wavs/LJ033-0192.wav|tests/data/ljspeech/wavs/LJ033-0192.npy +tests/data/ljspeech/wavs/LJ012-0039.wav|tests/data/ljspeech/wavs/LJ012-0039.npy +tests/data/ljspeech/wavs/LJ003-0086.wav|tests/data/ljspeech/wavs/LJ003-0086.npy +tests/data/ljspeech/wavs/LJ017-0170.wav|tests/data/ljspeech/wavs/LJ017-0170.npy +tests/data/ljspeech/wavs/LJ044-0215.wav|tests/data/ljspeech/wavs/LJ044-0215.npy +tests/data/ljspeech/wavs/LJ037-0008.wav|tests/data/ljspeech/wavs/LJ037-0008.npy +tests/data/ljspeech/wavs/LJ028-0258.wav|tests/data/ljspeech/wavs/LJ028-0258.npy +tests/data/ljspeech/wavs/LJ028-0350.wav|tests/data/ljspeech/wavs/LJ028-0350.npy +tests/data/ljspeech/wavs/LJ045-0204.wav|tests/data/ljspeech/wavs/LJ045-0204.npy +tests/data/ljspeech/wavs/LJ002-0180.wav|tests/data/ljspeech/wavs/LJ002-0180.npy +tests/data/ljspeech/wavs/LJ008-0213.wav|tests/data/ljspeech/wavs/LJ008-0213.npy +tests/data/ljspeech/wavs/LJ023-0131.wav|tests/data/ljspeech/wavs/LJ023-0131.npy +tests/data/ljspeech/wavs/LJ017-0102.wav|tests/data/ljspeech/wavs/LJ017-0102.npy +tests/data/ljspeech/wavs/LJ010-0226.wav|tests/data/ljspeech/wavs/LJ010-0226.npy +tests/data/ljspeech/wavs/LJ047-0183.wav|tests/data/ljspeech/wavs/LJ047-0183.npy +tests/data/ljspeech/wavs/LJ032-0259.wav|tests/data/ljspeech/wavs/LJ032-0259.npy +tests/data/ljspeech/wavs/LJ008-0256.wav|tests/data/ljspeech/wavs/LJ008-0256.npy +tests/data/ljspeech/wavs/LJ010-0258.wav|tests/data/ljspeech/wavs/LJ010-0258.npy +tests/data/ljspeech/wavs/LJ013-0183.wav|tests/data/ljspeech/wavs/LJ013-0183.npy +tests/data/ljspeech/wavs/LJ036-0149.wav|tests/data/ljspeech/wavs/LJ036-0149.npy +tests/data/ljspeech/wavs/LJ039-0224.wav|tests/data/ljspeech/wavs/LJ039-0224.npy +tests/data/ljspeech/wavs/LJ015-0281.wav|tests/data/ljspeech/wavs/LJ015-0281.npy +tests/data/ljspeech/wavs/LJ018-0278.wav|tests/data/ljspeech/wavs/LJ018-0278.npy +tests/data/ljspeech/wavs/LJ044-0075.wav|tests/data/ljspeech/wavs/LJ044-0075.npy +tests/data/ljspeech/wavs/LJ002-0033.wav|tests/data/ljspeech/wavs/LJ002-0033.npy +tests/data/ljspeech/wavs/LJ044-0052.wav|tests/data/ljspeech/wavs/LJ044-0052.npy +tests/data/ljspeech/wavs/LJ025-0117.wav|tests/data/ljspeech/wavs/LJ025-0117.npy +tests/data/ljspeech/wavs/LJ033-0084.wav|tests/data/ljspeech/wavs/LJ033-0084.npy +tests/data/ljspeech/wavs/LJ032-0091.wav|tests/data/ljspeech/wavs/LJ032-0091.npy +tests/data/ljspeech/wavs/LJ003-0115.wav|tests/data/ljspeech/wavs/LJ003-0115.npy +tests/data/ljspeech/wavs/LJ005-0215.wav|tests/data/ljspeech/wavs/LJ005-0215.npy +tests/data/ljspeech/wavs/LJ017-0060.wav|tests/data/ljspeech/wavs/LJ017-0060.npy +tests/data/ljspeech/wavs/LJ049-0142.wav|tests/data/ljspeech/wavs/LJ049-0142.npy +tests/data/ljspeech/wavs/LJ019-0321.wav|tests/data/ljspeech/wavs/LJ019-0321.npy +tests/data/ljspeech/wavs/LJ020-0092.wav|tests/data/ljspeech/wavs/LJ020-0092.npy +tests/data/ljspeech/wavs/LJ048-0095.wav|tests/data/ljspeech/wavs/LJ048-0095.npy +tests/data/ljspeech/wavs/LJ019-0276.wav|tests/data/ljspeech/wavs/LJ019-0276.npy +tests/data/ljspeech/wavs/LJ005-0260.wav|tests/data/ljspeech/wavs/LJ005-0260.npy +tests/data/ljspeech/wavs/LJ041-0069.wav|tests/data/ljspeech/wavs/LJ041-0069.npy +tests/data/ljspeech/wavs/LJ005-0185.wav|tests/data/ljspeech/wavs/LJ005-0185.npy +tests/data/ljspeech/wavs/LJ031-0012.wav|tests/data/ljspeech/wavs/LJ031-0012.npy +tests/data/ljspeech/wavs/LJ003-0034.wav|tests/data/ljspeech/wavs/LJ003-0034.npy +tests/data/ljspeech/wavs/LJ046-0093.wav|tests/data/ljspeech/wavs/LJ046-0093.npy +tests/data/ljspeech/wavs/LJ024-0022.wav|tests/data/ljspeech/wavs/LJ024-0022.npy +tests/data/ljspeech/wavs/LJ003-0320.wav|tests/data/ljspeech/wavs/LJ003-0320.npy +tests/data/ljspeech/wavs/LJ015-0155.wav|tests/data/ljspeech/wavs/LJ015-0155.npy +tests/data/ljspeech/wavs/LJ036-0142.wav|tests/data/ljspeech/wavs/LJ036-0142.npy +tests/data/ljspeech/wavs/LJ050-0005.wav|tests/data/ljspeech/wavs/LJ050-0005.npy +tests/data/ljspeech/wavs/LJ047-0193.wav|tests/data/ljspeech/wavs/LJ047-0193.npy +tests/data/ljspeech/wavs/LJ010-0017.wav|tests/data/ljspeech/wavs/LJ010-0017.npy +tests/data/ljspeech/wavs/LJ001-0112.wav|tests/data/ljspeech/wavs/LJ001-0112.npy +tests/data/ljspeech/wavs/LJ038-0236.wav|tests/data/ljspeech/wavs/LJ038-0236.npy +tests/data/ljspeech/wavs/LJ039-0215.wav|tests/data/ljspeech/wavs/LJ039-0215.npy +tests/data/ljspeech/wavs/LJ009-0234.wav|tests/data/ljspeech/wavs/LJ009-0234.npy +tests/data/ljspeech/wavs/LJ028-0212.wav|tests/data/ljspeech/wavs/LJ028-0212.npy +tests/data/ljspeech/wavs/LJ002-0130.wav|tests/data/ljspeech/wavs/LJ002-0130.npy +tests/data/ljspeech/wavs/LJ032-0053.wav|tests/data/ljspeech/wavs/LJ032-0053.npy +tests/data/ljspeech/wavs/LJ040-0060.wav|tests/data/ljspeech/wavs/LJ040-0060.npy +tests/data/ljspeech/wavs/LJ039-0110.wav|tests/data/ljspeech/wavs/LJ039-0110.npy +tests/data/ljspeech/wavs/LJ007-0224.wav|tests/data/ljspeech/wavs/LJ007-0224.npy +tests/data/ljspeech/wavs/LJ047-0020.wav|tests/data/ljspeech/wavs/LJ047-0020.npy +tests/data/ljspeech/wavs/LJ020-0030.wav|tests/data/ljspeech/wavs/LJ020-0030.npy +tests/data/ljspeech/wavs/LJ047-0223.wav|tests/data/ljspeech/wavs/LJ047-0223.npy +tests/data/ljspeech/wavs/LJ004-0205.wav|tests/data/ljspeech/wavs/LJ004-0205.npy +tests/data/ljspeech/wavs/LJ012-0254.wav|tests/data/ljspeech/wavs/LJ012-0254.npy +tests/data/ljspeech/wavs/LJ042-0008.wav|tests/data/ljspeech/wavs/LJ042-0008.npy +tests/data/ljspeech/wavs/LJ038-0013.wav|tests/data/ljspeech/wavs/LJ038-0013.npy +tests/data/ljspeech/wavs/LJ018-0335.wav|tests/data/ljspeech/wavs/LJ018-0335.npy +tests/data/ljspeech/wavs/LJ038-0209.wav|tests/data/ljspeech/wavs/LJ038-0209.npy +tests/data/ljspeech/wavs/LJ009-0194.wav|tests/data/ljspeech/wavs/LJ009-0194.npy +tests/data/ljspeech/wavs/LJ009-0099.wav|tests/data/ljspeech/wavs/LJ009-0099.npy +tests/data/ljspeech/wavs/LJ019-0312.wav|tests/data/ljspeech/wavs/LJ019-0312.npy +tests/data/ljspeech/wavs/LJ048-0104.wav|tests/data/ljspeech/wavs/LJ048-0104.npy +tests/data/ljspeech/wavs/LJ010-0238.wav|tests/data/ljspeech/wavs/LJ010-0238.npy +tests/data/ljspeech/wavs/LJ014-0298.wav|tests/data/ljspeech/wavs/LJ014-0298.npy +tests/data/ljspeech/wavs/LJ019-0326.wav|tests/data/ljspeech/wavs/LJ019-0326.npy +tests/data/ljspeech/wavs/LJ031-0214.wav|tests/data/ljspeech/wavs/LJ031-0214.npy +tests/data/ljspeech/wavs/LJ009-0216.wav|tests/data/ljspeech/wavs/LJ009-0216.npy +tests/data/ljspeech/wavs/LJ003-0138.wav|tests/data/ljspeech/wavs/LJ003-0138.npy +tests/data/ljspeech/wavs/LJ001-0181.wav|tests/data/ljspeech/wavs/LJ001-0181.npy +tests/data/ljspeech/wavs/LJ028-0372.wav|tests/data/ljspeech/wavs/LJ028-0372.npy +tests/data/ljspeech/wavs/LJ014-0256.wav|tests/data/ljspeech/wavs/LJ014-0256.npy +tests/data/ljspeech/wavs/LJ005-0129.wav|tests/data/ljspeech/wavs/LJ005-0129.npy +tests/data/ljspeech/wavs/LJ035-0165.wav|tests/data/ljspeech/wavs/LJ035-0165.npy +tests/data/ljspeech/wavs/LJ034-0141.wav|tests/data/ljspeech/wavs/LJ034-0141.npy +tests/data/ljspeech/wavs/LJ028-0386.wav|tests/data/ljspeech/wavs/LJ028-0386.npy +tests/data/ljspeech/wavs/LJ005-0004.wav|tests/data/ljspeech/wavs/LJ005-0004.npy +tests/data/ljspeech/wavs/LJ044-0072.wav|tests/data/ljspeech/wavs/LJ044-0072.npy +tests/data/ljspeech/wavs/LJ031-0064.wav|tests/data/ljspeech/wavs/LJ031-0064.npy +tests/data/ljspeech/wavs/LJ028-0069.wav|tests/data/ljspeech/wavs/LJ028-0069.npy +tests/data/ljspeech/wavs/LJ010-0285.wav|tests/data/ljspeech/wavs/LJ010-0285.npy +tests/data/ljspeech/wavs/LJ012-0144.wav|tests/data/ljspeech/wavs/LJ012-0144.npy +tests/data/ljspeech/wavs/LJ031-0005.wav|tests/data/ljspeech/wavs/LJ031-0005.npy +tests/data/ljspeech/wavs/LJ019-0209.wav|tests/data/ljspeech/wavs/LJ019-0209.npy +tests/data/ljspeech/wavs/LJ032-0257.wav|tests/data/ljspeech/wavs/LJ032-0257.npy +tests/data/ljspeech/wavs/LJ009-0281.wav|tests/data/ljspeech/wavs/LJ009-0281.npy +tests/data/ljspeech/wavs/LJ028-0418.wav|tests/data/ljspeech/wavs/LJ028-0418.npy +tests/data/ljspeech/wavs/LJ036-0095.wav|tests/data/ljspeech/wavs/LJ036-0095.npy +tests/data/ljspeech/wavs/LJ046-0115.wav|tests/data/ljspeech/wavs/LJ046-0115.npy +tests/data/ljspeech/wavs/LJ042-0119.wav|tests/data/ljspeech/wavs/LJ042-0119.npy +tests/data/ljspeech/wavs/LJ027-0131.wav|tests/data/ljspeech/wavs/LJ027-0131.npy +tests/data/ljspeech/wavs/LJ038-0188.wav|tests/data/ljspeech/wavs/LJ038-0188.npy +tests/data/ljspeech/wavs/LJ017-0160.wav|tests/data/ljspeech/wavs/LJ017-0160.npy +tests/data/ljspeech/wavs/LJ007-0040.wav|tests/data/ljspeech/wavs/LJ007-0040.npy +tests/data/ljspeech/wavs/LJ047-0003.wav|tests/data/ljspeech/wavs/LJ047-0003.npy +tests/data/ljspeech/wavs/LJ038-0177.wav|tests/data/ljspeech/wavs/LJ038-0177.npy +tests/data/ljspeech/wavs/LJ035-0097.wav|tests/data/ljspeech/wavs/LJ035-0097.npy +tests/data/ljspeech/wavs/LJ019-0146.wav|tests/data/ljspeech/wavs/LJ019-0146.npy +tests/data/ljspeech/wavs/LJ032-0156.wav|tests/data/ljspeech/wavs/LJ032-0156.npy +tests/data/ljspeech/wavs/LJ013-0055.wav|tests/data/ljspeech/wavs/LJ013-0055.npy +tests/data/ljspeech/wavs/LJ009-0028.wav|tests/data/ljspeech/wavs/LJ009-0028.npy +tests/data/ljspeech/wavs/LJ012-0235.wav|tests/data/ljspeech/wavs/LJ012-0235.npy +tests/data/ljspeech/wavs/LJ015-0004.wav|tests/data/ljspeech/wavs/LJ015-0004.npy +tests/data/ljspeech/wavs/LJ005-0277.wav|tests/data/ljspeech/wavs/LJ005-0277.npy +tests/data/ljspeech/wavs/LJ015-0060.wav|tests/data/ljspeech/wavs/LJ015-0060.npy +tests/data/ljspeech/wavs/LJ009-0282.wav|tests/data/ljspeech/wavs/LJ009-0282.npy +tests/data/ljspeech/wavs/LJ019-0213.wav|tests/data/ljspeech/wavs/LJ019-0213.npy +tests/data/ljspeech/wavs/LJ010-0110.wav|tests/data/ljspeech/wavs/LJ010-0110.npy +tests/data/ljspeech/wavs/LJ047-0196.wav|tests/data/ljspeech/wavs/LJ047-0196.npy +tests/data/ljspeech/wavs/LJ050-0219.wav|tests/data/ljspeech/wavs/LJ050-0219.npy +tests/data/ljspeech/wavs/LJ039-0165.wav|tests/data/ljspeech/wavs/LJ039-0165.npy +tests/data/ljspeech/wavs/LJ033-0183.wav|tests/data/ljspeech/wavs/LJ033-0183.npy +tests/data/ljspeech/wavs/LJ039-0001.wav|tests/data/ljspeech/wavs/LJ039-0001.npy +tests/data/ljspeech/wavs/LJ018-0369.wav|tests/data/ljspeech/wavs/LJ018-0369.npy +tests/data/ljspeech/wavs/LJ020-0018.wav|tests/data/ljspeech/wavs/LJ020-0018.npy +tests/data/ljspeech/wavs/LJ021-0005.wav|tests/data/ljspeech/wavs/LJ021-0005.npy +tests/data/ljspeech/wavs/LJ045-0124.wav|tests/data/ljspeech/wavs/LJ045-0124.npy +tests/data/ljspeech/wavs/LJ010-0157.wav|tests/data/ljspeech/wavs/LJ010-0157.npy +tests/data/ljspeech/wavs/LJ003-0010.wav|tests/data/ljspeech/wavs/LJ003-0010.npy +tests/data/ljspeech/wavs/LJ022-0064.wav|tests/data/ljspeech/wavs/LJ022-0064.npy +tests/data/ljspeech/wavs/LJ024-0075.wav|tests/data/ljspeech/wavs/LJ024-0075.npy +tests/data/ljspeech/wavs/LJ028-0446.wav|tests/data/ljspeech/wavs/LJ028-0446.npy +tests/data/ljspeech/wavs/LJ048-0240.wav|tests/data/ljspeech/wavs/LJ048-0240.npy +tests/data/ljspeech/wavs/LJ014-0207.wav|tests/data/ljspeech/wavs/LJ014-0207.npy +tests/data/ljspeech/wavs/LJ038-0132.wav|tests/data/ljspeech/wavs/LJ038-0132.npy +tests/data/ljspeech/wavs/LJ005-0096.wav|tests/data/ljspeech/wavs/LJ005-0096.npy +tests/data/ljspeech/wavs/LJ042-0102.wav|tests/data/ljspeech/wavs/LJ042-0102.npy +tests/data/ljspeech/wavs/LJ004-0034.wav|tests/data/ljspeech/wavs/LJ004-0034.npy +tests/data/ljspeech/wavs/LJ001-0028.wav|tests/data/ljspeech/wavs/LJ001-0028.npy +tests/data/ljspeech/wavs/LJ014-0235.wav|tests/data/ljspeech/wavs/LJ014-0235.npy +tests/data/ljspeech/wavs/LJ018-0209.wav|tests/data/ljspeech/wavs/LJ018-0209.npy +tests/data/ljspeech/wavs/LJ008-0058.wav|tests/data/ljspeech/wavs/LJ008-0058.npy +tests/data/ljspeech/wavs/LJ029-0158.wav|tests/data/ljspeech/wavs/LJ029-0158.npy +tests/data/ljspeech/wavs/LJ040-0208.wav|tests/data/ljspeech/wavs/LJ040-0208.npy +tests/data/ljspeech/wavs/LJ012-0129.wav|tests/data/ljspeech/wavs/LJ012-0129.npy +tests/data/ljspeech/wavs/LJ028-0013.wav|tests/data/ljspeech/wavs/LJ028-0013.npy +tests/data/ljspeech/wavs/LJ034-0101.wav|tests/data/ljspeech/wavs/LJ034-0101.npy +tests/data/ljspeech/wavs/LJ007-0136.wav|tests/data/ljspeech/wavs/LJ007-0136.npy +tests/data/ljspeech/wavs/LJ027-0091.wav|tests/data/ljspeech/wavs/LJ027-0091.npy +tests/data/ljspeech/wavs/LJ002-0141.wav|tests/data/ljspeech/wavs/LJ002-0141.npy +tests/data/ljspeech/wavs/LJ001-0157.wav|tests/data/ljspeech/wavs/LJ001-0157.npy +tests/data/ljspeech/wavs/LJ039-0007.wav|tests/data/ljspeech/wavs/LJ039-0007.npy +tests/data/ljspeech/wavs/LJ013-0142.wav|tests/data/ljspeech/wavs/LJ013-0142.npy +tests/data/ljspeech/wavs/LJ028-0052.wav|tests/data/ljspeech/wavs/LJ028-0052.npy +tests/data/ljspeech/wavs/LJ047-0172.wav|tests/data/ljspeech/wavs/LJ047-0172.npy +tests/data/ljspeech/wavs/LJ044-0038.wav|tests/data/ljspeech/wavs/LJ044-0038.npy +tests/data/ljspeech/wavs/LJ031-0072.wav|tests/data/ljspeech/wavs/LJ031-0072.npy +tests/data/ljspeech/wavs/LJ050-0027.wav|tests/data/ljspeech/wavs/LJ050-0027.npy +tests/data/ljspeech/wavs/LJ049-0097.wav|tests/data/ljspeech/wavs/LJ049-0097.npy +tests/data/ljspeech/wavs/LJ008-0052.wav|tests/data/ljspeech/wavs/LJ008-0052.npy +tests/data/ljspeech/wavs/LJ050-0029.wav|tests/data/ljspeech/wavs/LJ050-0029.npy +tests/data/ljspeech/wavs/LJ048-0100.wav|tests/data/ljspeech/wavs/LJ048-0100.npy +tests/data/ljspeech/wavs/LJ022-0102.wav|tests/data/ljspeech/wavs/LJ022-0102.npy +tests/data/ljspeech/wavs/LJ029-0169.wav|tests/data/ljspeech/wavs/LJ029-0169.npy +tests/data/ljspeech/wavs/LJ016-0292.wav|tests/data/ljspeech/wavs/LJ016-0292.npy +tests/data/ljspeech/wavs/LJ038-0046.wav|tests/data/ljspeech/wavs/LJ038-0046.npy +tests/data/ljspeech/wavs/LJ015-0173.wav|tests/data/ljspeech/wavs/LJ015-0173.npy +tests/data/ljspeech/wavs/LJ012-0207.wav|tests/data/ljspeech/wavs/LJ012-0207.npy +tests/data/ljspeech/wavs/LJ024-0100.wav|tests/data/ljspeech/wavs/LJ024-0100.npy +tests/data/ljspeech/wavs/LJ011-0146.wav|tests/data/ljspeech/wavs/LJ011-0146.npy +tests/data/ljspeech/wavs/LJ043-0068.wav|tests/data/ljspeech/wavs/LJ043-0068.npy +tests/data/ljspeech/wavs/LJ037-0103.wav|tests/data/ljspeech/wavs/LJ037-0103.npy +tests/data/ljspeech/wavs/LJ002-0163.wav|tests/data/ljspeech/wavs/LJ002-0163.npy +tests/data/ljspeech/wavs/LJ018-0261.wav|tests/data/ljspeech/wavs/LJ018-0261.npy +tests/data/ljspeech/wavs/LJ008-0259.wav|tests/data/ljspeech/wavs/LJ008-0259.npy +tests/data/ljspeech/wavs/LJ034-0048.wav|tests/data/ljspeech/wavs/LJ034-0048.npy +tests/data/ljspeech/wavs/LJ001-0144.wav|tests/data/ljspeech/wavs/LJ001-0144.npy +tests/data/ljspeech/wavs/LJ016-0362.wav|tests/data/ljspeech/wavs/LJ016-0362.npy +tests/data/ljspeech/wavs/LJ018-0375.wav|tests/data/ljspeech/wavs/LJ018-0375.npy +tests/data/ljspeech/wavs/LJ004-0208.wav|tests/data/ljspeech/wavs/LJ004-0208.npy +tests/data/ljspeech/wavs/LJ017-0171.wav|tests/data/ljspeech/wavs/LJ017-0171.npy +tests/data/ljspeech/wavs/LJ050-0011.wav|tests/data/ljspeech/wavs/LJ050-0011.npy +tests/data/ljspeech/wavs/LJ006-0079.wav|tests/data/ljspeech/wavs/LJ006-0079.npy +tests/data/ljspeech/wavs/LJ044-0011.wav|tests/data/ljspeech/wavs/LJ044-0011.npy +tests/data/ljspeech/wavs/LJ023-0139.wav|tests/data/ljspeech/wavs/LJ023-0139.npy +tests/data/ljspeech/wavs/LJ040-0002.wav|tests/data/ljspeech/wavs/LJ040-0002.npy +tests/data/ljspeech/wavs/LJ032-0204.wav|tests/data/ljspeech/wavs/LJ032-0204.npy +tests/data/ljspeech/wavs/LJ046-0200.wav|tests/data/ljspeech/wavs/LJ046-0200.npy +tests/data/ljspeech/wavs/LJ039-0022.wav|tests/data/ljspeech/wavs/LJ039-0022.npy +tests/data/ljspeech/wavs/LJ031-0034.wav|tests/data/ljspeech/wavs/LJ031-0034.npy +tests/data/ljspeech/wavs/LJ048-0068.wav|tests/data/ljspeech/wavs/LJ048-0068.npy +tests/data/ljspeech/wavs/LJ045-0061.wav|tests/data/ljspeech/wavs/LJ045-0061.npy +tests/data/ljspeech/wavs/LJ044-0122.wav|tests/data/ljspeech/wavs/LJ044-0122.npy +tests/data/ljspeech/wavs/LJ019-0290.wav|tests/data/ljspeech/wavs/LJ019-0290.npy +tests/data/ljspeech/wavs/LJ016-0366.wav|tests/data/ljspeech/wavs/LJ016-0366.npy +tests/data/ljspeech/wavs/LJ014-0160.wav|tests/data/ljspeech/wavs/LJ014-0160.npy +tests/data/ljspeech/wavs/LJ003-0288.wav|tests/data/ljspeech/wavs/LJ003-0288.npy +tests/data/ljspeech/wavs/LJ044-0074.wav|tests/data/ljspeech/wavs/LJ044-0074.npy +tests/data/ljspeech/wavs/LJ014-0253.wav|tests/data/ljspeech/wavs/LJ014-0253.npy +tests/data/ljspeech/wavs/LJ021-0074.wav|tests/data/ljspeech/wavs/LJ021-0074.npy +tests/data/ljspeech/wavs/LJ048-0119.wav|tests/data/ljspeech/wavs/LJ048-0119.npy +tests/data/ljspeech/wavs/LJ019-0243.wav|tests/data/ljspeech/wavs/LJ019-0243.npy +tests/data/ljspeech/wavs/LJ037-0078.wav|tests/data/ljspeech/wavs/LJ037-0078.npy +tests/data/ljspeech/wavs/LJ023-0134.wav|tests/data/ljspeech/wavs/LJ023-0134.npy +tests/data/ljspeech/wavs/LJ047-0168.wav|tests/data/ljspeech/wavs/LJ047-0168.npy +tests/data/ljspeech/wavs/LJ006-0092.wav|tests/data/ljspeech/wavs/LJ006-0092.npy +tests/data/ljspeech/wavs/LJ005-0213.wav|tests/data/ljspeech/wavs/LJ005-0213.npy +tests/data/ljspeech/wavs/LJ016-0127.wav|tests/data/ljspeech/wavs/LJ016-0127.npy +tests/data/ljspeech/wavs/LJ034-0171.wav|tests/data/ljspeech/wavs/LJ034-0171.npy +tests/data/ljspeech/wavs/LJ009-0098.wav|tests/data/ljspeech/wavs/LJ009-0098.npy +tests/data/ljspeech/wavs/LJ028-0085.wav|tests/data/ljspeech/wavs/LJ028-0085.npy +tests/data/ljspeech/wavs/LJ048-0069.wav|tests/data/ljspeech/wavs/LJ048-0069.npy +tests/data/ljspeech/wavs/LJ038-0286.wav|tests/data/ljspeech/wavs/LJ038-0286.npy +tests/data/ljspeech/wavs/LJ029-0099.wav|tests/data/ljspeech/wavs/LJ029-0099.npy +tests/data/ljspeech/wavs/LJ031-0074.wav|tests/data/ljspeech/wavs/LJ031-0074.npy +tests/data/ljspeech/wavs/LJ044-0237.wav|tests/data/ljspeech/wavs/LJ044-0237.npy +tests/data/ljspeech/wavs/LJ047-0194.wav|tests/data/ljspeech/wavs/LJ047-0194.npy +tests/data/ljspeech/wavs/LJ034-0084.wav|tests/data/ljspeech/wavs/LJ034-0084.npy +tests/data/ljspeech/wavs/LJ014-0051.wav|tests/data/ljspeech/wavs/LJ014-0051.npy +tests/data/ljspeech/wavs/LJ041-0167.wav|tests/data/ljspeech/wavs/LJ041-0167.npy +tests/data/ljspeech/wavs/LJ033-0121.wav|tests/data/ljspeech/wavs/LJ033-0121.npy +tests/data/ljspeech/wavs/LJ026-0047.wav|tests/data/ljspeech/wavs/LJ026-0047.npy +tests/data/ljspeech/wavs/LJ003-0321.wav|tests/data/ljspeech/wavs/LJ003-0321.npy +tests/data/ljspeech/wavs/LJ022-0142.wav|tests/data/ljspeech/wavs/LJ022-0142.npy +tests/data/ljspeech/wavs/LJ042-0218.wav|tests/data/ljspeech/wavs/LJ042-0218.npy +tests/data/ljspeech/wavs/LJ043-0023.wav|tests/data/ljspeech/wavs/LJ043-0023.npy +tests/data/ljspeech/wavs/LJ042-0209.wav|tests/data/ljspeech/wavs/LJ042-0209.npy +tests/data/ljspeech/wavs/LJ005-0300.wav|tests/data/ljspeech/wavs/LJ005-0300.npy +tests/data/ljspeech/wavs/LJ046-0127.wav|tests/data/ljspeech/wavs/LJ046-0127.npy +tests/data/ljspeech/wavs/LJ042-0010.wav|tests/data/ljspeech/wavs/LJ042-0010.npy +tests/data/ljspeech/wavs/LJ002-0178.wav|tests/data/ljspeech/wavs/LJ002-0178.npy +tests/data/ljspeech/wavs/LJ018-0256.wav|tests/data/ljspeech/wavs/LJ018-0256.npy +tests/data/ljspeech/wavs/LJ028-0024.wav|tests/data/ljspeech/wavs/LJ028-0024.npy +tests/data/ljspeech/wavs/LJ004-0132.wav|tests/data/ljspeech/wavs/LJ004-0132.npy +tests/data/ljspeech/wavs/LJ022-0191.wav|tests/data/ljspeech/wavs/LJ022-0191.npy +tests/data/ljspeech/wavs/LJ025-0070.wav|tests/data/ljspeech/wavs/LJ025-0070.npy +tests/data/ljspeech/wavs/LJ028-0344.wav|tests/data/ljspeech/wavs/LJ028-0344.npy +tests/data/ljspeech/wavs/LJ032-0037.wav|tests/data/ljspeech/wavs/LJ032-0037.npy +tests/data/ljspeech/wavs/LJ022-0039.wav|tests/data/ljspeech/wavs/LJ022-0039.npy +tests/data/ljspeech/wavs/LJ008-0318.wav|tests/data/ljspeech/wavs/LJ008-0318.npy +tests/data/ljspeech/wavs/LJ028-0158.wav|tests/data/ljspeech/wavs/LJ028-0158.npy +tests/data/ljspeech/wavs/LJ010-0041.wav|tests/data/ljspeech/wavs/LJ010-0041.npy +tests/data/ljspeech/wavs/LJ015-0257.wav|tests/data/ljspeech/wavs/LJ015-0257.npy +tests/data/ljspeech/wavs/LJ005-0099.wav|tests/data/ljspeech/wavs/LJ005-0099.npy +tests/data/ljspeech/wavs/LJ049-0185.wav|tests/data/ljspeech/wavs/LJ049-0185.npy +tests/data/ljspeech/wavs/LJ003-0110.wav|tests/data/ljspeech/wavs/LJ003-0110.npy +tests/data/ljspeech/wavs/LJ044-0099.wav|tests/data/ljspeech/wavs/LJ044-0099.npy +tests/data/ljspeech/wavs/LJ018-0392.wav|tests/data/ljspeech/wavs/LJ018-0392.npy +tests/data/ljspeech/wavs/LJ045-0008.wav|tests/data/ljspeech/wavs/LJ045-0008.npy +tests/data/ljspeech/wavs/LJ002-0246.wav|tests/data/ljspeech/wavs/LJ002-0246.npy +tests/data/ljspeech/wavs/LJ045-0002.wav|tests/data/ljspeech/wavs/LJ045-0002.npy +tests/data/ljspeech/wavs/LJ041-0002.wav|tests/data/ljspeech/wavs/LJ041-0002.npy +tests/data/ljspeech/wavs/LJ042-0210.wav|tests/data/ljspeech/wavs/LJ042-0210.npy +tests/data/ljspeech/wavs/LJ025-0072.wav|tests/data/ljspeech/wavs/LJ025-0072.npy +tests/data/ljspeech/wavs/LJ025-0074.wav|tests/data/ljspeech/wavs/LJ025-0074.npy +tests/data/ljspeech/wavs/LJ048-0089.wav|tests/data/ljspeech/wavs/LJ048-0089.npy +tests/data/ljspeech/wavs/LJ016-0412.wav|tests/data/ljspeech/wavs/LJ016-0412.npy +tests/data/ljspeech/wavs/LJ044-0048.wav|tests/data/ljspeech/wavs/LJ044-0048.npy +tests/data/ljspeech/wavs/LJ038-0292.wav|tests/data/ljspeech/wavs/LJ038-0292.npy +tests/data/ljspeech/wavs/LJ010-0202.wav|tests/data/ljspeech/wavs/LJ010-0202.npy +tests/data/ljspeech/wavs/LJ008-0004.wav|tests/data/ljspeech/wavs/LJ008-0004.npy +tests/data/ljspeech/wavs/LJ007-0042.wav|tests/data/ljspeech/wavs/LJ007-0042.npy +tests/data/ljspeech/wavs/LJ023-0014.wav|tests/data/ljspeech/wavs/LJ023-0014.npy +tests/data/ljspeech/wavs/LJ030-0222.wav|tests/data/ljspeech/wavs/LJ030-0222.npy +tests/data/ljspeech/wavs/LJ010-0256.wav|tests/data/ljspeech/wavs/LJ010-0256.npy +tests/data/ljspeech/wavs/LJ008-0296.wav|tests/data/ljspeech/wavs/LJ008-0296.npy +tests/data/ljspeech/wavs/LJ035-0114.wav|tests/data/ljspeech/wavs/LJ035-0114.npy +tests/data/ljspeech/wavs/LJ023-0017.wav|tests/data/ljspeech/wavs/LJ023-0017.npy +tests/data/ljspeech/wavs/LJ014-0028.wav|tests/data/ljspeech/wavs/LJ014-0028.npy +tests/data/ljspeech/wavs/LJ020-0093.wav|tests/data/ljspeech/wavs/LJ020-0093.npy +tests/data/ljspeech/wavs/LJ018-0373.wav|tests/data/ljspeech/wavs/LJ018-0373.npy +tests/data/ljspeech/wavs/LJ006-0186.wav|tests/data/ljspeech/wavs/LJ006-0186.npy +tests/data/ljspeech/wavs/LJ045-0081.wav|tests/data/ljspeech/wavs/LJ045-0081.npy +tests/data/ljspeech/wavs/LJ032-0005.wav|tests/data/ljspeech/wavs/LJ032-0005.npy +tests/data/ljspeech/wavs/LJ026-0037.wav|tests/data/ljspeech/wavs/LJ026-0037.npy +tests/data/ljspeech/wavs/LJ014-0056.wav|tests/data/ljspeech/wavs/LJ014-0056.npy +tests/data/ljspeech/wavs/LJ022-0072.wav|tests/data/ljspeech/wavs/LJ022-0072.npy +tests/data/ljspeech/wavs/LJ049-0040.wav|tests/data/ljspeech/wavs/LJ049-0040.npy +tests/data/ljspeech/wavs/LJ008-0253.wav|tests/data/ljspeech/wavs/LJ008-0253.npy +tests/data/ljspeech/wavs/LJ013-0074.wav|tests/data/ljspeech/wavs/LJ013-0074.npy +tests/data/ljspeech/wavs/LJ044-0021.wav|tests/data/ljspeech/wavs/LJ044-0021.npy +tests/data/ljspeech/wavs/LJ048-0040.wav|tests/data/ljspeech/wavs/LJ048-0040.npy +tests/data/ljspeech/wavs/LJ022-0042.wav|tests/data/ljspeech/wavs/LJ022-0042.npy +tests/data/ljspeech/wavs/LJ030-0011.wav|tests/data/ljspeech/wavs/LJ030-0011.npy +tests/data/ljspeech/wavs/LJ039-0035.wav|tests/data/ljspeech/wavs/LJ039-0035.npy +tests/data/ljspeech/wavs/LJ024-0076.wav|tests/data/ljspeech/wavs/LJ024-0076.npy +tests/data/ljspeech/wavs/LJ043-0014.wav|tests/data/ljspeech/wavs/LJ043-0014.npy +tests/data/ljspeech/wavs/LJ041-0190.wav|tests/data/ljspeech/wavs/LJ041-0190.npy +tests/data/ljspeech/wavs/LJ030-0099.wav|tests/data/ljspeech/wavs/LJ030-0099.npy +tests/data/ljspeech/wavs/LJ048-0280.wav|tests/data/ljspeech/wavs/LJ048-0280.npy +tests/data/ljspeech/wavs/LJ007-0241.wav|tests/data/ljspeech/wavs/LJ007-0241.npy +tests/data/ljspeech/wavs/LJ045-0153.wav|tests/data/ljspeech/wavs/LJ045-0153.npy +tests/data/ljspeech/wavs/LJ049-0062.wav|tests/data/ljspeech/wavs/LJ049-0062.npy +tests/data/ljspeech/wavs/LJ039-0048.wav|tests/data/ljspeech/wavs/LJ039-0048.npy +tests/data/ljspeech/wavs/LJ021-0045.wav|tests/data/ljspeech/wavs/LJ021-0045.npy +tests/data/ljspeech/wavs/LJ011-0275.wav|tests/data/ljspeech/wavs/LJ011-0275.npy +tests/data/ljspeech/wavs/LJ008-0059.wav|tests/data/ljspeech/wavs/LJ008-0059.npy +tests/data/ljspeech/wavs/LJ015-0242.wav|tests/data/ljspeech/wavs/LJ015-0242.npy +tests/data/ljspeech/wavs/LJ017-0183.wav|tests/data/ljspeech/wavs/LJ017-0183.npy +tests/data/ljspeech/wavs/LJ010-0277.wav|tests/data/ljspeech/wavs/LJ010-0277.npy +tests/data/ljspeech/wavs/LJ020-0089.wav|tests/data/ljspeech/wavs/LJ020-0089.npy +tests/data/ljspeech/wavs/LJ018-0242.wav|tests/data/ljspeech/wavs/LJ018-0242.npy +tests/data/ljspeech/wavs/LJ046-0030.wav|tests/data/ljspeech/wavs/LJ046-0030.npy +tests/data/ljspeech/wavs/LJ048-0241.wav|tests/data/ljspeech/wavs/LJ048-0241.npy +tests/data/ljspeech/wavs/LJ015-0097.wav|tests/data/ljspeech/wavs/LJ015-0097.npy +tests/data/ljspeech/wavs/LJ024-0045.wav|tests/data/ljspeech/wavs/LJ024-0045.npy +tests/data/ljspeech/wavs/LJ009-0036.wav|tests/data/ljspeech/wavs/LJ009-0036.npy +tests/data/ljspeech/wavs/LJ013-0076.wav|tests/data/ljspeech/wavs/LJ013-0076.npy +tests/data/ljspeech/wavs/LJ006-0069.wav|tests/data/ljspeech/wavs/LJ006-0069.npy +tests/data/ljspeech/wavs/LJ027-0079.wav|tests/data/ljspeech/wavs/LJ027-0079.npy +tests/data/ljspeech/wavs/LJ005-0252.wav|tests/data/ljspeech/wavs/LJ005-0252.npy +tests/data/ljspeech/wavs/LJ043-0115.wav|tests/data/ljspeech/wavs/LJ043-0115.npy +tests/data/ljspeech/wavs/LJ043-0032.wav|tests/data/ljspeech/wavs/LJ043-0032.npy +tests/data/ljspeech/wavs/LJ019-0062.wav|tests/data/ljspeech/wavs/LJ019-0062.npy +tests/data/ljspeech/wavs/LJ021-0023.wav|tests/data/ljspeech/wavs/LJ021-0023.npy +tests/data/ljspeech/wavs/LJ050-0186.wav|tests/data/ljspeech/wavs/LJ050-0186.npy +tests/data/ljspeech/wavs/LJ011-0136.wav|tests/data/ljspeech/wavs/LJ011-0136.npy +tests/data/ljspeech/wavs/LJ003-0318.wav|tests/data/ljspeech/wavs/LJ003-0318.npy +tests/data/ljspeech/wavs/LJ019-0216.wav|tests/data/ljspeech/wavs/LJ019-0216.npy +tests/data/ljspeech/wavs/LJ006-0267.wav|tests/data/ljspeech/wavs/LJ006-0267.npy +tests/data/ljspeech/wavs/LJ029-0116.wav|tests/data/ljspeech/wavs/LJ029-0116.npy +tests/data/ljspeech/wavs/LJ021-0026.wav|tests/data/ljspeech/wavs/LJ021-0026.npy +tests/data/ljspeech/wavs/LJ013-0202.wav|tests/data/ljspeech/wavs/LJ013-0202.npy +tests/data/ljspeech/wavs/LJ023-0123.wav|tests/data/ljspeech/wavs/LJ023-0123.npy +tests/data/ljspeech/wavs/LJ004-0119.wav|tests/data/ljspeech/wavs/LJ004-0119.npy +tests/data/ljspeech/wavs/LJ040-0015.wav|tests/data/ljspeech/wavs/LJ040-0015.npy +tests/data/ljspeech/wavs/LJ008-0312.wav|tests/data/ljspeech/wavs/LJ008-0312.npy +tests/data/ljspeech/wavs/LJ034-0018.wav|tests/data/ljspeech/wavs/LJ034-0018.npy +tests/data/ljspeech/wavs/LJ012-0263.wav|tests/data/ljspeech/wavs/LJ012-0263.npy +tests/data/ljspeech/wavs/LJ023-0133.wav|tests/data/ljspeech/wavs/LJ023-0133.npy +tests/data/ljspeech/wavs/LJ028-0047.wav|tests/data/ljspeech/wavs/LJ028-0047.npy +tests/data/ljspeech/wavs/LJ028-0501.wav|tests/data/ljspeech/wavs/LJ028-0501.npy +tests/data/ljspeech/wavs/LJ008-0231.wav|tests/data/ljspeech/wavs/LJ008-0231.npy +tests/data/ljspeech/wavs/LJ048-0212.wav|tests/data/ljspeech/wavs/LJ048-0212.npy +tests/data/ljspeech/wavs/LJ013-0187.wav|tests/data/ljspeech/wavs/LJ013-0187.npy +tests/data/ljspeech/wavs/LJ030-0065.wav|tests/data/ljspeech/wavs/LJ030-0065.npy +tests/data/ljspeech/wavs/LJ037-0177.wav|tests/data/ljspeech/wavs/LJ037-0177.npy +tests/data/ljspeech/wavs/LJ008-0092.wav|tests/data/ljspeech/wavs/LJ008-0092.npy +tests/data/ljspeech/wavs/LJ006-0253.wav|tests/data/ljspeech/wavs/LJ006-0253.npy +tests/data/ljspeech/wavs/LJ003-0003.wav|tests/data/ljspeech/wavs/LJ003-0003.npy +tests/data/ljspeech/wavs/LJ015-0232.wav|tests/data/ljspeech/wavs/LJ015-0232.npy +tests/data/ljspeech/wavs/LJ015-0070.wav|tests/data/ljspeech/wavs/LJ015-0070.npy +tests/data/ljspeech/wavs/LJ015-0234.wav|tests/data/ljspeech/wavs/LJ015-0234.npy +tests/data/ljspeech/wavs/LJ038-0122.wav|tests/data/ljspeech/wavs/LJ038-0122.npy +tests/data/ljspeech/wavs/LJ041-0173.wav|tests/data/ljspeech/wavs/LJ041-0173.npy +tests/data/ljspeech/wavs/LJ040-0044.wav|tests/data/ljspeech/wavs/LJ040-0044.npy +tests/data/ljspeech/wavs/LJ037-0241.wav|tests/data/ljspeech/wavs/LJ037-0241.npy +tests/data/ljspeech/wavs/LJ050-0048.wav|tests/data/ljspeech/wavs/LJ050-0048.npy +tests/data/ljspeech/wavs/LJ050-0208.wav|tests/data/ljspeech/wavs/LJ050-0208.npy +tests/data/ljspeech/wavs/LJ012-0047.wav|tests/data/ljspeech/wavs/LJ012-0047.npy +tests/data/ljspeech/wavs/LJ030-0168.wav|tests/data/ljspeech/wavs/LJ030-0168.npy +tests/data/ljspeech/wavs/LJ019-0045.wav|tests/data/ljspeech/wavs/LJ019-0045.npy +tests/data/ljspeech/wavs/LJ045-0234.wav|tests/data/ljspeech/wavs/LJ045-0234.npy +tests/data/ljspeech/wavs/LJ019-0110.wav|tests/data/ljspeech/wavs/LJ019-0110.npy +tests/data/ljspeech/wavs/LJ049-0115.wav|tests/data/ljspeech/wavs/LJ049-0115.npy +tests/data/ljspeech/wavs/LJ019-0315.wav|tests/data/ljspeech/wavs/LJ019-0315.npy +tests/data/ljspeech/wavs/LJ028-0315.wav|tests/data/ljspeech/wavs/LJ028-0315.npy +tests/data/ljspeech/wavs/LJ028-0010.wav|tests/data/ljspeech/wavs/LJ028-0010.npy +tests/data/ljspeech/wavs/LJ007-0108.wav|tests/data/ljspeech/wavs/LJ007-0108.npy +tests/data/ljspeech/wavs/LJ012-0019.wav|tests/data/ljspeech/wavs/LJ012-0019.npy +tests/data/ljspeech/wavs/LJ048-0117.wav|tests/data/ljspeech/wavs/LJ048-0117.npy +tests/data/ljspeech/wavs/LJ010-0260.wav|tests/data/ljspeech/wavs/LJ010-0260.npy +tests/data/ljspeech/wavs/LJ039-0138.wav|tests/data/ljspeech/wavs/LJ039-0138.npy +tests/data/ljspeech/wavs/LJ014-0101.wav|tests/data/ljspeech/wavs/LJ014-0101.npy +tests/data/ljspeech/wavs/LJ047-0103.wav|tests/data/ljspeech/wavs/LJ047-0103.npy +tests/data/ljspeech/wavs/LJ026-0155.wav|tests/data/ljspeech/wavs/LJ026-0155.npy +tests/data/ljspeech/wavs/LJ023-0088.wav|tests/data/ljspeech/wavs/LJ023-0088.npy +tests/data/ljspeech/wavs/LJ012-0093.wav|tests/data/ljspeech/wavs/LJ012-0093.npy +tests/data/ljspeech/wavs/LJ026-0092.wav|tests/data/ljspeech/wavs/LJ026-0092.npy +tests/data/ljspeech/wavs/LJ005-0169.wav|tests/data/ljspeech/wavs/LJ005-0169.npy +tests/data/ljspeech/wavs/LJ028-0270.wav|tests/data/ljspeech/wavs/LJ028-0270.npy +tests/data/ljspeech/wavs/LJ005-0020.wav|tests/data/ljspeech/wavs/LJ005-0020.npy +tests/data/ljspeech/wavs/LJ028-0477.wav|tests/data/ljspeech/wavs/LJ028-0477.npy +tests/data/ljspeech/wavs/LJ040-0130.wav|tests/data/ljspeech/wavs/LJ040-0130.npy +tests/data/ljspeech/wavs/LJ002-0088.wav|tests/data/ljspeech/wavs/LJ002-0088.npy +tests/data/ljspeech/wavs/LJ049-0128.wav|tests/data/ljspeech/wavs/LJ049-0128.npy +tests/data/ljspeech/wavs/LJ016-0211.wav|tests/data/ljspeech/wavs/LJ016-0211.npy +tests/data/ljspeech/wavs/LJ014-0117.wav|tests/data/ljspeech/wavs/LJ014-0117.npy +tests/data/ljspeech/wavs/LJ038-0024.wav|tests/data/ljspeech/wavs/LJ038-0024.npy +tests/data/ljspeech/wavs/LJ049-0190.wav|tests/data/ljspeech/wavs/LJ049-0190.npy +tests/data/ljspeech/wavs/LJ016-0085.wav|tests/data/ljspeech/wavs/LJ016-0085.npy +tests/data/ljspeech/wavs/LJ038-0179.wav|tests/data/ljspeech/wavs/LJ038-0179.npy +tests/data/ljspeech/wavs/LJ003-0203.wav|tests/data/ljspeech/wavs/LJ003-0203.npy +tests/data/ljspeech/wavs/LJ031-0176.wav|tests/data/ljspeech/wavs/LJ031-0176.npy +tests/data/ljspeech/wavs/LJ037-0055.wav|tests/data/ljspeech/wavs/LJ037-0055.npy +tests/data/ljspeech/wavs/LJ014-0265.wav|tests/data/ljspeech/wavs/LJ014-0265.npy +tests/data/ljspeech/wavs/LJ049-0198.wav|tests/data/ljspeech/wavs/LJ049-0198.npy +tests/data/ljspeech/wavs/LJ037-0236.wav|tests/data/ljspeech/wavs/LJ037-0236.npy +tests/data/ljspeech/wavs/LJ045-0027.wav|tests/data/ljspeech/wavs/LJ045-0027.npy +tests/data/ljspeech/wavs/LJ013-0108.wav|tests/data/ljspeech/wavs/LJ013-0108.npy +tests/data/ljspeech/wavs/LJ028-0076.wav|tests/data/ljspeech/wavs/LJ028-0076.npy +tests/data/ljspeech/wavs/LJ014-0277.wav|tests/data/ljspeech/wavs/LJ014-0277.npy +tests/data/ljspeech/wavs/LJ027-0157.wav|tests/data/ljspeech/wavs/LJ027-0157.npy +tests/data/ljspeech/wavs/LJ015-0290.wav|tests/data/ljspeech/wavs/LJ015-0290.npy +tests/data/ljspeech/wavs/LJ007-0130.wav|tests/data/ljspeech/wavs/LJ007-0130.npy +tests/data/ljspeech/wavs/LJ013-0026.wav|tests/data/ljspeech/wavs/LJ013-0026.npy +tests/data/ljspeech/wavs/LJ045-0192.wav|tests/data/ljspeech/wavs/LJ045-0192.npy +tests/data/ljspeech/wavs/LJ038-0284.wav|tests/data/ljspeech/wavs/LJ038-0284.npy +tests/data/ljspeech/wavs/LJ047-0027.wav|tests/data/ljspeech/wavs/LJ047-0027.npy +tests/data/ljspeech/wavs/LJ003-0348.wav|tests/data/ljspeech/wavs/LJ003-0348.npy +tests/data/ljspeech/wavs/LJ003-0178.wav|tests/data/ljspeech/wavs/LJ003-0178.npy +tests/data/ljspeech/wavs/LJ028-0023.wav|tests/data/ljspeech/wavs/LJ028-0023.npy +tests/data/ljspeech/wavs/LJ013-0226.wav|tests/data/ljspeech/wavs/LJ013-0226.npy +tests/data/ljspeech/wavs/LJ012-0174.wav|tests/data/ljspeech/wavs/LJ012-0174.npy +tests/data/ljspeech/wavs/LJ032-0154.wav|tests/data/ljspeech/wavs/LJ032-0154.npy +tests/data/ljspeech/wavs/LJ028-0200.wav|tests/data/ljspeech/wavs/LJ028-0200.npy +tests/data/ljspeech/wavs/LJ039-0228.wav|tests/data/ljspeech/wavs/LJ039-0228.npy +tests/data/ljspeech/wavs/LJ036-0121.wav|tests/data/ljspeech/wavs/LJ036-0121.npy +tests/data/ljspeech/wavs/LJ040-0186.wav|tests/data/ljspeech/wavs/LJ040-0186.npy +tests/data/ljspeech/wavs/LJ041-0118.wav|tests/data/ljspeech/wavs/LJ041-0118.npy +tests/data/ljspeech/wavs/LJ002-0267.wav|tests/data/ljspeech/wavs/LJ002-0267.npy +tests/data/ljspeech/wavs/LJ002-0124.wav|tests/data/ljspeech/wavs/LJ002-0124.npy +tests/data/ljspeech/wavs/LJ033-0150.wav|tests/data/ljspeech/wavs/LJ033-0150.npy +tests/data/ljspeech/wavs/LJ036-0147.wav|tests/data/ljspeech/wavs/LJ036-0147.npy +tests/data/ljspeech/wavs/LJ044-0033.wav|tests/data/ljspeech/wavs/LJ044-0033.npy +tests/data/ljspeech/wavs/LJ040-0197.wav|tests/data/ljspeech/wavs/LJ040-0197.npy +tests/data/ljspeech/wavs/LJ018-0079.wav|tests/data/ljspeech/wavs/LJ018-0079.npy +tests/data/ljspeech/wavs/LJ017-0266.wav|tests/data/ljspeech/wavs/LJ017-0266.npy +tests/data/ljspeech/wavs/LJ029-0129.wav|tests/data/ljspeech/wavs/LJ029-0129.npy +tests/data/ljspeech/wavs/LJ044-0158.wav|tests/data/ljspeech/wavs/LJ044-0158.npy +tests/data/ljspeech/wavs/LJ002-0004.wav|tests/data/ljspeech/wavs/LJ002-0004.npy +tests/data/ljspeech/wavs/LJ008-0261.wav|tests/data/ljspeech/wavs/LJ008-0261.npy +tests/data/ljspeech/wavs/LJ019-0176.wav|tests/data/ljspeech/wavs/LJ019-0176.npy +tests/data/ljspeech/wavs/LJ018-0226.wav|tests/data/ljspeech/wavs/LJ018-0226.npy +tests/data/ljspeech/wavs/LJ011-0012.wav|tests/data/ljspeech/wavs/LJ011-0012.npy +tests/data/ljspeech/wavs/LJ005-0193.wav|tests/data/ljspeech/wavs/LJ005-0193.npy +tests/data/ljspeech/wavs/LJ018-0003.wav|tests/data/ljspeech/wavs/LJ018-0003.npy +tests/data/ljspeech/wavs/LJ027-0047.wav|tests/data/ljspeech/wavs/LJ027-0047.npy +tests/data/ljspeech/wavs/LJ023-0118.wav|tests/data/ljspeech/wavs/LJ023-0118.npy +tests/data/ljspeech/wavs/LJ009-0051.wav|tests/data/ljspeech/wavs/LJ009-0051.npy +tests/data/ljspeech/wavs/LJ046-0053.wav|tests/data/ljspeech/wavs/LJ046-0053.npy +tests/data/ljspeech/wavs/LJ009-0033.wav|tests/data/ljspeech/wavs/LJ009-0033.npy +tests/data/ljspeech/wavs/LJ028-0375.wav|tests/data/ljspeech/wavs/LJ028-0375.npy +tests/data/ljspeech/wavs/LJ032-0136.wav|tests/data/ljspeech/wavs/LJ032-0136.npy +tests/data/ljspeech/wavs/LJ010-0015.wav|tests/data/ljspeech/wavs/LJ010-0015.npy +tests/data/ljspeech/wavs/LJ005-0089.wav|tests/data/ljspeech/wavs/LJ005-0089.npy +tests/data/ljspeech/wavs/LJ010-0206.wav|tests/data/ljspeech/wavs/LJ010-0206.npy +tests/data/ljspeech/wavs/LJ032-0261.wav|tests/data/ljspeech/wavs/LJ032-0261.npy +tests/data/ljspeech/wavs/LJ001-0084.wav|tests/data/ljspeech/wavs/LJ001-0084.npy +tests/data/ljspeech/wavs/LJ047-0023.wav|tests/data/ljspeech/wavs/LJ047-0023.npy +tests/data/ljspeech/wavs/LJ004-0120.wav|tests/data/ljspeech/wavs/LJ004-0120.npy +tests/data/ljspeech/wavs/LJ050-0062.wav|tests/data/ljspeech/wavs/LJ050-0062.npy +tests/data/ljspeech/wavs/LJ019-0101.wav|tests/data/ljspeech/wavs/LJ019-0101.npy +tests/data/ljspeech/wavs/LJ041-0080.wav|tests/data/ljspeech/wavs/LJ041-0080.npy +tests/data/ljspeech/wavs/LJ011-0098.wav|tests/data/ljspeech/wavs/LJ011-0098.npy +tests/data/ljspeech/wavs/LJ021-0158.wav|tests/data/ljspeech/wavs/LJ021-0158.npy +tests/data/ljspeech/wavs/LJ035-0024.wav|tests/data/ljspeech/wavs/LJ035-0024.npy +tests/data/ljspeech/wavs/LJ030-0149.wav|tests/data/ljspeech/wavs/LJ030-0149.npy +tests/data/ljspeech/wavs/LJ048-0012.wav|tests/data/ljspeech/wavs/LJ048-0012.npy +tests/data/ljspeech/wavs/LJ028-0510.wav|tests/data/ljspeech/wavs/LJ028-0510.npy +tests/data/ljspeech/wavs/LJ019-0009.wav|tests/data/ljspeech/wavs/LJ019-0009.npy +tests/data/ljspeech/wavs/LJ037-0038.wav|tests/data/ljspeech/wavs/LJ037-0038.npy +tests/data/ljspeech/wavs/LJ031-0128.wav|tests/data/ljspeech/wavs/LJ031-0128.npy +tests/data/ljspeech/wavs/LJ041-0066.wav|tests/data/ljspeech/wavs/LJ041-0066.npy +tests/data/ljspeech/wavs/LJ049-0069.wav|tests/data/ljspeech/wavs/LJ049-0069.npy +tests/data/ljspeech/wavs/LJ033-0008.wav|tests/data/ljspeech/wavs/LJ033-0008.npy +tests/data/ljspeech/wavs/LJ044-0184.wav|tests/data/ljspeech/wavs/LJ044-0184.npy +tests/data/ljspeech/wavs/LJ004-0157.wav|tests/data/ljspeech/wavs/LJ004-0157.npy +tests/data/ljspeech/wavs/LJ018-0184.wav|tests/data/ljspeech/wavs/LJ018-0184.npy +tests/data/ljspeech/wavs/LJ022-0070.wav|tests/data/ljspeech/wavs/LJ022-0070.npy +tests/data/ljspeech/wavs/LJ001-0053.wav|tests/data/ljspeech/wavs/LJ001-0053.npy +tests/data/ljspeech/wavs/LJ009-0223.wav|tests/data/ljspeech/wavs/LJ009-0223.npy +tests/data/ljspeech/wavs/LJ036-0086.wav|tests/data/ljspeech/wavs/LJ036-0086.npy +tests/data/ljspeech/wavs/LJ018-0225.wav|tests/data/ljspeech/wavs/LJ018-0225.npy +tests/data/ljspeech/wavs/LJ018-0124.wav|tests/data/ljspeech/wavs/LJ018-0124.npy +tests/data/ljspeech/wavs/LJ021-0177.wav|tests/data/ljspeech/wavs/LJ021-0177.npy +tests/data/ljspeech/wavs/LJ048-0136.wav|tests/data/ljspeech/wavs/LJ048-0136.npy +tests/data/ljspeech/wavs/LJ030-0192.wav|tests/data/ljspeech/wavs/LJ030-0192.npy +tests/data/ljspeech/wavs/LJ017-0146.wav|tests/data/ljspeech/wavs/LJ017-0146.npy +tests/data/ljspeech/wavs/LJ016-0352.wav|tests/data/ljspeech/wavs/LJ016-0352.npy +tests/data/ljspeech/wavs/LJ017-0222.wav|tests/data/ljspeech/wavs/LJ017-0222.npy +tests/data/ljspeech/wavs/LJ039-0247.wav|tests/data/ljspeech/wavs/LJ039-0247.npy +tests/data/ljspeech/wavs/LJ036-0194.wav|tests/data/ljspeech/wavs/LJ036-0194.npy +tests/data/ljspeech/wavs/LJ037-0231.wav|tests/data/ljspeech/wavs/LJ037-0231.npy +tests/data/ljspeech/wavs/LJ006-0122.wav|tests/data/ljspeech/wavs/LJ006-0122.npy +tests/data/ljspeech/wavs/LJ009-0175.wav|tests/data/ljspeech/wavs/LJ009-0175.npy +tests/data/ljspeech/wavs/LJ036-0192.wav|tests/data/ljspeech/wavs/LJ036-0192.npy +tests/data/ljspeech/wavs/LJ008-0030.wav|tests/data/ljspeech/wavs/LJ008-0030.npy +tests/data/ljspeech/wavs/LJ045-0129.wav|tests/data/ljspeech/wavs/LJ045-0129.npy +tests/data/ljspeech/wavs/LJ036-0072.wav|tests/data/ljspeech/wavs/LJ036-0072.npy +tests/data/ljspeech/wavs/LJ024-0001.wav|tests/data/ljspeech/wavs/LJ024-0001.npy +tests/data/ljspeech/wavs/LJ028-0090.wav|tests/data/ljspeech/wavs/LJ028-0090.npy +tests/data/ljspeech/wavs/LJ048-0215.wav|tests/data/ljspeech/wavs/LJ048-0215.npy +tests/data/ljspeech/wavs/LJ008-0276.wav|tests/data/ljspeech/wavs/LJ008-0276.npy +tests/data/ljspeech/wavs/LJ018-0077.wav|tests/data/ljspeech/wavs/LJ018-0077.npy +tests/data/ljspeech/wavs/LJ044-0030.wav|tests/data/ljspeech/wavs/LJ044-0030.npy +tests/data/ljspeech/wavs/LJ046-0076.wav|tests/data/ljspeech/wavs/LJ046-0076.npy +tests/data/ljspeech/wavs/LJ001-0151.wav|tests/data/ljspeech/wavs/LJ001-0151.npy +tests/data/ljspeech/wavs/LJ021-0170.wav|tests/data/ljspeech/wavs/LJ021-0170.npy +tests/data/ljspeech/wavs/LJ019-0342.wav|tests/data/ljspeech/wavs/LJ019-0342.npy +tests/data/ljspeech/wavs/LJ025-0171.wav|tests/data/ljspeech/wavs/LJ025-0171.npy +tests/data/ljspeech/wavs/LJ008-0281.wav|tests/data/ljspeech/wavs/LJ008-0281.npy +tests/data/ljspeech/wavs/LJ049-0126.wav|tests/data/ljspeech/wavs/LJ049-0126.npy +tests/data/ljspeech/wavs/LJ008-0070.wav|tests/data/ljspeech/wavs/LJ008-0070.npy +tests/data/ljspeech/wavs/LJ002-0092.wav|tests/data/ljspeech/wavs/LJ002-0092.npy +tests/data/ljspeech/wavs/LJ048-0243.wav|tests/data/ljspeech/wavs/LJ048-0243.npy +tests/data/ljspeech/wavs/LJ019-0319.wav|tests/data/ljspeech/wavs/LJ019-0319.npy +tests/data/ljspeech/wavs/LJ028-0005.wav|tests/data/ljspeech/wavs/LJ028-0005.npy +tests/data/ljspeech/wavs/LJ019-0232.wav|tests/data/ljspeech/wavs/LJ019-0232.npy +tests/data/ljspeech/wavs/LJ030-0220.wav|tests/data/ljspeech/wavs/LJ030-0220.npy +tests/data/ljspeech/wavs/LJ024-0057.wav|tests/data/ljspeech/wavs/LJ024-0057.npy +tests/data/ljspeech/wavs/LJ019-0132.wav|tests/data/ljspeech/wavs/LJ019-0132.npy +tests/data/ljspeech/wavs/LJ006-0232.wav|tests/data/ljspeech/wavs/LJ006-0232.npy +tests/data/ljspeech/wavs/LJ029-0187.wav|tests/data/ljspeech/wavs/LJ029-0187.npy +tests/data/ljspeech/wavs/LJ010-0152.wav|tests/data/ljspeech/wavs/LJ010-0152.npy +tests/data/ljspeech/wavs/LJ050-0079.wav|tests/data/ljspeech/wavs/LJ050-0079.npy +tests/data/ljspeech/wavs/LJ005-0019.wav|tests/data/ljspeech/wavs/LJ005-0019.npy +tests/data/ljspeech/wavs/LJ028-0447.wav|tests/data/ljspeech/wavs/LJ028-0447.npy +tests/data/ljspeech/wavs/LJ012-0231.wav|tests/data/ljspeech/wavs/LJ012-0231.npy +tests/data/ljspeech/wavs/LJ041-0142.wav|tests/data/ljspeech/wavs/LJ041-0142.npy +tests/data/ljspeech/wavs/LJ004-0207.wav|tests/data/ljspeech/wavs/LJ004-0207.npy +tests/data/ljspeech/wavs/LJ001-0167.wav|tests/data/ljspeech/wavs/LJ001-0167.npy +tests/data/ljspeech/wavs/LJ044-0107.wav|tests/data/ljspeech/wavs/LJ044-0107.npy +tests/data/ljspeech/wavs/LJ015-0157.wav|tests/data/ljspeech/wavs/LJ015-0157.npy +tests/data/ljspeech/wavs/LJ040-0237.wav|tests/data/ljspeech/wavs/LJ040-0237.npy +tests/data/ljspeech/wavs/LJ006-0107.wav|tests/data/ljspeech/wavs/LJ006-0107.npy +tests/data/ljspeech/wavs/LJ010-0031.wav|tests/data/ljspeech/wavs/LJ010-0031.npy +tests/data/ljspeech/wavs/LJ028-0050.wav|tests/data/ljspeech/wavs/LJ028-0050.npy +tests/data/ljspeech/wavs/LJ019-0214.wav|tests/data/ljspeech/wavs/LJ019-0214.npy +tests/data/ljspeech/wavs/LJ001-0161.wav|tests/data/ljspeech/wavs/LJ001-0161.npy +tests/data/ljspeech/wavs/LJ030-0211.wav|tests/data/ljspeech/wavs/LJ030-0211.npy +tests/data/ljspeech/wavs/LJ033-0079.wav|tests/data/ljspeech/wavs/LJ033-0079.npy +tests/data/ljspeech/wavs/LJ009-0269.wav|tests/data/ljspeech/wavs/LJ009-0269.npy +tests/data/ljspeech/wavs/LJ043-0084.wav|tests/data/ljspeech/wavs/LJ043-0084.npy +tests/data/ljspeech/wavs/LJ004-0017.wav|tests/data/ljspeech/wavs/LJ004-0017.npy +tests/data/ljspeech/wavs/LJ046-0247.wav|tests/data/ljspeech/wavs/LJ046-0247.npy +tests/data/ljspeech/wavs/LJ005-0114.wav|tests/data/ljspeech/wavs/LJ005-0114.npy +tests/data/ljspeech/wavs/LJ015-0066.wav|tests/data/ljspeech/wavs/LJ015-0066.npy +tests/data/ljspeech/wavs/LJ009-0299.wav|tests/data/ljspeech/wavs/LJ009-0299.npy +tests/data/ljspeech/wavs/LJ007-0004.wav|tests/data/ljspeech/wavs/LJ007-0004.npy +tests/data/ljspeech/wavs/LJ006-0045.wav|tests/data/ljspeech/wavs/LJ006-0045.npy +tests/data/ljspeech/wavs/LJ019-0395.wav|tests/data/ljspeech/wavs/LJ019-0395.npy +tests/data/ljspeech/wavs/LJ031-0162.wav|tests/data/ljspeech/wavs/LJ031-0162.npy +tests/data/ljspeech/wavs/LJ046-0249.wav|tests/data/ljspeech/wavs/LJ046-0249.npy +tests/data/ljspeech/wavs/LJ034-0132.wav|tests/data/ljspeech/wavs/LJ034-0132.npy +tests/data/ljspeech/wavs/LJ013-0073.wav|tests/data/ljspeech/wavs/LJ013-0073.npy +tests/data/ljspeech/wavs/LJ011-0279.wav|tests/data/ljspeech/wavs/LJ011-0279.npy +tests/data/ljspeech/wavs/LJ030-0158.wav|tests/data/ljspeech/wavs/LJ030-0158.npy +tests/data/ljspeech/wavs/LJ048-0127.wav|tests/data/ljspeech/wavs/LJ048-0127.npy +tests/data/ljspeech/wavs/LJ036-0210.wav|tests/data/ljspeech/wavs/LJ036-0210.npy +tests/data/ljspeech/wavs/LJ029-0193.wav|tests/data/ljspeech/wavs/LJ029-0193.npy +tests/data/ljspeech/wavs/LJ016-0151.wav|tests/data/ljspeech/wavs/LJ016-0151.npy +tests/data/ljspeech/wavs/LJ033-0153.wav|tests/data/ljspeech/wavs/LJ033-0153.npy +tests/data/ljspeech/wavs/LJ042-0009.wav|tests/data/ljspeech/wavs/LJ042-0009.npy +tests/data/ljspeech/wavs/LJ050-0085.wav|tests/data/ljspeech/wavs/LJ050-0085.npy +tests/data/ljspeech/wavs/LJ025-0034.wav|tests/data/ljspeech/wavs/LJ025-0034.npy +tests/data/ljspeech/wavs/LJ048-0235.wav|tests/data/ljspeech/wavs/LJ048-0235.npy +tests/data/ljspeech/wavs/LJ001-0041.wav|tests/data/ljspeech/wavs/LJ001-0041.npy +tests/data/ljspeech/wavs/LJ002-0288.wav|tests/data/ljspeech/wavs/LJ002-0288.npy +tests/data/ljspeech/wavs/LJ022-0074.wav|tests/data/ljspeech/wavs/LJ022-0074.npy +tests/data/ljspeech/wavs/LJ017-0217.wav|tests/data/ljspeech/wavs/LJ017-0217.npy +tests/data/ljspeech/wavs/LJ011-0100.wav|tests/data/ljspeech/wavs/LJ011-0100.npy +tests/data/ljspeech/wavs/LJ017-0280.wav|tests/data/ljspeech/wavs/LJ017-0280.npy +tests/data/ljspeech/wavs/LJ028-0298.wav|tests/data/ljspeech/wavs/LJ028-0298.npy +tests/data/ljspeech/wavs/LJ023-0069.wav|tests/data/ljspeech/wavs/LJ023-0069.npy +tests/data/ljspeech/wavs/LJ031-0119.wav|tests/data/ljspeech/wavs/LJ031-0119.npy +tests/data/ljspeech/wavs/LJ011-0091.wav|tests/data/ljspeech/wavs/LJ011-0091.npy +tests/data/ljspeech/wavs/LJ003-0266.wav|tests/data/ljspeech/wavs/LJ003-0266.npy +tests/data/ljspeech/wavs/LJ016-0141.wav|tests/data/ljspeech/wavs/LJ016-0141.npy +tests/data/ljspeech/wavs/LJ011-0194.wav|tests/data/ljspeech/wavs/LJ011-0194.npy +tests/data/ljspeech/wavs/LJ029-0194.wav|tests/data/ljspeech/wavs/LJ029-0194.npy +tests/data/ljspeech/wavs/LJ045-0198.wav|tests/data/ljspeech/wavs/LJ045-0198.npy +tests/data/ljspeech/wavs/LJ011-0138.wav|tests/data/ljspeech/wavs/LJ011-0138.npy +tests/data/ljspeech/wavs/LJ042-0027.wav|tests/data/ljspeech/wavs/LJ042-0027.npy +tests/data/ljspeech/wavs/LJ037-0135.wav|tests/data/ljspeech/wavs/LJ037-0135.npy +tests/data/ljspeech/wavs/LJ033-0142.wav|tests/data/ljspeech/wavs/LJ033-0142.npy +tests/data/ljspeech/wavs/LJ038-0088.wav|tests/data/ljspeech/wavs/LJ038-0088.npy +tests/data/ljspeech/wavs/LJ002-0225.wav|tests/data/ljspeech/wavs/LJ002-0225.npy +tests/data/ljspeech/wavs/LJ030-0160.wav|tests/data/ljspeech/wavs/LJ030-0160.npy +tests/data/ljspeech/wavs/LJ036-0176.wav|tests/data/ljspeech/wavs/LJ036-0176.npy +tests/data/ljspeech/wavs/LJ002-0034.wav|tests/data/ljspeech/wavs/LJ002-0034.npy +tests/data/ljspeech/wavs/LJ004-0028.wav|tests/data/ljspeech/wavs/LJ004-0028.npy +tests/data/ljspeech/wavs/LJ010-0018.wav|tests/data/ljspeech/wavs/LJ010-0018.npy +tests/data/ljspeech/wavs/LJ038-0074.wav|tests/data/ljspeech/wavs/LJ038-0074.npy +tests/data/ljspeech/wavs/LJ038-0015.wav|tests/data/ljspeech/wavs/LJ038-0015.npy +tests/data/ljspeech/wavs/LJ044-0040.wav|tests/data/ljspeech/wavs/LJ044-0040.npy +tests/data/ljspeech/wavs/LJ050-0045.wav|tests/data/ljspeech/wavs/LJ050-0045.npy +tests/data/ljspeech/wavs/LJ035-0137.wav|tests/data/ljspeech/wavs/LJ035-0137.npy +tests/data/ljspeech/wavs/LJ003-0309.wav|tests/data/ljspeech/wavs/LJ003-0309.npy +tests/data/ljspeech/wavs/LJ027-0056.wav|tests/data/ljspeech/wavs/LJ027-0056.npy +tests/data/ljspeech/wavs/LJ001-0006.wav|tests/data/ljspeech/wavs/LJ001-0006.npy +tests/data/ljspeech/wavs/LJ028-0132.wav|tests/data/ljspeech/wavs/LJ028-0132.npy +tests/data/ljspeech/wavs/LJ003-0161.wav|tests/data/ljspeech/wavs/LJ003-0161.npy +tests/data/ljspeech/wavs/LJ035-0154.wav|tests/data/ljspeech/wavs/LJ035-0154.npy +tests/data/ljspeech/wavs/LJ024-0126.wav|tests/data/ljspeech/wavs/LJ024-0126.npy +tests/data/ljspeech/wavs/LJ038-0127.wav|tests/data/ljspeech/wavs/LJ038-0127.npy +tests/data/ljspeech/wavs/LJ014-0047.wav|tests/data/ljspeech/wavs/LJ014-0047.npy +tests/data/ljspeech/wavs/LJ008-0170.wav|tests/data/ljspeech/wavs/LJ008-0170.npy +tests/data/ljspeech/wavs/LJ008-0146.wav|tests/data/ljspeech/wavs/LJ008-0146.npy +tests/data/ljspeech/wavs/LJ041-0055.wav|tests/data/ljspeech/wavs/LJ041-0055.npy +tests/data/ljspeech/wavs/LJ006-0307.wav|tests/data/ljspeech/wavs/LJ006-0307.npy +tests/data/ljspeech/wavs/LJ029-0156.wav|tests/data/ljspeech/wavs/LJ029-0156.npy +tests/data/ljspeech/wavs/LJ033-0214.wav|tests/data/ljspeech/wavs/LJ033-0214.npy +tests/data/ljspeech/wavs/LJ016-0304.wav|tests/data/ljspeech/wavs/LJ016-0304.npy +tests/data/ljspeech/wavs/LJ013-0186.wav|tests/data/ljspeech/wavs/LJ013-0186.npy +tests/data/ljspeech/wavs/LJ038-0151.wav|tests/data/ljspeech/wavs/LJ038-0151.npy +tests/data/ljspeech/wavs/LJ013-0208.wav|tests/data/ljspeech/wavs/LJ013-0208.npy +tests/data/ljspeech/wavs/LJ001-0011.wav|tests/data/ljspeech/wavs/LJ001-0011.npy +tests/data/ljspeech/wavs/LJ050-0102.wav|tests/data/ljspeech/wavs/LJ050-0102.npy +tests/data/ljspeech/wavs/LJ046-0061.wav|tests/data/ljspeech/wavs/LJ046-0061.npy +tests/data/ljspeech/wavs/LJ030-0051.wav|tests/data/ljspeech/wavs/LJ030-0051.npy +tests/data/ljspeech/wavs/LJ007-0100.wav|tests/data/ljspeech/wavs/LJ007-0100.npy +tests/data/ljspeech/wavs/LJ007-0099.wav|tests/data/ljspeech/wavs/LJ007-0099.npy +tests/data/ljspeech/wavs/LJ033-0186.wav|tests/data/ljspeech/wavs/LJ033-0186.npy +tests/data/ljspeech/wavs/LJ024-0023.wav|tests/data/ljspeech/wavs/LJ024-0023.npy +tests/data/ljspeech/wavs/LJ035-0079.wav|tests/data/ljspeech/wavs/LJ035-0079.npy +tests/data/ljspeech/wavs/LJ046-0005.wav|tests/data/ljspeech/wavs/LJ046-0005.npy +tests/data/ljspeech/wavs/LJ038-0235.wav|tests/data/ljspeech/wavs/LJ038-0235.npy +tests/data/ljspeech/wavs/LJ046-0208.wav|tests/data/ljspeech/wavs/LJ046-0208.npy +tests/data/ljspeech/wavs/LJ006-0109.wav|tests/data/ljspeech/wavs/LJ006-0109.npy +tests/data/ljspeech/wavs/LJ034-0062.wav|tests/data/ljspeech/wavs/LJ034-0062.npy +tests/data/ljspeech/wavs/LJ020-0044.wav|tests/data/ljspeech/wavs/LJ020-0044.npy +tests/data/ljspeech/wavs/LJ019-0117.wav|tests/data/ljspeech/wavs/LJ019-0117.npy +tests/data/ljspeech/wavs/LJ007-0142.wav|tests/data/ljspeech/wavs/LJ007-0142.npy +tests/data/ljspeech/wavs/LJ005-0036.wav|tests/data/ljspeech/wavs/LJ005-0036.npy +tests/data/ljspeech/wavs/LJ028-0066.wav|tests/data/ljspeech/wavs/LJ028-0066.npy +tests/data/ljspeech/wavs/LJ040-0037.wav|tests/data/ljspeech/wavs/LJ040-0037.npy +tests/data/ljspeech/wavs/LJ021-0180.wav|tests/data/ljspeech/wavs/LJ021-0180.npy +tests/data/ljspeech/wavs/LJ028-0301.wav|tests/data/ljspeech/wavs/LJ028-0301.npy +tests/data/ljspeech/wavs/LJ004-0188.wav|tests/data/ljspeech/wavs/LJ004-0188.npy +tests/data/ljspeech/wavs/LJ035-0125.wav|tests/data/ljspeech/wavs/LJ035-0125.npy +tests/data/ljspeech/wavs/LJ047-0111.wav|tests/data/ljspeech/wavs/LJ047-0111.npy +tests/data/ljspeech/wavs/LJ014-0188.wav|tests/data/ljspeech/wavs/LJ014-0188.npy +tests/data/ljspeech/wavs/LJ025-0137.wav|tests/data/ljspeech/wavs/LJ025-0137.npy +tests/data/ljspeech/wavs/LJ020-0001.wav|tests/data/ljspeech/wavs/LJ020-0001.npy +tests/data/ljspeech/wavs/LJ028-0209.wav|tests/data/ljspeech/wavs/LJ028-0209.npy +tests/data/ljspeech/wavs/LJ008-0236.wav|tests/data/ljspeech/wavs/LJ008-0236.npy +tests/data/ljspeech/wavs/LJ002-0093.wav|tests/data/ljspeech/wavs/LJ002-0093.npy +tests/data/ljspeech/wavs/LJ019-0148.wav|tests/data/ljspeech/wavs/LJ019-0148.npy +tests/data/ljspeech/wavs/LJ025-0124.wav|tests/data/ljspeech/wavs/LJ025-0124.npy +tests/data/ljspeech/wavs/LJ035-0108.wav|tests/data/ljspeech/wavs/LJ035-0108.npy +tests/data/ljspeech/wavs/LJ039-0063.wav|tests/data/ljspeech/wavs/LJ039-0063.npy +tests/data/ljspeech/wavs/LJ005-0134.wav|tests/data/ljspeech/wavs/LJ005-0134.npy +tests/data/ljspeech/wavs/LJ021-0029.wav|tests/data/ljspeech/wavs/LJ021-0029.npy +tests/data/ljspeech/wavs/LJ018-0121.wav|tests/data/ljspeech/wavs/LJ018-0121.npy +tests/data/ljspeech/wavs/LJ046-0139.wav|tests/data/ljspeech/wavs/LJ046-0139.npy +tests/data/ljspeech/wavs/LJ046-0112.wav|tests/data/ljspeech/wavs/LJ046-0112.npy +tests/data/ljspeech/wavs/LJ021-0091.wav|tests/data/ljspeech/wavs/LJ021-0091.npy +tests/data/ljspeech/wavs/LJ018-0390.wav|tests/data/ljspeech/wavs/LJ018-0390.npy +tests/data/ljspeech/wavs/LJ040-0194.wav|tests/data/ljspeech/wavs/LJ040-0194.npy +tests/data/ljspeech/wavs/LJ001-0135.wav|tests/data/ljspeech/wavs/LJ001-0135.npy +tests/data/ljspeech/wavs/LJ013-0225.wav|tests/data/ljspeech/wavs/LJ013-0225.npy +tests/data/ljspeech/wavs/LJ009-0107.wav|tests/data/ljspeech/wavs/LJ009-0107.npy +tests/data/ljspeech/wavs/LJ017-0097.wav|tests/data/ljspeech/wavs/LJ017-0097.npy +tests/data/ljspeech/wavs/LJ037-0158.wav|tests/data/ljspeech/wavs/LJ037-0158.npy +tests/data/ljspeech/wavs/LJ012-0291.wav|tests/data/ljspeech/wavs/LJ012-0291.npy +tests/data/ljspeech/wavs/LJ036-0173.wav|tests/data/ljspeech/wavs/LJ036-0173.npy +tests/data/ljspeech/wavs/LJ039-0052.wav|tests/data/ljspeech/wavs/LJ039-0052.npy +tests/data/ljspeech/wavs/LJ022-0044.wav|tests/data/ljspeech/wavs/LJ022-0044.npy +tests/data/ljspeech/wavs/LJ022-0132.wav|tests/data/ljspeech/wavs/LJ022-0132.npy +tests/data/ljspeech/wavs/LJ002-0226.wav|tests/data/ljspeech/wavs/LJ002-0226.npy +tests/data/ljspeech/wavs/LJ021-0145.wav|tests/data/ljspeech/wavs/LJ021-0145.npy +tests/data/ljspeech/wavs/LJ018-0379.wav|tests/data/ljspeech/wavs/LJ018-0379.npy +tests/data/ljspeech/wavs/LJ047-0239.wav|tests/data/ljspeech/wavs/LJ047-0239.npy +tests/data/ljspeech/wavs/LJ002-0057.wav|tests/data/ljspeech/wavs/LJ002-0057.npy +tests/data/ljspeech/wavs/LJ001-0083.wav|tests/data/ljspeech/wavs/LJ001-0083.npy +tests/data/ljspeech/wavs/LJ018-0072.wav|tests/data/ljspeech/wavs/LJ018-0072.npy +tests/data/ljspeech/wavs/LJ032-0169.wav|tests/data/ljspeech/wavs/LJ032-0169.npy +tests/data/ljspeech/wavs/LJ002-0282.wav|tests/data/ljspeech/wavs/LJ002-0282.npy +tests/data/ljspeech/wavs/LJ018-0388.wav|tests/data/ljspeech/wavs/LJ018-0388.npy +tests/data/ljspeech/wavs/LJ005-0101.wav|tests/data/ljspeech/wavs/LJ005-0101.npy +tests/data/ljspeech/wavs/LJ012-0021.wav|tests/data/ljspeech/wavs/LJ012-0021.npy +tests/data/ljspeech/wavs/LJ048-0249.wav|tests/data/ljspeech/wavs/LJ048-0249.npy +tests/data/ljspeech/wavs/LJ005-0075.wav|tests/data/ljspeech/wavs/LJ005-0075.npy +tests/data/ljspeech/wavs/LJ003-0295.wav|tests/data/ljspeech/wavs/LJ003-0295.npy +tests/data/ljspeech/wavs/LJ031-0191.wav|tests/data/ljspeech/wavs/LJ031-0191.npy +tests/data/ljspeech/wavs/LJ008-0313.wav|tests/data/ljspeech/wavs/LJ008-0313.npy +tests/data/ljspeech/wavs/LJ047-0041.wav|tests/data/ljspeech/wavs/LJ047-0041.npy +tests/data/ljspeech/wavs/LJ024-0066.wav|tests/data/ljspeech/wavs/LJ024-0066.npy +tests/data/ljspeech/wavs/LJ009-0060.wav|tests/data/ljspeech/wavs/LJ009-0060.npy +tests/data/ljspeech/wavs/LJ024-0055.wav|tests/data/ljspeech/wavs/LJ024-0055.npy +tests/data/ljspeech/wavs/LJ007-0233.wav|tests/data/ljspeech/wavs/LJ007-0233.npy +tests/data/ljspeech/wavs/LJ007-0055.wav|tests/data/ljspeech/wavs/LJ007-0055.npy +tests/data/ljspeech/wavs/LJ025-0085.wav|tests/data/ljspeech/wavs/LJ025-0085.npy +tests/data/ljspeech/wavs/LJ024-0029.wav|tests/data/ljspeech/wavs/LJ024-0029.npy +tests/data/ljspeech/wavs/LJ021-0046.wav|tests/data/ljspeech/wavs/LJ021-0046.npy +tests/data/ljspeech/wavs/LJ043-0169.wav|tests/data/ljspeech/wavs/LJ043-0169.npy +tests/data/ljspeech/wavs/LJ013-0175.wav|tests/data/ljspeech/wavs/LJ013-0175.npy +tests/data/ljspeech/wavs/LJ039-0064.wav|tests/data/ljspeech/wavs/LJ039-0064.npy +tests/data/ljspeech/wavs/LJ003-0213.wav|tests/data/ljspeech/wavs/LJ003-0213.npy +tests/data/ljspeech/wavs/LJ002-0053.wav|tests/data/ljspeech/wavs/LJ002-0053.npy +tests/data/ljspeech/wavs/LJ014-0248.wav|tests/data/ljspeech/wavs/LJ014-0248.npy +tests/data/ljspeech/wavs/LJ033-0088.wav|tests/data/ljspeech/wavs/LJ033-0088.npy +tests/data/ljspeech/wavs/LJ001-0137.wav|tests/data/ljspeech/wavs/LJ001-0137.npy +tests/data/ljspeech/wavs/LJ001-0029.wav|tests/data/ljspeech/wavs/LJ001-0029.npy +tests/data/ljspeech/wavs/LJ042-0091.wav|tests/data/ljspeech/wavs/LJ042-0091.npy +tests/data/ljspeech/wavs/LJ032-0009.wav|tests/data/ljspeech/wavs/LJ032-0009.npy +tests/data/ljspeech/wavs/LJ001-0155.wav|tests/data/ljspeech/wavs/LJ001-0155.npy +tests/data/ljspeech/wavs/LJ014-0197.wav|tests/data/ljspeech/wavs/LJ014-0197.npy +tests/data/ljspeech/wavs/LJ028-0147.wav|tests/data/ljspeech/wavs/LJ028-0147.npy +tests/data/ljspeech/wavs/LJ019-0396.wav|tests/data/ljspeech/wavs/LJ019-0396.npy +tests/data/ljspeech/wavs/LJ008-0141.wav|tests/data/ljspeech/wavs/LJ008-0141.npy +tests/data/ljspeech/wavs/LJ020-0105.wav|tests/data/ljspeech/wavs/LJ020-0105.npy +tests/data/ljspeech/wavs/LJ003-0323.wav|tests/data/ljspeech/wavs/LJ003-0323.npy +tests/data/ljspeech/wavs/LJ022-0123.wav|tests/data/ljspeech/wavs/LJ022-0123.npy +tests/data/ljspeech/wavs/LJ032-0093.wav|tests/data/ljspeech/wavs/LJ032-0093.npy +tests/data/ljspeech/wavs/LJ028-0126.wav|tests/data/ljspeech/wavs/LJ028-0126.npy +tests/data/ljspeech/wavs/LJ002-0248.wav|tests/data/ljspeech/wavs/LJ002-0248.npy +tests/data/ljspeech/wavs/LJ045-0215.wav|tests/data/ljspeech/wavs/LJ045-0215.npy +tests/data/ljspeech/wavs/LJ040-0211.wav|tests/data/ljspeech/wavs/LJ040-0211.npy +tests/data/ljspeech/wavs/LJ018-0066.wav|tests/data/ljspeech/wavs/LJ018-0066.npy +tests/data/ljspeech/wavs/LJ037-0168.wav|tests/data/ljspeech/wavs/LJ037-0168.npy +tests/data/ljspeech/wavs/LJ018-0328.wav|tests/data/ljspeech/wavs/LJ018-0328.npy +tests/data/ljspeech/wavs/LJ031-0045.wav|tests/data/ljspeech/wavs/LJ031-0045.npy +tests/data/ljspeech/wavs/LJ030-0183.wav|tests/data/ljspeech/wavs/LJ030-0183.npy +tests/data/ljspeech/wavs/LJ044-0071.wav|tests/data/ljspeech/wavs/LJ044-0071.npy +tests/data/ljspeech/wavs/LJ015-0186.wav|tests/data/ljspeech/wavs/LJ015-0186.npy +tests/data/ljspeech/wavs/LJ039-0244.wav|tests/data/ljspeech/wavs/LJ039-0244.npy +tests/data/ljspeech/wavs/LJ032-0166.wav|tests/data/ljspeech/wavs/LJ032-0166.npy +tests/data/ljspeech/wavs/LJ040-0138.wav|tests/data/ljspeech/wavs/LJ040-0138.npy +tests/data/ljspeech/wavs/LJ004-0078.wav|tests/data/ljspeech/wavs/LJ004-0078.npy +tests/data/ljspeech/wavs/LJ027-0022.wav|tests/data/ljspeech/wavs/LJ027-0022.npy +tests/data/ljspeech/wavs/LJ039-0237.wav|tests/data/ljspeech/wavs/LJ039-0237.npy +tests/data/ljspeech/wavs/LJ012-0006.wav|tests/data/ljspeech/wavs/LJ012-0006.npy +tests/data/ljspeech/wavs/LJ010-0129.wav|tests/data/ljspeech/wavs/LJ010-0129.npy +tests/data/ljspeech/wavs/LJ014-0039.wav|tests/data/ljspeech/wavs/LJ014-0039.npy +tests/data/ljspeech/wavs/LJ040-0155.wav|tests/data/ljspeech/wavs/LJ040-0155.npy +tests/data/ljspeech/wavs/LJ012-0077.wav|tests/data/ljspeech/wavs/LJ012-0077.npy +tests/data/ljspeech/wavs/LJ018-0133.wav|tests/data/ljspeech/wavs/LJ018-0133.npy +tests/data/ljspeech/wavs/LJ018-0300.wav|tests/data/ljspeech/wavs/LJ018-0300.npy +tests/data/ljspeech/wavs/LJ028-0055.wav|tests/data/ljspeech/wavs/LJ028-0055.npy +tests/data/ljspeech/wavs/LJ037-0250.wav|tests/data/ljspeech/wavs/LJ037-0250.npy +tests/data/ljspeech/wavs/LJ011-0160.wav|tests/data/ljspeech/wavs/LJ011-0160.npy +tests/data/ljspeech/wavs/LJ006-0159.wav|tests/data/ljspeech/wavs/LJ006-0159.npy +tests/data/ljspeech/wavs/LJ010-0080.wav|tests/data/ljspeech/wavs/LJ010-0080.npy +tests/data/ljspeech/wavs/LJ004-0169.wav|tests/data/ljspeech/wavs/LJ004-0169.npy +tests/data/ljspeech/wavs/LJ012-0227.wav|tests/data/ljspeech/wavs/LJ012-0227.npy +tests/data/ljspeech/wavs/LJ030-0091.wav|tests/data/ljspeech/wavs/LJ030-0091.npy +tests/data/ljspeech/wavs/LJ011-0018.wav|tests/data/ljspeech/wavs/LJ011-0018.npy +tests/data/ljspeech/wavs/LJ046-0237.wav|tests/data/ljspeech/wavs/LJ046-0237.npy +tests/data/ljspeech/wavs/LJ031-0033.wav|tests/data/ljspeech/wavs/LJ031-0033.npy +tests/data/ljspeech/wavs/LJ046-0242.wav|tests/data/ljspeech/wavs/LJ046-0242.npy +tests/data/ljspeech/wavs/LJ003-0047.wav|tests/data/ljspeech/wavs/LJ003-0047.npy +tests/data/ljspeech/wavs/LJ039-0111.wav|tests/data/ljspeech/wavs/LJ039-0111.npy +tests/data/ljspeech/wavs/LJ036-0168.wav|tests/data/ljspeech/wavs/LJ036-0168.npy +tests/data/ljspeech/wavs/LJ037-0152.wav|tests/data/ljspeech/wavs/LJ037-0152.npy +tests/data/ljspeech/wavs/LJ027-0081.wav|tests/data/ljspeech/wavs/LJ027-0081.npy +tests/data/ljspeech/wavs/LJ027-0020.wav|tests/data/ljspeech/wavs/LJ027-0020.npy +tests/data/ljspeech/wavs/LJ019-0028.wav|tests/data/ljspeech/wavs/LJ019-0028.npy +tests/data/ljspeech/wavs/LJ035-0033.wav|tests/data/ljspeech/wavs/LJ035-0033.npy +tests/data/ljspeech/wavs/LJ047-0201.wav|tests/data/ljspeech/wavs/LJ047-0201.npy +tests/data/ljspeech/wavs/LJ017-0168.wav|tests/data/ljspeech/wavs/LJ017-0168.npy +tests/data/ljspeech/wavs/LJ022-0176.wav|tests/data/ljspeech/wavs/LJ022-0176.npy +tests/data/ljspeech/wavs/LJ034-0014.wav|tests/data/ljspeech/wavs/LJ034-0014.npy +tests/data/ljspeech/wavs/LJ011-0082.wav|tests/data/ljspeech/wavs/LJ011-0082.npy +tests/data/ljspeech/wavs/LJ037-0127.wav|tests/data/ljspeech/wavs/LJ037-0127.npy +tests/data/ljspeech/wavs/LJ015-0085.wav|tests/data/ljspeech/wavs/LJ015-0085.npy +tests/data/ljspeech/wavs/LJ009-0199.wav|tests/data/ljspeech/wavs/LJ009-0199.npy +tests/data/ljspeech/wavs/LJ031-0187.wav|tests/data/ljspeech/wavs/LJ031-0187.npy +tests/data/ljspeech/wavs/LJ002-0317.wav|tests/data/ljspeech/wavs/LJ002-0317.npy +tests/data/ljspeech/wavs/LJ016-0160.wav|tests/data/ljspeech/wavs/LJ016-0160.npy +tests/data/ljspeech/wavs/LJ040-0236.wav|tests/data/ljspeech/wavs/LJ040-0236.npy +tests/data/ljspeech/wavs/LJ014-0335.wav|tests/data/ljspeech/wavs/LJ014-0335.npy +tests/data/ljspeech/wavs/LJ025-0046.wav|tests/data/ljspeech/wavs/LJ025-0046.npy +tests/data/ljspeech/wavs/LJ016-0115.wav|tests/data/ljspeech/wavs/LJ016-0115.npy +tests/data/ljspeech/wavs/LJ002-0260.wav|tests/data/ljspeech/wavs/LJ002-0260.npy +tests/data/ljspeech/wavs/LJ009-0293.wav|tests/data/ljspeech/wavs/LJ009-0293.npy +tests/data/ljspeech/wavs/LJ016-0291.wav|tests/data/ljspeech/wavs/LJ016-0291.npy +tests/data/ljspeech/wavs/LJ046-0231.wav|tests/data/ljspeech/wavs/LJ046-0231.npy +tests/data/ljspeech/wavs/LJ028-0188.wav|tests/data/ljspeech/wavs/LJ028-0188.npy +tests/data/ljspeech/wavs/LJ050-0249.wav|tests/data/ljspeech/wavs/LJ050-0249.npy +tests/data/ljspeech/wavs/LJ042-0017.wav|tests/data/ljspeech/wavs/LJ042-0017.npy +tests/data/ljspeech/wavs/LJ025-0045.wav|tests/data/ljspeech/wavs/LJ025-0045.npy +tests/data/ljspeech/wavs/LJ004-0002.wav|tests/data/ljspeech/wavs/LJ004-0002.npy +tests/data/ljspeech/wavs/LJ036-0172.wav|tests/data/ljspeech/wavs/LJ036-0172.npy +tests/data/ljspeech/wavs/LJ013-0011.wav|tests/data/ljspeech/wavs/LJ013-0011.npy +tests/data/ljspeech/wavs/LJ031-0102.wav|tests/data/ljspeech/wavs/LJ031-0102.npy +tests/data/ljspeech/wavs/LJ049-0135.wav|tests/data/ljspeech/wavs/LJ049-0135.npy +tests/data/ljspeech/wavs/LJ049-0218.wav|tests/data/ljspeech/wavs/LJ049-0218.npy +tests/data/ljspeech/wavs/LJ023-0041.wav|tests/data/ljspeech/wavs/LJ023-0041.npy +tests/data/ljspeech/wavs/LJ001-0103.wav|tests/data/ljspeech/wavs/LJ001-0103.npy +tests/data/ljspeech/wavs/LJ001-0110.wav|tests/data/ljspeech/wavs/LJ001-0110.npy +tests/data/ljspeech/wavs/LJ031-0175.wav|tests/data/ljspeech/wavs/LJ031-0175.npy +tests/data/ljspeech/wavs/LJ035-0025.wav|tests/data/ljspeech/wavs/LJ035-0025.npy +tests/data/ljspeech/wavs/LJ046-0233.wav|tests/data/ljspeech/wavs/LJ046-0233.npy +tests/data/ljspeech/wavs/LJ004-0229.wav|tests/data/ljspeech/wavs/LJ004-0229.npy +tests/data/ljspeech/wavs/LJ047-0047.wav|tests/data/ljspeech/wavs/LJ047-0047.npy +tests/data/ljspeech/wavs/LJ050-0054.wav|tests/data/ljspeech/wavs/LJ050-0054.npy +tests/data/ljspeech/wavs/LJ019-0050.wav|tests/data/ljspeech/wavs/LJ019-0050.npy +tests/data/ljspeech/wavs/LJ013-0122.wav|tests/data/ljspeech/wavs/LJ013-0122.npy +tests/data/ljspeech/wavs/LJ027-0006.wav|tests/data/ljspeech/wavs/LJ027-0006.npy +tests/data/ljspeech/wavs/LJ030-0133.wav|tests/data/ljspeech/wavs/LJ030-0133.npy +tests/data/ljspeech/wavs/LJ019-0155.wav|tests/data/ljspeech/wavs/LJ019-0155.npy +tests/data/ljspeech/wavs/LJ009-0151.wav|tests/data/ljspeech/wavs/LJ009-0151.npy +tests/data/ljspeech/wavs/LJ016-0301.wav|tests/data/ljspeech/wavs/LJ016-0301.npy +tests/data/ljspeech/wavs/LJ012-0167.wav|tests/data/ljspeech/wavs/LJ012-0167.npy +tests/data/ljspeech/wavs/LJ017-0101.wav|tests/data/ljspeech/wavs/LJ017-0101.npy +tests/data/ljspeech/wavs/LJ011-0039.wav|tests/data/ljspeech/wavs/LJ011-0039.npy +tests/data/ljspeech/wavs/LJ002-0293.wav|tests/data/ljspeech/wavs/LJ002-0293.npy +tests/data/ljspeech/wavs/LJ003-0322.wav|tests/data/ljspeech/wavs/LJ003-0322.npy +tests/data/ljspeech/wavs/LJ007-0002.wav|tests/data/ljspeech/wavs/LJ007-0002.npy +tests/data/ljspeech/wavs/LJ036-0181.wav|tests/data/ljspeech/wavs/LJ036-0181.npy +tests/data/ljspeech/wavs/LJ039-0031.wav|tests/data/ljspeech/wavs/LJ039-0031.npy +tests/data/ljspeech/wavs/LJ029-0084.wav|tests/data/ljspeech/wavs/LJ029-0084.npy +tests/data/ljspeech/wavs/LJ038-0296.wav|tests/data/ljspeech/wavs/LJ038-0296.npy +tests/data/ljspeech/wavs/LJ024-0044.wav|tests/data/ljspeech/wavs/LJ024-0044.npy +tests/data/ljspeech/wavs/LJ040-0039.wav|tests/data/ljspeech/wavs/LJ040-0039.npy +tests/data/ljspeech/wavs/LJ012-0020.wav|tests/data/ljspeech/wavs/LJ012-0020.npy +tests/data/ljspeech/wavs/LJ008-0120.wav|tests/data/ljspeech/wavs/LJ008-0120.npy +tests/data/ljspeech/wavs/LJ006-0106.wav|tests/data/ljspeech/wavs/LJ006-0106.npy +tests/data/ljspeech/wavs/LJ050-0178.wav|tests/data/ljspeech/wavs/LJ050-0178.npy +tests/data/ljspeech/wavs/LJ036-0053.wav|tests/data/ljspeech/wavs/LJ036-0053.npy +tests/data/ljspeech/wavs/LJ025-0164.wav|tests/data/ljspeech/wavs/LJ025-0164.npy +tests/data/ljspeech/wavs/LJ023-0066.wav|tests/data/ljspeech/wavs/LJ023-0066.npy +tests/data/ljspeech/wavs/LJ002-0183.wav|tests/data/ljspeech/wavs/LJ002-0183.npy +tests/data/ljspeech/wavs/LJ027-0061.wav|tests/data/ljspeech/wavs/LJ027-0061.npy +tests/data/ljspeech/wavs/LJ011-0188.wav|tests/data/ljspeech/wavs/LJ011-0188.npy +tests/data/ljspeech/wavs/LJ048-0257.wav|tests/data/ljspeech/wavs/LJ048-0257.npy +tests/data/ljspeech/wavs/LJ046-0238.wav|tests/data/ljspeech/wavs/LJ046-0238.npy +tests/data/ljspeech/wavs/LJ036-0031.wav|tests/data/ljspeech/wavs/LJ036-0031.npy +tests/data/ljspeech/wavs/LJ006-0236.wav|tests/data/ljspeech/wavs/LJ006-0236.npy +tests/data/ljspeech/wavs/LJ030-0230.wav|tests/data/ljspeech/wavs/LJ030-0230.npy +tests/data/ljspeech/wavs/LJ025-0030.wav|tests/data/ljspeech/wavs/LJ025-0030.npy +tests/data/ljspeech/wavs/LJ040-0038.wav|tests/data/ljspeech/wavs/LJ040-0038.npy +tests/data/ljspeech/wavs/LJ016-0389.wav|tests/data/ljspeech/wavs/LJ016-0389.npy +tests/data/ljspeech/wavs/LJ010-0237.wav|tests/data/ljspeech/wavs/LJ010-0237.npy +tests/data/ljspeech/wavs/LJ008-0111.wav|tests/data/ljspeech/wavs/LJ008-0111.npy +tests/data/ljspeech/wavs/LJ036-0182.wav|tests/data/ljspeech/wavs/LJ036-0182.npy +tests/data/ljspeech/wavs/LJ013-0043.wav|tests/data/ljspeech/wavs/LJ013-0043.npy +tests/data/ljspeech/wavs/LJ011-0126.wav|tests/data/ljspeech/wavs/LJ011-0126.npy +tests/data/ljspeech/wavs/LJ006-0251.wav|tests/data/ljspeech/wavs/LJ006-0251.npy +tests/data/ljspeech/wavs/LJ003-0192.wav|tests/data/ljspeech/wavs/LJ003-0192.npy +tests/data/ljspeech/wavs/LJ008-0118.wav|tests/data/ljspeech/wavs/LJ008-0118.npy +tests/data/ljspeech/wavs/LJ002-0008.wav|tests/data/ljspeech/wavs/LJ002-0008.npy +tests/data/ljspeech/wavs/LJ032-0051.wav|tests/data/ljspeech/wavs/LJ032-0051.npy +tests/data/ljspeech/wavs/LJ039-0009.wav|tests/data/ljspeech/wavs/LJ039-0009.npy +tests/data/ljspeech/wavs/LJ046-0191.wav|tests/data/ljspeech/wavs/LJ046-0191.npy +tests/data/ljspeech/wavs/LJ044-0125.wav|tests/data/ljspeech/wavs/LJ044-0125.npy +tests/data/ljspeech/wavs/LJ009-0020.wav|tests/data/ljspeech/wavs/LJ009-0020.npy +tests/data/ljspeech/wavs/LJ010-0165.wav|tests/data/ljspeech/wavs/LJ010-0165.npy +tests/data/ljspeech/wavs/LJ012-0214.wav|tests/data/ljspeech/wavs/LJ012-0214.npy +tests/data/ljspeech/wavs/LJ039-0081.wav|tests/data/ljspeech/wavs/LJ039-0081.npy +tests/data/ljspeech/wavs/LJ050-0124.wav|tests/data/ljspeech/wavs/LJ050-0124.npy +tests/data/ljspeech/wavs/LJ002-0036.wav|tests/data/ljspeech/wavs/LJ002-0036.npy +tests/data/ljspeech/wavs/LJ018-0217.wav|tests/data/ljspeech/wavs/LJ018-0217.npy +tests/data/ljspeech/wavs/LJ034-0147.wav|tests/data/ljspeech/wavs/LJ034-0147.npy +tests/data/ljspeech/wavs/LJ036-0021.wav|tests/data/ljspeech/wavs/LJ036-0021.npy +tests/data/ljspeech/wavs/LJ016-0003.wav|tests/data/ljspeech/wavs/LJ016-0003.npy +tests/data/ljspeech/wavs/LJ028-0002.wav|tests/data/ljspeech/wavs/LJ028-0002.npy +tests/data/ljspeech/wavs/LJ040-0149.wav|tests/data/ljspeech/wavs/LJ040-0149.npy +tests/data/ljspeech/wavs/LJ018-0250.wav|tests/data/ljspeech/wavs/LJ018-0250.npy +tests/data/ljspeech/wavs/LJ012-0098.wav|tests/data/ljspeech/wavs/LJ012-0098.npy +tests/data/ljspeech/wavs/LJ006-0129.wav|tests/data/ljspeech/wavs/LJ006-0129.npy +tests/data/ljspeech/wavs/LJ040-0213.wav|tests/data/ljspeech/wavs/LJ040-0213.npy +tests/data/ljspeech/wavs/LJ006-0302.wav|tests/data/ljspeech/wavs/LJ006-0302.npy +tests/data/ljspeech/wavs/LJ009-0110.wav|tests/data/ljspeech/wavs/LJ009-0110.npy +tests/data/ljspeech/wavs/LJ047-0051.wav|tests/data/ljspeech/wavs/LJ047-0051.npy +tests/data/ljspeech/wavs/LJ025-0138.wav|tests/data/ljspeech/wavs/LJ025-0138.npy +tests/data/ljspeech/wavs/LJ028-0103.wav|tests/data/ljspeech/wavs/LJ028-0103.npy +tests/data/ljspeech/wavs/LJ012-0201.wav|tests/data/ljspeech/wavs/LJ012-0201.npy +tests/data/ljspeech/wavs/LJ010-0091.wav|tests/data/ljspeech/wavs/LJ010-0091.npy +tests/data/ljspeech/wavs/LJ036-0029.wav|tests/data/ljspeech/wavs/LJ036-0029.npy +tests/data/ljspeech/wavs/LJ041-0122.wav|tests/data/ljspeech/wavs/LJ041-0122.npy +tests/data/ljspeech/wavs/LJ015-0214.wav|tests/data/ljspeech/wavs/LJ015-0214.npy +tests/data/ljspeech/wavs/LJ018-0353.wav|tests/data/ljspeech/wavs/LJ018-0353.npy +tests/data/ljspeech/wavs/LJ002-0015.wav|tests/data/ljspeech/wavs/LJ002-0015.npy +tests/data/ljspeech/wavs/LJ019-0010.wav|tests/data/ljspeech/wavs/LJ019-0010.npy +tests/data/ljspeech/wavs/LJ013-0125.wav|tests/data/ljspeech/wavs/LJ013-0125.npy +tests/data/ljspeech/wavs/LJ019-0352.wav|tests/data/ljspeech/wavs/LJ019-0352.npy +tests/data/ljspeech/wavs/LJ043-0111.wav|tests/data/ljspeech/wavs/LJ043-0111.npy +tests/data/ljspeech/wavs/LJ044-0013.wav|tests/data/ljspeech/wavs/LJ044-0013.npy +tests/data/ljspeech/wavs/LJ018-0055.wav|tests/data/ljspeech/wavs/LJ018-0055.npy +tests/data/ljspeech/wavs/LJ023-0127.wav|tests/data/ljspeech/wavs/LJ023-0127.npy +tests/data/ljspeech/wavs/LJ048-0067.wav|tests/data/ljspeech/wavs/LJ048-0067.npy +tests/data/ljspeech/wavs/LJ038-0154.wav|tests/data/ljspeech/wavs/LJ038-0154.npy +tests/data/ljspeech/wavs/LJ006-0152.wav|tests/data/ljspeech/wavs/LJ006-0152.npy +tests/data/ljspeech/wavs/LJ038-0076.wav|tests/data/ljspeech/wavs/LJ038-0076.npy +tests/data/ljspeech/wavs/LJ037-0014.wav|tests/data/ljspeech/wavs/LJ037-0014.npy +tests/data/ljspeech/wavs/LJ016-0424.wav|tests/data/ljspeech/wavs/LJ016-0424.npy +tests/data/ljspeech/wavs/LJ035-0200.wav|tests/data/ljspeech/wavs/LJ035-0200.npy +tests/data/ljspeech/wavs/LJ037-0264.wav|tests/data/ljspeech/wavs/LJ037-0264.npy +tests/data/ljspeech/wavs/LJ045-0225.wav|tests/data/ljspeech/wavs/LJ045-0225.npy +tests/data/ljspeech/wavs/LJ035-0171.wav|tests/data/ljspeech/wavs/LJ035-0171.npy +tests/data/ljspeech/wavs/LJ025-0083.wav|tests/data/ljspeech/wavs/LJ025-0083.npy +tests/data/ljspeech/wavs/LJ016-0323.wav|tests/data/ljspeech/wavs/LJ016-0323.npy +tests/data/ljspeech/wavs/LJ020-0075.wav|tests/data/ljspeech/wavs/LJ020-0075.npy +tests/data/ljspeech/wavs/LJ021-0047.wav|tests/data/ljspeech/wavs/LJ021-0047.npy +tests/data/ljspeech/wavs/LJ001-0051.wav|tests/data/ljspeech/wavs/LJ001-0051.npy +tests/data/ljspeech/wavs/LJ030-0218.wav|tests/data/ljspeech/wavs/LJ030-0218.npy +tests/data/ljspeech/wavs/LJ037-0027.wav|tests/data/ljspeech/wavs/LJ037-0027.npy +tests/data/ljspeech/wavs/LJ015-0101.wav|tests/data/ljspeech/wavs/LJ015-0101.npy +tests/data/ljspeech/wavs/LJ016-0376.wav|tests/data/ljspeech/wavs/LJ016-0376.npy +tests/data/ljspeech/wavs/LJ002-0090.wav|tests/data/ljspeech/wavs/LJ002-0090.npy +tests/data/ljspeech/wavs/LJ037-0210.wav|tests/data/ljspeech/wavs/LJ037-0210.npy +tests/data/ljspeech/wavs/LJ021-0037.wav|tests/data/ljspeech/wavs/LJ021-0037.npy +tests/data/ljspeech/wavs/LJ015-0271.wav|tests/data/ljspeech/wavs/LJ015-0271.npy +tests/data/ljspeech/wavs/LJ016-0280.wav|tests/data/ljspeech/wavs/LJ016-0280.npy +tests/data/ljspeech/wavs/LJ015-0029.wav|tests/data/ljspeech/wavs/LJ015-0029.npy +tests/data/ljspeech/wavs/LJ034-0061.wav|tests/data/ljspeech/wavs/LJ034-0061.npy +tests/data/ljspeech/wavs/LJ006-0108.wav|tests/data/ljspeech/wavs/LJ006-0108.npy +tests/data/ljspeech/wavs/LJ017-0256.wav|tests/data/ljspeech/wavs/LJ017-0256.npy +tests/data/ljspeech/wavs/LJ050-0225.wav|tests/data/ljspeech/wavs/LJ050-0225.npy +tests/data/ljspeech/wavs/LJ002-0094.wav|tests/data/ljspeech/wavs/LJ002-0094.npy +tests/data/ljspeech/wavs/LJ003-0030.wav|tests/data/ljspeech/wavs/LJ003-0030.npy +tests/data/ljspeech/wavs/LJ048-0284.wav|tests/data/ljspeech/wavs/LJ048-0284.npy +tests/data/ljspeech/wavs/LJ018-0048.wav|tests/data/ljspeech/wavs/LJ018-0048.npy +tests/data/ljspeech/wavs/LJ016-0032.wav|tests/data/ljspeech/wavs/LJ016-0032.npy +tests/data/ljspeech/wavs/LJ032-0201.wav|tests/data/ljspeech/wavs/LJ032-0201.npy +tests/data/ljspeech/wavs/LJ027-0068.wav|tests/data/ljspeech/wavs/LJ027-0068.npy +tests/data/ljspeech/wavs/LJ016-0195.wav|tests/data/ljspeech/wavs/LJ016-0195.npy +tests/data/ljspeech/wavs/LJ017-0034.wav|tests/data/ljspeech/wavs/LJ017-0034.npy +tests/data/ljspeech/wavs/LJ046-0221.wav|tests/data/ljspeech/wavs/LJ046-0221.npy +tests/data/ljspeech/wavs/LJ002-0203.wav|tests/data/ljspeech/wavs/LJ002-0203.npy +tests/data/ljspeech/wavs/LJ022-0087.wav|tests/data/ljspeech/wavs/LJ022-0087.npy +tests/data/ljspeech/wavs/LJ006-0164.wav|tests/data/ljspeech/wavs/LJ006-0164.npy +tests/data/ljspeech/wavs/LJ015-0062.wav|tests/data/ljspeech/wavs/LJ015-0062.npy +tests/data/ljspeech/wavs/LJ003-0139.wav|tests/data/ljspeech/wavs/LJ003-0139.npy +tests/data/ljspeech/wavs/LJ046-0007.wav|tests/data/ljspeech/wavs/LJ046-0007.npy +tests/data/ljspeech/wavs/LJ018-0101.wav|tests/data/ljspeech/wavs/LJ018-0101.npy +tests/data/ljspeech/wavs/LJ021-0083.wav|tests/data/ljspeech/wavs/LJ021-0083.npy +tests/data/ljspeech/wavs/LJ017-0142.wav|tests/data/ljspeech/wavs/LJ017-0142.npy +tests/data/ljspeech/wavs/LJ038-0110.wav|tests/data/ljspeech/wavs/LJ038-0110.npy +tests/data/ljspeech/wavs/LJ022-0151.wav|tests/data/ljspeech/wavs/LJ022-0151.npy +tests/data/ljspeech/wavs/LJ003-0264.wav|tests/data/ljspeech/wavs/LJ003-0264.npy +tests/data/ljspeech/wavs/LJ035-0118.wav|tests/data/ljspeech/wavs/LJ035-0118.npy +tests/data/ljspeech/wavs/LJ030-0141.wav|tests/data/ljspeech/wavs/LJ030-0141.npy +tests/data/ljspeech/wavs/LJ022-0033.wav|tests/data/ljspeech/wavs/LJ022-0033.npy +tests/data/ljspeech/wavs/LJ034-0102.wav|tests/data/ljspeech/wavs/LJ034-0102.npy +tests/data/ljspeech/wavs/LJ036-0103.wav|tests/data/ljspeech/wavs/LJ036-0103.npy +tests/data/ljspeech/wavs/LJ012-0139.wav|tests/data/ljspeech/wavs/LJ012-0139.npy +tests/data/ljspeech/wavs/LJ009-0249.wav|tests/data/ljspeech/wavs/LJ009-0249.npy +tests/data/ljspeech/wavs/LJ012-0097.wav|tests/data/ljspeech/wavs/LJ012-0097.npy +tests/data/ljspeech/wavs/LJ040-0173.wav|tests/data/ljspeech/wavs/LJ040-0173.npy +tests/data/ljspeech/wavs/LJ029-0031.wav|tests/data/ljspeech/wavs/LJ029-0031.npy +tests/data/ljspeech/wavs/LJ031-0155.wav|tests/data/ljspeech/wavs/LJ031-0155.npy +tests/data/ljspeech/wavs/LJ044-0239.wav|tests/data/ljspeech/wavs/LJ044-0239.npy +tests/data/ljspeech/wavs/LJ029-0196.wav|tests/data/ljspeech/wavs/LJ029-0196.npy +tests/data/ljspeech/wavs/LJ050-0049.wav|tests/data/ljspeech/wavs/LJ050-0049.npy +tests/data/ljspeech/wavs/LJ004-0199.wav|tests/data/ljspeech/wavs/LJ004-0199.npy +tests/data/ljspeech/wavs/LJ009-0003.wav|tests/data/ljspeech/wavs/LJ009-0003.npy +tests/data/ljspeech/wavs/LJ028-0272.wav|tests/data/ljspeech/wavs/LJ028-0272.npy +tests/data/ljspeech/wavs/LJ034-0025.wav|tests/data/ljspeech/wavs/LJ034-0025.npy +tests/data/ljspeech/wavs/LJ042-0168.wav|tests/data/ljspeech/wavs/LJ042-0168.npy +tests/data/ljspeech/wavs/LJ014-0093.wav|tests/data/ljspeech/wavs/LJ014-0093.npy +tests/data/ljspeech/wavs/LJ028-0373.wav|tests/data/ljspeech/wavs/LJ028-0373.npy +tests/data/ljspeech/wavs/LJ005-0087.wav|tests/data/ljspeech/wavs/LJ005-0087.npy +tests/data/ljspeech/wavs/LJ012-0015.wav|tests/data/ljspeech/wavs/LJ012-0015.npy +tests/data/ljspeech/wavs/LJ001-0069.wav|tests/data/ljspeech/wavs/LJ001-0069.npy +tests/data/ljspeech/wavs/LJ048-0064.wav|tests/data/ljspeech/wavs/LJ048-0064.npy +tests/data/ljspeech/wavs/LJ012-0171.wav|tests/data/ljspeech/wavs/LJ012-0171.npy +tests/data/ljspeech/wavs/LJ009-0250.wav|tests/data/ljspeech/wavs/LJ009-0250.npy +tests/data/ljspeech/wavs/LJ013-0212.wav|tests/data/ljspeech/wavs/LJ013-0212.npy +tests/data/ljspeech/wavs/LJ011-0052.wav|tests/data/ljspeech/wavs/LJ011-0052.npy +tests/data/ljspeech/wavs/LJ042-0191.wav|tests/data/ljspeech/wavs/LJ042-0191.npy +tests/data/ljspeech/wavs/LJ004-0211.wav|tests/data/ljspeech/wavs/LJ004-0211.npy +tests/data/ljspeech/wavs/LJ028-0439.wav|tests/data/ljspeech/wavs/LJ028-0439.npy +tests/data/ljspeech/wavs/LJ002-0290.wav|tests/data/ljspeech/wavs/LJ002-0290.npy +tests/data/ljspeech/wavs/LJ006-0285.wav|tests/data/ljspeech/wavs/LJ006-0285.npy +tests/data/ljspeech/wavs/LJ011-0225.wav|tests/data/ljspeech/wavs/LJ011-0225.npy +tests/data/ljspeech/wavs/LJ008-0035.wav|tests/data/ljspeech/wavs/LJ008-0035.npy +tests/data/ljspeech/wavs/LJ038-0259.wav|tests/data/ljspeech/wavs/LJ038-0259.npy +tests/data/ljspeech/wavs/LJ039-0192.wav|tests/data/ljspeech/wavs/LJ039-0192.npy +tests/data/ljspeech/wavs/LJ009-0284.wav|tests/data/ljspeech/wavs/LJ009-0284.npy +tests/data/ljspeech/wavs/LJ004-0249.wav|tests/data/ljspeech/wavs/LJ004-0249.npy +tests/data/ljspeech/wavs/LJ025-0113.wav|tests/data/ljspeech/wavs/LJ025-0113.npy +tests/data/ljspeech/wavs/LJ044-0061.wav|tests/data/ljspeech/wavs/LJ044-0061.npy +tests/data/ljspeech/wavs/LJ046-0085.wav|tests/data/ljspeech/wavs/LJ046-0085.npy +tests/data/ljspeech/wavs/LJ023-0083.wav|tests/data/ljspeech/wavs/LJ023-0083.npy +tests/data/ljspeech/wavs/LJ038-0041.wav|tests/data/ljspeech/wavs/LJ038-0041.npy +tests/data/ljspeech/wavs/LJ009-0077.wav|tests/data/ljspeech/wavs/LJ009-0077.npy +tests/data/ljspeech/wavs/LJ003-0127.wav|tests/data/ljspeech/wavs/LJ003-0127.npy +tests/data/ljspeech/wavs/LJ042-0145.wav|tests/data/ljspeech/wavs/LJ042-0145.npy +tests/data/ljspeech/wavs/LJ046-0091.wav|tests/data/ljspeech/wavs/LJ046-0091.npy +tests/data/ljspeech/wavs/LJ009-0288.wav|tests/data/ljspeech/wavs/LJ009-0288.npy +tests/data/ljspeech/wavs/LJ040-0098.wav|tests/data/ljspeech/wavs/LJ040-0098.npy +tests/data/ljspeech/wavs/LJ026-0041.wav|tests/data/ljspeech/wavs/LJ026-0041.npy +tests/data/ljspeech/wavs/LJ048-0180.wav|tests/data/ljspeech/wavs/LJ048-0180.npy +tests/data/ljspeech/wavs/LJ030-0232.wav|tests/data/ljspeech/wavs/LJ030-0232.npy +tests/data/ljspeech/wavs/LJ038-0044.wav|tests/data/ljspeech/wavs/LJ038-0044.npy +tests/data/ljspeech/wavs/LJ026-0031.wav|tests/data/ljspeech/wavs/LJ026-0031.npy +tests/data/ljspeech/wavs/LJ028-0074.wav|tests/data/ljspeech/wavs/LJ028-0074.npy +tests/data/ljspeech/wavs/LJ041-0104.wav|tests/data/ljspeech/wavs/LJ041-0104.npy +tests/data/ljspeech/wavs/LJ028-0246.wav|tests/data/ljspeech/wavs/LJ028-0246.npy +tests/data/ljspeech/wavs/LJ004-0219.wav|tests/data/ljspeech/wavs/LJ004-0219.npy +tests/data/ljspeech/wavs/LJ015-0174.wav|tests/data/ljspeech/wavs/LJ015-0174.npy +tests/data/ljspeech/wavs/LJ002-0117.wav|tests/data/ljspeech/wavs/LJ002-0117.npy +tests/data/ljspeech/wavs/LJ008-0246.wav|tests/data/ljspeech/wavs/LJ008-0246.npy +tests/data/ljspeech/wavs/LJ025-0033.wav|tests/data/ljspeech/wavs/LJ025-0033.npy +tests/data/ljspeech/wavs/LJ003-0327.wav|tests/data/ljspeech/wavs/LJ003-0327.npy +tests/data/ljspeech/wavs/LJ015-0099.wav|tests/data/ljspeech/wavs/LJ015-0099.npy +tests/data/ljspeech/wavs/LJ029-0041.wav|tests/data/ljspeech/wavs/LJ029-0041.npy +tests/data/ljspeech/wavs/LJ028-0031.wav|tests/data/ljspeech/wavs/LJ028-0031.npy +tests/data/ljspeech/wavs/LJ015-0175.wav|tests/data/ljspeech/wavs/LJ015-0175.npy +tests/data/ljspeech/wavs/LJ042-0071.wav|tests/data/ljspeech/wavs/LJ042-0071.npy +tests/data/ljspeech/wavs/LJ044-0036.wav|tests/data/ljspeech/wavs/LJ044-0036.npy +tests/data/ljspeech/wavs/LJ024-0058.wav|tests/data/ljspeech/wavs/LJ024-0058.npy +tests/data/ljspeech/wavs/LJ038-0192.wav|tests/data/ljspeech/wavs/LJ038-0192.npy +tests/data/ljspeech/wavs/LJ014-0244.wav|tests/data/ljspeech/wavs/LJ014-0244.npy +tests/data/ljspeech/wavs/LJ038-0055.wav|tests/data/ljspeech/wavs/LJ038-0055.npy +tests/data/ljspeech/wavs/LJ030-0022.wav|tests/data/ljspeech/wavs/LJ030-0022.npy +tests/data/ljspeech/wavs/LJ028-0215.wav|tests/data/ljspeech/wavs/LJ028-0215.npy +tests/data/ljspeech/wavs/LJ028-0077.wav|tests/data/ljspeech/wavs/LJ028-0077.npy +tests/data/ljspeech/wavs/LJ028-0460.wav|tests/data/ljspeech/wavs/LJ028-0460.npy +tests/data/ljspeech/wavs/LJ012-0042.wav|tests/data/ljspeech/wavs/LJ012-0042.npy +tests/data/ljspeech/wavs/LJ001-0035.wav|tests/data/ljspeech/wavs/LJ001-0035.npy +tests/data/ljspeech/wavs/LJ021-0040.wav|tests/data/ljspeech/wavs/LJ021-0040.npy +tests/data/ljspeech/wavs/LJ034-0083.wav|tests/data/ljspeech/wavs/LJ034-0083.npy +tests/data/ljspeech/wavs/LJ037-0215.wav|tests/data/ljspeech/wavs/LJ037-0215.npy +tests/data/ljspeech/wavs/LJ014-0216.wav|tests/data/ljspeech/wavs/LJ014-0216.npy +tests/data/ljspeech/wavs/LJ014-0116.wav|tests/data/ljspeech/wavs/LJ014-0116.npy +tests/data/ljspeech/wavs/LJ038-0145.wav|tests/data/ljspeech/wavs/LJ038-0145.npy +tests/data/ljspeech/wavs/LJ028-0125.wav|tests/data/ljspeech/wavs/LJ028-0125.npy +tests/data/ljspeech/wavs/LJ008-0107.wav|tests/data/ljspeech/wavs/LJ008-0107.npy +tests/data/ljspeech/wavs/LJ003-0191.wav|tests/data/ljspeech/wavs/LJ003-0191.npy +tests/data/ljspeech/wavs/LJ012-0177.wav|tests/data/ljspeech/wavs/LJ012-0177.npy +tests/data/ljspeech/wavs/LJ033-0189.wav|tests/data/ljspeech/wavs/LJ033-0189.npy +tests/data/ljspeech/wavs/LJ012-0210.wav|tests/data/ljspeech/wavs/LJ012-0210.npy +tests/data/ljspeech/wavs/LJ022-0106.wav|tests/data/ljspeech/wavs/LJ022-0106.npy +tests/data/ljspeech/wavs/LJ021-0058.wav|tests/data/ljspeech/wavs/LJ021-0058.npy +tests/data/ljspeech/wavs/LJ006-0102.wav|tests/data/ljspeech/wavs/LJ006-0102.npy +tests/data/ljspeech/wavs/LJ033-0032.wav|tests/data/ljspeech/wavs/LJ033-0032.npy +tests/data/ljspeech/wavs/LJ002-0200.wav|tests/data/ljspeech/wavs/LJ002-0200.npy +tests/data/ljspeech/wavs/LJ033-0185.wav|tests/data/ljspeech/wavs/LJ033-0185.npy +tests/data/ljspeech/wavs/LJ036-0057.wav|tests/data/ljspeech/wavs/LJ036-0057.npy +tests/data/ljspeech/wavs/LJ035-0066.wav|tests/data/ljspeech/wavs/LJ035-0066.npy +tests/data/ljspeech/wavs/LJ028-0257.wav|tests/data/ljspeech/wavs/LJ028-0257.npy +tests/data/ljspeech/wavs/LJ040-0064.wav|tests/data/ljspeech/wavs/LJ040-0064.npy +tests/data/ljspeech/wavs/LJ032-0052.wav|tests/data/ljspeech/wavs/LJ032-0052.npy +tests/data/ljspeech/wavs/LJ047-0036.wav|tests/data/ljspeech/wavs/LJ047-0036.npy +tests/data/ljspeech/wavs/LJ032-0059.wav|tests/data/ljspeech/wavs/LJ032-0059.npy +tests/data/ljspeech/wavs/LJ006-0179.wav|tests/data/ljspeech/wavs/LJ006-0179.npy +tests/data/ljspeech/wavs/LJ034-0063.wav|tests/data/ljspeech/wavs/LJ034-0063.npy +tests/data/ljspeech/wavs/LJ010-0252.wav|tests/data/ljspeech/wavs/LJ010-0252.npy +tests/data/ljspeech/wavs/LJ040-0076.wav|tests/data/ljspeech/wavs/LJ040-0076.npy +tests/data/ljspeech/wavs/LJ004-0039.wav|tests/data/ljspeech/wavs/LJ004-0039.npy +tests/data/ljspeech/wavs/LJ047-0049.wav|tests/data/ljspeech/wavs/LJ047-0049.npy +tests/data/ljspeech/wavs/LJ018-0132.wav|tests/data/ljspeech/wavs/LJ018-0132.npy +tests/data/ljspeech/wavs/LJ017-0182.wav|tests/data/ljspeech/wavs/LJ017-0182.npy +tests/data/ljspeech/wavs/LJ016-0368.wav|tests/data/ljspeech/wavs/LJ016-0368.npy +tests/data/ljspeech/wavs/LJ017-0185.wav|tests/data/ljspeech/wavs/LJ017-0185.npy +tests/data/ljspeech/wavs/LJ017-0227.wav|tests/data/ljspeech/wavs/LJ017-0227.npy +tests/data/ljspeech/wavs/LJ030-0170.wav|tests/data/ljspeech/wavs/LJ030-0170.npy +tests/data/ljspeech/wavs/LJ001-0177.wav|tests/data/ljspeech/wavs/LJ001-0177.npy +tests/data/ljspeech/wavs/LJ040-0105.wav|tests/data/ljspeech/wavs/LJ040-0105.npy +tests/data/ljspeech/wavs/LJ002-0286.wav|tests/data/ljspeech/wavs/LJ002-0286.npy +tests/data/ljspeech/wavs/LJ008-0241.wav|tests/data/ljspeech/wavs/LJ008-0241.npy +tests/data/ljspeech/wavs/LJ017-0086.wav|tests/data/ljspeech/wavs/LJ017-0086.npy +tests/data/ljspeech/wavs/LJ031-0097.wav|tests/data/ljspeech/wavs/LJ031-0097.npy +tests/data/ljspeech/wavs/LJ028-0346.wav|tests/data/ljspeech/wavs/LJ028-0346.npy +tests/data/ljspeech/wavs/LJ017-0255.wav|tests/data/ljspeech/wavs/LJ017-0255.npy +tests/data/ljspeech/wavs/LJ002-0273.wav|tests/data/ljspeech/wavs/LJ002-0273.npy +tests/data/ljspeech/wavs/LJ019-0017.wav|tests/data/ljspeech/wavs/LJ019-0017.npy +tests/data/ljspeech/wavs/LJ032-0232.wav|tests/data/ljspeech/wavs/LJ032-0232.npy +tests/data/ljspeech/wavs/LJ022-0076.wav|tests/data/ljspeech/wavs/LJ022-0076.npy +tests/data/ljspeech/wavs/LJ018-0053.wav|tests/data/ljspeech/wavs/LJ018-0053.npy +tests/data/ljspeech/wavs/LJ029-0006.wav|tests/data/ljspeech/wavs/LJ029-0006.npy +tests/data/ljspeech/wavs/LJ018-0010.wav|tests/data/ljspeech/wavs/LJ018-0010.npy +tests/data/ljspeech/wavs/LJ016-0182.wav|tests/data/ljspeech/wavs/LJ016-0182.npy +tests/data/ljspeech/wavs/LJ016-0095.wav|tests/data/ljspeech/wavs/LJ016-0095.npy +tests/data/ljspeech/wavs/LJ042-0201.wav|tests/data/ljspeech/wavs/LJ042-0201.npy +tests/data/ljspeech/wavs/LJ002-0232.wav|tests/data/ljspeech/wavs/LJ002-0232.npy +tests/data/ljspeech/wavs/LJ048-0217.wav|tests/data/ljspeech/wavs/LJ048-0217.npy +tests/data/ljspeech/wavs/LJ016-0140.wav|tests/data/ljspeech/wavs/LJ016-0140.npy +tests/data/ljspeech/wavs/LJ011-0229.wav|tests/data/ljspeech/wavs/LJ011-0229.npy +tests/data/ljspeech/wavs/LJ002-0172.wav|tests/data/ljspeech/wavs/LJ002-0172.npy +tests/data/ljspeech/wavs/LJ025-0097.wav|tests/data/ljspeech/wavs/LJ025-0097.npy +tests/data/ljspeech/wavs/LJ020-0056.wav|tests/data/ljspeech/wavs/LJ020-0056.npy +tests/data/ljspeech/wavs/LJ029-0032.wav|tests/data/ljspeech/wavs/LJ029-0032.npy +tests/data/ljspeech/wavs/LJ027-0100.wav|tests/data/ljspeech/wavs/LJ027-0100.npy +tests/data/ljspeech/wavs/LJ018-0321.wav|tests/data/ljspeech/wavs/LJ018-0321.npy +tests/data/ljspeech/wavs/LJ022-0196.wav|tests/data/ljspeech/wavs/LJ022-0196.npy +tests/data/ljspeech/wavs/LJ016-0098.wav|tests/data/ljspeech/wavs/LJ016-0098.npy +tests/data/ljspeech/wavs/LJ010-0108.wav|tests/data/ljspeech/wavs/LJ010-0108.npy +tests/data/ljspeech/wavs/LJ044-0039.wav|tests/data/ljspeech/wavs/LJ044-0039.npy +tests/data/ljspeech/wavs/LJ013-0129.wav|tests/data/ljspeech/wavs/LJ013-0129.npy +tests/data/ljspeech/wavs/LJ016-0317.wav|tests/data/ljspeech/wavs/LJ016-0317.npy +tests/data/ljspeech/wavs/LJ002-0050.wav|tests/data/ljspeech/wavs/LJ002-0050.npy +tests/data/ljspeech/wavs/LJ008-0190.wav|tests/data/ljspeech/wavs/LJ008-0190.npy +tests/data/ljspeech/wavs/LJ002-0173.wav|tests/data/ljspeech/wavs/LJ002-0173.npy +tests/data/ljspeech/wavs/LJ050-0201.wav|tests/data/ljspeech/wavs/LJ050-0201.npy +tests/data/ljspeech/wavs/LJ002-0038.wav|tests/data/ljspeech/wavs/LJ002-0038.npy +tests/data/ljspeech/wavs/LJ020-0012.wav|tests/data/ljspeech/wavs/LJ020-0012.npy +tests/data/ljspeech/wavs/LJ013-0079.wav|tests/data/ljspeech/wavs/LJ013-0079.npy +tests/data/ljspeech/wavs/LJ002-0336.wav|tests/data/ljspeech/wavs/LJ002-0336.npy +tests/data/ljspeech/wavs/LJ018-0193.wav|tests/data/ljspeech/wavs/LJ018-0193.npy +tests/data/ljspeech/wavs/LJ049-0077.wav|tests/data/ljspeech/wavs/LJ049-0077.npy +tests/data/ljspeech/wavs/LJ028-0374.wav|tests/data/ljspeech/wavs/LJ028-0374.npy +tests/data/ljspeech/wavs/LJ002-0143.wav|tests/data/ljspeech/wavs/LJ002-0143.npy +tests/data/ljspeech/wavs/LJ028-0378.wav|tests/data/ljspeech/wavs/LJ028-0378.npy +tests/data/ljspeech/wavs/LJ044-0016.wav|tests/data/ljspeech/wavs/LJ044-0016.npy +tests/data/ljspeech/wavs/LJ038-0260.wav|tests/data/ljspeech/wavs/LJ038-0260.npy +tests/data/ljspeech/wavs/LJ028-0180.wav|tests/data/ljspeech/wavs/LJ028-0180.npy +tests/data/ljspeech/wavs/LJ029-0021.wav|tests/data/ljspeech/wavs/LJ029-0021.npy +tests/data/ljspeech/wavs/LJ011-0228.wav|tests/data/ljspeech/wavs/LJ011-0228.npy +tests/data/ljspeech/wavs/LJ026-0096.wav|tests/data/ljspeech/wavs/LJ026-0096.npy +tests/data/ljspeech/wavs/LJ003-0039.wav|tests/data/ljspeech/wavs/LJ003-0039.npy +tests/data/ljspeech/wavs/LJ014-0317.wav|tests/data/ljspeech/wavs/LJ014-0317.npy +tests/data/ljspeech/wavs/LJ010-0217.wav|tests/data/ljspeech/wavs/LJ010-0217.npy +tests/data/ljspeech/wavs/LJ023-0128.wav|tests/data/ljspeech/wavs/LJ023-0128.npy +tests/data/ljspeech/wavs/LJ026-0136.wav|tests/data/ljspeech/wavs/LJ026-0136.npy +tests/data/ljspeech/wavs/LJ049-0167.wav|tests/data/ljspeech/wavs/LJ049-0167.npy +tests/data/ljspeech/wavs/LJ027-0141.wav|tests/data/ljspeech/wavs/LJ027-0141.npy +tests/data/ljspeech/wavs/LJ002-0187.wav|tests/data/ljspeech/wavs/LJ002-0187.npy +tests/data/ljspeech/wavs/LJ012-0191.wav|tests/data/ljspeech/wavs/LJ012-0191.npy +tests/data/ljspeech/wavs/LJ013-0158.wav|tests/data/ljspeech/wavs/LJ013-0158.npy +tests/data/ljspeech/wavs/LJ032-0128.wav|tests/data/ljspeech/wavs/LJ032-0128.npy +tests/data/ljspeech/wavs/LJ001-0052.wav|tests/data/ljspeech/wavs/LJ001-0052.npy +tests/data/ljspeech/wavs/LJ012-0253.wav|tests/data/ljspeech/wavs/LJ012-0253.npy +tests/data/ljspeech/wavs/LJ026-0137.wav|tests/data/ljspeech/wavs/LJ026-0137.npy +tests/data/ljspeech/wavs/LJ013-0033.wav|tests/data/ljspeech/wavs/LJ013-0033.npy +tests/data/ljspeech/wavs/LJ036-0106.wav|tests/data/ljspeech/wavs/LJ036-0106.npy +tests/data/ljspeech/wavs/LJ026-0097.wav|tests/data/ljspeech/wavs/LJ026-0097.npy +tests/data/ljspeech/wavs/LJ034-0185.wav|tests/data/ljspeech/wavs/LJ034-0185.npy +tests/data/ljspeech/wavs/LJ013-0191.wav|tests/data/ljspeech/wavs/LJ013-0191.npy +tests/data/ljspeech/wavs/LJ028-0441.wav|tests/data/ljspeech/wavs/LJ028-0441.npy +tests/data/ljspeech/wavs/LJ033-0199.wav|tests/data/ljspeech/wavs/LJ033-0199.npy +tests/data/ljspeech/wavs/LJ044-0001.wav|tests/data/ljspeech/wavs/LJ044-0001.npy +tests/data/ljspeech/wavs/LJ043-0001.wav|tests/data/ljspeech/wavs/LJ043-0001.npy +tests/data/ljspeech/wavs/LJ042-0001.wav|tests/data/ljspeech/wavs/LJ042-0001.npy +tests/data/ljspeech/wavs/LJ034-0001.wav|tests/data/ljspeech/wavs/LJ034-0001.npy +tests/data/ljspeech/wavs/LJ013-0163.wav|tests/data/ljspeech/wavs/LJ013-0163.npy +tests/data/ljspeech/wavs/LJ016-0330.wav|tests/data/ljspeech/wavs/LJ016-0330.npy +tests/data/ljspeech/wavs/LJ036-0178.wav|tests/data/ljspeech/wavs/LJ036-0178.npy +tests/data/ljspeech/wavs/LJ039-0102.wav|tests/data/ljspeech/wavs/LJ039-0102.npy +tests/data/ljspeech/wavs/LJ036-0180.wav|tests/data/ljspeech/wavs/LJ036-0180.npy +tests/data/ljspeech/wavs/LJ001-0122.wav|tests/data/ljspeech/wavs/LJ001-0122.npy +tests/data/ljspeech/wavs/LJ003-0177.wav|tests/data/ljspeech/wavs/LJ003-0177.npy +tests/data/ljspeech/wavs/LJ002-0100.wav|tests/data/ljspeech/wavs/LJ002-0100.npy +tests/data/ljspeech/wavs/LJ003-0122.wav|tests/data/ljspeech/wavs/LJ003-0122.npy +tests/data/ljspeech/wavs/LJ040-0217.wav|tests/data/ljspeech/wavs/LJ040-0217.npy +tests/data/ljspeech/wavs/LJ024-0129.wav|tests/data/ljspeech/wavs/LJ024-0129.npy +tests/data/ljspeech/wavs/LJ011-0207.wav|tests/data/ljspeech/wavs/LJ011-0207.npy +tests/data/ljspeech/wavs/LJ011-0165.wav|tests/data/ljspeech/wavs/LJ011-0165.npy +tests/data/ljspeech/wavs/LJ015-0176.wav|tests/data/ljspeech/wavs/LJ015-0176.npy +tests/data/ljspeech/wavs/LJ008-0018.wav|tests/data/ljspeech/wavs/LJ008-0018.npy +tests/data/ljspeech/wavs/LJ044-0166.wav|tests/data/ljspeech/wavs/LJ044-0166.npy +tests/data/ljspeech/wavs/LJ007-0141.wav|tests/data/ljspeech/wavs/LJ007-0141.npy +tests/data/ljspeech/wavs/LJ006-0005.wav|tests/data/ljspeech/wavs/LJ006-0005.npy +tests/data/ljspeech/wavs/LJ011-0086.wav|tests/data/ljspeech/wavs/LJ011-0086.npy +tests/data/ljspeech/wavs/LJ037-0043.wav|tests/data/ljspeech/wavs/LJ037-0043.npy +tests/data/ljspeech/wavs/LJ014-0148.wav|tests/data/ljspeech/wavs/LJ014-0148.npy +tests/data/ljspeech/wavs/LJ017-0261.wav|tests/data/ljspeech/wavs/LJ017-0261.npy +tests/data/ljspeech/wavs/LJ009-0115.wav|tests/data/ljspeech/wavs/LJ009-0115.npy +tests/data/ljspeech/wavs/LJ038-0105.wav|tests/data/ljspeech/wavs/LJ038-0105.npy +tests/data/ljspeech/wavs/LJ009-0162.wav|tests/data/ljspeech/wavs/LJ009-0162.npy +tests/data/ljspeech/wavs/LJ008-0096.wav|tests/data/ljspeech/wavs/LJ008-0096.npy +tests/data/ljspeech/wavs/LJ030-0172.wav|tests/data/ljspeech/wavs/LJ030-0172.npy +tests/data/ljspeech/wavs/LJ013-0018.wav|tests/data/ljspeech/wavs/LJ013-0018.npy +tests/data/ljspeech/wavs/LJ012-0245.wav|tests/data/ljspeech/wavs/LJ012-0245.npy +tests/data/ljspeech/wavs/LJ001-0016.wav|tests/data/ljspeech/wavs/LJ001-0016.npy +tests/data/ljspeech/wavs/LJ008-0099.wav|tests/data/ljspeech/wavs/LJ008-0099.npy +tests/data/ljspeech/wavs/LJ031-0001.wav|tests/data/ljspeech/wavs/LJ031-0001.npy +tests/data/ljspeech/wavs/LJ016-0284.wav|tests/data/ljspeech/wavs/LJ016-0284.npy +tests/data/ljspeech/wavs/LJ016-0242.wav|tests/data/ljspeech/wavs/LJ016-0242.npy +tests/data/ljspeech/wavs/LJ033-0176.wav|tests/data/ljspeech/wavs/LJ033-0176.npy +tests/data/ljspeech/wavs/LJ018-0165.wav|tests/data/ljspeech/wavs/LJ018-0165.npy +tests/data/ljspeech/wavs/LJ029-0183.wav|tests/data/ljspeech/wavs/LJ029-0183.npy +tests/data/ljspeech/wavs/LJ046-0185.wav|tests/data/ljspeech/wavs/LJ046-0185.npy +tests/data/ljspeech/wavs/LJ027-0165.wav|tests/data/ljspeech/wavs/LJ027-0165.npy +tests/data/ljspeech/wavs/LJ027-0001.wav|tests/data/ljspeech/wavs/LJ027-0001.npy +tests/data/ljspeech/wavs/LJ022-0101.wav|tests/data/ljspeech/wavs/LJ022-0101.npy +tests/data/ljspeech/wavs/LJ048-0191.wav|tests/data/ljspeech/wavs/LJ048-0191.npy +tests/data/ljspeech/wavs/LJ029-0092.wav|tests/data/ljspeech/wavs/LJ029-0092.npy +tests/data/ljspeech/wavs/LJ013-0169.wav|tests/data/ljspeech/wavs/LJ013-0169.npy +tests/data/ljspeech/wavs/LJ018-0088.wav|tests/data/ljspeech/wavs/LJ018-0088.npy +tests/data/ljspeech/wavs/LJ022-0117.wav|tests/data/ljspeech/wavs/LJ022-0117.npy +tests/data/ljspeech/wavs/LJ036-0204.wav|tests/data/ljspeech/wavs/LJ036-0204.npy +tests/data/ljspeech/wavs/LJ017-0065.wav|tests/data/ljspeech/wavs/LJ017-0065.npy +tests/data/ljspeech/wavs/LJ045-0076.wav|tests/data/ljspeech/wavs/LJ045-0076.npy +tests/data/ljspeech/wavs/LJ019-0295.wav|tests/data/ljspeech/wavs/LJ019-0295.npy +tests/data/ljspeech/wavs/LJ044-0089.wav|tests/data/ljspeech/wavs/LJ044-0089.npy +tests/data/ljspeech/wavs/LJ016-0060.wav|tests/data/ljspeech/wavs/LJ016-0060.npy +tests/data/ljspeech/wavs/LJ019-0257.wav|tests/data/ljspeech/wavs/LJ019-0257.npy +tests/data/ljspeech/wavs/LJ035-0180.wav|tests/data/ljspeech/wavs/LJ035-0180.npy +tests/data/ljspeech/wavs/LJ043-0037.wav|tests/data/ljspeech/wavs/LJ043-0037.npy +tests/data/ljspeech/wavs/LJ019-0261.wav|tests/data/ljspeech/wavs/LJ019-0261.npy +tests/data/ljspeech/wavs/LJ004-0095.wav|tests/data/ljspeech/wavs/LJ004-0095.npy +tests/data/ljspeech/wavs/LJ036-0070.wav|tests/data/ljspeech/wavs/LJ036-0070.npy +tests/data/ljspeech/wavs/LJ010-0210.wav|tests/data/ljspeech/wavs/LJ010-0210.npy +tests/data/ljspeech/wavs/LJ028-0280.wav|tests/data/ljspeech/wavs/LJ028-0280.npy +tests/data/ljspeech/wavs/LJ003-0065.wav|tests/data/ljspeech/wavs/LJ003-0065.npy +tests/data/ljspeech/wavs/LJ028-0202.wav|tests/data/ljspeech/wavs/LJ028-0202.npy +tests/data/ljspeech/wavs/LJ049-0001.wav|tests/data/ljspeech/wavs/LJ049-0001.npy +tests/data/ljspeech/wavs/LJ027-0172.wav|tests/data/ljspeech/wavs/LJ027-0172.npy +tests/data/ljspeech/wavs/LJ024-0133.wav|tests/data/ljspeech/wavs/LJ024-0133.npy +tests/data/ljspeech/wavs/LJ039-0193.wav|tests/data/ljspeech/wavs/LJ039-0193.npy +tests/data/ljspeech/wavs/LJ040-0239.wav|tests/data/ljspeech/wavs/LJ040-0239.npy +tests/data/ljspeech/wavs/LJ036-0047.wav|tests/data/ljspeech/wavs/LJ036-0047.npy +tests/data/ljspeech/wavs/LJ039-0044.wav|tests/data/ljspeech/wavs/LJ039-0044.npy +tests/data/ljspeech/wavs/LJ021-0068.wav|tests/data/ljspeech/wavs/LJ021-0068.npy +tests/data/ljspeech/wavs/LJ033-0203.wav|tests/data/ljspeech/wavs/LJ033-0203.npy +tests/data/ljspeech/wavs/LJ022-0195.wav|tests/data/ljspeech/wavs/LJ022-0195.npy +tests/data/ljspeech/wavs/LJ041-0024.wav|tests/data/ljspeech/wavs/LJ041-0024.npy +tests/data/ljspeech/wavs/LJ032-0130.wav|tests/data/ljspeech/wavs/LJ032-0130.npy +tests/data/ljspeech/wavs/LJ005-0112.wav|tests/data/ljspeech/wavs/LJ005-0112.npy +tests/data/ljspeech/wavs/LJ039-0120.wav|tests/data/ljspeech/wavs/LJ039-0120.npy +tests/data/ljspeech/wavs/LJ008-0157.wav|tests/data/ljspeech/wavs/LJ008-0157.npy +tests/data/ljspeech/wavs/LJ047-0185.wav|tests/data/ljspeech/wavs/LJ047-0185.npy +tests/data/ljspeech/wavs/LJ028-0029.wav|tests/data/ljspeech/wavs/LJ028-0029.npy +tests/data/ljspeech/wavs/LJ003-0131.wav|tests/data/ljspeech/wavs/LJ003-0131.npy +tests/data/ljspeech/wavs/LJ018-0021.wav|tests/data/ljspeech/wavs/LJ018-0021.npy +tests/data/ljspeech/wavs/LJ016-0431.wav|tests/data/ljspeech/wavs/LJ016-0431.npy +tests/data/ljspeech/wavs/LJ042-0228.wav|tests/data/ljspeech/wavs/LJ042-0228.npy +tests/data/ljspeech/wavs/LJ040-0135.wav|tests/data/ljspeech/wavs/LJ040-0135.npy +tests/data/ljspeech/wavs/LJ027-0007.wav|tests/data/ljspeech/wavs/LJ027-0007.npy +tests/data/ljspeech/wavs/LJ013-0220.wav|tests/data/ljspeech/wavs/LJ013-0220.npy +tests/data/ljspeech/wavs/LJ048-0190.wav|tests/data/ljspeech/wavs/LJ048-0190.npy +tests/data/ljspeech/wavs/LJ042-0193.wav|tests/data/ljspeech/wavs/LJ042-0193.npy +tests/data/ljspeech/wavs/LJ002-0244.wav|tests/data/ljspeech/wavs/LJ002-0244.npy +tests/data/ljspeech/wavs/LJ013-0014.wav|tests/data/ljspeech/wavs/LJ013-0014.npy +tests/data/ljspeech/wavs/LJ003-0240.wav|tests/data/ljspeech/wavs/LJ003-0240.npy +tests/data/ljspeech/wavs/LJ013-0235.wav|tests/data/ljspeech/wavs/LJ013-0235.npy +tests/data/ljspeech/wavs/LJ014-0025.wav|tests/data/ljspeech/wavs/LJ014-0025.npy +tests/data/ljspeech/wavs/LJ039-0002.wav|tests/data/ljspeech/wavs/LJ039-0002.npy +tests/data/ljspeech/wavs/LJ038-0001.wav|tests/data/ljspeech/wavs/LJ038-0001.npy +tests/data/ljspeech/wavs/LJ013-0162.wav|tests/data/ljspeech/wavs/LJ013-0162.npy +tests/data/ljspeech/wavs/LJ016-0434.wav|tests/data/ljspeech/wavs/LJ016-0434.npy +tests/data/ljspeech/wavs/LJ044-0070.wav|tests/data/ljspeech/wavs/LJ044-0070.npy +tests/data/ljspeech/wavs/LJ042-0163.wav|tests/data/ljspeech/wavs/LJ042-0163.npy +tests/data/ljspeech/wavs/LJ036-0045.wav|tests/data/ljspeech/wavs/LJ036-0045.npy +tests/data/ljspeech/wavs/LJ035-0063.wav|tests/data/ljspeech/wavs/LJ035-0063.npy +tests/data/ljspeech/wavs/LJ015-0194.wav|tests/data/ljspeech/wavs/LJ015-0194.npy +tests/data/ljspeech/wavs/LJ015-0071.wav|tests/data/ljspeech/wavs/LJ015-0071.npy +tests/data/ljspeech/wavs/LJ023-0108.wav|tests/data/ljspeech/wavs/LJ023-0108.npy +tests/data/ljspeech/wavs/LJ018-0330.wav|tests/data/ljspeech/wavs/LJ018-0330.npy +tests/data/ljspeech/wavs/LJ021-0162.wav|tests/data/ljspeech/wavs/LJ021-0162.npy +tests/data/ljspeech/wavs/LJ005-0267.wav|tests/data/ljspeech/wavs/LJ005-0267.npy +tests/data/ljspeech/wavs/LJ018-0232.wav|tests/data/ljspeech/wavs/LJ018-0232.npy +tests/data/ljspeech/wavs/LJ012-0213.wav|tests/data/ljspeech/wavs/LJ012-0213.npy +tests/data/ljspeech/wavs/LJ042-0107.wav|tests/data/ljspeech/wavs/LJ042-0107.npy +tests/data/ljspeech/wavs/LJ025-0018.wav|tests/data/ljspeech/wavs/LJ025-0018.npy +tests/data/ljspeech/wavs/LJ028-0499.wav|tests/data/ljspeech/wavs/LJ028-0499.npy +tests/data/ljspeech/wavs/LJ018-0160.wav|tests/data/ljspeech/wavs/LJ018-0160.npy +tests/data/ljspeech/wavs/LJ028-0040.wav|tests/data/ljspeech/wavs/LJ028-0040.npy +tests/data/ljspeech/wavs/LJ028-0486.wav|tests/data/ljspeech/wavs/LJ028-0486.npy +tests/data/ljspeech/wavs/LJ013-0190.wav|tests/data/ljspeech/wavs/LJ013-0190.npy +tests/data/ljspeech/wavs/LJ019-0083.wav|tests/data/ljspeech/wavs/LJ019-0083.npy +tests/data/ljspeech/wavs/LJ040-0190.wav|tests/data/ljspeech/wavs/LJ040-0190.npy +tests/data/ljspeech/wavs/LJ013-0161.wav|tests/data/ljspeech/wavs/LJ013-0161.npy +tests/data/ljspeech/wavs/LJ016-0387.wav|tests/data/ljspeech/wavs/LJ016-0387.npy +tests/data/ljspeech/wavs/LJ035-0085.wav|tests/data/ljspeech/wavs/LJ035-0085.npy +tests/data/ljspeech/wavs/LJ012-0292.wav|tests/data/ljspeech/wavs/LJ012-0292.npy +tests/data/ljspeech/wavs/LJ042-0066.wav|tests/data/ljspeech/wavs/LJ042-0066.npy +tests/data/ljspeech/wavs/LJ025-0093.wav|tests/data/ljspeech/wavs/LJ025-0093.npy +tests/data/ljspeech/wavs/LJ018-0168.wav|tests/data/ljspeech/wavs/LJ018-0168.npy +tests/data/ljspeech/wavs/LJ036-0034.wav|tests/data/ljspeech/wavs/LJ036-0034.npy +tests/data/ljspeech/wavs/LJ016-0092.wav|tests/data/ljspeech/wavs/LJ016-0092.npy +tests/data/ljspeech/wavs/LJ037-0018.wav|tests/data/ljspeech/wavs/LJ037-0018.npy +tests/data/ljspeech/wavs/LJ016-0034.wav|tests/data/ljspeech/wavs/LJ016-0034.npy +tests/data/ljspeech/wavs/LJ047-0147.wav|tests/data/ljspeech/wavs/LJ047-0147.npy +tests/data/ljspeech/wavs/LJ040-0156.wav|tests/data/ljspeech/wavs/LJ040-0156.npy +tests/data/ljspeech/wavs/LJ044-0032.wav|tests/data/ljspeech/wavs/LJ044-0032.npy +tests/data/ljspeech/wavs/LJ016-0004.wav|tests/data/ljspeech/wavs/LJ016-0004.npy +tests/data/ljspeech/wavs/LJ037-0138.wav|tests/data/ljspeech/wavs/LJ037-0138.npy +tests/data/ljspeech/wavs/LJ033-0063.wav|tests/data/ljspeech/wavs/LJ033-0063.npy +tests/data/ljspeech/wavs/LJ048-0279.wav|tests/data/ljspeech/wavs/LJ048-0279.npy +tests/data/ljspeech/wavs/LJ037-0133.wav|tests/data/ljspeech/wavs/LJ037-0133.npy +tests/data/ljspeech/wavs/LJ023-0141.wav|tests/data/ljspeech/wavs/LJ023-0141.npy +tests/data/ljspeech/wavs/LJ034-0016.wav|tests/data/ljspeech/wavs/LJ034-0016.npy +tests/data/ljspeech/wavs/LJ028-0008.wav|tests/data/ljspeech/wavs/LJ028-0008.npy +tests/data/ljspeech/wavs/LJ034-0010.wav|tests/data/ljspeech/wavs/LJ034-0010.npy +tests/data/ljspeech/wavs/LJ028-0406.wav|tests/data/ljspeech/wavs/LJ028-0406.npy +tests/data/ljspeech/wavs/LJ016-0192.wav|tests/data/ljspeech/wavs/LJ016-0192.npy +tests/data/ljspeech/wavs/LJ006-0051.wav|tests/data/ljspeech/wavs/LJ006-0051.npy +tests/data/ljspeech/wavs/LJ019-0035.wav|tests/data/ljspeech/wavs/LJ019-0035.npy +tests/data/ljspeech/wavs/LJ015-0146.wav|tests/data/ljspeech/wavs/LJ015-0146.npy +tests/data/ljspeech/wavs/LJ009-0258.wav|tests/data/ljspeech/wavs/LJ009-0258.npy +tests/data/ljspeech/wavs/LJ002-0174.wav|tests/data/ljspeech/wavs/LJ002-0174.npy +tests/data/ljspeech/wavs/LJ047-0086.wav|tests/data/ljspeech/wavs/LJ047-0086.npy +tests/data/ljspeech/wavs/LJ024-0119.wav|tests/data/ljspeech/wavs/LJ024-0119.npy +tests/data/ljspeech/wavs/LJ007-0198.wav|tests/data/ljspeech/wavs/LJ007-0198.npy +tests/data/ljspeech/wavs/LJ033-0064.wav|tests/data/ljspeech/wavs/LJ033-0064.npy +tests/data/ljspeech/wavs/LJ005-0008.wav|tests/data/ljspeech/wavs/LJ005-0008.npy +tests/data/ljspeech/wavs/LJ013-0168.wav|tests/data/ljspeech/wavs/LJ013-0168.npy +tests/data/ljspeech/wavs/LJ021-0100.wav|tests/data/ljspeech/wavs/LJ021-0100.npy +tests/data/ljspeech/wavs/LJ034-0015.wav|tests/data/ljspeech/wavs/LJ034-0015.npy +tests/data/ljspeech/wavs/LJ028-0497.wav|tests/data/ljspeech/wavs/LJ028-0497.npy +tests/data/ljspeech/wavs/LJ021-0079.wav|tests/data/ljspeech/wavs/LJ021-0079.npy +tests/data/ljspeech/wavs/LJ049-0100.wav|tests/data/ljspeech/wavs/LJ049-0100.npy +tests/data/ljspeech/wavs/LJ011-0252.wav|tests/data/ljspeech/wavs/LJ011-0252.npy +tests/data/ljspeech/wavs/LJ001-0098.wav|tests/data/ljspeech/wavs/LJ001-0098.npy +tests/data/ljspeech/wavs/LJ046-0189.wav|tests/data/ljspeech/wavs/LJ046-0189.npy +tests/data/ljspeech/wavs/LJ028-0182.wav|tests/data/ljspeech/wavs/LJ028-0182.npy +tests/data/ljspeech/wavs/LJ042-0199.wav|tests/data/ljspeech/wavs/LJ042-0199.npy +tests/data/ljspeech/wavs/LJ025-0002.wav|tests/data/ljspeech/wavs/LJ025-0002.npy +tests/data/ljspeech/wavs/LJ027-0062.wav|tests/data/ljspeech/wavs/LJ027-0062.npy +tests/data/ljspeech/wavs/LJ026-0152.wav|tests/data/ljspeech/wavs/LJ026-0152.npy +tests/data/ljspeech/wavs/LJ036-0190.wav|tests/data/ljspeech/wavs/LJ036-0190.npy +tests/data/ljspeech/wavs/LJ034-0050.wav|tests/data/ljspeech/wavs/LJ034-0050.npy +tests/data/ljspeech/wavs/LJ050-0248.wav|tests/data/ljspeech/wavs/LJ050-0248.npy +tests/data/ljspeech/wavs/LJ049-0014.wav|tests/data/ljspeech/wavs/LJ049-0014.npy +tests/data/ljspeech/wavs/LJ006-0198.wav|tests/data/ljspeech/wavs/LJ006-0198.npy +tests/data/ljspeech/wavs/LJ038-0056.wav|tests/data/ljspeech/wavs/LJ038-0056.npy +tests/data/ljspeech/wavs/LJ010-0233.wav|tests/data/ljspeech/wavs/LJ010-0233.npy +tests/data/ljspeech/wavs/LJ015-0012.wav|tests/data/ljspeech/wavs/LJ015-0012.npy +tests/data/ljspeech/wavs/LJ013-0040.wav|tests/data/ljspeech/wavs/LJ013-0040.npy +tests/data/ljspeech/wavs/LJ012-0288.wav|tests/data/ljspeech/wavs/LJ012-0288.npy +tests/data/ljspeech/wavs/LJ028-0128.wav|tests/data/ljspeech/wavs/LJ028-0128.npy +tests/data/ljspeech/wavs/LJ022-0137.wav|tests/data/ljspeech/wavs/LJ022-0137.npy +tests/data/ljspeech/wavs/LJ024-0074.wav|tests/data/ljspeech/wavs/LJ024-0074.npy +tests/data/ljspeech/wavs/LJ014-0112.wav|tests/data/ljspeech/wavs/LJ014-0112.npy +tests/data/ljspeech/wavs/LJ017-0194.wav|tests/data/ljspeech/wavs/LJ017-0194.npy +tests/data/ljspeech/wavs/LJ042-0069.wav|tests/data/ljspeech/wavs/LJ042-0069.npy +tests/data/ljspeech/wavs/LJ022-0103.wav|tests/data/ljspeech/wavs/LJ022-0103.npy +tests/data/ljspeech/wavs/LJ028-0072.wav|tests/data/ljspeech/wavs/LJ028-0072.npy +tests/data/ljspeech/wavs/LJ006-0265.wav|tests/data/ljspeech/wavs/LJ006-0265.npy +tests/data/ljspeech/wavs/LJ022-0091.wav|tests/data/ljspeech/wavs/LJ022-0091.npy +tests/data/ljspeech/wavs/LJ014-0032.wav|tests/data/ljspeech/wavs/LJ014-0032.npy +tests/data/ljspeech/wavs/LJ008-0221.wav|tests/data/ljspeech/wavs/LJ008-0221.npy +tests/data/ljspeech/wavs/LJ039-0207.wav|tests/data/ljspeech/wavs/LJ039-0207.npy +tests/data/ljspeech/wavs/LJ018-0012.wav|tests/data/ljspeech/wavs/LJ018-0012.npy +tests/data/ljspeech/wavs/LJ028-0084.wav|tests/data/ljspeech/wavs/LJ028-0084.npy +tests/data/ljspeech/wavs/LJ014-0022.wav|tests/data/ljspeech/wavs/LJ014-0022.npy +tests/data/ljspeech/wavs/LJ039-0164.wav|tests/data/ljspeech/wavs/LJ039-0164.npy +tests/data/ljspeech/wavs/LJ003-0036.wav|tests/data/ljspeech/wavs/LJ003-0036.npy +tests/data/ljspeech/wavs/LJ019-0387.wav|tests/data/ljspeech/wavs/LJ019-0387.npy +tests/data/ljspeech/wavs/LJ037-0096.wav|tests/data/ljspeech/wavs/LJ037-0096.npy +tests/data/ljspeech/wavs/LJ005-0031.wav|tests/data/ljspeech/wavs/LJ005-0031.npy +tests/data/ljspeech/wavs/LJ038-0193.wav|tests/data/ljspeech/wavs/LJ038-0193.npy +tests/data/ljspeech/wavs/LJ031-0198.wav|tests/data/ljspeech/wavs/LJ031-0198.npy +tests/data/ljspeech/wavs/LJ047-0097.wav|tests/data/ljspeech/wavs/LJ047-0097.npy +tests/data/ljspeech/wavs/LJ028-0274.wav|tests/data/ljspeech/wavs/LJ028-0274.npy +tests/data/ljspeech/wavs/LJ045-0045.wav|tests/data/ljspeech/wavs/LJ045-0045.npy +tests/data/ljspeech/wavs/LJ045-0071.wav|tests/data/ljspeech/wavs/LJ045-0071.npy +tests/data/ljspeech/wavs/LJ004-0154.wav|tests/data/ljspeech/wavs/LJ004-0154.npy +tests/data/ljspeech/wavs/LJ022-0181.wav|tests/data/ljspeech/wavs/LJ022-0181.npy +tests/data/ljspeech/wavs/LJ016-0146.wav|tests/data/ljspeech/wavs/LJ016-0146.npy +tests/data/ljspeech/wavs/LJ026-0161.wav|tests/data/ljspeech/wavs/LJ026-0161.npy +tests/data/ljspeech/wavs/LJ010-0315.wav|tests/data/ljspeech/wavs/LJ010-0315.npy +tests/data/ljspeech/wavs/LJ005-0059.wav|tests/data/ljspeech/wavs/LJ005-0059.npy +tests/data/ljspeech/wavs/LJ013-0222.wav|tests/data/ljspeech/wavs/LJ013-0222.npy +tests/data/ljspeech/wavs/LJ024-0078.wav|tests/data/ljspeech/wavs/LJ024-0078.npy +tests/data/ljspeech/wavs/LJ031-0086.wav|tests/data/ljspeech/wavs/LJ031-0086.npy +tests/data/ljspeech/wavs/LJ017-0094.wav|tests/data/ljspeech/wavs/LJ017-0094.npy +tests/data/ljspeech/wavs/LJ030-0143.wav|tests/data/ljspeech/wavs/LJ030-0143.npy +tests/data/ljspeech/wavs/LJ038-0146.wav|tests/data/ljspeech/wavs/LJ038-0146.npy +tests/data/ljspeech/wavs/LJ017-0252.wav|tests/data/ljspeech/wavs/LJ017-0252.npy +tests/data/ljspeech/wavs/LJ010-0263.wav|tests/data/ljspeech/wavs/LJ010-0263.npy +tests/data/ljspeech/wavs/LJ042-0090.wav|tests/data/ljspeech/wavs/LJ042-0090.npy +tests/data/ljspeech/wavs/LJ040-0065.wav|tests/data/ljspeech/wavs/LJ040-0065.npy +tests/data/ljspeech/wavs/LJ028-0249.wav|tests/data/ljspeech/wavs/LJ028-0249.npy +tests/data/ljspeech/wavs/LJ015-0204.wav|tests/data/ljspeech/wavs/LJ015-0204.npy +tests/data/ljspeech/wavs/LJ009-0005.wav|tests/data/ljspeech/wavs/LJ009-0005.npy +tests/data/ljspeech/wavs/LJ008-0274.wav|tests/data/ljspeech/wavs/LJ008-0274.npy +tests/data/ljspeech/wavs/LJ009-0013.wav|tests/data/ljspeech/wavs/LJ009-0013.npy +tests/data/ljspeech/wavs/LJ050-0210.wav|tests/data/ljspeech/wavs/LJ050-0210.npy +tests/data/ljspeech/wavs/LJ035-0199.wav|tests/data/ljspeech/wavs/LJ035-0199.npy +tests/data/ljspeech/wavs/LJ046-0156.wav|tests/data/ljspeech/wavs/LJ046-0156.npy +tests/data/ljspeech/wavs/LJ026-0103.wav|tests/data/ljspeech/wavs/LJ026-0103.npy +tests/data/ljspeech/wavs/LJ049-0048.wav|tests/data/ljspeech/wavs/LJ049-0048.npy +tests/data/ljspeech/wavs/LJ026-0159.wav|tests/data/ljspeech/wavs/LJ026-0159.npy +tests/data/ljspeech/wavs/LJ005-0145.wav|tests/data/ljspeech/wavs/LJ005-0145.npy +tests/data/ljspeech/wavs/LJ028-0045.wav|tests/data/ljspeech/wavs/LJ028-0045.npy +tests/data/ljspeech/wavs/LJ023-0090.wav|tests/data/ljspeech/wavs/LJ023-0090.npy +tests/data/ljspeech/wavs/LJ047-0118.wav|tests/data/ljspeech/wavs/LJ047-0118.npy +tests/data/ljspeech/wavs/LJ013-0087.wav|tests/data/ljspeech/wavs/LJ013-0087.npy +tests/data/ljspeech/wavs/LJ037-0112.wav|tests/data/ljspeech/wavs/LJ037-0112.npy +tests/data/ljspeech/wavs/LJ016-0033.wav|tests/data/ljspeech/wavs/LJ016-0033.npy +tests/data/ljspeech/wavs/LJ022-0075.wav|tests/data/ljspeech/wavs/LJ022-0075.npy +tests/data/ljspeech/wavs/LJ005-0153.wav|tests/data/ljspeech/wavs/LJ005-0153.npy +tests/data/ljspeech/wavs/LJ001-0134.wav|tests/data/ljspeech/wavs/LJ001-0134.npy +tests/data/ljspeech/wavs/LJ046-0205.wav|tests/data/ljspeech/wavs/LJ046-0205.npy +tests/data/ljspeech/wavs/LJ043-0021.wav|tests/data/ljspeech/wavs/LJ043-0021.npy +tests/data/ljspeech/wavs/LJ035-0018.wav|tests/data/ljspeech/wavs/LJ035-0018.npy +tests/data/ljspeech/wavs/LJ003-0066.wav|tests/data/ljspeech/wavs/LJ003-0066.npy +tests/data/ljspeech/wavs/LJ029-0178.wav|tests/data/ljspeech/wavs/LJ029-0178.npy +tests/data/ljspeech/wavs/LJ045-0180.wav|tests/data/ljspeech/wavs/LJ045-0180.npy +tests/data/ljspeech/wavs/LJ043-0125.wav|tests/data/ljspeech/wavs/LJ043-0125.npy +tests/data/ljspeech/wavs/LJ034-0030.wav|tests/data/ljspeech/wavs/LJ034-0030.npy +tests/data/ljspeech/wavs/LJ043-0164.wav|tests/data/ljspeech/wavs/LJ043-0164.npy +tests/data/ljspeech/wavs/LJ029-0065.wav|tests/data/ljspeech/wavs/LJ029-0065.npy +tests/data/ljspeech/wavs/LJ017-0107.wav|tests/data/ljspeech/wavs/LJ017-0107.npy +tests/data/ljspeech/wavs/LJ028-0465.wav|tests/data/ljspeech/wavs/LJ028-0465.npy +tests/data/ljspeech/wavs/LJ004-0203.wav|tests/data/ljspeech/wavs/LJ004-0203.npy +tests/data/ljspeech/wavs/LJ016-0162.wav|tests/data/ljspeech/wavs/LJ016-0162.npy +tests/data/ljspeech/wavs/LJ030-0208.wav|tests/data/ljspeech/wavs/LJ030-0208.npy +tests/data/ljspeech/wavs/LJ015-0122.wav|tests/data/ljspeech/wavs/LJ015-0122.npy +tests/data/ljspeech/wavs/LJ002-0126.wav|tests/data/ljspeech/wavs/LJ002-0126.npy +tests/data/ljspeech/wavs/LJ031-0208.wav|tests/data/ljspeech/wavs/LJ031-0208.npy +tests/data/ljspeech/wavs/LJ026-0134.wav|tests/data/ljspeech/wavs/LJ026-0134.npy +tests/data/ljspeech/wavs/LJ048-0228.wav|tests/data/ljspeech/wavs/LJ048-0228.npy +tests/data/ljspeech/wavs/LJ022-0100.wav|tests/data/ljspeech/wavs/LJ022-0100.npy +tests/data/ljspeech/wavs/LJ020-0033.wav|tests/data/ljspeech/wavs/LJ020-0033.npy +tests/data/ljspeech/wavs/LJ018-0358.wav|tests/data/ljspeech/wavs/LJ018-0358.npy +tests/data/ljspeech/wavs/LJ019-0061.wav|tests/data/ljspeech/wavs/LJ019-0061.npy +tests/data/ljspeech/wavs/LJ019-0090.wav|tests/data/ljspeech/wavs/LJ019-0090.npy +tests/data/ljspeech/wavs/LJ018-0350.wav|tests/data/ljspeech/wavs/LJ018-0350.npy +tests/data/ljspeech/wavs/LJ017-0193.wav|tests/data/ljspeech/wavs/LJ017-0193.npy +tests/data/ljspeech/wavs/LJ048-0226.wav|tests/data/ljspeech/wavs/LJ048-0226.npy +tests/data/ljspeech/wavs/LJ022-0050.wav|tests/data/ljspeech/wavs/LJ022-0050.npy +tests/data/ljspeech/wavs/LJ003-0296.wav|tests/data/ljspeech/wavs/LJ003-0296.npy +tests/data/ljspeech/wavs/LJ014-0043.wav|tests/data/ljspeech/wavs/LJ014-0043.npy +tests/data/ljspeech/wavs/LJ041-0153.wav|tests/data/ljspeech/wavs/LJ041-0153.npy +tests/data/ljspeech/wavs/LJ028-0171.wav|tests/data/ljspeech/wavs/LJ028-0171.npy +tests/data/ljspeech/wavs/LJ040-0078.wav|tests/data/ljspeech/wavs/LJ040-0078.npy +tests/data/ljspeech/wavs/LJ048-0044.wav|tests/data/ljspeech/wavs/LJ048-0044.npy +tests/data/ljspeech/wavs/LJ048-0145.wav|tests/data/ljspeech/wavs/LJ048-0145.npy +tests/data/ljspeech/wavs/LJ001-0063.wav|tests/data/ljspeech/wavs/LJ001-0063.npy +tests/data/ljspeech/wavs/LJ012-0184.wav|tests/data/ljspeech/wavs/LJ012-0184.npy +tests/data/ljspeech/wavs/LJ003-0249.wav|tests/data/ljspeech/wavs/LJ003-0249.npy +tests/data/ljspeech/wavs/LJ012-0185.wav|tests/data/ljspeech/wavs/LJ012-0185.npy +tests/data/ljspeech/wavs/LJ039-0134.wav|tests/data/ljspeech/wavs/LJ039-0134.npy +tests/data/ljspeech/wavs/LJ033-0213.wav|tests/data/ljspeech/wavs/LJ033-0213.npy +tests/data/ljspeech/wavs/LJ039-0175.wav|tests/data/ljspeech/wavs/LJ039-0175.npy +tests/data/ljspeech/wavs/LJ045-0018.wav|tests/data/ljspeech/wavs/LJ045-0018.npy +tests/data/ljspeech/wavs/LJ006-0201.wav|tests/data/ljspeech/wavs/LJ006-0201.npy +tests/data/ljspeech/wavs/LJ028-0016.wav|tests/data/ljspeech/wavs/LJ028-0016.npy +tests/data/ljspeech/wavs/LJ040-0220.wav|tests/data/ljspeech/wavs/LJ040-0220.npy +tests/data/ljspeech/wavs/LJ017-0021.wav|tests/data/ljspeech/wavs/LJ017-0021.npy +tests/data/ljspeech/wavs/LJ002-0194.wav|tests/data/ljspeech/wavs/LJ002-0194.npy +tests/data/ljspeech/wavs/LJ043-0141.wav|tests/data/ljspeech/wavs/LJ043-0141.npy +tests/data/ljspeech/wavs/LJ038-0157.wav|tests/data/ljspeech/wavs/LJ038-0157.npy +tests/data/ljspeech/wavs/LJ002-0048.wav|tests/data/ljspeech/wavs/LJ002-0048.npy +tests/data/ljspeech/wavs/LJ047-0137.wav|tests/data/ljspeech/wavs/LJ047-0137.npy +tests/data/ljspeech/wavs/LJ048-0261.wav|tests/data/ljspeech/wavs/LJ048-0261.npy +tests/data/ljspeech/wavs/LJ044-0045.wav|tests/data/ljspeech/wavs/LJ044-0045.npy +tests/data/ljspeech/wavs/LJ037-0057.wav|tests/data/ljspeech/wavs/LJ037-0057.npy +tests/data/ljspeech/wavs/LJ006-0288.wav|tests/data/ljspeech/wavs/LJ006-0288.npy +tests/data/ljspeech/wavs/LJ011-0120.wav|tests/data/ljspeech/wavs/LJ011-0120.npy +tests/data/ljspeech/wavs/LJ014-0143.wav|tests/data/ljspeech/wavs/LJ014-0143.npy +tests/data/ljspeech/wavs/LJ040-0147.wav|tests/data/ljspeech/wavs/LJ040-0147.npy +tests/data/ljspeech/wavs/LJ001-0156.wav|tests/data/ljspeech/wavs/LJ001-0156.npy +tests/data/ljspeech/wavs/LJ028-0089.wav|tests/data/ljspeech/wavs/LJ028-0089.npy +tests/data/ljspeech/wavs/LJ030-0194.wav|tests/data/ljspeech/wavs/LJ030-0194.npy +tests/data/ljspeech/wavs/LJ017-0054.wav|tests/data/ljspeech/wavs/LJ017-0054.npy +tests/data/ljspeech/wavs/LJ050-0246.wav|tests/data/ljspeech/wavs/LJ050-0246.npy +tests/data/ljspeech/wavs/LJ023-0073.wav|tests/data/ljspeech/wavs/LJ023-0073.npy +tests/data/ljspeech/wavs/LJ023-0071.wav|tests/data/ljspeech/wavs/LJ023-0071.npy +tests/data/ljspeech/wavs/LJ007-0111.wav|tests/data/ljspeech/wavs/LJ007-0111.npy +tests/data/ljspeech/wavs/LJ010-0132.wav|tests/data/ljspeech/wavs/LJ010-0132.npy +tests/data/ljspeech/wavs/LJ005-0106.wav|tests/data/ljspeech/wavs/LJ005-0106.npy +tests/data/ljspeech/wavs/LJ029-0208.wav|tests/data/ljspeech/wavs/LJ029-0208.npy +tests/data/ljspeech/wavs/LJ030-0127.wav|tests/data/ljspeech/wavs/LJ030-0127.npy +tests/data/ljspeech/wavs/LJ039-0246.wav|tests/data/ljspeech/wavs/LJ039-0246.npy +tests/data/ljspeech/wavs/LJ035-0048.wav|tests/data/ljspeech/wavs/LJ035-0048.npy +tests/data/ljspeech/wavs/LJ007-0179.wav|tests/data/ljspeech/wavs/LJ007-0179.npy +tests/data/ljspeech/wavs/LJ018-0198.wav|tests/data/ljspeech/wavs/LJ018-0198.npy +tests/data/ljspeech/wavs/LJ007-0186.wav|tests/data/ljspeech/wavs/LJ007-0186.npy +tests/data/ljspeech/wavs/LJ014-0163.wav|tests/data/ljspeech/wavs/LJ014-0163.npy +tests/data/ljspeech/wavs/LJ001-0139.wav|tests/data/ljspeech/wavs/LJ001-0139.npy +tests/data/ljspeech/wavs/LJ009-0139.wav|tests/data/ljspeech/wavs/LJ009-0139.npy +tests/data/ljspeech/wavs/LJ044-0020.wav|tests/data/ljspeech/wavs/LJ044-0020.npy +tests/data/ljspeech/wavs/LJ044-0055.wav|tests/data/ljspeech/wavs/LJ044-0055.npy +tests/data/ljspeech/wavs/LJ009-0174.wav|tests/data/ljspeech/wavs/LJ009-0174.npy +tests/data/ljspeech/wavs/LJ003-0070.wav|tests/data/ljspeech/wavs/LJ003-0070.npy +tests/data/ljspeech/wavs/LJ049-0095.wav|tests/data/ljspeech/wavs/LJ049-0095.npy +tests/data/ljspeech/wavs/LJ040-0129.wav|tests/data/ljspeech/wavs/LJ040-0129.npy +tests/data/ljspeech/wavs/LJ042-0110.wav|tests/data/ljspeech/wavs/LJ042-0110.npy +tests/data/ljspeech/wavs/LJ008-0199.wav|tests/data/ljspeech/wavs/LJ008-0199.npy +tests/data/ljspeech/wavs/LJ042-0051.wav|tests/data/ljspeech/wavs/LJ042-0051.npy +tests/data/ljspeech/wavs/LJ003-0190.wav|tests/data/ljspeech/wavs/LJ003-0190.npy +tests/data/ljspeech/wavs/LJ014-0087.wav|tests/data/ljspeech/wavs/LJ014-0087.npy +tests/data/ljspeech/wavs/LJ021-0049.wav|tests/data/ljspeech/wavs/LJ021-0049.npy +tests/data/ljspeech/wavs/LJ026-0022.wav|tests/data/ljspeech/wavs/LJ026-0022.npy +tests/data/ljspeech/wavs/LJ017-0058.wav|tests/data/ljspeech/wavs/LJ017-0058.npy +tests/data/ljspeech/wavs/LJ036-0170.wav|tests/data/ljspeech/wavs/LJ036-0170.npy +tests/data/ljspeech/wavs/LJ017-0226.wav|tests/data/ljspeech/wavs/LJ017-0226.npy +tests/data/ljspeech/wavs/LJ032-0146.wav|tests/data/ljspeech/wavs/LJ032-0146.npy +tests/data/ljspeech/wavs/LJ016-0429.wav|tests/data/ljspeech/wavs/LJ016-0429.npy +tests/data/ljspeech/wavs/LJ019-0267.wav|tests/data/ljspeech/wavs/LJ019-0267.npy +tests/data/ljspeech/wavs/LJ010-0276.wav|tests/data/ljspeech/wavs/LJ010-0276.npy +tests/data/ljspeech/wavs/LJ007-0170.wav|tests/data/ljspeech/wavs/LJ007-0170.npy +tests/data/ljspeech/wavs/LJ008-0085.wav|tests/data/ljspeech/wavs/LJ008-0085.npy +tests/data/ljspeech/wavs/LJ002-0040.wav|tests/data/ljspeech/wavs/LJ002-0040.npy +tests/data/ljspeech/wavs/LJ026-0109.wav|tests/data/ljspeech/wavs/LJ026-0109.npy +tests/data/ljspeech/wavs/LJ010-0203.wav|tests/data/ljspeech/wavs/LJ010-0203.npy +tests/data/ljspeech/wavs/LJ034-0068.wav|tests/data/ljspeech/wavs/LJ034-0068.npy +tests/data/ljspeech/wavs/LJ030-0244.wav|tests/data/ljspeech/wavs/LJ030-0244.npy +tests/data/ljspeech/wavs/LJ050-0073.wav|tests/data/ljspeech/wavs/LJ050-0073.npy +tests/data/ljspeech/wavs/LJ001-0056.wav|tests/data/ljspeech/wavs/LJ001-0056.npy +tests/data/ljspeech/wavs/LJ028-0086.wav|tests/data/ljspeech/wavs/LJ028-0086.npy +tests/data/ljspeech/wavs/LJ047-0208.wav|tests/data/ljspeech/wavs/LJ047-0208.npy +tests/data/ljspeech/wavs/LJ050-0041.wav|tests/data/ljspeech/wavs/LJ050-0041.npy +tests/data/ljspeech/wavs/LJ037-0208.wav|tests/data/ljspeech/wavs/LJ037-0208.npy +tests/data/ljspeech/wavs/LJ043-0073.wav|tests/data/ljspeech/wavs/LJ043-0073.npy +tests/data/ljspeech/wavs/LJ019-0302.wav|tests/data/ljspeech/wavs/LJ019-0302.npy +tests/data/ljspeech/wavs/LJ049-0209.wav|tests/data/ljspeech/wavs/LJ049-0209.npy +tests/data/ljspeech/wavs/LJ041-0074.wav|tests/data/ljspeech/wavs/LJ041-0074.npy +tests/data/ljspeech/wavs/LJ001-0062.wav|tests/data/ljspeech/wavs/LJ001-0062.npy +tests/data/ljspeech/wavs/LJ044-0091.wav|tests/data/ljspeech/wavs/LJ044-0091.npy +tests/data/ljspeech/wavs/LJ013-0240.wav|tests/data/ljspeech/wavs/LJ013-0240.npy +tests/data/ljspeech/wavs/LJ035-0002.wav|tests/data/ljspeech/wavs/LJ035-0002.npy +tests/data/ljspeech/wavs/LJ009-0141.wav|tests/data/ljspeech/wavs/LJ009-0141.npy +tests/data/ljspeech/wavs/LJ003-0231.wav|tests/data/ljspeech/wavs/LJ003-0231.npy +tests/data/ljspeech/wavs/LJ020-0096.wav|tests/data/ljspeech/wavs/LJ020-0096.npy +tests/data/ljspeech/wavs/LJ003-0080.wav|tests/data/ljspeech/wavs/LJ003-0080.npy +tests/data/ljspeech/wavs/LJ008-0136.wav|tests/data/ljspeech/wavs/LJ008-0136.npy +tests/data/ljspeech/wavs/LJ003-0126.wav|tests/data/ljspeech/wavs/LJ003-0126.npy +tests/data/ljspeech/wavs/LJ039-0040.wav|tests/data/ljspeech/wavs/LJ039-0040.npy +tests/data/ljspeech/wavs/LJ050-0166.wav|tests/data/ljspeech/wavs/LJ050-0166.npy +tests/data/ljspeech/wavs/LJ009-0041.wav|tests/data/ljspeech/wavs/LJ009-0041.npy +tests/data/ljspeech/wavs/LJ049-0206.wav|tests/data/ljspeech/wavs/LJ049-0206.npy +tests/data/ljspeech/wavs/LJ044-0115.wav|tests/data/ljspeech/wavs/LJ044-0115.npy +tests/data/ljspeech/wavs/LJ035-0005.wav|tests/data/ljspeech/wavs/LJ035-0005.npy +tests/data/ljspeech/wavs/LJ009-0221.wav|tests/data/ljspeech/wavs/LJ009-0221.npy +tests/data/ljspeech/wavs/LJ032-0081.wav|tests/data/ljspeech/wavs/LJ032-0081.npy +tests/data/ljspeech/wavs/LJ030-0057.wav|tests/data/ljspeech/wavs/LJ030-0057.npy +tests/data/ljspeech/wavs/LJ008-0071.wav|tests/data/ljspeech/wavs/LJ008-0071.npy +tests/data/ljspeech/wavs/LJ005-0133.wav|tests/data/ljspeech/wavs/LJ005-0133.npy +tests/data/ljspeech/wavs/LJ016-0416.wav|tests/data/ljspeech/wavs/LJ016-0416.npy +tests/data/ljspeech/wavs/LJ021-0041.wav|tests/data/ljspeech/wavs/LJ021-0041.npy +tests/data/ljspeech/wavs/LJ046-0006.wav|tests/data/ljspeech/wavs/LJ046-0006.npy +tests/data/ljspeech/wavs/LJ005-0025.wav|tests/data/ljspeech/wavs/LJ005-0025.npy +tests/data/ljspeech/wavs/LJ030-0171.wav|tests/data/ljspeech/wavs/LJ030-0171.npy +tests/data/ljspeech/wavs/LJ016-0381.wav|tests/data/ljspeech/wavs/LJ016-0381.npy +tests/data/ljspeech/wavs/LJ045-0137.wav|tests/data/ljspeech/wavs/LJ045-0137.npy +tests/data/ljspeech/wavs/LJ034-0067.wav|tests/data/ljspeech/wavs/LJ034-0067.npy +tests/data/ljspeech/wavs/LJ033-0188.wav|tests/data/ljspeech/wavs/LJ033-0188.npy +tests/data/ljspeech/wavs/LJ047-0085.wav|tests/data/ljspeech/wavs/LJ047-0085.npy +tests/data/ljspeech/wavs/LJ038-0043.wav|tests/data/ljspeech/wavs/LJ038-0043.npy +tests/data/ljspeech/wavs/LJ002-0162.wav|tests/data/ljspeech/wavs/LJ002-0162.npy +tests/data/ljspeech/wavs/LJ022-0164.wav|tests/data/ljspeech/wavs/LJ022-0164.npy +tests/data/ljspeech/wavs/LJ040-0109.wav|tests/data/ljspeech/wavs/LJ040-0109.npy +tests/data/ljspeech/wavs/LJ034-0057.wav|tests/data/ljspeech/wavs/LJ034-0057.npy +tests/data/ljspeech/wavs/LJ018-0043.wav|tests/data/ljspeech/wavs/LJ018-0043.npy +tests/data/ljspeech/wavs/LJ002-0274.wav|tests/data/ljspeech/wavs/LJ002-0274.npy +tests/data/ljspeech/wavs/LJ030-0231.wav|tests/data/ljspeech/wavs/LJ030-0231.npy +tests/data/ljspeech/wavs/LJ018-0301.wav|tests/data/ljspeech/wavs/LJ018-0301.npy +tests/data/ljspeech/wavs/LJ013-0113.wav|tests/data/ljspeech/wavs/LJ013-0113.npy +tests/data/ljspeech/wavs/LJ033-0011.wav|tests/data/ljspeech/wavs/LJ033-0011.npy +tests/data/ljspeech/wavs/LJ019-0036.wav|tests/data/ljspeech/wavs/LJ019-0036.npy +tests/data/ljspeech/wavs/LJ009-0095.wav|tests/data/ljspeech/wavs/LJ009-0095.npy +tests/data/ljspeech/wavs/LJ034-0042.wav|tests/data/ljspeech/wavs/LJ034-0042.npy +tests/data/ljspeech/wavs/LJ002-0123.wav|tests/data/ljspeech/wavs/LJ002-0123.npy +tests/data/ljspeech/wavs/LJ044-0082.wav|tests/data/ljspeech/wavs/LJ044-0082.npy +tests/data/ljspeech/wavs/LJ006-0261.wav|tests/data/ljspeech/wavs/LJ006-0261.npy +tests/data/ljspeech/wavs/LJ041-0111.wav|tests/data/ljspeech/wavs/LJ041-0111.npy +tests/data/ljspeech/wavs/LJ011-0236.wav|tests/data/ljspeech/wavs/LJ011-0236.npy +tests/data/ljspeech/wavs/LJ026-0124.wav|tests/data/ljspeech/wavs/LJ026-0124.npy +tests/data/ljspeech/wavs/LJ021-0057.wav|tests/data/ljspeech/wavs/LJ021-0057.npy +tests/data/ljspeech/wavs/LJ010-0121.wav|tests/data/ljspeech/wavs/LJ010-0121.npy +tests/data/ljspeech/wavs/LJ049-0007.wav|tests/data/ljspeech/wavs/LJ049-0007.npy +tests/data/ljspeech/wavs/LJ003-0041.wav|tests/data/ljspeech/wavs/LJ003-0041.npy +tests/data/ljspeech/wavs/LJ043-0018.wav|tests/data/ljspeech/wavs/LJ043-0018.npy +tests/data/ljspeech/wavs/LJ031-0181.wav|tests/data/ljspeech/wavs/LJ031-0181.npy +tests/data/ljspeech/wavs/LJ017-0283.wav|tests/data/ljspeech/wavs/LJ017-0283.npy +tests/data/ljspeech/wavs/LJ030-0056.wav|tests/data/ljspeech/wavs/LJ030-0056.npy +tests/data/ljspeech/wavs/LJ046-0023.wav|tests/data/ljspeech/wavs/LJ046-0023.npy +tests/data/ljspeech/wavs/LJ041-0137.wav|tests/data/ljspeech/wavs/LJ041-0137.npy +tests/data/ljspeech/wavs/LJ032-0031.wav|tests/data/ljspeech/wavs/LJ032-0031.npy +tests/data/ljspeech/wavs/LJ033-0149.wav|tests/data/ljspeech/wavs/LJ033-0149.npy +tests/data/ljspeech/wavs/LJ008-0195.wav|tests/data/ljspeech/wavs/LJ008-0195.npy +tests/data/ljspeech/wavs/LJ032-0210.wav|tests/data/ljspeech/wavs/LJ032-0210.npy +tests/data/ljspeech/wavs/LJ002-0091.wav|tests/data/ljspeech/wavs/LJ002-0091.npy +tests/data/ljspeech/wavs/LJ018-0346.wav|tests/data/ljspeech/wavs/LJ018-0346.npy +tests/data/ljspeech/wavs/LJ050-0046.wav|tests/data/ljspeech/wavs/LJ050-0046.npy +tests/data/ljspeech/wavs/LJ010-0216.wav|tests/data/ljspeech/wavs/LJ010-0216.npy +tests/data/ljspeech/wavs/LJ028-0400.wav|tests/data/ljspeech/wavs/LJ028-0400.npy +tests/data/ljspeech/wavs/LJ030-0182.wav|tests/data/ljspeech/wavs/LJ030-0182.npy +tests/data/ljspeech/wavs/LJ036-0129.wav|tests/data/ljspeech/wavs/LJ036-0129.npy +tests/data/ljspeech/wavs/LJ011-0173.wav|tests/data/ljspeech/wavs/LJ011-0173.npy +tests/data/ljspeech/wavs/LJ041-0099.wav|tests/data/ljspeech/wavs/LJ041-0099.npy +tests/data/ljspeech/wavs/LJ049-0006.wav|tests/data/ljspeech/wavs/LJ049-0006.npy +tests/data/ljspeech/wavs/LJ006-0024.wav|tests/data/ljspeech/wavs/LJ006-0024.npy +tests/data/ljspeech/wavs/LJ019-0078.wav|tests/data/ljspeech/wavs/LJ019-0078.npy +tests/data/ljspeech/wavs/LJ028-0481.wav|tests/data/ljspeech/wavs/LJ028-0481.npy +tests/data/ljspeech/wavs/LJ002-0051.wav|tests/data/ljspeech/wavs/LJ002-0051.npy +tests/data/ljspeech/wavs/LJ016-0125.wav|tests/data/ljspeech/wavs/LJ016-0125.npy +tests/data/ljspeech/wavs/LJ015-0061.wav|tests/data/ljspeech/wavs/LJ015-0061.npy +tests/data/ljspeech/wavs/LJ024-0012.wav|tests/data/ljspeech/wavs/LJ024-0012.npy +tests/data/ljspeech/wavs/LJ036-0008.wav|tests/data/ljspeech/wavs/LJ036-0008.npy +tests/data/ljspeech/wavs/LJ004-0079.wav|tests/data/ljspeech/wavs/LJ004-0079.npy +tests/data/ljspeech/wavs/LJ009-0035.wav|tests/data/ljspeech/wavs/LJ009-0035.npy +tests/data/ljspeech/wavs/LJ018-0094.wav|tests/data/ljspeech/wavs/LJ018-0094.npy +tests/data/ljspeech/wavs/LJ047-0206.wav|tests/data/ljspeech/wavs/LJ047-0206.npy +tests/data/ljspeech/wavs/LJ003-0038.wav|tests/data/ljspeech/wavs/LJ003-0038.npy +tests/data/ljspeech/wavs/LJ016-0337.wav|tests/data/ljspeech/wavs/LJ016-0337.npy +tests/data/ljspeech/wavs/LJ015-0278.wav|tests/data/ljspeech/wavs/LJ015-0278.npy +tests/data/ljspeech/wavs/LJ035-0148.wav|tests/data/ljspeech/wavs/LJ035-0148.npy +tests/data/ljspeech/wavs/LJ015-0254.wav|tests/data/ljspeech/wavs/LJ015-0254.npy +tests/data/ljspeech/wavs/LJ017-0015.wav|tests/data/ljspeech/wavs/LJ017-0015.npy +tests/data/ljspeech/wavs/LJ037-0238.wav|tests/data/ljspeech/wavs/LJ037-0238.npy +tests/data/ljspeech/wavs/LJ046-0159.wav|tests/data/ljspeech/wavs/LJ046-0159.npy +tests/data/ljspeech/wavs/LJ019-0054.wav|tests/data/ljspeech/wavs/LJ019-0054.npy +tests/data/ljspeech/wavs/LJ017-0092.wav|tests/data/ljspeech/wavs/LJ017-0092.npy +tests/data/ljspeech/wavs/LJ026-0150.wav|tests/data/ljspeech/wavs/LJ026-0150.npy +tests/data/ljspeech/wavs/LJ026-0119.wav|tests/data/ljspeech/wavs/LJ026-0119.npy +tests/data/ljspeech/wavs/LJ036-0038.wav|tests/data/ljspeech/wavs/LJ036-0038.npy +tests/data/ljspeech/wavs/LJ006-0074.wav|tests/data/ljspeech/wavs/LJ006-0074.npy +tests/data/ljspeech/wavs/LJ012-0038.wav|tests/data/ljspeech/wavs/LJ012-0038.npy +tests/data/ljspeech/wavs/LJ002-0128.wav|tests/data/ljspeech/wavs/LJ002-0128.npy +tests/data/ljspeech/wavs/LJ017-0104.wav|tests/data/ljspeech/wavs/LJ017-0104.npy +tests/data/ljspeech/wavs/LJ009-0100.wav|tests/data/ljspeech/wavs/LJ009-0100.npy +tests/data/ljspeech/wavs/LJ037-0023.wav|tests/data/ljspeech/wavs/LJ037-0023.npy +tests/data/ljspeech/wavs/LJ044-0101.wav|tests/data/ljspeech/wavs/LJ044-0101.npy +tests/data/ljspeech/wavs/LJ050-0269.wav|tests/data/ljspeech/wavs/LJ050-0269.npy +tests/data/ljspeech/wavs/LJ047-0246.wav|tests/data/ljspeech/wavs/LJ047-0246.npy +tests/data/ljspeech/wavs/LJ017-0175.wav|tests/data/ljspeech/wavs/LJ017-0175.npy +tests/data/ljspeech/wavs/LJ042-0151.wav|tests/data/ljspeech/wavs/LJ042-0151.npy +tests/data/ljspeech/wavs/LJ016-0354.wav|tests/data/ljspeech/wavs/LJ016-0354.npy +tests/data/ljspeech/wavs/LJ017-0022.wav|tests/data/ljspeech/wavs/LJ017-0022.npy +tests/data/ljspeech/wavs/LJ003-0310.wav|tests/data/ljspeech/wavs/LJ003-0310.npy +tests/data/ljspeech/wavs/LJ018-0210.wav|tests/data/ljspeech/wavs/LJ018-0210.npy +tests/data/ljspeech/wavs/LJ015-0300.wav|tests/data/ljspeech/wavs/LJ015-0300.npy +tests/data/ljspeech/wavs/LJ018-0097.wav|tests/data/ljspeech/wavs/LJ018-0097.npy +tests/data/ljspeech/wavs/LJ012-0037.wav|tests/data/ljspeech/wavs/LJ012-0037.npy +tests/data/ljspeech/wavs/LJ008-0208.wav|tests/data/ljspeech/wavs/LJ008-0208.npy +tests/data/ljspeech/wavs/LJ017-0178.wav|tests/data/ljspeech/wavs/LJ017-0178.npy +tests/data/ljspeech/wavs/LJ045-0236.wav|tests/data/ljspeech/wavs/LJ045-0236.npy +tests/data/ljspeech/wavs/LJ032-0038.wav|tests/data/ljspeech/wavs/LJ032-0038.npy +tests/data/ljspeech/wavs/LJ010-0034.wav|tests/data/ljspeech/wavs/LJ010-0034.npy +tests/data/ljspeech/wavs/LJ048-0237.wav|tests/data/ljspeech/wavs/LJ048-0237.npy +tests/data/ljspeech/wavs/LJ016-0205.wav|tests/data/ljspeech/wavs/LJ016-0205.npy +tests/data/ljspeech/wavs/LJ047-0035.wav|tests/data/ljspeech/wavs/LJ047-0035.npy +tests/data/ljspeech/wavs/LJ018-0238.wav|tests/data/ljspeech/wavs/LJ018-0238.npy +tests/data/ljspeech/wavs/LJ016-0001.wav|tests/data/ljspeech/wavs/LJ016-0001.npy +tests/data/ljspeech/wavs/LJ016-0135.wav|tests/data/ljspeech/wavs/LJ016-0135.npy +tests/data/ljspeech/wavs/LJ042-0096.wav|tests/data/ljspeech/wavs/LJ042-0096.npy +tests/data/ljspeech/wavs/LJ013-0146.wav|tests/data/ljspeech/wavs/LJ013-0146.npy +tests/data/ljspeech/wavs/LJ002-0205.wav|tests/data/ljspeech/wavs/LJ002-0205.npy +tests/data/ljspeech/wavs/LJ010-0071.wav|tests/data/ljspeech/wavs/LJ010-0071.npy +tests/data/ljspeech/wavs/LJ006-0194.wav|tests/data/ljspeech/wavs/LJ006-0194.npy +tests/data/ljspeech/wavs/LJ046-0041.wav|tests/data/ljspeech/wavs/LJ046-0041.npy +tests/data/ljspeech/wavs/LJ015-0312.wav|tests/data/ljspeech/wavs/LJ015-0312.npy +tests/data/ljspeech/wavs/LJ006-0156.wav|tests/data/ljspeech/wavs/LJ006-0156.npy +tests/data/ljspeech/wavs/LJ009-0004.wav|tests/data/ljspeech/wavs/LJ009-0004.npy +tests/data/ljspeech/wavs/LJ028-0183.wav|tests/data/ljspeech/wavs/LJ028-0183.npy +tests/data/ljspeech/wavs/LJ010-0295.wav|tests/data/ljspeech/wavs/LJ010-0295.npy +tests/data/ljspeech/wavs/LJ037-0100.wav|tests/data/ljspeech/wavs/LJ037-0100.npy +tests/data/ljspeech/wavs/LJ019-0008.wav|tests/data/ljspeech/wavs/LJ019-0008.npy +tests/data/ljspeech/wavs/LJ011-0174.wav|tests/data/ljspeech/wavs/LJ011-0174.npy +tests/data/ljspeech/wavs/LJ006-0266.wav|tests/data/ljspeech/wavs/LJ006-0266.npy +tests/data/ljspeech/wavs/LJ015-0313.wav|tests/data/ljspeech/wavs/LJ015-0313.npy +tests/data/ljspeech/wavs/LJ026-0110.wav|tests/data/ljspeech/wavs/LJ026-0110.npy +tests/data/ljspeech/wavs/LJ008-0252.wav|tests/data/ljspeech/wavs/LJ008-0252.npy +tests/data/ljspeech/wavs/LJ037-0093.wav|tests/data/ljspeech/wavs/LJ037-0093.npy +tests/data/ljspeech/wavs/LJ016-0122.wav|tests/data/ljspeech/wavs/LJ016-0122.npy +tests/data/ljspeech/wavs/LJ037-0181.wav|tests/data/ljspeech/wavs/LJ037-0181.npy +tests/data/ljspeech/wavs/LJ017-0228.wav|tests/data/ljspeech/wavs/LJ017-0228.npy +tests/data/ljspeech/wavs/LJ030-0017.wav|tests/data/ljspeech/wavs/LJ030-0017.npy +tests/data/ljspeech/wavs/LJ016-0030.wav|tests/data/ljspeech/wavs/LJ016-0030.npy +tests/data/ljspeech/wavs/LJ027-0161.wav|tests/data/ljspeech/wavs/LJ027-0161.npy +tests/data/ljspeech/wavs/LJ011-0246.wav|tests/data/ljspeech/wavs/LJ011-0246.npy +tests/data/ljspeech/wavs/LJ044-0083.wav|tests/data/ljspeech/wavs/LJ044-0083.npy +tests/data/ljspeech/wavs/LJ050-0240.wav|tests/data/ljspeech/wavs/LJ050-0240.npy +tests/data/ljspeech/wavs/LJ032-0116.wav|tests/data/ljspeech/wavs/LJ032-0116.npy +tests/data/ljspeech/wavs/LJ014-0209.wav|tests/data/ljspeech/wavs/LJ014-0209.npy +tests/data/ljspeech/wavs/LJ030-0025.wav|tests/data/ljspeech/wavs/LJ030-0025.npy +tests/data/ljspeech/wavs/LJ012-0149.wav|tests/data/ljspeech/wavs/LJ012-0149.npy +tests/data/ljspeech/wavs/LJ011-0242.wav|tests/data/ljspeech/wavs/LJ011-0242.npy +tests/data/ljspeech/wavs/LJ028-0051.wav|tests/data/ljspeech/wavs/LJ028-0051.npy +tests/data/ljspeech/wavs/LJ024-0106.wav|tests/data/ljspeech/wavs/LJ024-0106.npy +tests/data/ljspeech/wavs/LJ014-0172.wav|tests/data/ljspeech/wavs/LJ014-0172.npy +tests/data/ljspeech/wavs/LJ023-0092.wav|tests/data/ljspeech/wavs/LJ023-0092.npy +tests/data/ljspeech/wavs/LJ015-0083.wav|tests/data/ljspeech/wavs/LJ015-0083.npy +tests/data/ljspeech/wavs/LJ030-0253.wav|tests/data/ljspeech/wavs/LJ030-0253.npy +tests/data/ljspeech/wavs/LJ014-0236.wav|tests/data/ljspeech/wavs/LJ014-0236.npy +tests/data/ljspeech/wavs/LJ016-0245.wav|tests/data/ljspeech/wavs/LJ016-0245.npy +tests/data/ljspeech/wavs/LJ009-0222.wav|tests/data/ljspeech/wavs/LJ009-0222.npy +tests/data/ljspeech/wavs/LJ015-0024.wav|tests/data/ljspeech/wavs/LJ015-0024.npy +tests/data/ljspeech/wavs/LJ002-0075.wav|tests/data/ljspeech/wavs/LJ002-0075.npy +tests/data/ljspeech/wavs/LJ046-0224.wav|tests/data/ljspeech/wavs/LJ046-0224.npy +tests/data/ljspeech/wavs/LJ032-0030.wav|tests/data/ljspeech/wavs/LJ032-0030.npy +tests/data/ljspeech/wavs/LJ015-0075.wav|tests/data/ljspeech/wavs/LJ015-0075.npy +tests/data/ljspeech/wavs/LJ014-0221.wav|tests/data/ljspeech/wavs/LJ014-0221.npy +tests/data/ljspeech/wavs/LJ035-0036.wav|tests/data/ljspeech/wavs/LJ035-0036.npy +tests/data/ljspeech/wavs/LJ015-0256.wav|tests/data/ljspeech/wavs/LJ015-0256.npy +tests/data/ljspeech/wavs/LJ044-0081.wav|tests/data/ljspeech/wavs/LJ044-0081.npy +tests/data/ljspeech/wavs/LJ045-0011.wav|tests/data/ljspeech/wavs/LJ045-0011.npy +tests/data/ljspeech/wavs/LJ048-0128.wav|tests/data/ljspeech/wavs/LJ048-0128.npy +tests/data/ljspeech/wavs/LJ009-0198.wav|tests/data/ljspeech/wavs/LJ009-0198.npy +tests/data/ljspeech/wavs/LJ038-0147.wav|tests/data/ljspeech/wavs/LJ038-0147.npy +tests/data/ljspeech/wavs/LJ018-0249.wav|tests/data/ljspeech/wavs/LJ018-0249.npy +tests/data/ljspeech/wavs/LJ033-0072.wav|tests/data/ljspeech/wavs/LJ033-0072.npy +tests/data/ljspeech/wavs/LJ006-0304.wav|tests/data/ljspeech/wavs/LJ006-0304.npy +tests/data/ljspeech/wavs/LJ050-0056.wav|tests/data/ljspeech/wavs/LJ050-0056.npy +tests/data/ljspeech/wavs/LJ002-0022.wav|tests/data/ljspeech/wavs/LJ002-0022.npy +tests/data/ljspeech/wavs/LJ032-0028.wav|tests/data/ljspeech/wavs/LJ032-0028.npy +tests/data/ljspeech/wavs/LJ041-0081.wav|tests/data/ljspeech/wavs/LJ041-0081.npy +tests/data/ljspeech/wavs/LJ039-0071.wav|tests/data/ljspeech/wavs/LJ039-0071.npy +tests/data/ljspeech/wavs/LJ009-0189.wav|tests/data/ljspeech/wavs/LJ009-0189.npy +tests/data/ljspeech/wavs/LJ039-0050.wav|tests/data/ljspeech/wavs/LJ039-0050.npy +tests/data/ljspeech/wavs/LJ005-0072.wav|tests/data/ljspeech/wavs/LJ005-0072.npy +tests/data/ljspeech/wavs/LJ029-0143.wav|tests/data/ljspeech/wavs/LJ029-0143.npy +tests/data/ljspeech/wavs/LJ019-0173.wav|tests/data/ljspeech/wavs/LJ019-0173.npy +tests/data/ljspeech/wavs/LJ006-0262.wav|tests/data/ljspeech/wavs/LJ006-0262.npy +tests/data/ljspeech/wavs/LJ030-0207.wav|tests/data/ljspeech/wavs/LJ030-0207.npy +tests/data/ljspeech/wavs/LJ042-0093.wav|tests/data/ljspeech/wavs/LJ042-0093.npy +tests/data/ljspeech/wavs/LJ019-0182.wav|tests/data/ljspeech/wavs/LJ019-0182.npy +tests/data/ljspeech/wavs/LJ005-0196.wav|tests/data/ljspeech/wavs/LJ005-0196.npy +tests/data/ljspeech/wavs/LJ014-0225.wav|tests/data/ljspeech/wavs/LJ014-0225.npy +tests/data/ljspeech/wavs/LJ049-0112.wav|tests/data/ljspeech/wavs/LJ049-0112.npy +tests/data/ljspeech/wavs/LJ042-0215.wav|tests/data/ljspeech/wavs/LJ042-0215.npy +tests/data/ljspeech/wavs/LJ038-0185.wav|tests/data/ljspeech/wavs/LJ038-0185.npy +tests/data/ljspeech/wavs/LJ042-0229.wav|tests/data/ljspeech/wavs/LJ042-0229.npy +tests/data/ljspeech/wavs/LJ015-0128.wav|tests/data/ljspeech/wavs/LJ015-0128.npy +tests/data/ljspeech/wavs/LJ026-0042.wav|tests/data/ljspeech/wavs/LJ026-0042.npy +tests/data/ljspeech/wavs/LJ014-0310.wav|tests/data/ljspeech/wavs/LJ014-0310.npy +tests/data/ljspeech/wavs/LJ009-0200.wav|tests/data/ljspeech/wavs/LJ009-0200.npy +tests/data/ljspeech/wavs/LJ025-0021.wav|tests/data/ljspeech/wavs/LJ025-0021.npy +tests/data/ljspeech/wavs/LJ028-0456.wav|tests/data/ljspeech/wavs/LJ028-0456.npy +tests/data/ljspeech/wavs/LJ028-0117.wav|tests/data/ljspeech/wavs/LJ028-0117.npy +tests/data/ljspeech/wavs/LJ028-0163.wav|tests/data/ljspeech/wavs/LJ028-0163.npy +tests/data/ljspeech/wavs/LJ004-0213.wav|tests/data/ljspeech/wavs/LJ004-0213.npy +tests/data/ljspeech/wavs/LJ012-0126.wav|tests/data/ljspeech/wavs/LJ012-0126.npy +tests/data/ljspeech/wavs/LJ024-0095.wav|tests/data/ljspeech/wavs/LJ024-0095.npy +tests/data/ljspeech/wavs/LJ015-0048.wav|tests/data/ljspeech/wavs/LJ015-0048.npy +tests/data/ljspeech/wavs/LJ010-0153.wav|tests/data/ljspeech/wavs/LJ010-0153.npy +tests/data/ljspeech/wavs/LJ001-0031.wav|tests/data/ljspeech/wavs/LJ001-0031.npy +tests/data/ljspeech/wavs/LJ005-0191.wav|tests/data/ljspeech/wavs/LJ005-0191.npy +tests/data/ljspeech/wavs/LJ038-0042.wav|tests/data/ljspeech/wavs/LJ038-0042.npy +tests/data/ljspeech/wavs/LJ041-0176.wav|tests/data/ljspeech/wavs/LJ041-0176.npy +tests/data/ljspeech/wavs/LJ007-0164.wav|tests/data/ljspeech/wavs/LJ007-0164.npy +tests/data/ljspeech/wavs/LJ027-0030.wav|tests/data/ljspeech/wavs/LJ027-0030.npy +tests/data/ljspeech/wavs/LJ027-0164.wav|tests/data/ljspeech/wavs/LJ027-0164.npy +tests/data/ljspeech/wavs/LJ016-0346.wav|tests/data/ljspeech/wavs/LJ016-0346.npy +tests/data/ljspeech/wavs/LJ021-0157.wav|tests/data/ljspeech/wavs/LJ021-0157.npy +tests/data/ljspeech/wavs/LJ007-0159.wav|tests/data/ljspeech/wavs/LJ007-0159.npy +tests/data/ljspeech/wavs/LJ019-0296.wav|tests/data/ljspeech/wavs/LJ019-0296.npy +tests/data/ljspeech/wavs/LJ019-0220.wav|tests/data/ljspeech/wavs/LJ019-0220.npy +tests/data/ljspeech/wavs/LJ002-0324.wav|tests/data/ljspeech/wavs/LJ002-0324.npy +tests/data/ljspeech/wavs/LJ026-0156.wav|tests/data/ljspeech/wavs/LJ026-0156.npy +tests/data/ljspeech/wavs/LJ050-0130.wav|tests/data/ljspeech/wavs/LJ050-0130.npy +tests/data/ljspeech/wavs/LJ037-0047.wav|tests/data/ljspeech/wavs/LJ037-0047.npy +tests/data/ljspeech/wavs/LJ031-0138.wav|tests/data/ljspeech/wavs/LJ031-0138.npy +tests/data/ljspeech/wavs/LJ019-0252.wav|tests/data/ljspeech/wavs/LJ019-0252.npy +tests/data/ljspeech/wavs/LJ050-0117.wav|tests/data/ljspeech/wavs/LJ050-0117.npy +tests/data/ljspeech/wavs/LJ028-0172.wav|tests/data/ljspeech/wavs/LJ028-0172.npy +tests/data/ljspeech/wavs/LJ033-0211.wav|tests/data/ljspeech/wavs/LJ033-0211.npy +tests/data/ljspeech/wavs/LJ013-0200.wav|tests/data/ljspeech/wavs/LJ013-0200.npy +tests/data/ljspeech/wavs/LJ010-0092.wav|tests/data/ljspeech/wavs/LJ010-0092.npy +tests/data/ljspeech/wavs/LJ010-0105.wav|tests/data/ljspeech/wavs/LJ010-0105.npy +tests/data/ljspeech/wavs/LJ014-0223.wav|tests/data/ljspeech/wavs/LJ014-0223.npy +tests/data/ljspeech/wavs/LJ015-0016.wav|tests/data/ljspeech/wavs/LJ015-0016.npy +tests/data/ljspeech/wavs/LJ034-0130.wav|tests/data/ljspeech/wavs/LJ034-0130.npy +tests/data/ljspeech/wavs/LJ012-0176.wav|tests/data/ljspeech/wavs/LJ012-0176.npy +tests/data/ljspeech/wavs/LJ006-0059.wav|tests/data/ljspeech/wavs/LJ006-0059.npy +tests/data/ljspeech/wavs/LJ035-0142.wav|tests/data/ljspeech/wavs/LJ035-0142.npy +tests/data/ljspeech/wavs/LJ014-0264.wav|tests/data/ljspeech/wavs/LJ014-0264.npy +tests/data/ljspeech/wavs/LJ043-0036.wav|tests/data/ljspeech/wavs/LJ043-0036.npy +tests/data/ljspeech/wavs/LJ044-0120.wav|tests/data/ljspeech/wavs/LJ044-0120.npy +tests/data/ljspeech/wavs/LJ014-0301.wav|tests/data/ljspeech/wavs/LJ014-0301.npy +tests/data/ljspeech/wavs/LJ021-0001.wav|tests/data/ljspeech/wavs/LJ021-0001.npy +tests/data/ljspeech/wavs/LJ023-0001.wav|tests/data/ljspeech/wavs/LJ023-0001.npy +tests/data/ljspeech/wavs/LJ022-0145.wav|tests/data/ljspeech/wavs/LJ022-0145.npy +tests/data/ljspeech/wavs/LJ023-0115.wav|tests/data/ljspeech/wavs/LJ023-0115.npy +tests/data/ljspeech/wavs/LJ025-0048.wav|tests/data/ljspeech/wavs/LJ025-0048.npy +tests/data/ljspeech/wavs/LJ023-0042.wav|tests/data/ljspeech/wavs/LJ023-0042.npy +tests/data/ljspeech/wavs/LJ049-0046.wav|tests/data/ljspeech/wavs/LJ049-0046.npy +tests/data/ljspeech/wavs/LJ050-0112.wav|tests/data/ljspeech/wavs/LJ050-0112.npy +tests/data/ljspeech/wavs/LJ036-0016.wav|tests/data/ljspeech/wavs/LJ036-0016.npy +tests/data/ljspeech/wavs/LJ033-0209.wav|tests/data/ljspeech/wavs/LJ033-0209.npy +tests/data/ljspeech/wavs/LJ010-0155.wav|tests/data/ljspeech/wavs/LJ010-0155.npy +tests/data/ljspeech/wavs/LJ007-0218.wav|tests/data/ljspeech/wavs/LJ007-0218.npy +tests/data/ljspeech/wavs/LJ035-0197.wav|tests/data/ljspeech/wavs/LJ035-0197.npy +tests/data/ljspeech/wavs/LJ011-0175.wav|tests/data/ljspeech/wavs/LJ011-0175.npy +tests/data/ljspeech/wavs/LJ038-0123.wav|tests/data/ljspeech/wavs/LJ038-0123.npy +tests/data/ljspeech/wavs/LJ040-0079.wav|tests/data/ljspeech/wavs/LJ040-0079.npy +tests/data/ljspeech/wavs/LJ014-0322.wav|tests/data/ljspeech/wavs/LJ014-0322.npy +tests/data/ljspeech/wavs/LJ035-0027.wav|tests/data/ljspeech/wavs/LJ035-0027.npy +tests/data/ljspeech/wavs/LJ013-0132.wav|tests/data/ljspeech/wavs/LJ013-0132.npy +tests/data/ljspeech/wavs/LJ035-0181.wav|tests/data/ljspeech/wavs/LJ035-0181.npy +tests/data/ljspeech/wavs/LJ010-0221.wav|tests/data/ljspeech/wavs/LJ010-0221.npy +tests/data/ljspeech/wavs/LJ050-0133.wav|tests/data/ljspeech/wavs/LJ050-0133.npy +tests/data/ljspeech/wavs/LJ012-0105.wav|tests/data/ljspeech/wavs/LJ012-0105.npy +tests/data/ljspeech/wavs/LJ028-0204.wav|tests/data/ljspeech/wavs/LJ028-0204.npy +tests/data/ljspeech/wavs/LJ003-0147.wav|tests/data/ljspeech/wavs/LJ003-0147.npy +tests/data/ljspeech/wavs/LJ031-0081.wav|tests/data/ljspeech/wavs/LJ031-0081.npy +tests/data/ljspeech/wavs/LJ008-0147.wav|tests/data/ljspeech/wavs/LJ008-0147.npy +tests/data/ljspeech/wavs/LJ011-0273.wav|tests/data/ljspeech/wavs/LJ011-0273.npy +tests/data/ljspeech/wavs/LJ015-0163.wav|tests/data/ljspeech/wavs/LJ015-0163.npy +tests/data/ljspeech/wavs/LJ042-0073.wav|tests/data/ljspeech/wavs/LJ042-0073.npy +tests/data/ljspeech/wavs/LJ026-0145.wav|tests/data/ljspeech/wavs/LJ026-0145.npy +tests/data/ljspeech/wavs/LJ040-0030.wav|tests/data/ljspeech/wavs/LJ040-0030.npy +tests/data/ljspeech/wavs/LJ023-0043.wav|tests/data/ljspeech/wavs/LJ023-0043.npy +tests/data/ljspeech/wavs/LJ022-0069.wav|tests/data/ljspeech/wavs/LJ022-0069.npy +tests/data/ljspeech/wavs/LJ025-0040.wav|tests/data/ljspeech/wavs/LJ025-0040.npy +tests/data/ljspeech/wavs/LJ035-0050.wav|tests/data/ljspeech/wavs/LJ035-0050.npy +tests/data/ljspeech/wavs/LJ039-0161.wav|tests/data/ljspeech/wavs/LJ039-0161.npy +tests/data/ljspeech/wavs/LJ047-0119.wav|tests/data/ljspeech/wavs/LJ047-0119.npy +tests/data/ljspeech/wavs/LJ042-0167.wav|tests/data/ljspeech/wavs/LJ042-0167.npy +tests/data/ljspeech/wavs/LJ013-0089.wav|tests/data/ljspeech/wavs/LJ013-0089.npy +tests/data/ljspeech/wavs/LJ005-0151.wav|tests/data/ljspeech/wavs/LJ005-0151.npy +tests/data/ljspeech/wavs/LJ023-0056.wav|tests/data/ljspeech/wavs/LJ023-0056.npy +tests/data/ljspeech/wavs/LJ035-0095.wav|tests/data/ljspeech/wavs/LJ035-0095.npy +tests/data/ljspeech/wavs/LJ015-0144.wav|tests/data/ljspeech/wavs/LJ015-0144.npy +tests/data/ljspeech/wavs/LJ049-0157.wav|tests/data/ljspeech/wavs/LJ049-0157.npy +tests/data/ljspeech/wavs/LJ019-0032.wav|tests/data/ljspeech/wavs/LJ019-0032.npy +tests/data/ljspeech/wavs/LJ025-0141.wav|tests/data/ljspeech/wavs/LJ025-0141.npy +tests/data/ljspeech/wavs/LJ047-0033.wav|tests/data/ljspeech/wavs/LJ047-0033.npy +tests/data/ljspeech/wavs/LJ016-0236.wav|tests/data/ljspeech/wavs/LJ016-0236.npy +tests/data/ljspeech/wavs/LJ050-0080.wav|tests/data/ljspeech/wavs/LJ050-0080.npy +tests/data/ljspeech/wavs/LJ015-0169.wav|tests/data/ljspeech/wavs/LJ015-0169.npy +tests/data/ljspeech/wavs/LJ016-0219.wav|tests/data/ljspeech/wavs/LJ016-0219.npy +tests/data/ljspeech/wavs/LJ028-0429.wav|tests/data/ljspeech/wavs/LJ028-0429.npy +tests/data/ljspeech/wavs/LJ048-0203.wav|tests/data/ljspeech/wavs/LJ048-0203.npy +tests/data/ljspeech/wavs/LJ024-0124.wav|tests/data/ljspeech/wavs/LJ024-0124.npy +tests/data/ljspeech/wavs/LJ016-0166.wav|tests/data/ljspeech/wavs/LJ016-0166.npy +tests/data/ljspeech/wavs/LJ019-0175.wav|tests/data/ljspeech/wavs/LJ019-0175.npy +tests/data/ljspeech/wavs/LJ009-0146.wav|tests/data/ljspeech/wavs/LJ009-0146.npy +tests/data/ljspeech/wavs/LJ008-0007.wav|tests/data/ljspeech/wavs/LJ008-0007.npy +tests/data/ljspeech/wavs/LJ017-0020.wav|tests/data/ljspeech/wavs/LJ017-0020.npy +tests/data/ljspeech/wavs/LJ028-0241.wav|tests/data/ljspeech/wavs/LJ028-0241.npy +tests/data/ljspeech/wavs/LJ037-0204.wav|tests/data/ljspeech/wavs/LJ037-0204.npy +tests/data/ljspeech/wavs/LJ018-0315.wav|tests/data/ljspeech/wavs/LJ018-0315.npy +tests/data/ljspeech/wavs/LJ038-0305.wav|tests/data/ljspeech/wavs/LJ038-0305.npy +tests/data/ljspeech/wavs/LJ036-0098.wav|tests/data/ljspeech/wavs/LJ036-0098.npy +tests/data/ljspeech/wavs/LJ022-0001.wav|tests/data/ljspeech/wavs/LJ022-0001.npy +tests/data/ljspeech/wavs/LJ017-0083.wav|tests/data/ljspeech/wavs/LJ017-0083.npy +tests/data/ljspeech/wavs/LJ016-0254.wav|tests/data/ljspeech/wavs/LJ016-0254.npy +tests/data/ljspeech/wavs/LJ006-0213.wav|tests/data/ljspeech/wavs/LJ006-0213.npy +tests/data/ljspeech/wavs/LJ025-0086.wav|tests/data/ljspeech/wavs/LJ025-0086.npy +tests/data/ljspeech/wavs/LJ031-0087.wav|tests/data/ljspeech/wavs/LJ031-0087.npy +tests/data/ljspeech/wavs/LJ044-0178.wav|tests/data/ljspeech/wavs/LJ044-0178.npy +tests/data/ljspeech/wavs/LJ043-0083.wav|tests/data/ljspeech/wavs/LJ043-0083.npy +tests/data/ljspeech/wavs/LJ048-0024.wav|tests/data/ljspeech/wavs/LJ048-0024.npy +tests/data/ljspeech/wavs/LJ043-0148.wav|tests/data/ljspeech/wavs/LJ043-0148.npy +tests/data/ljspeech/wavs/LJ019-0161.wav|tests/data/ljspeech/wavs/LJ019-0161.npy +tests/data/ljspeech/wavs/LJ029-0131.wav|tests/data/ljspeech/wavs/LJ029-0131.npy +tests/data/ljspeech/wavs/LJ045-0152.wav|tests/data/ljspeech/wavs/LJ045-0152.npy +tests/data/ljspeech/wavs/LJ028-0007.wav|tests/data/ljspeech/wavs/LJ028-0007.npy +tests/data/ljspeech/wavs/LJ018-0006.wav|tests/data/ljspeech/wavs/LJ018-0006.npy +tests/data/ljspeech/wavs/LJ008-0065.wav|tests/data/ljspeech/wavs/LJ008-0065.npy +tests/data/ljspeech/wavs/LJ018-0136.wav|tests/data/ljspeech/wavs/LJ018-0136.npy +tests/data/ljspeech/wavs/LJ033-0133.wav|tests/data/ljspeech/wavs/LJ033-0133.npy +tests/data/ljspeech/wavs/LJ037-0117.wav|tests/data/ljspeech/wavs/LJ037-0117.npy +tests/data/ljspeech/wavs/LJ040-0214.wav|tests/data/ljspeech/wavs/LJ040-0214.npy +tests/data/ljspeech/wavs/LJ022-0067.wav|tests/data/ljspeech/wavs/LJ022-0067.npy +tests/data/ljspeech/wavs/LJ023-0124.wav|tests/data/ljspeech/wavs/LJ023-0124.npy +tests/data/ljspeech/wavs/LJ011-0196.wav|tests/data/ljspeech/wavs/LJ011-0196.npy +tests/data/ljspeech/wavs/LJ017-0136.wav|tests/data/ljspeech/wavs/LJ017-0136.npy +tests/data/ljspeech/wavs/LJ022-0010.wav|tests/data/ljspeech/wavs/LJ022-0010.npy +tests/data/ljspeech/wavs/LJ004-0210.wav|tests/data/ljspeech/wavs/LJ004-0210.npy +tests/data/ljspeech/wavs/LJ021-0027.wav|tests/data/ljspeech/wavs/LJ021-0027.npy +tests/data/ljspeech/wavs/LJ035-0166.wav|tests/data/ljspeech/wavs/LJ035-0166.npy +tests/data/ljspeech/wavs/LJ032-0089.wav|tests/data/ljspeech/wavs/LJ032-0089.npy +tests/data/ljspeech/wavs/LJ031-0023.wav|tests/data/ljspeech/wavs/LJ031-0023.npy +tests/data/ljspeech/wavs/LJ019-0307.wav|tests/data/ljspeech/wavs/LJ019-0307.npy +tests/data/ljspeech/wavs/LJ032-0086.wav|tests/data/ljspeech/wavs/LJ032-0086.npy +tests/data/ljspeech/wavs/LJ036-0160.wav|tests/data/ljspeech/wavs/LJ036-0160.npy +tests/data/ljspeech/wavs/LJ032-0087.wav|tests/data/ljspeech/wavs/LJ032-0087.npy +tests/data/ljspeech/wavs/LJ030-0063.wav|tests/data/ljspeech/wavs/LJ030-0063.npy +tests/data/ljspeech/wavs/LJ028-0273.wav|tests/data/ljspeech/wavs/LJ028-0273.npy +tests/data/ljspeech/wavs/LJ022-0061.wav|tests/data/ljspeech/wavs/LJ022-0061.npy +tests/data/ljspeech/wavs/LJ036-0075.wav|tests/data/ljspeech/wavs/LJ036-0075.npy +tests/data/ljspeech/wavs/LJ028-0034.wav|tests/data/ljspeech/wavs/LJ028-0034.npy +tests/data/ljspeech/wavs/LJ042-0082.wav|tests/data/ljspeech/wavs/LJ042-0082.npy +tests/data/ljspeech/wavs/LJ018-0295.wav|tests/data/ljspeech/wavs/LJ018-0295.npy +tests/data/ljspeech/wavs/LJ028-0371.wav|tests/data/ljspeech/wavs/LJ028-0371.npy +tests/data/ljspeech/wavs/LJ004-0176.wav|tests/data/ljspeech/wavs/LJ004-0176.npy +tests/data/ljspeech/wavs/LJ048-0282.wav|tests/data/ljspeech/wavs/LJ048-0282.npy +tests/data/ljspeech/wavs/LJ014-0262.wav|tests/data/ljspeech/wavs/LJ014-0262.npy +tests/data/ljspeech/wavs/LJ031-0083.wav|tests/data/ljspeech/wavs/LJ031-0083.npy +tests/data/ljspeech/wavs/LJ050-0014.wav|tests/data/ljspeech/wavs/LJ050-0014.npy +tests/data/ljspeech/wavs/LJ035-0112.wav|tests/data/ljspeech/wavs/LJ035-0112.npy +tests/data/ljspeech/wavs/LJ020-0014.wav|tests/data/ljspeech/wavs/LJ020-0014.npy +tests/data/ljspeech/wavs/LJ019-0330.wav|tests/data/ljspeech/wavs/LJ019-0330.npy +tests/data/ljspeech/wavs/LJ011-0179.wav|tests/data/ljspeech/wavs/LJ011-0179.npy +tests/data/ljspeech/wavs/LJ028-0468.wav|tests/data/ljspeech/wavs/LJ028-0468.npy +tests/data/ljspeech/wavs/LJ050-0007.wav|tests/data/ljspeech/wavs/LJ050-0007.npy +tests/data/ljspeech/wavs/LJ005-0183.wav|tests/data/ljspeech/wavs/LJ005-0183.npy +tests/data/ljspeech/wavs/LJ020-0051.wav|tests/data/ljspeech/wavs/LJ020-0051.npy +tests/data/ljspeech/wavs/LJ025-0116.wav|tests/data/ljspeech/wavs/LJ025-0116.npy +tests/data/ljspeech/wavs/LJ010-0163.wav|tests/data/ljspeech/wavs/LJ010-0163.npy +tests/data/ljspeech/wavs/LJ010-0309.wav|tests/data/ljspeech/wavs/LJ010-0309.npy +tests/data/ljspeech/wavs/LJ016-0201.wav|tests/data/ljspeech/wavs/LJ016-0201.npy +tests/data/ljspeech/wavs/LJ030-0181.wav|tests/data/ljspeech/wavs/LJ030-0181.npy +tests/data/ljspeech/wavs/LJ031-0009.wav|tests/data/ljspeech/wavs/LJ031-0009.npy +tests/data/ljspeech/wavs/LJ046-0183.wav|tests/data/ljspeech/wavs/LJ046-0183.npy +tests/data/ljspeech/wavs/LJ010-0047.wav|tests/data/ljspeech/wavs/LJ010-0047.npy +tests/data/ljspeech/wavs/LJ027-0071.wav|tests/data/ljspeech/wavs/LJ027-0071.npy +tests/data/ljspeech/wavs/LJ018-0051.wav|tests/data/ljspeech/wavs/LJ018-0051.npy +tests/data/ljspeech/wavs/LJ036-0050.wav|tests/data/ljspeech/wavs/LJ036-0050.npy +tests/data/ljspeech/wavs/LJ040-0207.wav|tests/data/ljspeech/wavs/LJ040-0207.npy +tests/data/ljspeech/wavs/LJ019-0006.wav|tests/data/ljspeech/wavs/LJ019-0006.npy +tests/data/ljspeech/wavs/LJ014-0176.wav|tests/data/ljspeech/wavs/LJ014-0176.npy +tests/data/ljspeech/wavs/LJ047-0235.wav|tests/data/ljspeech/wavs/LJ047-0235.npy +tests/data/ljspeech/wavs/LJ006-0187.wav|tests/data/ljspeech/wavs/LJ006-0187.npy +tests/data/ljspeech/wavs/LJ035-0009.wav|tests/data/ljspeech/wavs/LJ035-0009.npy +tests/data/ljspeech/wavs/LJ036-0213.wav|tests/data/ljspeech/wavs/LJ036-0213.npy +tests/data/ljspeech/wavs/LJ043-0114.wav|tests/data/ljspeech/wavs/LJ043-0114.npy +tests/data/ljspeech/wavs/LJ008-0080.wav|tests/data/ljspeech/wavs/LJ008-0080.npy +tests/data/ljspeech/wavs/LJ016-0383.wav|tests/data/ljspeech/wavs/LJ016-0383.npy +tests/data/ljspeech/wavs/LJ017-0214.wav|tests/data/ljspeech/wavs/LJ017-0214.npy +tests/data/ljspeech/wavs/LJ028-0317.wav|tests/data/ljspeech/wavs/LJ028-0317.npy +tests/data/ljspeech/wavs/LJ028-0297.wav|tests/data/ljspeech/wavs/LJ028-0297.npy +tests/data/ljspeech/wavs/LJ014-0107.wav|tests/data/ljspeech/wavs/LJ014-0107.npy +tests/data/ljspeech/wavs/LJ032-0010.wav|tests/data/ljspeech/wavs/LJ032-0010.npy +tests/data/ljspeech/wavs/LJ022-0125.wav|tests/data/ljspeech/wavs/LJ022-0125.npy +tests/data/ljspeech/wavs/LJ006-0078.wav|tests/data/ljspeech/wavs/LJ006-0078.npy +tests/data/ljspeech/wavs/LJ003-0216.wav|tests/data/ljspeech/wavs/LJ003-0216.npy +tests/data/ljspeech/wavs/LJ007-0127.wav|tests/data/ljspeech/wavs/LJ007-0127.npy +tests/data/ljspeech/wavs/LJ030-0224.wav|tests/data/ljspeech/wavs/LJ030-0224.npy +tests/data/ljspeech/wavs/LJ028-0142.wav|tests/data/ljspeech/wavs/LJ028-0142.npy +tests/data/ljspeech/wavs/LJ033-0048.wav|tests/data/ljspeech/wavs/LJ033-0048.npy +tests/data/ljspeech/wavs/LJ003-0345.wav|tests/data/ljspeech/wavs/LJ003-0345.npy +tests/data/ljspeech/wavs/LJ019-0100.wav|tests/data/ljspeech/wavs/LJ019-0100.npy +tests/data/ljspeech/wavs/LJ016-0400.wav|tests/data/ljspeech/wavs/LJ016-0400.npy +tests/data/ljspeech/wavs/LJ028-0004.wav|tests/data/ljspeech/wavs/LJ028-0004.npy +tests/data/ljspeech/wavs/LJ044-0175.wav|tests/data/ljspeech/wavs/LJ044-0175.npy +tests/data/ljspeech/wavs/LJ046-0021.wav|tests/data/ljspeech/wavs/LJ046-0021.npy +tests/data/ljspeech/wavs/LJ037-0185.wav|tests/data/ljspeech/wavs/LJ037-0185.npy +tests/data/ljspeech/wavs/LJ034-0055.wav|tests/data/ljspeech/wavs/LJ034-0055.npy +tests/data/ljspeech/wavs/LJ044-0073.wav|tests/data/ljspeech/wavs/LJ044-0073.npy +tests/data/ljspeech/wavs/LJ027-0075.wav|tests/data/ljspeech/wavs/LJ027-0075.npy +tests/data/ljspeech/wavs/LJ019-0071.wav|tests/data/ljspeech/wavs/LJ019-0071.npy +tests/data/ljspeech/wavs/LJ025-0173.wav|tests/data/ljspeech/wavs/LJ025-0173.npy +tests/data/ljspeech/wavs/LJ035-0156.wav|tests/data/ljspeech/wavs/LJ035-0156.npy +tests/data/ljspeech/wavs/LJ019-0115.wav|tests/data/ljspeech/wavs/LJ019-0115.npy +tests/data/ljspeech/wavs/LJ032-0237.wav|tests/data/ljspeech/wavs/LJ032-0237.npy +tests/data/ljspeech/wavs/LJ021-0084.wav|tests/data/ljspeech/wavs/LJ021-0084.npy +tests/data/ljspeech/wavs/LJ032-0020.wav|tests/data/ljspeech/wavs/LJ032-0020.npy +tests/data/ljspeech/wavs/LJ043-0129.wav|tests/data/ljspeech/wavs/LJ043-0129.npy +tests/data/ljspeech/wavs/LJ010-0014.wav|tests/data/ljspeech/wavs/LJ010-0014.npy +tests/data/ljspeech/wavs/LJ015-0137.wav|tests/data/ljspeech/wavs/LJ015-0137.npy +tests/data/ljspeech/wavs/LJ019-0286.wav|tests/data/ljspeech/wavs/LJ019-0286.npy +tests/data/ljspeech/wavs/LJ003-0324.wav|tests/data/ljspeech/wavs/LJ003-0324.npy +tests/data/ljspeech/wavs/LJ030-0237.wav|tests/data/ljspeech/wavs/LJ030-0237.npy +tests/data/ljspeech/wavs/LJ046-0010.wav|tests/data/ljspeech/wavs/LJ046-0010.npy +tests/data/ljspeech/wavs/LJ002-0300.wav|tests/data/ljspeech/wavs/LJ002-0300.npy +tests/data/ljspeech/wavs/LJ013-0182.wav|tests/data/ljspeech/wavs/LJ013-0182.npy +tests/data/ljspeech/wavs/LJ006-0055.wav|tests/data/ljspeech/wavs/LJ006-0055.npy +tests/data/ljspeech/wavs/LJ015-0188.wav|tests/data/ljspeech/wavs/LJ015-0188.npy +tests/data/ljspeech/wavs/LJ049-0161.wav|tests/data/ljspeech/wavs/LJ049-0161.npy +tests/data/ljspeech/wavs/LJ017-0276.wav|tests/data/ljspeech/wavs/LJ017-0276.npy +tests/data/ljspeech/wavs/LJ001-0113.wav|tests/data/ljspeech/wavs/LJ001-0113.npy +tests/data/ljspeech/wavs/LJ044-0150.wav|tests/data/ljspeech/wavs/LJ044-0150.npy +tests/data/ljspeech/wavs/LJ014-0099.wav|tests/data/ljspeech/wavs/LJ014-0099.npy +tests/data/ljspeech/wavs/LJ028-0514.wav|tests/data/ljspeech/wavs/LJ028-0514.npy +tests/data/ljspeech/wavs/LJ028-0104.wav|tests/data/ljspeech/wavs/LJ028-0104.npy +tests/data/ljspeech/wavs/LJ003-0064.wav|tests/data/ljspeech/wavs/LJ003-0064.npy +tests/data/ljspeech/wavs/LJ002-0013.wav|tests/data/ljspeech/wavs/LJ002-0013.npy +tests/data/ljspeech/wavs/LJ040-0235.wav|tests/data/ljspeech/wavs/LJ040-0235.npy +tests/data/ljspeech/wavs/LJ039-0135.wav|tests/data/ljspeech/wavs/LJ039-0135.npy +tests/data/ljspeech/wavs/LJ014-0233.wav|tests/data/ljspeech/wavs/LJ014-0233.npy +tests/data/ljspeech/wavs/LJ048-0073.wav|tests/data/ljspeech/wavs/LJ048-0073.npy +tests/data/ljspeech/wavs/LJ036-0196.wav|tests/data/ljspeech/wavs/LJ036-0196.npy +tests/data/ljspeech/wavs/LJ047-0028.wav|tests/data/ljspeech/wavs/LJ047-0028.npy +tests/data/ljspeech/wavs/LJ031-0035.wav|tests/data/ljspeech/wavs/LJ031-0035.npy +tests/data/ljspeech/wavs/LJ046-0126.wav|tests/data/ljspeech/wavs/LJ046-0126.npy +tests/data/ljspeech/wavs/LJ018-0125.wav|tests/data/ljspeech/wavs/LJ018-0125.npy +tests/data/ljspeech/wavs/LJ026-0083.wav|tests/data/ljspeech/wavs/LJ026-0083.npy +tests/data/ljspeech/wavs/LJ018-0349.wav|tests/data/ljspeech/wavs/LJ018-0349.npy +tests/data/ljspeech/wavs/LJ042-0240.wav|tests/data/ljspeech/wavs/LJ042-0240.npy +tests/data/ljspeech/wavs/LJ022-0120.wav|tests/data/ljspeech/wavs/LJ022-0120.npy +tests/data/ljspeech/wavs/LJ030-0088.wav|tests/data/ljspeech/wavs/LJ030-0088.npy +tests/data/ljspeech/wavs/LJ047-0042.wav|tests/data/ljspeech/wavs/LJ047-0042.npy +tests/data/ljspeech/wavs/LJ039-0039.wav|tests/data/ljspeech/wavs/LJ039-0039.npy +tests/data/ljspeech/wavs/LJ042-0121.wav|tests/data/ljspeech/wavs/LJ042-0121.npy +tests/data/ljspeech/wavs/LJ011-0168.wav|tests/data/ljspeech/wavs/LJ011-0168.npy +tests/data/ljspeech/wavs/LJ028-0356.wav|tests/data/ljspeech/wavs/LJ028-0356.npy +tests/data/ljspeech/wavs/LJ028-0178.wav|tests/data/ljspeech/wavs/LJ028-0178.npy +tests/data/ljspeech/wavs/LJ018-0200.wav|tests/data/ljspeech/wavs/LJ018-0200.npy +tests/data/ljspeech/wavs/LJ016-0331.wav|tests/data/ljspeech/wavs/LJ016-0331.npy +tests/data/ljspeech/wavs/LJ019-0227.wav|tests/data/ljspeech/wavs/LJ019-0227.npy +tests/data/ljspeech/wavs/LJ007-0181.wav|tests/data/ljspeech/wavs/LJ007-0181.npy +tests/data/ljspeech/wavs/LJ034-0193.wav|tests/data/ljspeech/wavs/LJ034-0193.npy +tests/data/ljspeech/wavs/LJ026-0030.wav|tests/data/ljspeech/wavs/LJ026-0030.npy +tests/data/ljspeech/wavs/LJ018-0187.wav|tests/data/ljspeech/wavs/LJ018-0187.npy +tests/data/ljspeech/wavs/LJ041-0172.wav|tests/data/ljspeech/wavs/LJ041-0172.npy +tests/data/ljspeech/wavs/LJ003-0343.wav|tests/data/ljspeech/wavs/LJ003-0343.npy +tests/data/ljspeech/wavs/LJ009-0228.wav|tests/data/ljspeech/wavs/LJ009-0228.npy +tests/data/ljspeech/wavs/LJ001-0046.wav|tests/data/ljspeech/wavs/LJ001-0046.npy +tests/data/ljspeech/wavs/LJ030-0196.wav|tests/data/ljspeech/wavs/LJ030-0196.npy +tests/data/ljspeech/wavs/LJ036-0017.wav|tests/data/ljspeech/wavs/LJ036-0017.npy +tests/data/ljspeech/wavs/LJ034-0196.wav|tests/data/ljspeech/wavs/LJ034-0196.npy +tests/data/ljspeech/wavs/LJ026-0120.wav|tests/data/ljspeech/wavs/LJ026-0120.npy +tests/data/ljspeech/wavs/LJ002-0081.wav|tests/data/ljspeech/wavs/LJ002-0081.npy +tests/data/ljspeech/wavs/LJ037-0009.wav|tests/data/ljspeech/wavs/LJ037-0009.npy +tests/data/ljspeech/wavs/LJ014-0078.wav|tests/data/ljspeech/wavs/LJ014-0078.npy +tests/data/ljspeech/wavs/LJ026-0020.wav|tests/data/ljspeech/wavs/LJ026-0020.npy +tests/data/ljspeech/wavs/LJ033-0154.wav|tests/data/ljspeech/wavs/LJ033-0154.npy +tests/data/ljspeech/wavs/LJ016-0403.wav|tests/data/ljspeech/wavs/LJ016-0403.npy +tests/data/ljspeech/wavs/LJ011-0141.wav|tests/data/ljspeech/wavs/LJ011-0141.npy +tests/data/ljspeech/wavs/LJ010-0184.wav|tests/data/ljspeech/wavs/LJ010-0184.npy +tests/data/ljspeech/wavs/LJ011-0104.wav|tests/data/ljspeech/wavs/LJ011-0104.npy +tests/data/ljspeech/wavs/LJ001-0146.wav|tests/data/ljspeech/wavs/LJ001-0146.npy +tests/data/ljspeech/wavs/LJ010-0204.wav|tests/data/ljspeech/wavs/LJ010-0204.npy +tests/data/ljspeech/wavs/LJ036-0073.wav|tests/data/ljspeech/wavs/LJ036-0073.npy +tests/data/ljspeech/wavs/LJ018-0162.wav|tests/data/ljspeech/wavs/LJ018-0162.npy +tests/data/ljspeech/wavs/LJ034-0107.wav|tests/data/ljspeech/wavs/LJ034-0107.npy +tests/data/ljspeech/wavs/LJ045-0217.wav|tests/data/ljspeech/wavs/LJ045-0217.npy +tests/data/ljspeech/wavs/LJ008-0311.wav|tests/data/ljspeech/wavs/LJ008-0311.npy +tests/data/ljspeech/wavs/LJ032-0085.wav|tests/data/ljspeech/wavs/LJ032-0085.npy +tests/data/ljspeech/wavs/LJ012-0192.wav|tests/data/ljspeech/wavs/LJ012-0192.npy +tests/data/ljspeech/wavs/LJ035-0052.wav|tests/data/ljspeech/wavs/LJ035-0052.npy +tests/data/ljspeech/wavs/LJ014-0074.wav|tests/data/ljspeech/wavs/LJ014-0074.npy +tests/data/ljspeech/wavs/LJ041-0028.wav|tests/data/ljspeech/wavs/LJ041-0028.npy +tests/data/ljspeech/wavs/LJ005-0069.wav|tests/data/ljspeech/wavs/LJ005-0069.npy +tests/data/ljspeech/wavs/LJ007-0068.wav|tests/data/ljspeech/wavs/LJ007-0068.npy +tests/data/ljspeech/wavs/LJ016-0385.wav|tests/data/ljspeech/wavs/LJ016-0385.npy +tests/data/ljspeech/wavs/LJ028-0285.wav|tests/data/ljspeech/wavs/LJ028-0285.npy +tests/data/ljspeech/wavs/LJ013-0025.wav|tests/data/ljspeech/wavs/LJ013-0025.npy +tests/data/ljspeech/wavs/LJ018-0075.wav|tests/data/ljspeech/wavs/LJ018-0075.npy +tests/data/ljspeech/wavs/LJ003-0009.wav|tests/data/ljspeech/wavs/LJ003-0009.npy +tests/data/ljspeech/wavs/LJ010-0307.wav|tests/data/ljspeech/wavs/LJ010-0307.npy +tests/data/ljspeech/wavs/LJ039-0204.wav|tests/data/ljspeech/wavs/LJ039-0204.npy +tests/data/ljspeech/wavs/LJ041-0150.wav|tests/data/ljspeech/wavs/LJ041-0150.npy +tests/data/ljspeech/wavs/LJ039-0206.wav|tests/data/ljspeech/wavs/LJ039-0206.npy +tests/data/ljspeech/wavs/LJ043-0186.wav|tests/data/ljspeech/wavs/LJ043-0186.npy +tests/data/ljspeech/wavs/LJ050-0038.wav|tests/data/ljspeech/wavs/LJ050-0038.npy +tests/data/ljspeech/wavs/LJ047-0221.wav|tests/data/ljspeech/wavs/LJ047-0221.npy +tests/data/ljspeech/wavs/LJ023-0099.wav|tests/data/ljspeech/wavs/LJ023-0099.npy +tests/data/ljspeech/wavs/LJ030-0252.wav|tests/data/ljspeech/wavs/LJ030-0252.npy +tests/data/ljspeech/wavs/LJ025-0122.wav|tests/data/ljspeech/wavs/LJ025-0122.npy +tests/data/ljspeech/wavs/LJ048-0285.wav|tests/data/ljspeech/wavs/LJ048-0285.npy +tests/data/ljspeech/wavs/LJ035-0189.wav|tests/data/ljspeech/wavs/LJ035-0189.npy +tests/data/ljspeech/wavs/LJ045-0032.wav|tests/data/ljspeech/wavs/LJ045-0032.npy +tests/data/ljspeech/wavs/LJ024-0013.wav|tests/data/ljspeech/wavs/LJ024-0013.npy +tests/data/ljspeech/wavs/LJ005-0188.wav|tests/data/ljspeech/wavs/LJ005-0188.npy +tests/data/ljspeech/wavs/LJ009-0283.wav|tests/data/ljspeech/wavs/LJ009-0283.npy +tests/data/ljspeech/wavs/LJ046-0133.wav|tests/data/ljspeech/wavs/LJ046-0133.npy +tests/data/ljspeech/wavs/LJ042-0028.wav|tests/data/ljspeech/wavs/LJ042-0028.npy +tests/data/ljspeech/wavs/LJ015-0040.wav|tests/data/ljspeech/wavs/LJ015-0040.npy +tests/data/ljspeech/wavs/LJ043-0013.wav|tests/data/ljspeech/wavs/LJ043-0013.npy +tests/data/ljspeech/wavs/LJ003-0098.wav|tests/data/ljspeech/wavs/LJ003-0098.npy +tests/data/ljspeech/wavs/LJ028-0518.wav|tests/data/ljspeech/wavs/LJ028-0518.npy +tests/data/ljspeech/wavs/LJ016-0020.wav|tests/data/ljspeech/wavs/LJ016-0020.npy +tests/data/ljspeech/wavs/LJ025-0144.wav|tests/data/ljspeech/wavs/LJ025-0144.npy +tests/data/ljspeech/wavs/LJ017-0115.wav|tests/data/ljspeech/wavs/LJ017-0115.npy +tests/data/ljspeech/wavs/LJ022-0036.wav|tests/data/ljspeech/wavs/LJ022-0036.npy +tests/data/ljspeech/wavs/LJ006-0056.wav|tests/data/ljspeech/wavs/LJ006-0056.npy +tests/data/ljspeech/wavs/LJ039-0173.wav|tests/data/ljspeech/wavs/LJ039-0173.npy +tests/data/ljspeech/wavs/LJ008-0028.wav|tests/data/ljspeech/wavs/LJ008-0028.npy +tests/data/ljspeech/wavs/LJ049-0008.wav|tests/data/ljspeech/wavs/LJ049-0008.npy +tests/data/ljspeech/wavs/LJ003-0118.wav|tests/data/ljspeech/wavs/LJ003-0118.npy +tests/data/ljspeech/wavs/LJ013-0053.wav|tests/data/ljspeech/wavs/LJ013-0053.npy +tests/data/ljspeech/wavs/LJ037-0196.wav|tests/data/ljspeech/wavs/LJ037-0196.npy +tests/data/ljspeech/wavs/LJ033-0037.wav|tests/data/ljspeech/wavs/LJ033-0037.npy +tests/data/ljspeech/wavs/LJ010-0302.wav|tests/data/ljspeech/wavs/LJ010-0302.npy +tests/data/ljspeech/wavs/LJ041-0149.wav|tests/data/ljspeech/wavs/LJ041-0149.npy +tests/data/ljspeech/wavs/LJ004-0045.wav|tests/data/ljspeech/wavs/LJ004-0045.npy +tests/data/ljspeech/wavs/LJ004-0021.wav|tests/data/ljspeech/wavs/LJ004-0021.npy +tests/data/ljspeech/wavs/LJ039-0148.wav|tests/data/ljspeech/wavs/LJ039-0148.npy +tests/data/ljspeech/wavs/LJ023-0019.wav|tests/data/ljspeech/wavs/LJ023-0019.npy +tests/data/ljspeech/wavs/LJ003-0247.wav|tests/data/ljspeech/wavs/LJ003-0247.npy +tests/data/ljspeech/wavs/LJ019-0164.wav|tests/data/ljspeech/wavs/LJ019-0164.npy +tests/data/ljspeech/wavs/LJ029-0204.wav|tests/data/ljspeech/wavs/LJ029-0204.npy +tests/data/ljspeech/wavs/LJ013-0171.wav|tests/data/ljspeech/wavs/LJ013-0171.npy +tests/data/ljspeech/wavs/LJ010-0259.wav|tests/data/ljspeech/wavs/LJ010-0259.npy +tests/data/ljspeech/wavs/LJ034-0149.wav|tests/data/ljspeech/wavs/LJ034-0149.npy +tests/data/ljspeech/wavs/LJ024-0114.wav|tests/data/ljspeech/wavs/LJ024-0114.npy +tests/data/ljspeech/wavs/LJ027-0067.wav|tests/data/ljspeech/wavs/LJ027-0067.npy +tests/data/ljspeech/wavs/LJ015-0203.wav|tests/data/ljspeech/wavs/LJ015-0203.npy +tests/data/ljspeech/wavs/LJ028-0156.wav|tests/data/ljspeech/wavs/LJ028-0156.npy +tests/data/ljspeech/wavs/LJ035-0104.wav|tests/data/ljspeech/wavs/LJ035-0104.npy +tests/data/ljspeech/wavs/LJ030-0241.wav|tests/data/ljspeech/wavs/LJ030-0241.npy +tests/data/ljspeech/wavs/LJ050-0134.wav|tests/data/ljspeech/wavs/LJ050-0134.npy +tests/data/ljspeech/wavs/LJ028-0228.wav|tests/data/ljspeech/wavs/LJ028-0228.npy +tests/data/ljspeech/wavs/LJ019-0111.wav|tests/data/ljspeech/wavs/LJ019-0111.npy +tests/data/ljspeech/wavs/LJ004-0057.wav|tests/data/ljspeech/wavs/LJ004-0057.npy +tests/data/ljspeech/wavs/LJ017-0148.wav|tests/data/ljspeech/wavs/LJ017-0148.npy +tests/data/ljspeech/wavs/LJ050-0033.wav|tests/data/ljspeech/wavs/LJ050-0033.npy +tests/data/ljspeech/wavs/LJ019-0063.wav|tests/data/ljspeech/wavs/LJ019-0063.npy +tests/data/ljspeech/wavs/LJ020-0017.wav|tests/data/ljspeech/wavs/LJ020-0017.npy +tests/data/ljspeech/wavs/LJ035-0182.wav|tests/data/ljspeech/wavs/LJ035-0182.npy +tests/data/ljspeech/wavs/LJ006-0121.wav|tests/data/ljspeech/wavs/LJ006-0121.npy +tests/data/ljspeech/wavs/LJ028-0154.wav|tests/data/ljspeech/wavs/LJ028-0154.npy +tests/data/ljspeech/wavs/LJ015-0291.wav|tests/data/ljspeech/wavs/LJ015-0291.npy +tests/data/ljspeech/wavs/LJ002-0160.wav|tests/data/ljspeech/wavs/LJ002-0160.npy +tests/data/ljspeech/wavs/LJ008-0025.wav|tests/data/ljspeech/wavs/LJ008-0025.npy +tests/data/ljspeech/wavs/LJ016-0202.wav|tests/data/ljspeech/wavs/LJ016-0202.npy +tests/data/ljspeech/wavs/LJ004-0134.wav|tests/data/ljspeech/wavs/LJ004-0134.npy +tests/data/ljspeech/wavs/LJ018-0391.wav|tests/data/ljspeech/wavs/LJ018-0391.npy +tests/data/ljspeech/wavs/LJ042-0173.wav|tests/data/ljspeech/wavs/LJ042-0173.npy +tests/data/ljspeech/wavs/LJ016-0002.wav|tests/data/ljspeech/wavs/LJ016-0002.npy +tests/data/ljspeech/wavs/LJ019-0174.wav|tests/data/ljspeech/wavs/LJ019-0174.npy +tests/data/ljspeech/wavs/LJ050-0207.wav|tests/data/ljspeech/wavs/LJ050-0207.npy +tests/data/ljspeech/wavs/LJ038-0067.wav|tests/data/ljspeech/wavs/LJ038-0067.npy +tests/data/ljspeech/wavs/LJ048-0007.wav|tests/data/ljspeech/wavs/LJ048-0007.npy +tests/data/ljspeech/wavs/LJ005-0060.wav|tests/data/ljspeech/wavs/LJ005-0060.npy +tests/data/ljspeech/wavs/LJ001-0140.wav|tests/data/ljspeech/wavs/LJ001-0140.npy +tests/data/ljspeech/wavs/LJ012-0059.wav|tests/data/ljspeech/wavs/LJ012-0059.npy +tests/data/ljspeech/wavs/LJ015-0191.wav|tests/data/ljspeech/wavs/LJ015-0191.npy +tests/data/ljspeech/wavs/LJ017-0030.wav|tests/data/ljspeech/wavs/LJ017-0030.npy +tests/data/ljspeech/wavs/LJ021-0103.wav|tests/data/ljspeech/wavs/LJ021-0103.npy +tests/data/ljspeech/wavs/LJ017-0141.wav|tests/data/ljspeech/wavs/LJ017-0141.npy +tests/data/ljspeech/wavs/LJ007-0124.wav|tests/data/ljspeech/wavs/LJ007-0124.npy +tests/data/ljspeech/wavs/LJ017-0119.wav|tests/data/ljspeech/wavs/LJ017-0119.npy +tests/data/ljspeech/wavs/LJ038-0252.wav|tests/data/ljspeech/wavs/LJ038-0252.npy +tests/data/ljspeech/wavs/LJ012-0134.wav|tests/data/ljspeech/wavs/LJ012-0134.npy +tests/data/ljspeech/wavs/LJ001-0026.wav|tests/data/ljspeech/wavs/LJ001-0026.npy +tests/data/ljspeech/wavs/LJ016-0213.wav|tests/data/ljspeech/wavs/LJ016-0213.npy +tests/data/ljspeech/wavs/LJ004-0094.wav|tests/data/ljspeech/wavs/LJ004-0094.npy +tests/data/ljspeech/wavs/LJ028-0039.wav|tests/data/ljspeech/wavs/LJ028-0039.npy +tests/data/ljspeech/wavs/LJ028-0042.wav|tests/data/ljspeech/wavs/LJ028-0042.npy +tests/data/ljspeech/wavs/LJ050-0058.wav|tests/data/ljspeech/wavs/LJ050-0058.npy +tests/data/ljspeech/wavs/LJ014-0108.wav|tests/data/ljspeech/wavs/LJ014-0108.npy +tests/data/ljspeech/wavs/LJ015-0164.wav|tests/data/ljspeech/wavs/LJ015-0164.npy +tests/data/ljspeech/wavs/LJ040-0101.wav|tests/data/ljspeech/wavs/LJ040-0101.npy +tests/data/ljspeech/wavs/LJ009-0080.wav|tests/data/ljspeech/wavs/LJ009-0080.npy +tests/data/ljspeech/wavs/LJ039-0158.wav|tests/data/ljspeech/wavs/LJ039-0158.npy +tests/data/ljspeech/wavs/LJ035-0162.wav|tests/data/ljspeech/wavs/LJ035-0162.npy +tests/data/ljspeech/wavs/LJ019-0001.wav|tests/data/ljspeech/wavs/LJ019-0001.npy +tests/data/ljspeech/wavs/LJ030-0205.wav|tests/data/ljspeech/wavs/LJ030-0205.npy +tests/data/ljspeech/wavs/LJ039-0069.wav|tests/data/ljspeech/wavs/LJ039-0069.npy +tests/data/ljspeech/wavs/LJ037-0190.wav|tests/data/ljspeech/wavs/LJ037-0190.npy +tests/data/ljspeech/wavs/LJ002-0206.wav|tests/data/ljspeech/wavs/LJ002-0206.npy +tests/data/ljspeech/wavs/LJ042-0041.wav|tests/data/ljspeech/wavs/LJ042-0041.npy +tests/data/ljspeech/wavs/LJ032-0161.wav|tests/data/ljspeech/wavs/LJ032-0161.npy +tests/data/ljspeech/wavs/LJ024-0052.wav|tests/data/ljspeech/wavs/LJ024-0052.npy +tests/data/ljspeech/wavs/LJ027-0077.wav|tests/data/ljspeech/wavs/LJ027-0077.npy +tests/data/ljspeech/wavs/LJ010-0013.wav|tests/data/ljspeech/wavs/LJ010-0013.npy +tests/data/ljspeech/wavs/LJ013-0219.wav|tests/data/ljspeech/wavs/LJ013-0219.npy +tests/data/ljspeech/wavs/LJ047-0229.wav|tests/data/ljspeech/wavs/LJ047-0229.npy +tests/data/ljspeech/wavs/LJ015-0158.wav|tests/data/ljspeech/wavs/LJ015-0158.npy +tests/data/ljspeech/wavs/LJ022-0096.wav|tests/data/ljspeech/wavs/LJ022-0096.npy +tests/data/ljspeech/wavs/LJ006-0188.wav|tests/data/ljspeech/wavs/LJ006-0188.npy +tests/data/ljspeech/wavs/LJ014-0139.wav|tests/data/ljspeech/wavs/LJ014-0139.npy +tests/data/ljspeech/wavs/LJ015-0140.wav|tests/data/ljspeech/wavs/LJ015-0140.npy +tests/data/ljspeech/wavs/LJ003-0077.wav|tests/data/ljspeech/wavs/LJ003-0077.npy +tests/data/ljspeech/wavs/LJ020-0036.wav|tests/data/ljspeech/wavs/LJ020-0036.npy +tests/data/ljspeech/wavs/LJ042-0182.wav|tests/data/ljspeech/wavs/LJ042-0182.npy +tests/data/ljspeech/wavs/LJ013-0178.wav|tests/data/ljspeech/wavs/LJ013-0178.npy +tests/data/ljspeech/wavs/LJ009-0109.wav|tests/data/ljspeech/wavs/LJ009-0109.npy +tests/data/ljspeech/wavs/LJ019-0390.wav|tests/data/ljspeech/wavs/LJ019-0390.npy +tests/data/ljspeech/wavs/LJ046-0034.wav|tests/data/ljspeech/wavs/LJ046-0034.npy +tests/data/ljspeech/wavs/LJ029-0213.wav|tests/data/ljspeech/wavs/LJ029-0213.npy +tests/data/ljspeech/wavs/LJ040-0020.wav|tests/data/ljspeech/wavs/LJ040-0020.npy +tests/data/ljspeech/wavs/LJ002-0247.wav|tests/data/ljspeech/wavs/LJ002-0247.npy +tests/data/ljspeech/wavs/LJ008-0255.wav|tests/data/ljspeech/wavs/LJ008-0255.npy +tests/data/ljspeech/wavs/LJ028-0316.wav|tests/data/ljspeech/wavs/LJ028-0316.npy +tests/data/ljspeech/wavs/LJ010-0270.wav|tests/data/ljspeech/wavs/LJ010-0270.npy +tests/data/ljspeech/wavs/LJ037-0188.wav|tests/data/ljspeech/wavs/LJ037-0188.npy +tests/data/ljspeech/wavs/LJ038-0300.wav|tests/data/ljspeech/wavs/LJ038-0300.npy +tests/data/ljspeech/wavs/LJ049-0216.wav|tests/data/ljspeech/wavs/LJ049-0216.npy +tests/data/ljspeech/wavs/LJ031-0127.wav|tests/data/ljspeech/wavs/LJ031-0127.npy +tests/data/ljspeech/wavs/LJ041-0029.wav|tests/data/ljspeech/wavs/LJ041-0029.npy +tests/data/ljspeech/wavs/LJ005-0049.wav|tests/data/ljspeech/wavs/LJ005-0049.npy +tests/data/ljspeech/wavs/LJ036-0084.wav|tests/data/ljspeech/wavs/LJ036-0084.npy +tests/data/ljspeech/wavs/LJ041-0067.wav|tests/data/ljspeech/wavs/LJ041-0067.npy +tests/data/ljspeech/wavs/LJ023-0114.wav|tests/data/ljspeech/wavs/LJ023-0114.npy +tests/data/ljspeech/wavs/LJ010-0095.wav|tests/data/ljspeech/wavs/LJ010-0095.npy +tests/data/ljspeech/wavs/LJ011-0027.wav|tests/data/ljspeech/wavs/LJ011-0027.npy +tests/data/ljspeech/wavs/LJ028-0328.wav|tests/data/ljspeech/wavs/LJ028-0328.npy +tests/data/ljspeech/wavs/LJ004-0158.wav|tests/data/ljspeech/wavs/LJ004-0158.npy +tests/data/ljspeech/wavs/LJ045-0108.wav|tests/data/ljspeech/wavs/LJ045-0108.npy +tests/data/ljspeech/wavs/LJ047-0112.wav|tests/data/ljspeech/wavs/LJ047-0112.npy +tests/data/ljspeech/wavs/LJ022-0187.wav|tests/data/ljspeech/wavs/LJ022-0187.npy +tests/data/ljspeech/wavs/LJ003-0087.wav|tests/data/ljspeech/wavs/LJ003-0087.npy +tests/data/ljspeech/wavs/LJ047-0099.wav|tests/data/ljspeech/wavs/LJ047-0099.npy +tests/data/ljspeech/wavs/LJ024-0010.wav|tests/data/ljspeech/wavs/LJ024-0010.npy +tests/data/ljspeech/wavs/LJ049-0081.wav|tests/data/ljspeech/wavs/LJ049-0081.npy +tests/data/ljspeech/wavs/LJ014-0082.wav|tests/data/ljspeech/wavs/LJ014-0082.npy +tests/data/ljspeech/wavs/LJ017-0165.wav|tests/data/ljspeech/wavs/LJ017-0165.npy +tests/data/ljspeech/wavs/LJ028-0469.wav|tests/data/ljspeech/wavs/LJ028-0469.npy +tests/data/ljspeech/wavs/LJ010-0079.wav|tests/data/ljspeech/wavs/LJ010-0079.npy +tests/data/ljspeech/wavs/LJ012-0202.wav|tests/data/ljspeech/wavs/LJ012-0202.npy +tests/data/ljspeech/wavs/LJ040-0085.wav|tests/data/ljspeech/wavs/LJ040-0085.npy +tests/data/ljspeech/wavs/LJ008-0223.wav|tests/data/ljspeech/wavs/LJ008-0223.npy +tests/data/ljspeech/wavs/LJ014-0083.wav|tests/data/ljspeech/wavs/LJ014-0083.npy +tests/data/ljspeech/wavs/LJ023-0058.wav|tests/data/ljspeech/wavs/LJ023-0058.npy +tests/data/ljspeech/wavs/LJ032-0171.wav|tests/data/ljspeech/wavs/LJ032-0171.npy +tests/data/ljspeech/wavs/LJ031-0142.wav|tests/data/ljspeech/wavs/LJ031-0142.npy +tests/data/ljspeech/wavs/LJ048-0170.wav|tests/data/ljspeech/wavs/LJ048-0170.npy +tests/data/ljspeech/wavs/LJ049-0047.wav|tests/data/ljspeech/wavs/LJ049-0047.npy +tests/data/ljspeech/wavs/LJ037-0050.wav|tests/data/ljspeech/wavs/LJ037-0050.npy +tests/data/ljspeech/wavs/LJ004-0011.wav|tests/data/ljspeech/wavs/LJ004-0011.npy +tests/data/ljspeech/wavs/LJ050-0034.wav|tests/data/ljspeech/wavs/LJ050-0034.npy +tests/data/ljspeech/wavs/LJ017-0200.wav|tests/data/ljspeech/wavs/LJ017-0200.npy +tests/data/ljspeech/wavs/LJ011-0243.wav|tests/data/ljspeech/wavs/LJ011-0243.npy +tests/data/ljspeech/wavs/LJ038-0084.wav|tests/data/ljspeech/wavs/LJ038-0084.npy +tests/data/ljspeech/wavs/LJ035-0140.wav|tests/data/ljspeech/wavs/LJ035-0140.npy +tests/data/ljspeech/wavs/LJ002-0215.wav|tests/data/ljspeech/wavs/LJ002-0215.npy +tests/data/ljspeech/wavs/LJ039-0061.wav|tests/data/ljspeech/wavs/LJ039-0061.npy +tests/data/ljspeech/wavs/LJ050-0241.wav|tests/data/ljspeech/wavs/LJ050-0241.npy +tests/data/ljspeech/wavs/LJ039-0047.wav|tests/data/ljspeech/wavs/LJ039-0047.npy +tests/data/ljspeech/wavs/LJ021-0127.wav|tests/data/ljspeech/wavs/LJ021-0127.npy +tests/data/ljspeech/wavs/LJ050-0114.wav|tests/data/ljspeech/wavs/LJ050-0114.npy +tests/data/ljspeech/wavs/LJ010-0158.wav|tests/data/ljspeech/wavs/LJ010-0158.npy +tests/data/ljspeech/wavs/LJ040-0181.wav|tests/data/ljspeech/wavs/LJ040-0181.npy +tests/data/ljspeech/wavs/LJ017-0109.wav|tests/data/ljspeech/wavs/LJ017-0109.npy +tests/data/ljspeech/wavs/LJ010-0222.wav|tests/data/ljspeech/wavs/LJ010-0222.npy +tests/data/ljspeech/wavs/LJ024-0007.wav|tests/data/ljspeech/wavs/LJ024-0007.npy +tests/data/ljspeech/wavs/LJ003-0334.wav|tests/data/ljspeech/wavs/LJ003-0334.npy +tests/data/ljspeech/wavs/LJ005-0160.wav|tests/data/ljspeech/wavs/LJ005-0160.npy +tests/data/ljspeech/wavs/LJ050-0116.wav|tests/data/ljspeech/wavs/LJ050-0116.npy +tests/data/ljspeech/wavs/LJ017-0197.wav|tests/data/ljspeech/wavs/LJ017-0197.npy +tests/data/ljspeech/wavs/LJ016-0046.wav|tests/data/ljspeech/wavs/LJ016-0046.npy +tests/data/ljspeech/wavs/LJ006-0036.wav|tests/data/ljspeech/wavs/LJ006-0036.npy +tests/data/ljspeech/wavs/LJ016-0159.wav|tests/data/ljspeech/wavs/LJ016-0159.npy +tests/data/ljspeech/wavs/LJ011-0191.wav|tests/data/ljspeech/wavs/LJ011-0191.npy +tests/data/ljspeech/wavs/LJ024-0065.wav|tests/data/ljspeech/wavs/LJ024-0065.npy +tests/data/ljspeech/wavs/LJ019-0196.wav|tests/data/ljspeech/wavs/LJ019-0196.npy +tests/data/ljspeech/wavs/LJ014-0061.wav|tests/data/ljspeech/wavs/LJ014-0061.npy +tests/data/ljspeech/wavs/LJ034-0017.wav|tests/data/ljspeech/wavs/LJ034-0017.npy +tests/data/ljspeech/wavs/LJ008-0125.wav|tests/data/ljspeech/wavs/LJ008-0125.npy +tests/data/ljspeech/wavs/LJ005-0085.wav|tests/data/ljspeech/wavs/LJ005-0085.npy +tests/data/ljspeech/wavs/LJ046-0195.wav|tests/data/ljspeech/wavs/LJ046-0195.npy +tests/data/ljspeech/wavs/LJ036-0134.wav|tests/data/ljspeech/wavs/LJ036-0134.npy +tests/data/ljspeech/wavs/LJ033-0015.wav|tests/data/ljspeech/wavs/LJ033-0015.npy +tests/data/ljspeech/wavs/LJ010-0294.wav|tests/data/ljspeech/wavs/LJ010-0294.npy +tests/data/ljspeech/wavs/LJ041-0103.wav|tests/data/ljspeech/wavs/LJ041-0103.npy +tests/data/ljspeech/wavs/LJ004-0177.wav|tests/data/ljspeech/wavs/LJ004-0177.npy +tests/data/ljspeech/wavs/LJ018-0024.wav|tests/data/ljspeech/wavs/LJ018-0024.npy +tests/data/ljspeech/wavs/LJ043-0077.wav|tests/data/ljspeech/wavs/LJ043-0077.npy +tests/data/ljspeech/wavs/LJ022-0095.wav|tests/data/ljspeech/wavs/LJ022-0095.npy +tests/data/ljspeech/wavs/LJ010-0231.wav|tests/data/ljspeech/wavs/LJ010-0231.npy +tests/data/ljspeech/wavs/LJ021-0121.wav|tests/data/ljspeech/wavs/LJ021-0121.npy +tests/data/ljspeech/wavs/LJ018-0086.wav|tests/data/ljspeech/wavs/LJ018-0086.npy +tests/data/ljspeech/wavs/LJ017-0186.wav|tests/data/ljspeech/wavs/LJ017-0186.npy +tests/data/ljspeech/wavs/LJ003-0184.wav|tests/data/ljspeech/wavs/LJ003-0184.npy +tests/data/ljspeech/wavs/LJ006-0012.wav|tests/data/ljspeech/wavs/LJ006-0012.npy +tests/data/ljspeech/wavs/LJ016-0086.wav|tests/data/ljspeech/wavs/LJ016-0086.npy +tests/data/ljspeech/wavs/LJ026-0085.wav|tests/data/ljspeech/wavs/LJ026-0085.npy +tests/data/ljspeech/wavs/LJ032-0157.wav|tests/data/ljspeech/wavs/LJ032-0157.npy +tests/data/ljspeech/wavs/LJ045-0021.wav|tests/data/ljspeech/wavs/LJ045-0021.npy +tests/data/ljspeech/wavs/LJ050-0152.wav|tests/data/ljspeech/wavs/LJ050-0152.npy +tests/data/ljspeech/wavs/LJ001-0130.wav|tests/data/ljspeech/wavs/LJ001-0130.npy +tests/data/ljspeech/wavs/LJ041-0102.wav|tests/data/ljspeech/wavs/LJ041-0102.npy +tests/data/ljspeech/wavs/LJ003-0326.wav|tests/data/ljspeech/wavs/LJ003-0326.npy +tests/data/ljspeech/wavs/LJ030-0223.wav|tests/data/ljspeech/wavs/LJ030-0223.npy +tests/data/ljspeech/wavs/LJ012-0248.wav|tests/data/ljspeech/wavs/LJ012-0248.npy +tests/data/ljspeech/wavs/LJ030-0009.wav|tests/data/ljspeech/wavs/LJ030-0009.npy +tests/data/ljspeech/wavs/LJ006-0211.wav|tests/data/ljspeech/wavs/LJ006-0211.npy +tests/data/ljspeech/wavs/LJ039-0191.wav|tests/data/ljspeech/wavs/LJ039-0191.npy +tests/data/ljspeech/wavs/LJ036-0090.wav|tests/data/ljspeech/wavs/LJ036-0090.npy +tests/data/ljspeech/wavs/LJ028-0502.wav|tests/data/ljspeech/wavs/LJ028-0502.npy +tests/data/ljspeech/wavs/LJ028-0081.wav|tests/data/ljspeech/wavs/LJ028-0081.npy +tests/data/ljspeech/wavs/LJ044-0060.wav|tests/data/ljspeech/wavs/LJ044-0060.npy +tests/data/ljspeech/wavs/LJ050-0067.wav|tests/data/ljspeech/wavs/LJ050-0067.npy +tests/data/ljspeech/wavs/LJ008-0151.wav|tests/data/ljspeech/wavs/LJ008-0151.npy +tests/data/ljspeech/wavs/LJ033-0182.wav|tests/data/ljspeech/wavs/LJ033-0182.npy +tests/data/ljspeech/wavs/LJ019-0131.wav|tests/data/ljspeech/wavs/LJ019-0131.npy +tests/data/ljspeech/wavs/LJ004-0112.wav|tests/data/ljspeech/wavs/LJ004-0112.npy +tests/data/ljspeech/wavs/LJ030-0248.wav|tests/data/ljspeech/wavs/LJ030-0248.npy +tests/data/ljspeech/wavs/LJ048-0141.wav|tests/data/ljspeech/wavs/LJ048-0141.npy +tests/data/ljspeech/wavs/LJ031-0183.wav|tests/data/ljspeech/wavs/LJ031-0183.npy +tests/data/ljspeech/wavs/LJ019-0314.wav|tests/data/ljspeech/wavs/LJ019-0314.npy +tests/data/ljspeech/wavs/LJ022-0097.wav|tests/data/ljspeech/wavs/LJ022-0097.npy +tests/data/ljspeech/wavs/LJ046-0103.wav|tests/data/ljspeech/wavs/LJ046-0103.npy +tests/data/ljspeech/wavs/LJ012-0246.wav|tests/data/ljspeech/wavs/LJ012-0246.npy +tests/data/ljspeech/wavs/LJ013-0156.wav|tests/data/ljspeech/wavs/LJ013-0156.npy +tests/data/ljspeech/wavs/LJ028-0440.wav|tests/data/ljspeech/wavs/LJ028-0440.npy +tests/data/ljspeech/wavs/LJ003-0037.wav|tests/data/ljspeech/wavs/LJ003-0037.npy +tests/data/ljspeech/wavs/LJ002-0241.wav|tests/data/ljspeech/wavs/LJ002-0241.npy +tests/data/ljspeech/wavs/LJ040-0040.wav|tests/data/ljspeech/wavs/LJ040-0040.npy +tests/data/ljspeech/wavs/LJ018-0268.wav|tests/data/ljspeech/wavs/LJ018-0268.npy +tests/data/ljspeech/wavs/LJ019-0344.wav|tests/data/ljspeech/wavs/LJ019-0344.npy +tests/data/ljspeech/wavs/LJ013-0042.wav|tests/data/ljspeech/wavs/LJ013-0042.npy +tests/data/ljspeech/wavs/LJ026-0095.wav|tests/data/ljspeech/wavs/LJ026-0095.npy +tests/data/ljspeech/wavs/LJ010-0303.wav|tests/data/ljspeech/wavs/LJ010-0303.npy +tests/data/ljspeech/wavs/LJ019-0160.wav|tests/data/ljspeech/wavs/LJ019-0160.npy +tests/data/ljspeech/wavs/LJ017-0089.wav|tests/data/ljspeech/wavs/LJ017-0089.npy +tests/data/ljspeech/wavs/LJ046-0060.wav|tests/data/ljspeech/wavs/LJ046-0060.npy +tests/data/ljspeech/wavs/LJ005-0184.wav|tests/data/ljspeech/wavs/LJ005-0184.npy +tests/data/ljspeech/wavs/LJ042-0114.wav|tests/data/ljspeech/wavs/LJ042-0114.npy +tests/data/ljspeech/wavs/LJ034-0173.wav|tests/data/ljspeech/wavs/LJ034-0173.npy +tests/data/ljspeech/wavs/LJ018-0004.wav|tests/data/ljspeech/wavs/LJ018-0004.npy +tests/data/ljspeech/wavs/LJ012-0281.wav|tests/data/ljspeech/wavs/LJ012-0281.npy +tests/data/ljspeech/wavs/LJ040-0175.wav|tests/data/ljspeech/wavs/LJ040-0175.npy +tests/data/ljspeech/wavs/LJ002-0122.wav|tests/data/ljspeech/wavs/LJ002-0122.npy +tests/data/ljspeech/wavs/LJ044-0152.wav|tests/data/ljspeech/wavs/LJ044-0152.npy +tests/data/ljspeech/wavs/LJ037-0251.wav|tests/data/ljspeech/wavs/LJ037-0251.npy +tests/data/ljspeech/wavs/LJ031-0222.wav|tests/data/ljspeech/wavs/LJ031-0222.npy +tests/data/ljspeech/wavs/LJ030-0227.wav|tests/data/ljspeech/wavs/LJ030-0227.npy +tests/data/ljspeech/wavs/LJ032-0186.wav|tests/data/ljspeech/wavs/LJ032-0186.npy +tests/data/ljspeech/wavs/LJ033-0210.wav|tests/data/ljspeech/wavs/LJ033-0210.npy +tests/data/ljspeech/wavs/LJ035-0167.wav|tests/data/ljspeech/wavs/LJ035-0167.npy +tests/data/ljspeech/wavs/LJ047-0142.wav|tests/data/ljspeech/wavs/LJ047-0142.npy +tests/data/ljspeech/wavs/LJ009-0124.wav|tests/data/ljspeech/wavs/LJ009-0124.npy +tests/data/ljspeech/wavs/LJ038-0266.wav|tests/data/ljspeech/wavs/LJ038-0266.npy +tests/data/ljspeech/wavs/LJ046-0063.wav|tests/data/ljspeech/wavs/LJ046-0063.npy +tests/data/ljspeech/wavs/LJ034-0131.wav|tests/data/ljspeech/wavs/LJ034-0131.npy +tests/data/ljspeech/wavs/LJ008-0139.wav|tests/data/ljspeech/wavs/LJ008-0139.npy +tests/data/ljspeech/wavs/LJ010-0016.wav|tests/data/ljspeech/wavs/LJ010-0016.npy +tests/data/ljspeech/wavs/LJ045-0209.wav|tests/data/ljspeech/wavs/LJ045-0209.npy +tests/data/ljspeech/wavs/LJ047-0236.wav|tests/data/ljspeech/wavs/LJ047-0236.npy +tests/data/ljspeech/wavs/LJ001-0074.wav|tests/data/ljspeech/wavs/LJ001-0074.npy +tests/data/ljspeech/wavs/LJ015-0200.wav|tests/data/ljspeech/wavs/LJ015-0200.npy +tests/data/ljspeech/wavs/LJ050-0200.wav|tests/data/ljspeech/wavs/LJ050-0200.npy +tests/data/ljspeech/wavs/LJ011-0289.wav|tests/data/ljspeech/wavs/LJ011-0289.npy +tests/data/ljspeech/wavs/LJ033-0031.wav|tests/data/ljspeech/wavs/LJ033-0031.npy +tests/data/ljspeech/wavs/LJ015-0258.wav|tests/data/ljspeech/wavs/LJ015-0258.npy +tests/data/ljspeech/wavs/LJ019-0091.wav|tests/data/ljspeech/wavs/LJ019-0091.npy +tests/data/ljspeech/wavs/LJ027-0113.wav|tests/data/ljspeech/wavs/LJ027-0113.npy +tests/data/ljspeech/wavs/LJ022-0160.wav|tests/data/ljspeech/wavs/LJ022-0160.npy +tests/data/ljspeech/wavs/LJ029-0190.wav|tests/data/ljspeech/wavs/LJ029-0190.npy +tests/data/ljspeech/wavs/LJ015-0133.wav|tests/data/ljspeech/wavs/LJ015-0133.npy +tests/data/ljspeech/wavs/LJ034-0206.wav|tests/data/ljspeech/wavs/LJ034-0206.npy +tests/data/ljspeech/wavs/LJ016-0257.wav|tests/data/ljspeech/wavs/LJ016-0257.npy +tests/data/ljspeech/wavs/LJ003-0012.wav|tests/data/ljspeech/wavs/LJ003-0012.npy +tests/data/ljspeech/wavs/LJ008-0162.wav|tests/data/ljspeech/wavs/LJ008-0162.npy +tests/data/ljspeech/wavs/LJ002-0199.wav|tests/data/ljspeech/wavs/LJ002-0199.npy +tests/data/ljspeech/wavs/LJ038-0165.wav|tests/data/ljspeech/wavs/LJ038-0165.npy +tests/data/ljspeech/wavs/LJ032-0029.wav|tests/data/ljspeech/wavs/LJ032-0029.npy +tests/data/ljspeech/wavs/LJ009-0217.wav|tests/data/ljspeech/wavs/LJ009-0217.npy +tests/data/ljspeech/wavs/LJ007-0182.wav|tests/data/ljspeech/wavs/LJ007-0182.npy +tests/data/ljspeech/wavs/LJ022-0134.wav|tests/data/ljspeech/wavs/LJ022-0134.npy +tests/data/ljspeech/wavs/LJ044-0202.wav|tests/data/ljspeech/wavs/LJ044-0202.npy +tests/data/ljspeech/wavs/LJ039-0118.wav|tests/data/ljspeech/wavs/LJ039-0118.npy +tests/data/ljspeech/wavs/LJ048-0048.wav|tests/data/ljspeech/wavs/LJ048-0048.npy +tests/data/ljspeech/wavs/LJ031-0200.wav|tests/data/ljspeech/wavs/LJ031-0200.npy +tests/data/ljspeech/wavs/LJ017-0009.wav|tests/data/ljspeech/wavs/LJ017-0009.npy +tests/data/ljspeech/wavs/LJ034-0052.wav|tests/data/ljspeech/wavs/LJ034-0052.npy +tests/data/ljspeech/wavs/LJ005-0232.wav|tests/data/ljspeech/wavs/LJ005-0232.npy +tests/data/ljspeech/wavs/LJ012-0295.wav|tests/data/ljspeech/wavs/LJ012-0295.npy +tests/data/ljspeech/wavs/LJ018-0374.wav|tests/data/ljspeech/wavs/LJ018-0374.npy +tests/data/ljspeech/wavs/LJ013-0027.wav|tests/data/ljspeech/wavs/LJ013-0027.npy +tests/data/ljspeech/wavs/LJ005-0115.wav|tests/data/ljspeech/wavs/LJ005-0115.npy +tests/data/ljspeech/wavs/LJ042-0186.wav|tests/data/ljspeech/wavs/LJ042-0186.npy +tests/data/ljspeech/wavs/LJ025-0064.wav|tests/data/ljspeech/wavs/LJ025-0064.npy +tests/data/ljspeech/wavs/LJ032-0179.wav|tests/data/ljspeech/wavs/LJ032-0179.npy +tests/data/ljspeech/wavs/LJ049-0178.wav|tests/data/ljspeech/wavs/LJ049-0178.npy +tests/data/ljspeech/wavs/LJ027-0087.wav|tests/data/ljspeech/wavs/LJ027-0087.npy +tests/data/ljspeech/wavs/LJ031-0232.wav|tests/data/ljspeech/wavs/LJ031-0232.npy +tests/data/ljspeech/wavs/LJ035-0021.wav|tests/data/ljspeech/wavs/LJ035-0021.npy +tests/data/ljspeech/wavs/LJ029-0026.wav|tests/data/ljspeech/wavs/LJ029-0026.npy +tests/data/ljspeech/wavs/LJ029-0004.wav|tests/data/ljspeech/wavs/LJ029-0004.npy +tests/data/ljspeech/wavs/LJ008-0206.wav|tests/data/ljspeech/wavs/LJ008-0206.npy +tests/data/ljspeech/wavs/LJ039-0242.wav|tests/data/ljspeech/wavs/LJ039-0242.npy +tests/data/ljspeech/wavs/LJ013-0137.wav|tests/data/ljspeech/wavs/LJ013-0137.npy +tests/data/ljspeech/wavs/LJ016-0318.wav|tests/data/ljspeech/wavs/LJ016-0318.npy +tests/data/ljspeech/wavs/LJ014-0134.wav|tests/data/ljspeech/wavs/LJ014-0134.npy +tests/data/ljspeech/wavs/LJ003-0194.wav|tests/data/ljspeech/wavs/LJ003-0194.npy +tests/data/ljspeech/wavs/LJ011-0267.wav|tests/data/ljspeech/wavs/LJ011-0267.npy +tests/data/ljspeech/wavs/LJ002-0156.wav|tests/data/ljspeech/wavs/LJ002-0156.npy +tests/data/ljspeech/wavs/LJ050-0155.wav|tests/data/ljspeech/wavs/LJ050-0155.npy +tests/data/ljspeech/wavs/LJ046-0164.wav|tests/data/ljspeech/wavs/LJ046-0164.npy +tests/data/ljspeech/wavs/LJ015-0111.wav|tests/data/ljspeech/wavs/LJ015-0111.npy +tests/data/ljspeech/wavs/LJ037-0213.wav|tests/data/ljspeech/wavs/LJ037-0213.npy +tests/data/ljspeech/wavs/LJ049-0172.wav|tests/data/ljspeech/wavs/LJ049-0172.npy +tests/data/ljspeech/wavs/LJ013-0044.wav|tests/data/ljspeech/wavs/LJ013-0044.npy +tests/data/ljspeech/wavs/LJ042-0074.wav|tests/data/ljspeech/wavs/LJ042-0074.npy +tests/data/ljspeech/wavs/LJ018-0110.wav|tests/data/ljspeech/wavs/LJ018-0110.npy +tests/data/ljspeech/wavs/LJ027-0032.wav|tests/data/ljspeech/wavs/LJ027-0032.npy +tests/data/ljspeech/wavs/LJ027-0158.wav|tests/data/ljspeech/wavs/LJ027-0158.npy +tests/data/ljspeech/wavs/LJ019-0258.wav|tests/data/ljspeech/wavs/LJ019-0258.npy +tests/data/ljspeech/wavs/LJ034-0019.wav|tests/data/ljspeech/wavs/LJ034-0019.npy +tests/data/ljspeech/wavs/LJ040-0215.wav|tests/data/ljspeech/wavs/LJ040-0215.npy +tests/data/ljspeech/wavs/LJ014-0038.wav|tests/data/ljspeech/wavs/LJ014-0038.npy +tests/data/ljspeech/wavs/LJ016-0063.wav|tests/data/ljspeech/wavs/LJ016-0063.npy +tests/data/ljspeech/wavs/LJ046-0108.wav|tests/data/ljspeech/wavs/LJ046-0108.npy +tests/data/ljspeech/wavs/LJ010-0039.wav|tests/data/ljspeech/wavs/LJ010-0039.npy +tests/data/ljspeech/wavs/LJ028-0021.wav|tests/data/ljspeech/wavs/LJ028-0021.npy +tests/data/ljspeech/wavs/LJ008-0135.wav|tests/data/ljspeech/wavs/LJ008-0135.npy +tests/data/ljspeech/wavs/LJ021-0062.wav|tests/data/ljspeech/wavs/LJ021-0062.npy +tests/data/ljspeech/wavs/LJ017-0053.wav|tests/data/ljspeech/wavs/LJ017-0053.npy +tests/data/ljspeech/wavs/LJ015-0068.wav|tests/data/ljspeech/wavs/LJ015-0068.npy +tests/data/ljspeech/wavs/LJ016-0064.wav|tests/data/ljspeech/wavs/LJ016-0064.npy +tests/data/ljspeech/wavs/LJ015-0243.wav|tests/data/ljspeech/wavs/LJ015-0243.npy +tests/data/ljspeech/wavs/LJ048-0146.wav|tests/data/ljspeech/wavs/LJ048-0146.npy +tests/data/ljspeech/wavs/LJ002-0043.wav|tests/data/ljspeech/wavs/LJ002-0043.npy +tests/data/ljspeech/wavs/LJ039-0241.wav|tests/data/ljspeech/wavs/LJ039-0241.npy +tests/data/ljspeech/wavs/LJ022-0037.wav|tests/data/ljspeech/wavs/LJ022-0037.npy +tests/data/ljspeech/wavs/LJ001-0004.wav|tests/data/ljspeech/wavs/LJ001-0004.npy +tests/data/ljspeech/wavs/LJ019-0039.wav|tests/data/ljspeech/wavs/LJ019-0039.npy +tests/data/ljspeech/wavs/LJ039-0029.wav|tests/data/ljspeech/wavs/LJ039-0029.npy +tests/data/ljspeech/wavs/LJ028-0053.wav|tests/data/ljspeech/wavs/LJ028-0053.npy +tests/data/ljspeech/wavs/LJ013-0006.wav|tests/data/ljspeech/wavs/LJ013-0006.npy +tests/data/ljspeech/wavs/LJ026-0021.wav|tests/data/ljspeech/wavs/LJ026-0021.npy +tests/data/ljspeech/wavs/LJ047-0052.wav|tests/data/ljspeech/wavs/LJ047-0052.npy +tests/data/ljspeech/wavs/LJ044-0031.wav|tests/data/ljspeech/wavs/LJ044-0031.npy +tests/data/ljspeech/wavs/LJ044-0051.wav|tests/data/ljspeech/wavs/LJ044-0051.npy +tests/data/ljspeech/wavs/LJ030-0210.wav|tests/data/ljspeech/wavs/LJ030-0210.npy +tests/data/ljspeech/wavs/LJ040-0083.wav|tests/data/ljspeech/wavs/LJ040-0083.npy +tests/data/ljspeech/wavs/LJ010-0027.wav|tests/data/ljspeech/wavs/LJ010-0027.npy +tests/data/ljspeech/wavs/LJ010-0278.wav|tests/data/ljspeech/wavs/LJ010-0278.npy +tests/data/ljspeech/wavs/LJ015-0307.wav|tests/data/ljspeech/wavs/LJ015-0307.npy +tests/data/ljspeech/wavs/LJ013-0005.wav|tests/data/ljspeech/wavs/LJ013-0005.npy +tests/data/ljspeech/wavs/LJ018-0108.wav|tests/data/ljspeech/wavs/LJ018-0108.npy +tests/data/ljspeech/wavs/LJ032-0172.wav|tests/data/ljspeech/wavs/LJ032-0172.npy +tests/data/ljspeech/wavs/LJ003-0305.wav|tests/data/ljspeech/wavs/LJ003-0305.npy +tests/data/ljspeech/wavs/LJ015-0190.wav|tests/data/ljspeech/wavs/LJ015-0190.npy +tests/data/ljspeech/wavs/LJ009-0128.wav|tests/data/ljspeech/wavs/LJ009-0128.npy +tests/data/ljspeech/wavs/LJ011-0227.wav|tests/data/ljspeech/wavs/LJ011-0227.npy +tests/data/ljspeech/wavs/LJ005-0028.wav|tests/data/ljspeech/wavs/LJ005-0028.npy +tests/data/ljspeech/wavs/LJ010-0268.wav|tests/data/ljspeech/wavs/LJ010-0268.npy +tests/data/ljspeech/wavs/LJ008-0260.wav|tests/data/ljspeech/wavs/LJ008-0260.npy +tests/data/ljspeech/wavs/LJ013-0134.wav|tests/data/ljspeech/wavs/LJ013-0134.npy +tests/data/ljspeech/wavs/LJ015-0294.wav|tests/data/ljspeech/wavs/LJ015-0294.npy +tests/data/ljspeech/wavs/LJ022-0110.wav|tests/data/ljspeech/wavs/LJ022-0110.npy +tests/data/ljspeech/wavs/LJ001-0102.wav|tests/data/ljspeech/wavs/LJ001-0102.npy +tests/data/ljspeech/wavs/LJ005-0148.wav|tests/data/ljspeech/wavs/LJ005-0148.npy +tests/data/ljspeech/wavs/LJ026-0148.wav|tests/data/ljspeech/wavs/LJ026-0148.npy +tests/data/ljspeech/wavs/LJ012-0178.wav|tests/data/ljspeech/wavs/LJ012-0178.npy +tests/data/ljspeech/wavs/LJ050-0214.wav|tests/data/ljspeech/wavs/LJ050-0214.npy +tests/data/ljspeech/wavs/LJ003-0205.wav|tests/data/ljspeech/wavs/LJ003-0205.npy +tests/data/ljspeech/wavs/LJ018-0386.wav|tests/data/ljspeech/wavs/LJ018-0386.npy +tests/data/ljspeech/wavs/LJ018-0290.wav|tests/data/ljspeech/wavs/LJ018-0290.npy +tests/data/ljspeech/wavs/LJ042-0020.wav|tests/data/ljspeech/wavs/LJ042-0020.npy +tests/data/ljspeech/wavs/LJ045-0196.wav|tests/data/ljspeech/wavs/LJ045-0196.npy +tests/data/ljspeech/wavs/LJ046-0166.wav|tests/data/ljspeech/wavs/LJ046-0166.npy +tests/data/ljspeech/wavs/LJ010-0167.wav|tests/data/ljspeech/wavs/LJ010-0167.npy +tests/data/ljspeech/wavs/LJ037-0065.wav|tests/data/ljspeech/wavs/LJ037-0065.npy +tests/data/ljspeech/wavs/LJ046-0190.wav|tests/data/ljspeech/wavs/LJ046-0190.npy +tests/data/ljspeech/wavs/LJ011-0205.wav|tests/data/ljspeech/wavs/LJ011-0205.npy +tests/data/ljspeech/wavs/LJ041-0044.wav|tests/data/ljspeech/wavs/LJ041-0044.npy +tests/data/ljspeech/wavs/LJ016-0334.wav|tests/data/ljspeech/wavs/LJ016-0334.npy +tests/data/ljspeech/wavs/LJ043-0082.wav|tests/data/ljspeech/wavs/LJ043-0082.npy +tests/data/ljspeech/wavs/LJ040-0142.wav|tests/data/ljspeech/wavs/LJ040-0142.npy +tests/data/ljspeech/wavs/LJ028-0503.wav|tests/data/ljspeech/wavs/LJ028-0503.npy +tests/data/ljspeech/wavs/LJ049-0028.wav|tests/data/ljspeech/wavs/LJ049-0028.npy +tests/data/ljspeech/wavs/LJ035-0098.wav|tests/data/ljspeech/wavs/LJ035-0098.npy +tests/data/ljspeech/wavs/LJ006-0163.wav|tests/data/ljspeech/wavs/LJ006-0163.npy +tests/data/ljspeech/wavs/LJ035-0102.wav|tests/data/ljspeech/wavs/LJ035-0102.npy +tests/data/ljspeech/wavs/LJ014-0297.wav|tests/data/ljspeech/wavs/LJ014-0297.npy +tests/data/ljspeech/wavs/LJ003-0268.wav|tests/data/ljspeech/wavs/LJ003-0268.npy +tests/data/ljspeech/wavs/LJ011-0014.wav|tests/data/ljspeech/wavs/LJ011-0014.npy +tests/data/ljspeech/wavs/LJ001-0119.wav|tests/data/ljspeech/wavs/LJ001-0119.npy +tests/data/ljspeech/wavs/LJ006-0111.wav|tests/data/ljspeech/wavs/LJ006-0111.npy +tests/data/ljspeech/wavs/LJ019-0231.wav|tests/data/ljspeech/wavs/LJ019-0231.npy +tests/data/ljspeech/wavs/LJ014-0332.wav|tests/data/ljspeech/wavs/LJ014-0332.npy +tests/data/ljspeech/wavs/LJ002-0161.wav|tests/data/ljspeech/wavs/LJ002-0161.npy +tests/data/ljspeech/wavs/LJ014-0299.wav|tests/data/ljspeech/wavs/LJ014-0299.npy +tests/data/ljspeech/wavs/LJ031-0213.wav|tests/data/ljspeech/wavs/LJ031-0213.npy +tests/data/ljspeech/wavs/LJ019-0317.wav|tests/data/ljspeech/wavs/LJ019-0317.npy +tests/data/ljspeech/wavs/LJ050-0215.wav|tests/data/ljspeech/wavs/LJ050-0215.npy +tests/data/ljspeech/wavs/LJ034-0200.wav|tests/data/ljspeech/wavs/LJ034-0200.npy +tests/data/ljspeech/wavs/LJ016-0293.wav|tests/data/ljspeech/wavs/LJ016-0293.npy +tests/data/ljspeech/wavs/LJ006-0034.wav|tests/data/ljspeech/wavs/LJ006-0034.npy +tests/data/ljspeech/wavs/LJ035-0087.wav|tests/data/ljspeech/wavs/LJ035-0087.npy +tests/data/ljspeech/wavs/LJ036-0167.wav|tests/data/ljspeech/wavs/LJ036-0167.npy +tests/data/ljspeech/wavs/LJ017-0159.wav|tests/data/ljspeech/wavs/LJ017-0159.npy +tests/data/ljspeech/wavs/LJ035-0010.wav|tests/data/ljspeech/wavs/LJ035-0010.npy +tests/data/ljspeech/wavs/LJ025-0140.wav|tests/data/ljspeech/wavs/LJ025-0140.npy +tests/data/ljspeech/wavs/LJ018-0228.wav|tests/data/ljspeech/wavs/LJ018-0228.npy +tests/data/ljspeech/wavs/LJ017-0145.wav|tests/data/ljspeech/wavs/LJ017-0145.npy +tests/data/ljspeech/wavs/LJ017-0284.wav|tests/data/ljspeech/wavs/LJ017-0284.npy +tests/data/ljspeech/wavs/LJ002-0184.wav|tests/data/ljspeech/wavs/LJ002-0184.npy +tests/data/ljspeech/wavs/LJ019-0064.wav|tests/data/ljspeech/wavs/LJ019-0064.npy +tests/data/ljspeech/wavs/LJ025-0132.wav|tests/data/ljspeech/wavs/LJ025-0132.npy +tests/data/ljspeech/wavs/LJ041-0064.wav|tests/data/ljspeech/wavs/LJ041-0064.npy +tests/data/ljspeech/wavs/LJ042-0226.wav|tests/data/ljspeech/wavs/LJ042-0226.npy +tests/data/ljspeech/wavs/LJ003-0222.wav|tests/data/ljspeech/wavs/LJ003-0222.npy +tests/data/ljspeech/wavs/LJ004-0189.wav|tests/data/ljspeech/wavs/LJ004-0189.npy +tests/data/ljspeech/wavs/LJ022-0186.wav|tests/data/ljspeech/wavs/LJ022-0186.npy +tests/data/ljspeech/wavs/LJ009-0278.wav|tests/data/ljspeech/wavs/LJ009-0278.npy +tests/data/ljspeech/wavs/LJ002-0087.wav|tests/data/ljspeech/wavs/LJ002-0087.npy +tests/data/ljspeech/wavs/LJ016-0153.wav|tests/data/ljspeech/wavs/LJ016-0153.npy +tests/data/ljspeech/wavs/LJ028-0415.wav|tests/data/ljspeech/wavs/LJ028-0415.npy +tests/data/ljspeech/wavs/LJ018-0213.wav|tests/data/ljspeech/wavs/LJ018-0213.npy +tests/data/ljspeech/wavs/LJ009-0235.wav|tests/data/ljspeech/wavs/LJ009-0235.npy +tests/data/ljspeech/wavs/LJ001-0136.wav|tests/data/ljspeech/wavs/LJ001-0136.npy +tests/data/ljspeech/wavs/LJ009-0204.wav|tests/data/ljspeech/wavs/LJ009-0204.npy +tests/data/ljspeech/wavs/LJ040-0148.wav|tests/data/ljspeech/wavs/LJ040-0148.npy +tests/data/ljspeech/wavs/LJ043-0157.wav|tests/data/ljspeech/wavs/LJ043-0157.npy +tests/data/ljspeech/wavs/LJ025-0145.wav|tests/data/ljspeech/wavs/LJ025-0145.npy +tests/data/ljspeech/wavs/LJ010-0196.wav|tests/data/ljspeech/wavs/LJ010-0196.npy +tests/data/ljspeech/wavs/LJ019-0308.wav|tests/data/ljspeech/wavs/LJ019-0308.npy +tests/data/ljspeech/wavs/LJ018-0153.wav|tests/data/ljspeech/wavs/LJ018-0153.npy +tests/data/ljspeech/wavs/LJ026-0072.wav|tests/data/ljspeech/wavs/LJ026-0072.npy +tests/data/ljspeech/wavs/LJ035-0121.wav|tests/data/ljspeech/wavs/LJ035-0121.npy +tests/data/ljspeech/wavs/LJ002-0001.wav|tests/data/ljspeech/wavs/LJ002-0001.npy +tests/data/ljspeech/wavs/LJ018-0173.wav|tests/data/ljspeech/wavs/LJ018-0173.npy +tests/data/ljspeech/wavs/LJ047-0038.wav|tests/data/ljspeech/wavs/LJ047-0038.npy +tests/data/ljspeech/wavs/LJ002-0113.wav|tests/data/ljspeech/wavs/LJ002-0113.npy +tests/data/ljspeech/wavs/LJ005-0202.wav|tests/data/ljspeech/wavs/LJ005-0202.npy +tests/data/ljspeech/wavs/LJ020-0013.wav|tests/data/ljspeech/wavs/LJ020-0013.npy +tests/data/ljspeech/wavs/LJ026-0140.wav|tests/data/ljspeech/wavs/LJ026-0140.npy +tests/data/ljspeech/wavs/LJ019-0145.wav|tests/data/ljspeech/wavs/LJ019-0145.npy +tests/data/ljspeech/wavs/LJ047-0082.wav|tests/data/ljspeech/wavs/LJ047-0082.npy +tests/data/ljspeech/wavs/LJ019-0135.wav|tests/data/ljspeech/wavs/LJ019-0135.npy +tests/data/ljspeech/wavs/LJ046-0122.wav|tests/data/ljspeech/wavs/LJ046-0122.npy +tests/data/ljspeech/wavs/LJ034-0153.wav|tests/data/ljspeech/wavs/LJ034-0153.npy +tests/data/ljspeech/wavs/LJ036-0082.wav|tests/data/ljspeech/wavs/LJ036-0082.npy +tests/data/ljspeech/wavs/LJ049-0021.wav|tests/data/ljspeech/wavs/LJ049-0021.npy +tests/data/ljspeech/wavs/LJ035-0058.wav|tests/data/ljspeech/wavs/LJ035-0058.npy +tests/data/ljspeech/wavs/LJ010-0089.wav|tests/data/ljspeech/wavs/LJ010-0089.npy +tests/data/ljspeech/wavs/LJ025-0148.wav|tests/data/ljspeech/wavs/LJ025-0148.npy +tests/data/ljspeech/wavs/LJ047-0216.wav|tests/data/ljspeech/wavs/LJ047-0216.npy +tests/data/ljspeech/wavs/LJ010-0130.wav|tests/data/ljspeech/wavs/LJ010-0130.npy +tests/data/ljspeech/wavs/LJ019-0331.wav|tests/data/ljspeech/wavs/LJ019-0331.npy +tests/data/ljspeech/wavs/LJ008-0278.wav|tests/data/ljspeech/wavs/LJ008-0278.npy +tests/data/ljspeech/wavs/LJ048-0096.wav|tests/data/ljspeech/wavs/LJ048-0096.npy +tests/data/ljspeech/wavs/LJ008-0307.wav|tests/data/ljspeech/wavs/LJ008-0307.npy +tests/data/ljspeech/wavs/LJ021-0097.wav|tests/data/ljspeech/wavs/LJ021-0097.npy +tests/data/ljspeech/wavs/LJ043-0096.wav|tests/data/ljspeech/wavs/LJ043-0096.npy +tests/data/ljspeech/wavs/LJ028-0343.wav|tests/data/ljspeech/wavs/LJ028-0343.npy +tests/data/ljspeech/wavs/LJ046-0099.wav|tests/data/ljspeech/wavs/LJ046-0099.npy +tests/data/ljspeech/wavs/LJ009-0017.wav|tests/data/ljspeech/wavs/LJ009-0017.npy +tests/data/ljspeech/wavs/LJ002-0061.wav|tests/data/ljspeech/wavs/LJ002-0061.npy +tests/data/ljspeech/wavs/LJ028-0476.wav|tests/data/ljspeech/wavs/LJ028-0476.npy +tests/data/ljspeech/wavs/LJ008-0283.wav|tests/data/ljspeech/wavs/LJ008-0283.npy +tests/data/ljspeech/wavs/LJ034-0080.wav|tests/data/ljspeech/wavs/LJ034-0080.npy +tests/data/ljspeech/wavs/LJ012-0089.wav|tests/data/ljspeech/wavs/LJ012-0089.npy +tests/data/ljspeech/wavs/LJ042-0250.wav|tests/data/ljspeech/wavs/LJ042-0250.npy +tests/data/ljspeech/wavs/LJ036-0166.wav|tests/data/ljspeech/wavs/LJ036-0166.npy +tests/data/ljspeech/wavs/LJ043-0010.wav|tests/data/ljspeech/wavs/LJ043-0010.npy +tests/data/ljspeech/wavs/LJ015-0065.wav|tests/data/ljspeech/wavs/LJ015-0065.npy +tests/data/ljspeech/wavs/LJ037-0026.wav|tests/data/ljspeech/wavs/LJ037-0026.npy +tests/data/ljspeech/wavs/LJ003-0176.wav|tests/data/ljspeech/wavs/LJ003-0176.npy +tests/data/ljspeech/wavs/LJ015-0167.wav|tests/data/ljspeech/wavs/LJ015-0167.npy +tests/data/ljspeech/wavs/LJ014-0281.wav|tests/data/ljspeech/wavs/LJ014-0281.npy +tests/data/ljspeech/wavs/LJ003-0109.wav|tests/data/ljspeech/wavs/LJ003-0109.npy +tests/data/ljspeech/wavs/LJ014-0279.wav|tests/data/ljspeech/wavs/LJ014-0279.npy +tests/data/ljspeech/wavs/LJ049-0171.wav|tests/data/ljspeech/wavs/LJ049-0171.npy +tests/data/ljspeech/wavs/LJ015-0131.wav|tests/data/ljspeech/wavs/LJ015-0131.npy +tests/data/ljspeech/wavs/LJ040-0013.wav|tests/data/ljspeech/wavs/LJ040-0013.npy +tests/data/ljspeech/wavs/LJ028-0091.wav|tests/data/ljspeech/wavs/LJ028-0091.npy +tests/data/ljspeech/wavs/LJ015-0211.wav|tests/data/ljspeech/wavs/LJ015-0211.npy +tests/data/ljspeech/wavs/LJ045-0245.wav|tests/data/ljspeech/wavs/LJ045-0245.npy +tests/data/ljspeech/wavs/LJ050-0213.wav|tests/data/ljspeech/wavs/LJ050-0213.npy +tests/data/ljspeech/wavs/LJ043-0012.wav|tests/data/ljspeech/wavs/LJ043-0012.npy +tests/data/ljspeech/wavs/LJ005-0275.wav|tests/data/ljspeech/wavs/LJ005-0275.npy +tests/data/ljspeech/wavs/LJ015-0202.wav|tests/data/ljspeech/wavs/LJ015-0202.npy +tests/data/ljspeech/wavs/LJ044-0026.wav|tests/data/ljspeech/wavs/LJ044-0026.npy +tests/data/ljspeech/wavs/LJ012-0131.wav|tests/data/ljspeech/wavs/LJ012-0131.npy +tests/data/ljspeech/wavs/LJ036-0165.wav|tests/data/ljspeech/wavs/LJ036-0165.npy +tests/data/ljspeech/wavs/LJ044-0006.wav|tests/data/ljspeech/wavs/LJ044-0006.npy +tests/data/ljspeech/wavs/LJ015-0026.wav|tests/data/ljspeech/wavs/LJ015-0026.npy +tests/data/ljspeech/wavs/LJ005-0149.wav|tests/data/ljspeech/wavs/LJ005-0149.npy +tests/data/ljspeech/wavs/LJ039-0149.wav|tests/data/ljspeech/wavs/LJ039-0149.npy +tests/data/ljspeech/wavs/LJ030-0012.wav|tests/data/ljspeech/wavs/LJ030-0012.npy +tests/data/ljspeech/wavs/LJ034-0054.wav|tests/data/ljspeech/wavs/LJ034-0054.npy +tests/data/ljspeech/wavs/LJ030-0069.wav|tests/data/ljspeech/wavs/LJ030-0069.npy +tests/data/ljspeech/wavs/LJ015-0044.wav|tests/data/ljspeech/wavs/LJ015-0044.npy +tests/data/ljspeech/wavs/LJ038-0129.wav|tests/data/ljspeech/wavs/LJ038-0129.npy +tests/data/ljspeech/wavs/LJ044-0050.wav|tests/data/ljspeech/wavs/LJ044-0050.npy +tests/data/ljspeech/wavs/LJ016-0024.wav|tests/data/ljspeech/wavs/LJ016-0024.npy +tests/data/ljspeech/wavs/LJ044-0094.wav|tests/data/ljspeech/wavs/LJ044-0094.npy +tests/data/ljspeech/wavs/LJ037-0149.wav|tests/data/ljspeech/wavs/LJ037-0149.npy +tests/data/ljspeech/wavs/LJ011-0137.wav|tests/data/ljspeech/wavs/LJ011-0137.npy +tests/data/ljspeech/wavs/LJ027-0093.wav|tests/data/ljspeech/wavs/LJ027-0093.npy +tests/data/ljspeech/wavs/LJ049-0210.wav|tests/data/ljspeech/wavs/LJ049-0210.npy +tests/data/ljspeech/wavs/LJ015-0261.wav|tests/data/ljspeech/wavs/LJ015-0261.npy +tests/data/ljspeech/wavs/LJ047-0250.wav|tests/data/ljspeech/wavs/LJ047-0250.npy +tests/data/ljspeech/wavs/LJ008-0067.wav|tests/data/ljspeech/wavs/LJ008-0067.npy +tests/data/ljspeech/wavs/LJ032-0199.wav|tests/data/ljspeech/wavs/LJ032-0199.npy +tests/data/ljspeech/wavs/LJ039-0174.wav|tests/data/ljspeech/wavs/LJ039-0174.npy +tests/data/ljspeech/wavs/LJ027-0179.wav|tests/data/ljspeech/wavs/LJ027-0179.npy +tests/data/ljspeech/wavs/LJ048-0094.wav|tests/data/ljspeech/wavs/LJ048-0094.npy +tests/data/ljspeech/wavs/LJ032-0207.wav|tests/data/ljspeech/wavs/LJ032-0207.npy +tests/data/ljspeech/wavs/LJ017-0068.wav|tests/data/ljspeech/wavs/LJ017-0068.npy +tests/data/ljspeech/wavs/LJ039-0187.wav|tests/data/ljspeech/wavs/LJ039-0187.npy +tests/data/ljspeech/wavs/LJ003-0075.wav|tests/data/ljspeech/wavs/LJ003-0075.npy +tests/data/ljspeech/wavs/LJ032-0115.wav|tests/data/ljspeech/wavs/LJ032-0115.npy +tests/data/ljspeech/wavs/LJ048-0054.wav|tests/data/ljspeech/wavs/LJ048-0054.npy +tests/data/ljspeech/wavs/LJ016-0297.wav|tests/data/ljspeech/wavs/LJ016-0297.npy +tests/data/ljspeech/wavs/LJ003-0002.wav|tests/data/ljspeech/wavs/LJ003-0002.npy +tests/data/ljspeech/wavs/LJ008-0188.wav|tests/data/ljspeech/wavs/LJ008-0188.npy +tests/data/ljspeech/wavs/LJ011-0113.wav|tests/data/ljspeech/wavs/LJ011-0113.npy +tests/data/ljspeech/wavs/LJ016-0229.wav|tests/data/ljspeech/wavs/LJ016-0229.npy +tests/data/ljspeech/wavs/LJ028-0493.wav|tests/data/ljspeech/wavs/LJ028-0493.npy +tests/data/ljspeech/wavs/LJ015-0297.wav|tests/data/ljspeech/wavs/LJ015-0297.npy +tests/data/ljspeech/wavs/LJ031-0229.wav|tests/data/ljspeech/wavs/LJ031-0229.npy +tests/data/ljspeech/wavs/LJ034-0043.wav|tests/data/ljspeech/wavs/LJ034-0043.npy +tests/data/ljspeech/wavs/LJ028-0291.wav|tests/data/ljspeech/wavs/LJ028-0291.npy +tests/data/ljspeech/wavs/LJ028-0127.wav|tests/data/ljspeech/wavs/LJ028-0127.npy +tests/data/ljspeech/wavs/LJ009-0001.wav|tests/data/ljspeech/wavs/LJ009-0001.npy +tests/data/ljspeech/wavs/LJ026-0116.wav|tests/data/ljspeech/wavs/LJ026-0116.npy +tests/data/ljspeech/wavs/LJ014-0181.wav|tests/data/ljspeech/wavs/LJ014-0181.npy +tests/data/ljspeech/wavs/LJ013-0112.wav|tests/data/ljspeech/wavs/LJ013-0112.npy +tests/data/ljspeech/wavs/LJ013-0007.wav|tests/data/ljspeech/wavs/LJ013-0007.npy +tests/data/ljspeech/wavs/LJ038-0269.wav|tests/data/ljspeech/wavs/LJ038-0269.npy +tests/data/ljspeech/wavs/LJ049-0078.wav|tests/data/ljspeech/wavs/LJ049-0078.npy +tests/data/ljspeech/wavs/LJ027-0026.wav|tests/data/ljspeech/wavs/LJ027-0026.npy +tests/data/ljspeech/wavs/LJ010-0316.wav|tests/data/ljspeech/wavs/LJ010-0316.npy +tests/data/ljspeech/wavs/LJ002-0249.wav|tests/data/ljspeech/wavs/LJ002-0249.npy +tests/data/ljspeech/wavs/LJ025-0006.wav|tests/data/ljspeech/wavs/LJ025-0006.npy +tests/data/ljspeech/wavs/LJ045-0035.wav|tests/data/ljspeech/wavs/LJ045-0035.npy +tests/data/ljspeech/wavs/LJ016-0133.wav|tests/data/ljspeech/wavs/LJ016-0133.npy +tests/data/ljspeech/wavs/LJ014-0159.wav|tests/data/ljspeech/wavs/LJ014-0159.npy +tests/data/ljspeech/wavs/LJ028-0190.wav|tests/data/ljspeech/wavs/LJ028-0190.npy +tests/data/ljspeech/wavs/LJ037-0028.wav|tests/data/ljspeech/wavs/LJ037-0028.npy +tests/data/ljspeech/wavs/LJ005-0292.wav|tests/data/ljspeech/wavs/LJ005-0292.npy +tests/data/ljspeech/wavs/LJ013-0198.wav|tests/data/ljspeech/wavs/LJ013-0198.npy +tests/data/ljspeech/wavs/LJ003-0254.wav|tests/data/ljspeech/wavs/LJ003-0254.npy +tests/data/ljspeech/wavs/LJ008-0046.wav|tests/data/ljspeech/wavs/LJ008-0046.npy +tests/data/ljspeech/wavs/LJ039-0088.wav|tests/data/ljspeech/wavs/LJ039-0088.npy +tests/data/ljspeech/wavs/LJ013-0224.wav|tests/data/ljspeech/wavs/LJ013-0224.npy +tests/data/ljspeech/wavs/LJ024-0121.wav|tests/data/ljspeech/wavs/LJ024-0121.npy +tests/data/ljspeech/wavs/LJ049-0139.wav|tests/data/ljspeech/wavs/LJ049-0139.npy +tests/data/ljspeech/wavs/LJ013-0241.wav|tests/data/ljspeech/wavs/LJ013-0241.npy +tests/data/ljspeech/wavs/LJ028-0162.wav|tests/data/ljspeech/wavs/LJ028-0162.npy +tests/data/ljspeech/wavs/LJ003-0242.wav|tests/data/ljspeech/wavs/LJ003-0242.npy +tests/data/ljspeech/wavs/LJ003-0198.wav|tests/data/ljspeech/wavs/LJ003-0198.npy +tests/data/ljspeech/wavs/LJ032-0145.wav|tests/data/ljspeech/wavs/LJ032-0145.npy +tests/data/ljspeech/wavs/LJ007-0095.wav|tests/data/ljspeech/wavs/LJ007-0095.npy +tests/data/ljspeech/wavs/LJ012-0289.wav|tests/data/ljspeech/wavs/LJ012-0289.npy +tests/data/ljspeech/wavs/LJ028-0167.wav|tests/data/ljspeech/wavs/LJ028-0167.npy +tests/data/ljspeech/wavs/LJ032-0045.wav|tests/data/ljspeech/wavs/LJ032-0045.npy +tests/data/ljspeech/wavs/LJ034-0112.wav|tests/data/ljspeech/wavs/LJ034-0112.npy +tests/data/ljspeech/wavs/LJ047-0127.wav|tests/data/ljspeech/wavs/LJ047-0127.npy +tests/data/ljspeech/wavs/LJ033-0030.wav|tests/data/ljspeech/wavs/LJ033-0030.npy +tests/data/ljspeech/wavs/LJ008-0001.wav|tests/data/ljspeech/wavs/LJ008-0001.npy +tests/data/ljspeech/wavs/LJ037-0051.wav|tests/data/ljspeech/wavs/LJ037-0051.npy +tests/data/ljspeech/wavs/LJ001-0153.wav|tests/data/ljspeech/wavs/LJ001-0153.npy +tests/data/ljspeech/wavs/LJ036-0030.wav|tests/data/ljspeech/wavs/LJ036-0030.npy +tests/data/ljspeech/wavs/LJ031-0082.wav|tests/data/ljspeech/wavs/LJ031-0082.npy +tests/data/ljspeech/wavs/LJ022-0140.wav|tests/data/ljspeech/wavs/LJ022-0140.npy +tests/data/ljspeech/wavs/LJ007-0064.wav|tests/data/ljspeech/wavs/LJ007-0064.npy +tests/data/ljspeech/wavs/LJ021-0140.wav|tests/data/ljspeech/wavs/LJ021-0140.npy +tests/data/ljspeech/wavs/LJ050-0072.wav|tests/data/ljspeech/wavs/LJ050-0072.npy +tests/data/ljspeech/wavs/LJ025-0096.wav|tests/data/ljspeech/wavs/LJ025-0096.npy +tests/data/ljspeech/wavs/LJ048-0159.wav|tests/data/ljspeech/wavs/LJ048-0159.npy +tests/data/ljspeech/wavs/LJ025-0056.wav|tests/data/ljspeech/wavs/LJ025-0056.npy +tests/data/ljspeech/wavs/LJ006-0161.wav|tests/data/ljspeech/wavs/LJ006-0161.npy +tests/data/ljspeech/wavs/LJ013-0046.wav|tests/data/ljspeech/wavs/LJ013-0046.npy +tests/data/ljspeech/wavs/LJ004-0067.wav|tests/data/ljspeech/wavs/LJ004-0067.npy +tests/data/ljspeech/wavs/LJ050-0159.wav|tests/data/ljspeech/wavs/LJ050-0159.npy +tests/data/ljspeech/wavs/LJ027-0129.wav|tests/data/ljspeech/wavs/LJ027-0129.npy +tests/data/ljspeech/wavs/LJ013-0245.wav|tests/data/ljspeech/wavs/LJ013-0245.npy +tests/data/ljspeech/wavs/LJ010-0134.wav|tests/data/ljspeech/wavs/LJ010-0134.npy +tests/data/ljspeech/wavs/LJ046-0097.wav|tests/data/ljspeech/wavs/LJ046-0097.npy +tests/data/ljspeech/wavs/LJ008-0003.wav|tests/data/ljspeech/wavs/LJ008-0003.npy +tests/data/ljspeech/wavs/LJ048-0053.wav|tests/data/ljspeech/wavs/LJ048-0053.npy +tests/data/ljspeech/wavs/LJ016-0071.wav|tests/data/ljspeech/wavs/LJ016-0071.npy +tests/data/ljspeech/wavs/LJ049-0133.wav|tests/data/ljspeech/wavs/LJ049-0133.npy +tests/data/ljspeech/wavs/LJ004-0075.wav|tests/data/ljspeech/wavs/LJ004-0075.npy +tests/data/ljspeech/wavs/LJ047-0226.wav|tests/data/ljspeech/wavs/LJ047-0226.npy +tests/data/ljspeech/wavs/LJ016-0044.wav|tests/data/ljspeech/wavs/LJ016-0044.npy +tests/data/ljspeech/wavs/LJ027-0117.wav|tests/data/ljspeech/wavs/LJ027-0117.npy +tests/data/ljspeech/wavs/LJ047-0040.wav|tests/data/ljspeech/wavs/LJ047-0040.npy +tests/data/ljspeech/wavs/LJ032-0149.wav|tests/data/ljspeech/wavs/LJ032-0149.npy +tests/data/ljspeech/wavs/LJ003-0035.wav|tests/data/ljspeech/wavs/LJ003-0035.npy +tests/data/ljspeech/wavs/LJ008-0192.wav|tests/data/ljspeech/wavs/LJ008-0192.npy +tests/data/ljspeech/wavs/LJ007-0242.wav|tests/data/ljspeech/wavs/LJ007-0242.npy +tests/data/ljspeech/wavs/LJ040-0172.wav|tests/data/ljspeech/wavs/LJ040-0172.npy +tests/data/ljspeech/wavs/LJ028-0001.wav|tests/data/ljspeech/wavs/LJ028-0001.npy +tests/data/ljspeech/wavs/LJ049-0120.wav|tests/data/ljspeech/wavs/LJ049-0120.npy +tests/data/ljspeech/wavs/LJ042-0239.wav|tests/data/ljspeech/wavs/LJ042-0239.npy +tests/data/ljspeech/wavs/LJ014-0174.wav|tests/data/ljspeech/wavs/LJ014-0174.npy +tests/data/ljspeech/wavs/LJ025-0031.wav|tests/data/ljspeech/wavs/LJ025-0031.npy +tests/data/ljspeech/wavs/LJ009-0287.wav|tests/data/ljspeech/wavs/LJ009-0287.npy +tests/data/ljspeech/wavs/LJ027-0136.wav|tests/data/ljspeech/wavs/LJ027-0136.npy +tests/data/ljspeech/wavs/LJ021-0025.wav|tests/data/ljspeech/wavs/LJ021-0025.npy +tests/data/ljspeech/wavs/LJ030-0118.wav|tests/data/ljspeech/wavs/LJ030-0118.npy +tests/data/ljspeech/wavs/LJ009-0302.wav|tests/data/ljspeech/wavs/LJ009-0302.npy +tests/data/ljspeech/wavs/LJ019-0310.wav|tests/data/ljspeech/wavs/LJ019-0310.npy +tests/data/ljspeech/wavs/LJ041-0138.wav|tests/data/ljspeech/wavs/LJ041-0138.npy +tests/data/ljspeech/wavs/LJ048-0281.wav|tests/data/ljspeech/wavs/LJ048-0281.npy +tests/data/ljspeech/wavs/LJ008-0115.wav|tests/data/ljspeech/wavs/LJ008-0115.npy +tests/data/ljspeech/wavs/LJ030-0235.wav|tests/data/ljspeech/wavs/LJ030-0235.npy +tests/data/ljspeech/wavs/LJ046-0216.wav|tests/data/ljspeech/wavs/LJ046-0216.npy +tests/data/ljspeech/wavs/LJ014-0295.wav|tests/data/ljspeech/wavs/LJ014-0295.npy +tests/data/ljspeech/wavs/LJ034-0029.wav|tests/data/ljspeech/wavs/LJ034-0029.npy +tests/data/ljspeech/wavs/LJ015-0034.wav|tests/data/ljspeech/wavs/LJ015-0034.npy +tests/data/ljspeech/wavs/LJ035-0044.wav|tests/data/ljspeech/wavs/LJ035-0044.npy +tests/data/ljspeech/wavs/LJ011-0221.wav|tests/data/ljspeech/wavs/LJ011-0221.npy +tests/data/ljspeech/wavs/LJ009-0268.wav|tests/data/ljspeech/wavs/LJ009-0268.npy +tests/data/ljspeech/wavs/LJ046-0186.wav|tests/data/ljspeech/wavs/LJ046-0186.npy +tests/data/ljspeech/wavs/LJ019-0294.wav|tests/data/ljspeech/wavs/LJ019-0294.npy +tests/data/ljspeech/wavs/LJ036-0007.wav|tests/data/ljspeech/wavs/LJ036-0007.npy +tests/data/ljspeech/wavs/LJ003-0141.wav|tests/data/ljspeech/wavs/LJ003-0141.npy +tests/data/ljspeech/wavs/LJ019-0038.wav|tests/data/ljspeech/wavs/LJ019-0038.npy +tests/data/ljspeech/wavs/LJ033-0184.wav|tests/data/ljspeech/wavs/LJ033-0184.npy +tests/data/ljspeech/wavs/LJ032-0132.wav|tests/data/ljspeech/wavs/LJ032-0132.npy +tests/data/ljspeech/wavs/LJ037-0209.wav|tests/data/ljspeech/wavs/LJ037-0209.npy +tests/data/ljspeech/wavs/LJ010-0057.wav|tests/data/ljspeech/wavs/LJ010-0057.npy +tests/data/ljspeech/wavs/LJ003-0083.wav|tests/data/ljspeech/wavs/LJ003-0083.npy +tests/data/ljspeech/wavs/LJ003-0183.wav|tests/data/ljspeech/wavs/LJ003-0183.npy +tests/data/ljspeech/wavs/LJ023-0054.wav|tests/data/ljspeech/wavs/LJ023-0054.npy +tests/data/ljspeech/wavs/LJ003-0114.wav|tests/data/ljspeech/wavs/LJ003-0114.npy +tests/data/ljspeech/wavs/LJ001-0080.wav|tests/data/ljspeech/wavs/LJ001-0080.npy +tests/data/ljspeech/wavs/LJ028-0030.wav|tests/data/ljspeech/wavs/LJ028-0030.npy +tests/data/ljspeech/wavs/LJ006-0303.wav|tests/data/ljspeech/wavs/LJ006-0303.npy +tests/data/ljspeech/wavs/LJ035-0071.wav|tests/data/ljspeech/wavs/LJ035-0071.npy +tests/data/ljspeech/wavs/LJ025-0163.wav|tests/data/ljspeech/wavs/LJ025-0163.npy +tests/data/ljspeech/wavs/LJ037-0214.wav|tests/data/ljspeech/wavs/LJ037-0214.npy +tests/data/ljspeech/wavs/LJ048-0225.wav|tests/data/ljspeech/wavs/LJ048-0225.npy +tests/data/ljspeech/wavs/LJ014-0247.wav|tests/data/ljspeech/wavs/LJ014-0247.npy +tests/data/ljspeech/wavs/LJ009-0032.wav|tests/data/ljspeech/wavs/LJ009-0032.npy +tests/data/ljspeech/wavs/LJ019-0245.wav|tests/data/ljspeech/wavs/LJ019-0245.npy +tests/data/ljspeech/wavs/LJ009-0182.wav|tests/data/ljspeech/wavs/LJ009-0182.npy +tests/data/ljspeech/wavs/LJ009-0085.wav|tests/data/ljspeech/wavs/LJ009-0085.npy +tests/data/ljspeech/wavs/LJ019-0067.wav|tests/data/ljspeech/wavs/LJ019-0067.npy +tests/data/ljspeech/wavs/LJ033-0039.wav|tests/data/ljspeech/wavs/LJ033-0039.npy +tests/data/ljspeech/wavs/LJ015-0215.wav|tests/data/ljspeech/wavs/LJ015-0215.npy +tests/data/ljspeech/wavs/LJ008-0061.wav|tests/data/ljspeech/wavs/LJ008-0061.npy +tests/data/ljspeech/wavs/LJ015-0151.wav|tests/data/ljspeech/wavs/LJ015-0151.npy +tests/data/ljspeech/wavs/LJ015-0231.wav|tests/data/ljspeech/wavs/LJ015-0231.npy +tests/data/ljspeech/wavs/LJ009-0191.wav|tests/data/ljspeech/wavs/LJ009-0191.npy +tests/data/ljspeech/wavs/LJ021-0034.wav|tests/data/ljspeech/wavs/LJ021-0034.npy +tests/data/ljspeech/wavs/LJ003-0092.wav|tests/data/ljspeech/wavs/LJ003-0092.npy +tests/data/ljspeech/wavs/LJ014-0063.wav|tests/data/ljspeech/wavs/LJ014-0063.npy +tests/data/ljspeech/wavs/LJ015-0183.wav|tests/data/ljspeech/wavs/LJ015-0183.npy +tests/data/ljspeech/wavs/LJ011-0022.wav|tests/data/ljspeech/wavs/LJ011-0022.npy +tests/data/ljspeech/wavs/LJ043-0070.wav|tests/data/ljspeech/wavs/LJ043-0070.npy +tests/data/ljspeech/wavs/LJ046-0056.wav|tests/data/ljspeech/wavs/LJ046-0056.npy +tests/data/ljspeech/wavs/LJ044-0188.wav|tests/data/ljspeech/wavs/LJ044-0188.npy +tests/data/ljspeech/wavs/LJ042-0005.wav|tests/data/ljspeech/wavs/LJ042-0005.npy +tests/data/ljspeech/wavs/LJ050-0059.wav|tests/data/ljspeech/wavs/LJ050-0059.npy +tests/data/ljspeech/wavs/LJ047-0104.wav|tests/data/ljspeech/wavs/LJ047-0104.npy +tests/data/ljspeech/wavs/LJ027-0063.wav|tests/data/ljspeech/wavs/LJ027-0063.npy +tests/data/ljspeech/wavs/LJ010-0292.wav|tests/data/ljspeech/wavs/LJ010-0292.npy +tests/data/ljspeech/wavs/LJ033-0018.wav|tests/data/ljspeech/wavs/LJ033-0018.npy +tests/data/ljspeech/wavs/LJ031-0060.wav|tests/data/ljspeech/wavs/LJ031-0060.npy +tests/data/ljspeech/wavs/LJ006-0214.wav|tests/data/ljspeech/wavs/LJ006-0214.npy +tests/data/ljspeech/wavs/LJ027-0085.wav|tests/data/ljspeech/wavs/LJ027-0085.npy +tests/data/ljspeech/wavs/LJ036-0052.wav|tests/data/ljspeech/wavs/LJ036-0052.npy +tests/data/ljspeech/wavs/LJ008-0234.wav|tests/data/ljspeech/wavs/LJ008-0234.npy +tests/data/ljspeech/wavs/LJ032-0263.wav|tests/data/ljspeech/wavs/LJ032-0263.npy +tests/data/ljspeech/wavs/LJ046-0157.wav|tests/data/ljspeech/wavs/LJ046-0157.npy +tests/data/ljspeech/wavs/LJ006-0241.wav|tests/data/ljspeech/wavs/LJ006-0241.npy +tests/data/ljspeech/wavs/LJ049-0050.wav|tests/data/ljspeech/wavs/LJ049-0050.npy +tests/data/ljspeech/wavs/LJ009-0039.wav|tests/data/ljspeech/wavs/LJ009-0039.npy +tests/data/ljspeech/wavs/LJ040-0136.wav|tests/data/ljspeech/wavs/LJ040-0136.npy +tests/data/ljspeech/wavs/LJ040-0045.wav|tests/data/ljspeech/wavs/LJ040-0045.npy +tests/data/ljspeech/wavs/LJ016-0369.wav|tests/data/ljspeech/wavs/LJ016-0369.npy +tests/data/ljspeech/wavs/LJ035-0039.wav|tests/data/ljspeech/wavs/LJ035-0039.npy +tests/data/ljspeech/wavs/LJ017-0061.wav|tests/data/ljspeech/wavs/LJ017-0061.npy +tests/data/ljspeech/wavs/LJ049-0039.wav|tests/data/ljspeech/wavs/LJ049-0039.npy +tests/data/ljspeech/wavs/LJ027-0044.wav|tests/data/ljspeech/wavs/LJ027-0044.npy +tests/data/ljspeech/wavs/LJ019-0349.wav|tests/data/ljspeech/wavs/LJ019-0349.npy +tests/data/ljspeech/wavs/LJ028-0338.wav|tests/data/ljspeech/wavs/LJ028-0338.npy +tests/data/ljspeech/wavs/LJ028-0430.wav|tests/data/ljspeech/wavs/LJ028-0430.npy +tests/data/ljspeech/wavs/LJ011-0220.wav|tests/data/ljspeech/wavs/LJ011-0220.npy +tests/data/ljspeech/wavs/LJ018-0167.wav|tests/data/ljspeech/wavs/LJ018-0167.npy +tests/data/ljspeech/wavs/LJ013-0012.wav|tests/data/ljspeech/wavs/LJ013-0012.npy +tests/data/ljspeech/wavs/LJ001-0091.wav|tests/data/ljspeech/wavs/LJ001-0091.npy +tests/data/ljspeech/wavs/LJ026-0019.wav|tests/data/ljspeech/wavs/LJ026-0019.npy +tests/data/ljspeech/wavs/LJ022-0184.wav|tests/data/ljspeech/wavs/LJ022-0184.npy +tests/data/ljspeech/wavs/LJ017-0153.wav|tests/data/ljspeech/wavs/LJ017-0153.npy +tests/data/ljspeech/wavs/LJ016-0068.wav|tests/data/ljspeech/wavs/LJ016-0068.npy +tests/data/ljspeech/wavs/LJ015-0295.wav|tests/data/ljspeech/wavs/LJ015-0295.npy +tests/data/ljspeech/wavs/LJ050-0154.wav|tests/data/ljspeech/wavs/LJ050-0154.npy +tests/data/ljspeech/wavs/LJ006-0112.wav|tests/data/ljspeech/wavs/LJ006-0112.npy +tests/data/ljspeech/wavs/LJ041-0022.wav|tests/data/ljspeech/wavs/LJ041-0022.npy +tests/data/ljspeech/wavs/LJ046-0082.wav|tests/data/ljspeech/wavs/LJ046-0082.npy +tests/data/ljspeech/wavs/LJ006-0259.wav|tests/data/ljspeech/wavs/LJ006-0259.npy +tests/data/ljspeech/wavs/LJ034-0076.wav|tests/data/ljspeech/wavs/LJ034-0076.npy +tests/data/ljspeech/wavs/LJ039-0214.wav|tests/data/ljspeech/wavs/LJ039-0214.npy +tests/data/ljspeech/wavs/LJ007-0091.wav|tests/data/ljspeech/wavs/LJ007-0091.npy +tests/data/ljspeech/wavs/LJ030-0066.wav|tests/data/ljspeech/wavs/LJ030-0066.npy +tests/data/ljspeech/wavs/LJ041-0056.wav|tests/data/ljspeech/wavs/LJ041-0056.npy +tests/data/ljspeech/wavs/LJ003-0042.wav|tests/data/ljspeech/wavs/LJ003-0042.npy +tests/data/ljspeech/wavs/LJ001-0138.wav|tests/data/ljspeech/wavs/LJ001-0138.npy +tests/data/ljspeech/wavs/LJ020-0097.wav|tests/data/ljspeech/wavs/LJ020-0097.npy +tests/data/ljspeech/wavs/LJ039-0217.wav|tests/data/ljspeech/wavs/LJ039-0217.npy +tests/data/ljspeech/wavs/LJ017-0218.wav|tests/data/ljspeech/wavs/LJ017-0218.npy +tests/data/ljspeech/wavs/LJ032-0042.wav|tests/data/ljspeech/wavs/LJ032-0042.npy +tests/data/ljspeech/wavs/LJ011-0284.wav|tests/data/ljspeech/wavs/LJ011-0284.npy +tests/data/ljspeech/wavs/LJ002-0315.wav|tests/data/ljspeech/wavs/LJ002-0315.npy +tests/data/ljspeech/wavs/LJ049-0132.wav|tests/data/ljspeech/wavs/LJ049-0132.npy +tests/data/ljspeech/wavs/LJ009-0038.wav|tests/data/ljspeech/wavs/LJ009-0038.npy +tests/data/ljspeech/wavs/LJ003-0049.wav|tests/data/ljspeech/wavs/LJ003-0049.npy +tests/data/ljspeech/wavs/LJ028-0146.wav|tests/data/ljspeech/wavs/LJ028-0146.npy +tests/data/ljspeech/wavs/LJ005-0044.wav|tests/data/ljspeech/wavs/LJ005-0044.npy +tests/data/ljspeech/wavs/LJ007-0025.wav|tests/data/ljspeech/wavs/LJ007-0025.npy +tests/data/ljspeech/wavs/LJ043-0180.wav|tests/data/ljspeech/wavs/LJ043-0180.npy +tests/data/ljspeech/wavs/LJ037-0144.wav|tests/data/ljspeech/wavs/LJ037-0144.npy +tests/data/ljspeech/wavs/LJ041-0203.wav|tests/data/ljspeech/wavs/LJ041-0203.npy +tests/data/ljspeech/wavs/LJ019-0270.wav|tests/data/ljspeech/wavs/LJ019-0270.npy +tests/data/ljspeech/wavs/LJ026-0165.wav|tests/data/ljspeech/wavs/LJ026-0165.npy +tests/data/ljspeech/wavs/LJ044-0023.wav|tests/data/ljspeech/wavs/LJ044-0023.npy +tests/data/ljspeech/wavs/LJ048-0075.wav|tests/data/ljspeech/wavs/LJ048-0075.npy +tests/data/ljspeech/wavs/LJ025-0026.wav|tests/data/ljspeech/wavs/LJ025-0026.npy +tests/data/ljspeech/wavs/LJ028-0483.wav|tests/data/ljspeech/wavs/LJ028-0483.npy +tests/data/ljspeech/wavs/LJ001-0047.wav|tests/data/ljspeech/wavs/LJ001-0047.npy +tests/data/ljspeech/wavs/LJ025-0025.wav|tests/data/ljspeech/wavs/LJ025-0025.npy +tests/data/ljspeech/wavs/LJ026-0057.wav|tests/data/ljspeech/wavs/LJ026-0057.npy +tests/data/ljspeech/wavs/LJ021-0098.wav|tests/data/ljspeech/wavs/LJ021-0098.npy +tests/data/ljspeech/wavs/LJ019-0343.wav|tests/data/ljspeech/wavs/LJ019-0343.npy +tests/data/ljspeech/wavs/LJ004-0097.wav|tests/data/ljspeech/wavs/LJ004-0097.npy +tests/data/ljspeech/wavs/LJ006-0263.wav|tests/data/ljspeech/wavs/LJ006-0263.npy +tests/data/ljspeech/wavs/LJ006-0039.wav|tests/data/ljspeech/wavs/LJ006-0039.npy +tests/data/ljspeech/wavs/LJ014-0229.wav|tests/data/ljspeech/wavs/LJ014-0229.npy +tests/data/ljspeech/wavs/LJ015-0259.wav|tests/data/ljspeech/wavs/LJ015-0259.npy +tests/data/ljspeech/wavs/LJ042-0152.wav|tests/data/ljspeech/wavs/LJ042-0152.npy +tests/data/ljspeech/wavs/LJ031-0043.wav|tests/data/ljspeech/wavs/LJ031-0043.npy +tests/data/ljspeech/wavs/LJ041-0154.wav|tests/data/ljspeech/wavs/LJ041-0154.npy +tests/data/ljspeech/wavs/LJ029-0051.wav|tests/data/ljspeech/wavs/LJ029-0051.npy +tests/data/ljspeech/wavs/LJ018-0126.wav|tests/data/ljspeech/wavs/LJ018-0126.npy +tests/data/ljspeech/wavs/LJ004-0148.wav|tests/data/ljspeech/wavs/LJ004-0148.npy +tests/data/ljspeech/wavs/LJ005-0084.wav|tests/data/ljspeech/wavs/LJ005-0084.npy +tests/data/ljspeech/wavs/LJ021-0128.wav|tests/data/ljspeech/wavs/LJ021-0128.npy +tests/data/ljspeech/wavs/LJ047-0075.wav|tests/data/ljspeech/wavs/LJ047-0075.npy +tests/data/ljspeech/wavs/LJ035-0078.wav|tests/data/ljspeech/wavs/LJ035-0078.npy +tests/data/ljspeech/wavs/LJ018-0334.wav|tests/data/ljspeech/wavs/LJ018-0334.npy +tests/data/ljspeech/wavs/LJ012-0212.wav|tests/data/ljspeech/wavs/LJ012-0212.npy +tests/data/ljspeech/wavs/LJ011-0256.wav|tests/data/ljspeech/wavs/LJ011-0256.npy +tests/data/ljspeech/wavs/LJ016-0212.wav|tests/data/ljspeech/wavs/LJ016-0212.npy +tests/data/ljspeech/wavs/LJ044-0222.wav|tests/data/ljspeech/wavs/LJ044-0222.npy +tests/data/ljspeech/wavs/LJ032-0027.wav|tests/data/ljspeech/wavs/LJ032-0027.npy +tests/data/ljspeech/wavs/LJ050-0177.wav|tests/data/ljspeech/wavs/LJ050-0177.npy +tests/data/ljspeech/wavs/LJ039-0137.wav|tests/data/ljspeech/wavs/LJ039-0137.npy +tests/data/ljspeech/wavs/LJ012-0092.wav|tests/data/ljspeech/wavs/LJ012-0092.npy +tests/data/ljspeech/wavs/LJ037-0012.wav|tests/data/ljspeech/wavs/LJ037-0012.npy +tests/data/ljspeech/wavs/LJ034-0188.wav|tests/data/ljspeech/wavs/LJ034-0188.npy +tests/data/ljspeech/wavs/LJ004-0111.wav|tests/data/ljspeech/wavs/LJ004-0111.npy +tests/data/ljspeech/wavs/LJ002-0331.wav|tests/data/ljspeech/wavs/LJ002-0331.npy +tests/data/ljspeech/wavs/LJ049-0052.wav|tests/data/ljspeech/wavs/LJ049-0052.npy +tests/data/ljspeech/wavs/LJ013-0126.wav|tests/data/ljspeech/wavs/LJ013-0126.npy +tests/data/ljspeech/wavs/LJ001-0118.wav|tests/data/ljspeech/wavs/LJ001-0118.npy +tests/data/ljspeech/wavs/LJ033-0168.wav|tests/data/ljspeech/wavs/LJ033-0168.npy +tests/data/ljspeech/wavs/LJ008-0273.wav|tests/data/ljspeech/wavs/LJ008-0273.npy +tests/data/ljspeech/wavs/LJ008-0138.wav|tests/data/ljspeech/wavs/LJ008-0138.npy +tests/data/ljspeech/wavs/LJ031-0130.wav|tests/data/ljspeech/wavs/LJ031-0130.npy +tests/data/ljspeech/wavs/LJ008-0017.wav|tests/data/ljspeech/wavs/LJ008-0017.npy +tests/data/ljspeech/wavs/LJ015-0107.wav|tests/data/ljspeech/wavs/LJ015-0107.npy +tests/data/ljspeech/wavs/LJ048-0082.wav|tests/data/ljspeech/wavs/LJ048-0082.npy +tests/data/ljspeech/wavs/LJ039-0019.wav|tests/data/ljspeech/wavs/LJ039-0019.npy +tests/data/ljspeech/wavs/LJ029-0100.wav|tests/data/ljspeech/wavs/LJ029-0100.npy +tests/data/ljspeech/wavs/LJ028-0359.wav|tests/data/ljspeech/wavs/LJ028-0359.npy +tests/data/ljspeech/wavs/LJ015-0021.wav|tests/data/ljspeech/wavs/LJ015-0021.npy +tests/data/ljspeech/wavs/LJ028-0067.wav|tests/data/ljspeech/wavs/LJ028-0067.npy +tests/data/ljspeech/wavs/LJ047-0054.wav|tests/data/ljspeech/wavs/LJ047-0054.npy +tests/data/ljspeech/wavs/LJ006-0029.wav|tests/data/ljspeech/wavs/LJ006-0029.npy +tests/data/ljspeech/wavs/LJ010-0178.wav|tests/data/ljspeech/wavs/LJ010-0178.npy +tests/data/ljspeech/wavs/LJ016-0290.wav|tests/data/ljspeech/wavs/LJ016-0290.npy +tests/data/ljspeech/wavs/LJ019-0108.wav|tests/data/ljspeech/wavs/LJ019-0108.npy +tests/data/ljspeech/wavs/LJ001-0108.wav|tests/data/ljspeech/wavs/LJ001-0108.npy +tests/data/ljspeech/wavs/LJ003-0311.wav|tests/data/ljspeech/wavs/LJ003-0311.npy +tests/data/ljspeech/wavs/LJ028-0478.wav|tests/data/ljspeech/wavs/LJ028-0478.npy +tests/data/ljspeech/wavs/LJ032-0035.wav|tests/data/ljspeech/wavs/LJ032-0035.npy +tests/data/ljspeech/wavs/LJ044-0010.wav|tests/data/ljspeech/wavs/LJ044-0010.npy +tests/data/ljspeech/wavs/LJ039-0105.wav|tests/data/ljspeech/wavs/LJ039-0105.npy +tests/data/ljspeech/wavs/LJ028-0425.wav|tests/data/ljspeech/wavs/LJ028-0425.npy +tests/data/ljspeech/wavs/LJ034-0041.wav|tests/data/ljspeech/wavs/LJ034-0041.npy +tests/data/ljspeech/wavs/LJ012-0069.wav|tests/data/ljspeech/wavs/LJ012-0069.npy +tests/data/ljspeech/wavs/LJ045-0242.wav|tests/data/ljspeech/wavs/LJ045-0242.npy +tests/data/ljspeech/wavs/LJ030-0039.wav|tests/data/ljspeech/wavs/LJ030-0039.npy +tests/data/ljspeech/wavs/LJ021-0204.wav|tests/data/ljspeech/wavs/LJ021-0204.npy +tests/data/ljspeech/wavs/LJ050-0123.wav|tests/data/ljspeech/wavs/LJ050-0123.npy +tests/data/ljspeech/wavs/LJ025-0087.wav|tests/data/ljspeech/wavs/LJ025-0087.npy +tests/data/ljspeech/wavs/LJ044-0134.wav|tests/data/ljspeech/wavs/LJ044-0134.npy +tests/data/ljspeech/wavs/LJ046-0016.wav|tests/data/ljspeech/wavs/LJ046-0016.npy +tests/data/ljspeech/wavs/LJ015-0301.wav|tests/data/ljspeech/wavs/LJ015-0301.npy +tests/data/ljspeech/wavs/LJ041-0018.wav|tests/data/ljspeech/wavs/LJ041-0018.npy +tests/data/ljspeech/wavs/LJ030-0070.wav|tests/data/ljspeech/wavs/LJ030-0070.npy +tests/data/ljspeech/wavs/LJ010-0267.wav|tests/data/ljspeech/wavs/LJ010-0267.npy +tests/data/ljspeech/wavs/LJ008-0227.wav|tests/data/ljspeech/wavs/LJ008-0227.npy +tests/data/ljspeech/wavs/LJ042-0032.wav|tests/data/ljspeech/wavs/LJ042-0032.npy +tests/data/ljspeech/wavs/LJ036-0015.wav|tests/data/ljspeech/wavs/LJ036-0015.npy +tests/data/ljspeech/wavs/LJ034-0082.wav|tests/data/ljspeech/wavs/LJ034-0082.npy +tests/data/ljspeech/wavs/LJ024-0113.wav|tests/data/ljspeech/wavs/LJ024-0113.npy +tests/data/ljspeech/wavs/LJ004-0063.wav|tests/data/ljspeech/wavs/LJ004-0063.npy +tests/data/ljspeech/wavs/LJ036-0100.wav|tests/data/ljspeech/wavs/LJ036-0100.npy +tests/data/ljspeech/wavs/LJ022-0035.wav|tests/data/ljspeech/wavs/LJ022-0035.npy +tests/data/ljspeech/wavs/LJ003-0014.wav|tests/data/ljspeech/wavs/LJ003-0014.npy +tests/data/ljspeech/wavs/LJ013-0232.wav|tests/data/ljspeech/wavs/LJ013-0232.npy +tests/data/ljspeech/wavs/LJ013-0195.wav|tests/data/ljspeech/wavs/LJ013-0195.npy +tests/data/ljspeech/wavs/LJ045-0206.wav|tests/data/ljspeech/wavs/LJ045-0206.npy +tests/data/ljspeech/wavs/LJ008-0102.wav|tests/data/ljspeech/wavs/LJ008-0102.npy +tests/data/ljspeech/wavs/LJ007-0123.wav|tests/data/ljspeech/wavs/LJ007-0123.npy +tests/data/ljspeech/wavs/LJ003-0165.wav|tests/data/ljspeech/wavs/LJ003-0165.npy +tests/data/ljspeech/wavs/LJ023-0023.wav|tests/data/ljspeech/wavs/LJ023-0023.npy +tests/data/ljspeech/wavs/LJ040-0066.wav|tests/data/ljspeech/wavs/LJ040-0066.npy +tests/data/ljspeech/wavs/LJ035-0161.wav|tests/data/ljspeech/wavs/LJ035-0161.npy +tests/data/ljspeech/wavs/LJ038-0010.wav|tests/data/ljspeech/wavs/LJ038-0010.npy +tests/data/ljspeech/wavs/LJ015-0311.wav|tests/data/ljspeech/wavs/LJ015-0311.npy +tests/data/ljspeech/wavs/LJ003-0093.wav|tests/data/ljspeech/wavs/LJ003-0093.npy +tests/data/ljspeech/wavs/LJ001-0048.wav|tests/data/ljspeech/wavs/LJ001-0048.npy +tests/data/ljspeech/wavs/LJ021-0051.wav|tests/data/ljspeech/wavs/LJ021-0051.npy +tests/data/ljspeech/wavs/LJ014-0261.wav|tests/data/ljspeech/wavs/LJ014-0261.npy +tests/data/ljspeech/wavs/LJ027-0069.wav|tests/data/ljspeech/wavs/LJ027-0069.npy +tests/data/ljspeech/wavs/LJ031-0048.wav|tests/data/ljspeech/wavs/LJ031-0048.npy +tests/data/ljspeech/wavs/LJ023-0049.wav|tests/data/ljspeech/wavs/LJ023-0049.npy +tests/data/ljspeech/wavs/LJ038-0009.wav|tests/data/ljspeech/wavs/LJ038-0009.npy +tests/data/ljspeech/wavs/LJ028-0240.wav|tests/data/ljspeech/wavs/LJ028-0240.npy +tests/data/ljspeech/wavs/LJ015-0305.wav|tests/data/ljspeech/wavs/LJ015-0305.npy +tests/data/ljspeech/wavs/LJ049-0169.wav|tests/data/ljspeech/wavs/LJ049-0169.npy +tests/data/ljspeech/wavs/LJ004-0214.wav|tests/data/ljspeech/wavs/LJ004-0214.npy +tests/data/ljspeech/wavs/LJ036-0189.wav|tests/data/ljspeech/wavs/LJ036-0189.npy +tests/data/ljspeech/wavs/LJ050-0110.wav|tests/data/ljspeech/wavs/LJ050-0110.npy +tests/data/ljspeech/wavs/LJ001-0064.wav|tests/data/ljspeech/wavs/LJ001-0064.npy +tests/data/ljspeech/wavs/LJ045-0158.wav|tests/data/ljspeech/wavs/LJ045-0158.npy +tests/data/ljspeech/wavs/LJ044-0159.wav|tests/data/ljspeech/wavs/LJ044-0159.npy +tests/data/ljspeech/wavs/LJ015-0003.wav|tests/data/ljspeech/wavs/LJ015-0003.npy +tests/data/ljspeech/wavs/LJ021-0106.wav|tests/data/ljspeech/wavs/LJ021-0106.npy +tests/data/ljspeech/wavs/LJ040-0069.wav|tests/data/ljspeech/wavs/LJ040-0069.npy +tests/data/ljspeech/wavs/LJ005-0198.wav|tests/data/ljspeech/wavs/LJ005-0198.npy +tests/data/ljspeech/wavs/LJ014-0286.wav|tests/data/ljspeech/wavs/LJ014-0286.npy +tests/data/ljspeech/wavs/LJ039-0178.wav|tests/data/ljspeech/wavs/LJ039-0178.npy +tests/data/ljspeech/wavs/LJ004-0212.wav|tests/data/ljspeech/wavs/LJ004-0212.npy +tests/data/ljspeech/wavs/LJ003-0157.wav|tests/data/ljspeech/wavs/LJ003-0157.npy +tests/data/ljspeech/wavs/LJ022-0011.wav|tests/data/ljspeech/wavs/LJ022-0011.npy +tests/data/ljspeech/wavs/LJ009-0069.wav|tests/data/ljspeech/wavs/LJ009-0069.npy +tests/data/ljspeech/wavs/LJ011-0040.wav|tests/data/ljspeech/wavs/LJ011-0040.npy +tests/data/ljspeech/wavs/LJ034-0022.wav|tests/data/ljspeech/wavs/LJ034-0022.npy +tests/data/ljspeech/wavs/LJ011-0063.wav|tests/data/ljspeech/wavs/LJ011-0063.npy +tests/data/ljspeech/wavs/LJ046-0067.wav|tests/data/ljspeech/wavs/LJ046-0067.npy +tests/data/ljspeech/wavs/LJ002-0177.wav|tests/data/ljspeech/wavs/LJ002-0177.npy +tests/data/ljspeech/wavs/LJ046-0198.wav|tests/data/ljspeech/wavs/LJ046-0198.npy +tests/data/ljspeech/wavs/LJ022-0082.wav|tests/data/ljspeech/wavs/LJ022-0082.npy +tests/data/ljspeech/wavs/LJ009-0184.wav|tests/data/ljspeech/wavs/LJ009-0184.npy +tests/data/ljspeech/wavs/LJ050-0147.wav|tests/data/ljspeech/wavs/LJ050-0147.npy +tests/data/ljspeech/wavs/LJ005-0144.wav|tests/data/ljspeech/wavs/LJ005-0144.npy +tests/data/ljspeech/wavs/LJ003-0166.wav|tests/data/ljspeech/wavs/LJ003-0166.npy +tests/data/ljspeech/wavs/LJ011-0102.wav|tests/data/ljspeech/wavs/LJ011-0102.npy +tests/data/ljspeech/wavs/LJ010-0046.wav|tests/data/ljspeech/wavs/LJ010-0046.npy +tests/data/ljspeech/wavs/LJ025-0023.wav|tests/data/ljspeech/wavs/LJ025-0023.npy +tests/data/ljspeech/wavs/LJ025-0044.wav|tests/data/ljspeech/wavs/LJ025-0044.npy +tests/data/ljspeech/wavs/LJ010-0257.wav|tests/data/ljspeech/wavs/LJ010-0257.npy +tests/data/ljspeech/wavs/LJ027-0054.wav|tests/data/ljspeech/wavs/LJ027-0054.npy +tests/data/ljspeech/wavs/LJ041-0052.wav|tests/data/ljspeech/wavs/LJ041-0052.npy +tests/data/ljspeech/wavs/LJ006-0250.wav|tests/data/ljspeech/wavs/LJ006-0250.npy +tests/data/ljspeech/wavs/LJ028-0488.wav|tests/data/ljspeech/wavs/LJ028-0488.npy +tests/data/ljspeech/wavs/LJ030-0064.wav|tests/data/ljspeech/wavs/LJ030-0064.npy +tests/data/ljspeech/wavs/LJ015-0141.wav|tests/data/ljspeech/wavs/LJ015-0141.npy +tests/data/ljspeech/wavs/LJ029-0118.wav|tests/data/ljspeech/wavs/LJ029-0118.npy +tests/data/ljspeech/wavs/LJ039-0051.wav|tests/data/ljspeech/wavs/LJ039-0051.npy +tests/data/ljspeech/wavs/LJ016-0116.wav|tests/data/ljspeech/wavs/LJ016-0116.npy +tests/data/ljspeech/wavs/LJ015-0079.wav|tests/data/ljspeech/wavs/LJ015-0079.npy +tests/data/ljspeech/wavs/LJ003-0089.wav|tests/data/ljspeech/wavs/LJ003-0089.npy +tests/data/ljspeech/wavs/LJ016-0413.wav|tests/data/ljspeech/wavs/LJ016-0413.npy +tests/data/ljspeech/wavs/LJ036-0096.wav|tests/data/ljspeech/wavs/LJ036-0096.npy +tests/data/ljspeech/wavs/LJ012-0172.wav|tests/data/ljspeech/wavs/LJ012-0172.npy +tests/data/ljspeech/wavs/LJ016-0078.wav|tests/data/ljspeech/wavs/LJ016-0078.npy +tests/data/ljspeech/wavs/LJ014-0040.wav|tests/data/ljspeech/wavs/LJ014-0040.npy +tests/data/ljspeech/wavs/LJ033-0139.wav|tests/data/ljspeech/wavs/LJ033-0139.npy +tests/data/ljspeech/wavs/LJ047-0009.wav|tests/data/ljspeech/wavs/LJ047-0009.npy +tests/data/ljspeech/wavs/LJ047-0116.wav|tests/data/ljspeech/wavs/LJ047-0116.npy +tests/data/ljspeech/wavs/LJ032-0217.wav|tests/data/ljspeech/wavs/LJ032-0217.npy +tests/data/ljspeech/wavs/LJ001-0093.wav|tests/data/ljspeech/wavs/LJ001-0093.npy +tests/data/ljspeech/wavs/LJ027-0155.wav|tests/data/ljspeech/wavs/LJ027-0155.npy +tests/data/ljspeech/wavs/LJ025-0143.wav|tests/data/ljspeech/wavs/LJ025-0143.npy +tests/data/ljspeech/wavs/LJ018-0258.wav|tests/data/ljspeech/wavs/LJ018-0258.npy +tests/data/ljspeech/wavs/LJ045-0193.wav|tests/data/ljspeech/wavs/LJ045-0193.npy +tests/data/ljspeech/wavs/LJ013-0032.wav|tests/data/ljspeech/wavs/LJ013-0032.npy +tests/data/ljspeech/wavs/LJ018-0248.wav|tests/data/ljspeech/wavs/LJ018-0248.npy +tests/data/ljspeech/wavs/LJ017-0172.wav|tests/data/ljspeech/wavs/LJ017-0172.npy +tests/data/ljspeech/wavs/LJ016-0209.wav|tests/data/ljspeech/wavs/LJ016-0209.npy +tests/data/ljspeech/wavs/LJ013-0034.wav|tests/data/ljspeech/wavs/LJ013-0034.npy +tests/data/ljspeech/wavs/LJ047-0244.wav|tests/data/ljspeech/wavs/LJ047-0244.npy +tests/data/ljspeech/wavs/LJ017-0243.wav|tests/data/ljspeech/wavs/LJ017-0243.npy +tests/data/ljspeech/wavs/LJ043-0035.wav|tests/data/ljspeech/wavs/LJ043-0035.npy +tests/data/ljspeech/wavs/LJ030-0004.wav|tests/data/ljspeech/wavs/LJ030-0004.npy +tests/data/ljspeech/wavs/LJ047-0098.wav|tests/data/ljspeech/wavs/LJ047-0098.npy +tests/data/ljspeech/wavs/LJ028-0197.wav|tests/data/ljspeech/wavs/LJ028-0197.npy +tests/data/ljspeech/wavs/LJ044-0226.wav|tests/data/ljspeech/wavs/LJ044-0226.npy +tests/data/ljspeech/wavs/LJ005-0123.wav|tests/data/ljspeech/wavs/LJ005-0123.npy +tests/data/ljspeech/wavs/LJ013-0015.wav|tests/data/ljspeech/wavs/LJ013-0015.npy +tests/data/ljspeech/wavs/LJ018-0293.wav|tests/data/ljspeech/wavs/LJ018-0293.npy +tests/data/ljspeech/wavs/LJ039-0233.wav|tests/data/ljspeech/wavs/LJ039-0233.npy +tests/data/ljspeech/wavs/LJ018-0368.wav|tests/data/ljspeech/wavs/LJ018-0368.npy +tests/data/ljspeech/wavs/LJ036-0217.wav|tests/data/ljspeech/wavs/LJ036-0217.npy +tests/data/ljspeech/wavs/LJ009-0165.wav|tests/data/ljspeech/wavs/LJ009-0165.npy +tests/data/ljspeech/wavs/LJ013-0237.wav|tests/data/ljspeech/wavs/LJ013-0237.npy +tests/data/ljspeech/wavs/LJ005-0209.wav|tests/data/ljspeech/wavs/LJ005-0209.npy +tests/data/ljspeech/wavs/LJ019-0363.wav|tests/data/ljspeech/wavs/LJ019-0363.npy +tests/data/ljspeech/wavs/LJ018-0216.wav|tests/data/ljspeech/wavs/LJ018-0216.npy +tests/data/ljspeech/wavs/LJ045-0179.wav|tests/data/ljspeech/wavs/LJ045-0179.npy +tests/data/ljspeech/wavs/LJ017-0211.wav|tests/data/ljspeech/wavs/LJ017-0211.npy +tests/data/ljspeech/wavs/LJ013-0078.wav|tests/data/ljspeech/wavs/LJ013-0078.npy +tests/data/ljspeech/wavs/LJ016-0326.wav|tests/data/ljspeech/wavs/LJ016-0326.npy +tests/data/ljspeech/wavs/LJ042-0095.wav|tests/data/ljspeech/wavs/LJ042-0095.npy +tests/data/ljspeech/wavs/LJ038-0302.wav|tests/data/ljspeech/wavs/LJ038-0302.npy +tests/data/ljspeech/wavs/LJ026-0004.wav|tests/data/ljspeech/wavs/LJ026-0004.npy +tests/data/ljspeech/wavs/LJ031-0044.wav|tests/data/ljspeech/wavs/LJ031-0044.npy +tests/data/ljspeech/wavs/LJ046-0202.wav|tests/data/ljspeech/wavs/LJ046-0202.npy +tests/data/ljspeech/wavs/LJ044-0102.wav|tests/data/ljspeech/wavs/LJ044-0102.npy +tests/data/ljspeech/wavs/LJ027-0023.wav|tests/data/ljspeech/wavs/LJ027-0023.npy +tests/data/ljspeech/wavs/LJ039-0062.wav|tests/data/ljspeech/wavs/LJ039-0062.npy +tests/data/ljspeech/wavs/LJ013-0160.wav|tests/data/ljspeech/wavs/LJ013-0160.npy +tests/data/ljspeech/wavs/LJ024-0135.wav|tests/data/ljspeech/wavs/LJ024-0135.npy +tests/data/ljspeech/wavs/LJ003-0154.wav|tests/data/ljspeech/wavs/LJ003-0154.npy +tests/data/ljspeech/wavs/LJ047-0155.wav|tests/data/ljspeech/wavs/LJ047-0155.npy +tests/data/ljspeech/wavs/LJ011-0264.wav|tests/data/ljspeech/wavs/LJ011-0264.npy +tests/data/ljspeech/wavs/LJ006-0234.wav|tests/data/ljspeech/wavs/LJ006-0234.npy +tests/data/ljspeech/wavs/LJ012-0271.wav|tests/data/ljspeech/wavs/LJ012-0271.npy +tests/data/ljspeech/wavs/LJ014-0065.wav|tests/data/ljspeech/wavs/LJ014-0065.npy +tests/data/ljspeech/wavs/LJ028-0082.wav|tests/data/ljspeech/wavs/LJ028-0082.npy +tests/data/ljspeech/wavs/LJ013-0180.wav|tests/data/ljspeech/wavs/LJ013-0180.npy +tests/data/ljspeech/wavs/LJ038-0039.wav|tests/data/ljspeech/wavs/LJ038-0039.npy +tests/data/ljspeech/wavs/LJ049-0037.wav|tests/data/ljspeech/wavs/LJ049-0037.npy +tests/data/ljspeech/wavs/LJ048-0061.wav|tests/data/ljspeech/wavs/LJ048-0061.npy +tests/data/ljspeech/wavs/LJ016-0367.wav|tests/data/ljspeech/wavs/LJ016-0367.npy +tests/data/ljspeech/wavs/LJ047-0064.wav|tests/data/ljspeech/wavs/LJ047-0064.npy +tests/data/ljspeech/wavs/LJ028-0263.wav|tests/data/ljspeech/wavs/LJ028-0263.npy +tests/data/ljspeech/wavs/LJ003-0208.wav|tests/data/ljspeech/wavs/LJ003-0208.npy +tests/data/ljspeech/wavs/LJ015-0049.wav|tests/data/ljspeech/wavs/LJ015-0049.npy +tests/data/ljspeech/wavs/LJ029-0007.wav|tests/data/ljspeech/wavs/LJ029-0007.npy +tests/data/ljspeech/wavs/LJ002-0073.wav|tests/data/ljspeech/wavs/LJ002-0073.npy +tests/data/ljspeech/wavs/LJ039-0028.wav|tests/data/ljspeech/wavs/LJ039-0028.npy +tests/data/ljspeech/wavs/LJ013-0051.wav|tests/data/ljspeech/wavs/LJ013-0051.npy +tests/data/ljspeech/wavs/LJ046-0197.wav|tests/data/ljspeech/wavs/LJ046-0197.npy +tests/data/ljspeech/wavs/LJ012-0264.wav|tests/data/ljspeech/wavs/LJ012-0264.npy +tests/data/ljspeech/wavs/LJ041-0045.wav|tests/data/ljspeech/wavs/LJ041-0045.npy +tests/data/ljspeech/wavs/LJ021-0056.wav|tests/data/ljspeech/wavs/LJ021-0056.npy +tests/data/ljspeech/wavs/LJ008-0132.wav|tests/data/ljspeech/wavs/LJ008-0132.npy +tests/data/ljspeech/wavs/LJ028-0225.wav|tests/data/ljspeech/wavs/LJ028-0225.npy +tests/data/ljspeech/wavs/LJ028-0094.wav|tests/data/ljspeech/wavs/LJ028-0094.npy +tests/data/ljspeech/wavs/LJ009-0245.wav|tests/data/ljspeech/wavs/LJ009-0245.npy +tests/data/ljspeech/wavs/LJ044-0126.wav|tests/data/ljspeech/wavs/LJ044-0126.npy +tests/data/ljspeech/wavs/LJ028-0337.wav|tests/data/ljspeech/wavs/LJ028-0337.npy +tests/data/ljspeech/wavs/LJ009-0134.wav|tests/data/ljspeech/wavs/LJ009-0134.npy +tests/data/ljspeech/wavs/LJ032-0119.wav|tests/data/ljspeech/wavs/LJ032-0119.npy +tests/data/ljspeech/wavs/LJ004-0116.wav|tests/data/ljspeech/wavs/LJ004-0116.npy +tests/data/ljspeech/wavs/LJ007-0112.wav|tests/data/ljspeech/wavs/LJ007-0112.npy +tests/data/ljspeech/wavs/LJ003-0152.wav|tests/data/ljspeech/wavs/LJ003-0152.npy +tests/data/ljspeech/wavs/LJ035-0100.wav|tests/data/ljspeech/wavs/LJ035-0100.npy +tests/data/ljspeech/wavs/LJ010-0223.wav|tests/data/ljspeech/wavs/LJ010-0223.npy +tests/data/ljspeech/wavs/LJ014-0135.wav|tests/data/ljspeech/wavs/LJ014-0135.npy +tests/data/ljspeech/wavs/LJ019-0157.wav|tests/data/ljspeech/wavs/LJ019-0157.npy +tests/data/ljspeech/wavs/LJ020-0055.wav|tests/data/ljspeech/wavs/LJ020-0055.npy +tests/data/ljspeech/wavs/LJ030-0199.wav|tests/data/ljspeech/wavs/LJ030-0199.npy +tests/data/ljspeech/wavs/LJ028-0327.wav|tests/data/ljspeech/wavs/LJ028-0327.npy +tests/data/ljspeech/wavs/LJ033-0067.wav|tests/data/ljspeech/wavs/LJ033-0067.npy +tests/data/ljspeech/wavs/LJ013-0185.wav|tests/data/ljspeech/wavs/LJ013-0185.npy +tests/data/ljspeech/wavs/LJ019-0318.wav|tests/data/ljspeech/wavs/LJ019-0318.npy +tests/data/ljspeech/wavs/LJ012-0130.wav|tests/data/ljspeech/wavs/LJ012-0130.npy +tests/data/ljspeech/wavs/LJ012-0219.wav|tests/data/ljspeech/wavs/LJ012-0219.npy +tests/data/ljspeech/wavs/LJ012-0236.wav|tests/data/ljspeech/wavs/LJ012-0236.npy +tests/data/ljspeech/wavs/LJ038-0178.wav|tests/data/ljspeech/wavs/LJ038-0178.npy +tests/data/ljspeech/wavs/LJ048-0242.wav|tests/data/ljspeech/wavs/LJ048-0242.npy +tests/data/ljspeech/wavs/LJ041-0133.wav|tests/data/ljspeech/wavs/LJ041-0133.npy +tests/data/ljspeech/wavs/LJ017-0125.wav|tests/data/ljspeech/wavs/LJ017-0125.npy +tests/data/ljspeech/wavs/LJ033-0080.wav|tests/data/ljspeech/wavs/LJ033-0080.npy +tests/data/ljspeech/wavs/LJ044-0062.wav|tests/data/ljspeech/wavs/LJ044-0062.npy +tests/data/ljspeech/wavs/LJ028-0088.wav|tests/data/ljspeech/wavs/LJ028-0088.npy +tests/data/ljspeech/wavs/LJ022-0041.wav|tests/data/ljspeech/wavs/LJ022-0041.npy +tests/data/ljspeech/wavs/LJ038-0218.wav|tests/data/ljspeech/wavs/LJ038-0218.npy +tests/data/ljspeech/wavs/LJ033-0162.wav|tests/data/ljspeech/wavs/LJ033-0162.npy +tests/data/ljspeech/wavs/LJ048-0097.wav|tests/data/ljspeech/wavs/LJ048-0097.npy +tests/data/ljspeech/wavs/LJ029-0207.wav|tests/data/ljspeech/wavs/LJ029-0207.npy +tests/data/ljspeech/wavs/LJ025-0123.wav|tests/data/ljspeech/wavs/LJ025-0123.npy +tests/data/ljspeech/wavs/LJ012-0221.wav|tests/data/ljspeech/wavs/LJ012-0221.npy +tests/data/ljspeech/wavs/LJ028-0340.wav|tests/data/ljspeech/wavs/LJ028-0340.npy +tests/data/ljspeech/wavs/LJ013-0017.wav|tests/data/ljspeech/wavs/LJ013-0017.npy +tests/data/ljspeech/wavs/LJ005-0102.wav|tests/data/ljspeech/wavs/LJ005-0102.npy +tests/data/ljspeech/wavs/LJ012-0218.wav|tests/data/ljspeech/wavs/LJ012-0218.npy +tests/data/ljspeech/wavs/LJ013-0266.wav|tests/data/ljspeech/wavs/LJ013-0266.npy +tests/data/ljspeech/wavs/LJ046-0068.wav|tests/data/ljspeech/wavs/LJ046-0068.npy +tests/data/ljspeech/wavs/LJ020-0102.wav|tests/data/ljspeech/wavs/LJ020-0102.npy +tests/data/ljspeech/wavs/LJ038-0241.wav|tests/data/ljspeech/wavs/LJ038-0241.npy +tests/data/ljspeech/wavs/LJ003-0209.wav|tests/data/ljspeech/wavs/LJ003-0209.npy +tests/data/ljspeech/wavs/LJ043-0139.wav|tests/data/ljspeech/wavs/LJ043-0139.npy +tests/data/ljspeech/wavs/LJ014-0031.wav|tests/data/ljspeech/wavs/LJ014-0031.npy +tests/data/ljspeech/wavs/LJ032-0111.wav|tests/data/ljspeech/wavs/LJ032-0111.npy +tests/data/ljspeech/wavs/LJ019-0288.wav|tests/data/ljspeech/wavs/LJ019-0288.npy +tests/data/ljspeech/wavs/LJ020-0108.wav|tests/data/ljspeech/wavs/LJ020-0108.npy +tests/data/ljspeech/wavs/LJ018-0037.wav|tests/data/ljspeech/wavs/LJ018-0037.npy +tests/data/ljspeech/wavs/LJ003-0248.wav|tests/data/ljspeech/wavs/LJ003-0248.npy +tests/data/ljspeech/wavs/LJ035-0089.wav|tests/data/ljspeech/wavs/LJ035-0089.npy +tests/data/ljspeech/wavs/LJ001-0131.wav|tests/data/ljspeech/wavs/LJ001-0131.npy +tests/data/ljspeech/wavs/LJ005-0068.wav|tests/data/ljspeech/wavs/LJ005-0068.npy +tests/data/ljspeech/wavs/LJ038-0212.wav|tests/data/ljspeech/wavs/LJ038-0212.npy +tests/data/ljspeech/wavs/LJ032-0043.wav|tests/data/ljspeech/wavs/LJ032-0043.npy +tests/data/ljspeech/wavs/LJ044-0172.wav|tests/data/ljspeech/wavs/LJ044-0172.npy +tests/data/ljspeech/wavs/LJ016-0097.wav|tests/data/ljspeech/wavs/LJ016-0097.npy +tests/data/ljspeech/wavs/LJ050-0118.wav|tests/data/ljspeech/wavs/LJ050-0118.npy +tests/data/ljspeech/wavs/LJ022-0098.wav|tests/data/ljspeech/wavs/LJ022-0098.npy +tests/data/ljspeech/wavs/LJ029-0005.wav|tests/data/ljspeech/wavs/LJ029-0005.npy +tests/data/ljspeech/wavs/LJ049-0065.wav|tests/data/ljspeech/wavs/LJ049-0065.npy +tests/data/ljspeech/wavs/LJ022-0099.wav|tests/data/ljspeech/wavs/LJ022-0099.npy +tests/data/ljspeech/wavs/LJ018-0366.wav|tests/data/ljspeech/wavs/LJ018-0366.npy +tests/data/ljspeech/wavs/LJ038-0032.wav|tests/data/ljspeech/wavs/LJ038-0032.npy +tests/data/ljspeech/wavs/LJ018-0365.wav|tests/data/ljspeech/wavs/LJ018-0365.npy +tests/data/ljspeech/wavs/LJ015-0210.wav|tests/data/ljspeech/wavs/LJ015-0210.npy +tests/data/ljspeech/wavs/LJ047-0010.wav|tests/data/ljspeech/wavs/LJ047-0010.npy +tests/data/ljspeech/wavs/LJ032-0097.wav|tests/data/ljspeech/wavs/LJ032-0097.npy +tests/data/ljspeech/wavs/LJ006-0053.wav|tests/data/ljspeech/wavs/LJ006-0053.npy +tests/data/ljspeech/wavs/LJ022-0149.wav|tests/data/ljspeech/wavs/LJ022-0149.npy +tests/data/ljspeech/wavs/LJ045-0010.wav|tests/data/ljspeech/wavs/LJ045-0010.npy +tests/data/ljspeech/wavs/LJ007-0205.wav|tests/data/ljspeech/wavs/LJ007-0205.npy +tests/data/ljspeech/wavs/LJ008-0228.wav|tests/data/ljspeech/wavs/LJ008-0228.npy +tests/data/ljspeech/wavs/LJ008-0306.wav|tests/data/ljspeech/wavs/LJ008-0306.npy +tests/data/ljspeech/wavs/LJ022-0168.wav|tests/data/ljspeech/wavs/LJ022-0168.npy +tests/data/ljspeech/wavs/LJ008-0304.wav|tests/data/ljspeech/wavs/LJ008-0304.npy +tests/data/ljspeech/wavs/LJ003-0073.wav|tests/data/ljspeech/wavs/LJ003-0073.npy +tests/data/ljspeech/wavs/LJ005-0107.wav|tests/data/ljspeech/wavs/LJ005-0107.npy +tests/data/ljspeech/wavs/LJ028-0494.wav|tests/data/ljspeech/wavs/LJ028-0494.npy +tests/data/ljspeech/wavs/LJ004-0165.wav|tests/data/ljspeech/wavs/LJ004-0165.npy +tests/data/ljspeech/wavs/LJ049-0088.wav|tests/data/ljspeech/wavs/LJ049-0088.npy +tests/data/ljspeech/wavs/LJ030-0071.wav|tests/data/ljspeech/wavs/LJ030-0071.npy +tests/data/ljspeech/wavs/LJ015-0275.wav|tests/data/ljspeech/wavs/LJ015-0275.npy +tests/data/ljspeech/wavs/LJ008-0203.wav|tests/data/ljspeech/wavs/LJ008-0203.npy +tests/data/ljspeech/wavs/LJ034-0127.wav|tests/data/ljspeech/wavs/LJ034-0127.npy +tests/data/ljspeech/wavs/LJ005-0221.wav|tests/data/ljspeech/wavs/LJ005-0221.npy +tests/data/ljspeech/wavs/LJ003-0195.wav|tests/data/ljspeech/wavs/LJ003-0195.npy +tests/data/ljspeech/wavs/LJ035-0198.wav|tests/data/ljspeech/wavs/LJ035-0198.npy +tests/data/ljspeech/wavs/LJ026-0125.wav|tests/data/ljspeech/wavs/LJ026-0125.npy +tests/data/ljspeech/wavs/LJ033-0151.wav|tests/data/ljspeech/wavs/LJ033-0151.npy +tests/data/ljspeech/wavs/LJ016-0155.wav|tests/data/ljspeech/wavs/LJ016-0155.npy +tests/data/ljspeech/wavs/LJ019-0273.wav|tests/data/ljspeech/wavs/LJ019-0273.npy +tests/data/ljspeech/wavs/LJ022-0112.wav|tests/data/ljspeech/wavs/LJ022-0112.npy +tests/data/ljspeech/wavs/LJ006-0153.wav|tests/data/ljspeech/wavs/LJ006-0153.npy +tests/data/ljspeech/wavs/LJ005-0200.wav|tests/data/ljspeech/wavs/LJ005-0200.npy +tests/data/ljspeech/wavs/LJ010-0120.wav|tests/data/ljspeech/wavs/LJ010-0120.npy +tests/data/ljspeech/wavs/LJ004-0023.wav|tests/data/ljspeech/wavs/LJ004-0023.npy +tests/data/ljspeech/wavs/LJ025-0067.wav|tests/data/ljspeech/wavs/LJ025-0067.npy +tests/data/ljspeech/wavs/LJ016-0327.wav|tests/data/ljspeech/wavs/LJ016-0327.npy +tests/data/ljspeech/wavs/LJ011-0197.wav|tests/data/ljspeech/wavs/LJ011-0197.npy +tests/data/ljspeech/wavs/LJ010-0064.wav|tests/data/ljspeech/wavs/LJ010-0064.npy +tests/data/ljspeech/wavs/LJ016-0336.wav|tests/data/ljspeech/wavs/LJ016-0336.npy +tests/data/ljspeech/wavs/LJ033-0023.wav|tests/data/ljspeech/wavs/LJ033-0023.npy +tests/data/ljspeech/wavs/LJ036-0049.wav|tests/data/ljspeech/wavs/LJ036-0049.npy +tests/data/ljspeech/wavs/LJ031-0170.wav|tests/data/ljspeech/wavs/LJ031-0170.npy +tests/data/ljspeech/wavs/LJ037-0108.wav|tests/data/ljspeech/wavs/LJ037-0108.npy +tests/data/ljspeech/wavs/LJ016-0161.wav|tests/data/ljspeech/wavs/LJ016-0161.npy +tests/data/ljspeech/wavs/LJ048-0288.wav|tests/data/ljspeech/wavs/LJ048-0288.npy +tests/data/ljspeech/wavs/LJ043-0149.wav|tests/data/ljspeech/wavs/LJ043-0149.npy +tests/data/ljspeech/wavs/LJ004-0113.wav|tests/data/ljspeech/wavs/LJ004-0113.npy +tests/data/ljspeech/wavs/LJ004-0044.wav|tests/data/ljspeech/wavs/LJ004-0044.npy +tests/data/ljspeech/wavs/LJ005-0071.wav|tests/data/ljspeech/wavs/LJ005-0071.npy +tests/data/ljspeech/wavs/LJ039-0182.wav|tests/data/ljspeech/wavs/LJ039-0182.npy +tests/data/ljspeech/wavs/LJ039-0075.wav|tests/data/ljspeech/wavs/LJ039-0075.npy +tests/data/ljspeech/wavs/LJ010-0116.wav|tests/data/ljspeech/wavs/LJ010-0116.npy +tests/data/ljspeech/wavs/LJ018-0116.wav|tests/data/ljspeech/wavs/LJ018-0116.npy +tests/data/ljspeech/wavs/LJ016-0005.wav|tests/data/ljspeech/wavs/LJ016-0005.npy +tests/data/ljspeech/wavs/LJ006-0133.wav|tests/data/ljspeech/wavs/LJ006-0133.npy +tests/data/ljspeech/wavs/LJ002-0025.wav|tests/data/ljspeech/wavs/LJ002-0025.npy +tests/data/ljspeech/wavs/LJ040-0103.wav|tests/data/ljspeech/wavs/LJ040-0103.npy +tests/data/ljspeech/wavs/LJ026-0104.wav|tests/data/ljspeech/wavs/LJ026-0104.npy +tests/data/ljspeech/wavs/LJ047-0078.wav|tests/data/ljspeech/wavs/LJ047-0078.npy +tests/data/ljspeech/wavs/LJ021-0187.wav|tests/data/ljspeech/wavs/LJ021-0187.npy +tests/data/ljspeech/wavs/LJ050-0202.wav|tests/data/ljspeech/wavs/LJ050-0202.npy +tests/data/ljspeech/wavs/LJ019-0271.wav|tests/data/ljspeech/wavs/LJ019-0271.npy +tests/data/ljspeech/wavs/LJ011-0123.wav|tests/data/ljspeech/wavs/LJ011-0123.npy +tests/data/ljspeech/wavs/LJ004-0091.wav|tests/data/ljspeech/wavs/LJ004-0091.npy +tests/data/ljspeech/wavs/LJ029-0080.wav|tests/data/ljspeech/wavs/LJ029-0080.npy +tests/data/ljspeech/wavs/LJ047-0089.wav|tests/data/ljspeech/wavs/LJ047-0089.npy +tests/data/ljspeech/wavs/LJ016-0039.wav|tests/data/ljspeech/wavs/LJ016-0039.npy +tests/data/ljspeech/wavs/LJ032-0267.wav|tests/data/ljspeech/wavs/LJ032-0267.npy +tests/data/ljspeech/wavs/LJ014-0166.wav|tests/data/ljspeech/wavs/LJ014-0166.npy +tests/data/ljspeech/wavs/LJ037-0094.wav|tests/data/ljspeech/wavs/LJ037-0094.npy +tests/data/ljspeech/wavs/LJ042-0086.wav|tests/data/ljspeech/wavs/LJ042-0086.npy +tests/data/ljspeech/wavs/LJ021-0010.wav|tests/data/ljspeech/wavs/LJ021-0010.npy +tests/data/ljspeech/wavs/LJ018-0144.wav|tests/data/ljspeech/wavs/LJ018-0144.npy +tests/data/ljspeech/wavs/LJ035-0177.wav|tests/data/ljspeech/wavs/LJ035-0177.npy +tests/data/ljspeech/wavs/LJ003-0246.wav|tests/data/ljspeech/wavs/LJ003-0246.npy +tests/data/ljspeech/wavs/LJ020-0106.wav|tests/data/ljspeech/wavs/LJ020-0106.npy +tests/data/ljspeech/wavs/LJ018-0015.wav|tests/data/ljspeech/wavs/LJ018-0015.npy +tests/data/ljspeech/wavs/LJ026-0102.wav|tests/data/ljspeech/wavs/LJ026-0102.npy +tests/data/ljspeech/wavs/LJ006-0260.wav|tests/data/ljspeech/wavs/LJ006-0260.npy +tests/data/ljspeech/wavs/LJ046-0040.wav|tests/data/ljspeech/wavs/LJ046-0040.npy +tests/data/ljspeech/wavs/LJ031-0006.wav|tests/data/ljspeech/wavs/LJ031-0006.npy +tests/data/ljspeech/wavs/LJ039-0184.wav|tests/data/ljspeech/wavs/LJ039-0184.npy +tests/data/ljspeech/wavs/LJ025-0049.wav|tests/data/ljspeech/wavs/LJ025-0049.npy +tests/data/ljspeech/wavs/LJ030-0180.wav|tests/data/ljspeech/wavs/LJ030-0180.npy +tests/data/ljspeech/wavs/LJ016-0186.wav|tests/data/ljspeech/wavs/LJ016-0186.npy +tests/data/ljspeech/wavs/LJ010-0084.wav|tests/data/ljspeech/wavs/LJ010-0084.npy +tests/data/ljspeech/wavs/LJ033-0161.wav|tests/data/ljspeech/wavs/LJ033-0161.npy +tests/data/ljspeech/wavs/LJ047-0058.wav|tests/data/ljspeech/wavs/LJ047-0058.npy +tests/data/ljspeech/wavs/LJ044-0217.wav|tests/data/ljspeech/wavs/LJ044-0217.npy +tests/data/ljspeech/wavs/LJ011-0265.wav|tests/data/ljspeech/wavs/LJ011-0265.npy +tests/data/ljspeech/wavs/LJ038-0181.wav|tests/data/ljspeech/wavs/LJ038-0181.npy +tests/data/ljspeech/wavs/LJ030-0077.wav|tests/data/ljspeech/wavs/LJ030-0077.npy +tests/data/ljspeech/wavs/LJ011-0271.wav|tests/data/ljspeech/wavs/LJ011-0271.npy +tests/data/ljspeech/wavs/LJ040-0067.wav|tests/data/ljspeech/wavs/LJ040-0067.npy +tests/data/ljspeech/wavs/LJ032-0011.wav|tests/data/ljspeech/wavs/LJ032-0011.npy +tests/data/ljspeech/wavs/LJ016-0087.wav|tests/data/ljspeech/wavs/LJ016-0087.npy +tests/data/ljspeech/wavs/LJ013-0263.wav|tests/data/ljspeech/wavs/LJ013-0263.npy +tests/data/ljspeech/wavs/LJ017-0187.wav|tests/data/ljspeech/wavs/LJ017-0187.npy +tests/data/ljspeech/wavs/LJ013-0170.wav|tests/data/ljspeech/wavs/LJ013-0170.npy +tests/data/ljspeech/wavs/LJ001-0030.wav|tests/data/ljspeech/wavs/LJ001-0030.npy +tests/data/ljspeech/wavs/LJ018-0269.wav|tests/data/ljspeech/wavs/LJ018-0269.npy +tests/data/ljspeech/wavs/LJ008-0005.wav|tests/data/ljspeech/wavs/LJ008-0005.npy +tests/data/ljspeech/wavs/LJ039-0084.wav|tests/data/ljspeech/wavs/LJ039-0084.npy +tests/data/ljspeech/wavs/LJ023-0079.wav|tests/data/ljspeech/wavs/LJ023-0079.npy +tests/data/ljspeech/wavs/LJ018-0128.wav|tests/data/ljspeech/wavs/LJ018-0128.npy +tests/data/ljspeech/wavs/LJ014-0110.wav|tests/data/ljspeech/wavs/LJ014-0110.npy +tests/data/ljspeech/wavs/LJ013-0206.wav|tests/data/ljspeech/wavs/LJ013-0206.npy +tests/data/ljspeech/wavs/LJ028-0046.wav|tests/data/ljspeech/wavs/LJ028-0046.npy +tests/data/ljspeech/wavs/LJ029-0141.wav|tests/data/ljspeech/wavs/LJ029-0141.npy +tests/data/ljspeech/wavs/LJ032-0099.wav|tests/data/ljspeech/wavs/LJ032-0099.npy +tests/data/ljspeech/wavs/LJ012-0057.wav|tests/data/ljspeech/wavs/LJ012-0057.npy +tests/data/ljspeech/wavs/LJ018-0151.wav|tests/data/ljspeech/wavs/LJ018-0151.npy +tests/data/ljspeech/wavs/LJ030-0080.wav|tests/data/ljspeech/wavs/LJ030-0080.npy +tests/data/ljspeech/wavs/LJ009-0081.wav|tests/data/ljspeech/wavs/LJ009-0081.npy +tests/data/ljspeech/wavs/LJ015-0142.wav|tests/data/ljspeech/wavs/LJ015-0142.npy +tests/data/ljspeech/wavs/LJ050-0199.wav|tests/data/ljspeech/wavs/LJ050-0199.npy +tests/data/ljspeech/wavs/LJ002-0323.wav|tests/data/ljspeech/wavs/LJ002-0323.npy +tests/data/ljspeech/wavs/LJ021-0003.wav|tests/data/ljspeech/wavs/LJ021-0003.npy +tests/data/ljspeech/wavs/LJ009-0201.wav|tests/data/ljspeech/wavs/LJ009-0201.npy +tests/data/ljspeech/wavs/LJ046-0009.wav|tests/data/ljspeech/wavs/LJ046-0009.npy +tests/data/ljspeech/wavs/LJ043-0143.wav|tests/data/ljspeech/wavs/LJ043-0143.npy +tests/data/ljspeech/wavs/LJ012-0162.wav|tests/data/ljspeech/wavs/LJ012-0162.npy +tests/data/ljspeech/wavs/LJ043-0054.wav|tests/data/ljspeech/wavs/LJ043-0054.npy +tests/data/ljspeech/wavs/LJ031-0121.wav|tests/data/ljspeech/wavs/LJ031-0121.npy +tests/data/ljspeech/wavs/LJ033-0054.wav|tests/data/ljspeech/wavs/LJ033-0054.npy +tests/data/ljspeech/wavs/LJ008-0144.wav|tests/data/ljspeech/wavs/LJ008-0144.npy +tests/data/ljspeech/wavs/LJ021-0064.wav|tests/data/ljspeech/wavs/LJ021-0064.npy +tests/data/ljspeech/wavs/LJ015-0198.wav|tests/data/ljspeech/wavs/LJ015-0198.npy +tests/data/ljspeech/wavs/LJ032-0273.wav|tests/data/ljspeech/wavs/LJ032-0273.npy +tests/data/ljspeech/wavs/LJ032-0224.wav|tests/data/ljspeech/wavs/LJ032-0224.npy +tests/data/ljspeech/wavs/LJ039-0145.wav|tests/data/ljspeech/wavs/LJ039-0145.npy +tests/data/ljspeech/wavs/LJ034-0108.wav|tests/data/ljspeech/wavs/LJ034-0108.npy +tests/data/ljspeech/wavs/LJ018-0011.wav|tests/data/ljspeech/wavs/LJ018-0011.npy +tests/data/ljspeech/wavs/LJ030-0116.wav|tests/data/ljspeech/wavs/LJ030-0116.npy +tests/data/ljspeech/wavs/LJ031-0186.wav|tests/data/ljspeech/wavs/LJ031-0186.npy +tests/data/ljspeech/wavs/LJ004-0237.wav|tests/data/ljspeech/wavs/LJ004-0237.npy +tests/data/ljspeech/wavs/LJ042-0174.wav|tests/data/ljspeech/wavs/LJ042-0174.npy +tests/data/ljspeech/wavs/LJ023-0086.wav|tests/data/ljspeech/wavs/LJ023-0086.npy +tests/data/ljspeech/wavs/LJ015-0273.wav|tests/data/ljspeech/wavs/LJ015-0273.npy +tests/data/ljspeech/wavs/LJ022-0026.wav|tests/data/ljspeech/wavs/LJ022-0026.npy +tests/data/ljspeech/wavs/LJ049-0099.wav|tests/data/ljspeech/wavs/LJ049-0099.npy +tests/data/ljspeech/wavs/LJ025-0089.wav|tests/data/ljspeech/wavs/LJ025-0089.npy +tests/data/ljspeech/wavs/LJ022-0071.wav|tests/data/ljspeech/wavs/LJ022-0071.npy +tests/data/ljspeech/wavs/LJ016-0339.wav|tests/data/ljspeech/wavs/LJ016-0339.npy +tests/data/ljspeech/wavs/LJ015-0212.wav|tests/data/ljspeech/wavs/LJ015-0212.npy +tests/data/ljspeech/wavs/LJ025-0133.wav|tests/data/ljspeech/wavs/LJ025-0133.npy +tests/data/ljspeech/wavs/LJ020-0087.wav|tests/data/ljspeech/wavs/LJ020-0087.npy +tests/data/ljspeech/wavs/LJ039-0248.wav|tests/data/ljspeech/wavs/LJ039-0248.npy +tests/data/ljspeech/wavs/LJ034-0159.wav|tests/data/ljspeech/wavs/LJ034-0159.npy +tests/data/ljspeech/wavs/LJ002-0231.wav|tests/data/ljspeech/wavs/LJ002-0231.npy +tests/data/ljspeech/wavs/LJ032-0226.wav|tests/data/ljspeech/wavs/LJ032-0226.npy +tests/data/ljspeech/wavs/LJ033-0007.wav|tests/data/ljspeech/wavs/LJ033-0007.npy +tests/data/ljspeech/wavs/LJ002-0264.wav|tests/data/ljspeech/wavs/LJ002-0264.npy +tests/data/ljspeech/wavs/LJ008-0019.wav|tests/data/ljspeech/wavs/LJ008-0019.npy +tests/data/ljspeech/wavs/LJ036-0114.wav|tests/data/ljspeech/wavs/LJ036-0114.npy +tests/data/ljspeech/wavs/LJ007-0057.wav|tests/data/ljspeech/wavs/LJ007-0057.npy +tests/data/ljspeech/wavs/LJ014-0241.wav|tests/data/ljspeech/wavs/LJ014-0241.npy +tests/data/ljspeech/wavs/LJ003-0084.wav|tests/data/ljspeech/wavs/LJ003-0084.npy +tests/data/ljspeech/wavs/LJ016-0342.wav|tests/data/ljspeech/wavs/LJ016-0342.npy +tests/data/ljspeech/wavs/LJ011-0226.wav|tests/data/ljspeech/wavs/LJ011-0226.npy +tests/data/ljspeech/wavs/LJ027-0102.wav|tests/data/ljspeech/wavs/LJ027-0102.npy +tests/data/ljspeech/wavs/LJ042-0006.wav|tests/data/ljspeech/wavs/LJ042-0006.npy +tests/data/ljspeech/wavs/LJ037-0114.wav|tests/data/ljspeech/wavs/LJ037-0114.npy +tests/data/ljspeech/wavs/LJ018-0174.wav|tests/data/ljspeech/wavs/LJ018-0174.npy +tests/data/ljspeech/wavs/LJ044-0076.wav|tests/data/ljspeech/wavs/LJ044-0076.npy +tests/data/ljspeech/wavs/LJ015-0298.wav|tests/data/ljspeech/wavs/LJ015-0298.npy +tests/data/ljspeech/wavs/LJ015-0262.wav|tests/data/ljspeech/wavs/LJ015-0262.npy +tests/data/ljspeech/wavs/LJ027-0109.wav|tests/data/ljspeech/wavs/LJ027-0109.npy +tests/data/ljspeech/wavs/LJ045-0120.wav|tests/data/ljspeech/wavs/LJ045-0120.npy +tests/data/ljspeech/wavs/LJ008-0201.wav|tests/data/ljspeech/wavs/LJ008-0201.npy +tests/data/ljspeech/wavs/LJ003-0090.wav|tests/data/ljspeech/wavs/LJ003-0090.npy +tests/data/ljspeech/wavs/LJ041-0007.wav|tests/data/ljspeech/wavs/LJ041-0007.npy +tests/data/ljspeech/wavs/LJ029-0046.wav|tests/data/ljspeech/wavs/LJ029-0046.npy +tests/data/ljspeech/wavs/LJ039-0243.wav|tests/data/ljspeech/wavs/LJ039-0243.npy +tests/data/ljspeech/wavs/LJ010-0281.wav|tests/data/ljspeech/wavs/LJ010-0281.npy +tests/data/ljspeech/wavs/LJ038-0277.wav|tests/data/ljspeech/wavs/LJ038-0277.npy +tests/data/ljspeech/wavs/LJ028-0019.wav|tests/data/ljspeech/wavs/LJ028-0019.npy +tests/data/ljspeech/wavs/LJ020-0009.wav|tests/data/ljspeech/wavs/LJ020-0009.npy +tests/data/ljspeech/wavs/LJ012-0175.wav|tests/data/ljspeech/wavs/LJ012-0175.npy +tests/data/ljspeech/wavs/LJ006-0238.wav|tests/data/ljspeech/wavs/LJ006-0238.npy +tests/data/ljspeech/wavs/LJ043-0176.wav|tests/data/ljspeech/wavs/LJ043-0176.npy +tests/data/ljspeech/wavs/LJ002-0047.wav|tests/data/ljspeech/wavs/LJ002-0047.npy +tests/data/ljspeech/wavs/LJ018-0240.wav|tests/data/ljspeech/wavs/LJ018-0240.npy +tests/data/ljspeech/wavs/LJ039-0236.wav|tests/data/ljspeech/wavs/LJ039-0236.npy +tests/data/ljspeech/wavs/LJ034-0071.wav|tests/data/ljspeech/wavs/LJ034-0071.npy +tests/data/ljspeech/wavs/LJ044-0058.wav|tests/data/ljspeech/wavs/LJ044-0058.npy +tests/data/ljspeech/wavs/LJ033-0086.wav|tests/data/ljspeech/wavs/LJ033-0086.npy +tests/data/ljspeech/wavs/LJ034-0205.wav|tests/data/ljspeech/wavs/LJ034-0205.npy +tests/data/ljspeech/wavs/LJ013-0268.wav|tests/data/ljspeech/wavs/LJ013-0268.npy +tests/data/ljspeech/wavs/LJ031-0215.wav|tests/data/ljspeech/wavs/LJ031-0215.npy +tests/data/ljspeech/wavs/LJ047-0117.wav|tests/data/ljspeech/wavs/LJ047-0117.npy +tests/data/ljspeech/wavs/LJ013-0069.wav|tests/data/ljspeech/wavs/LJ013-0069.npy +tests/data/ljspeech/wavs/LJ018-0233.wav|tests/data/ljspeech/wavs/LJ018-0233.npy +tests/data/ljspeech/wavs/LJ021-0179.wav|tests/data/ljspeech/wavs/LJ021-0179.npy +tests/data/ljspeech/wavs/LJ046-0092.wav|tests/data/ljspeech/wavs/LJ046-0092.npy +tests/data/ljspeech/wavs/LJ028-0138.wav|tests/data/ljspeech/wavs/LJ028-0138.npy +tests/data/ljspeech/wavs/LJ036-0010.wav|tests/data/ljspeech/wavs/LJ036-0010.npy +tests/data/ljspeech/wavs/LJ006-0189.wav|tests/data/ljspeech/wavs/LJ006-0189.npy +tests/data/ljspeech/wavs/LJ050-0262.wav|tests/data/ljspeech/wavs/LJ050-0262.npy +tests/data/ljspeech/wavs/LJ024-0130.wav|tests/data/ljspeech/wavs/LJ024-0130.npy +tests/data/ljspeech/wavs/LJ029-0066.wav|tests/data/ljspeech/wavs/LJ029-0066.npy +tests/data/ljspeech/wavs/LJ041-0085.wav|tests/data/ljspeech/wavs/LJ041-0085.npy +tests/data/ljspeech/wavs/LJ028-0152.wav|tests/data/ljspeech/wavs/LJ028-0152.npy +tests/data/ljspeech/wavs/LJ032-0120.wav|tests/data/ljspeech/wavs/LJ032-0120.npy +tests/data/ljspeech/wavs/LJ003-0261.wav|tests/data/ljspeech/wavs/LJ003-0261.npy +tests/data/ljspeech/wavs/LJ002-0319.wav|tests/data/ljspeech/wavs/LJ002-0319.npy +tests/data/ljspeech/wavs/LJ030-0226.wav|tests/data/ljspeech/wavs/LJ030-0226.npy +tests/data/ljspeech/wavs/LJ008-0038.wav|tests/data/ljspeech/wavs/LJ008-0038.npy +tests/data/ljspeech/wavs/LJ010-0140.wav|tests/data/ljspeech/wavs/LJ010-0140.npy +tests/data/ljspeech/wavs/LJ050-0220.wav|tests/data/ljspeech/wavs/LJ050-0220.npy +tests/data/ljspeech/wavs/LJ009-0106.wav|tests/data/ljspeech/wavs/LJ009-0106.npy +tests/data/ljspeech/wavs/LJ005-0086.wav|tests/data/ljspeech/wavs/LJ005-0086.npy +tests/data/ljspeech/wavs/LJ010-0124.wav|tests/data/ljspeech/wavs/LJ010-0124.npy +tests/data/ljspeech/wavs/LJ038-0289.wav|tests/data/ljspeech/wavs/LJ038-0289.npy +tests/data/ljspeech/wavs/LJ013-0181.wav|tests/data/ljspeech/wavs/LJ013-0181.npy +tests/data/ljspeech/wavs/LJ011-0005.wav|tests/data/ljspeech/wavs/LJ011-0005.npy +tests/data/ljspeech/wavs/LJ017-0111.wav|tests/data/ljspeech/wavs/LJ017-0111.npy +tests/data/ljspeech/wavs/LJ040-0049.wav|tests/data/ljspeech/wavs/LJ040-0049.npy +tests/data/ljspeech/wavs/LJ047-0094.wav|tests/data/ljspeech/wavs/LJ047-0094.npy +tests/data/ljspeech/wavs/LJ039-0097.wav|tests/data/ljspeech/wavs/LJ039-0097.npy +tests/data/ljspeech/wavs/LJ010-0038.wav|tests/data/ljspeech/wavs/LJ010-0038.npy +tests/data/ljspeech/wavs/LJ007-0176.wav|tests/data/ljspeech/wavs/LJ007-0176.npy +tests/data/ljspeech/wavs/LJ018-0103.wav|tests/data/ljspeech/wavs/LJ018-0103.npy +tests/data/ljspeech/wavs/LJ042-0062.wav|tests/data/ljspeech/wavs/LJ042-0062.npy +tests/data/ljspeech/wavs/LJ026-0058.wav|tests/data/ljspeech/wavs/LJ026-0058.npy +tests/data/ljspeech/wavs/LJ003-0174.wav|tests/data/ljspeech/wavs/LJ003-0174.npy +tests/data/ljspeech/wavs/LJ023-0060.wav|tests/data/ljspeech/wavs/LJ023-0060.npy +tests/data/ljspeech/wavs/LJ048-0078.wav|tests/data/ljspeech/wavs/LJ048-0078.npy +tests/data/ljspeech/wavs/LJ047-0166.wav|tests/data/ljspeech/wavs/LJ047-0166.npy +tests/data/ljspeech/wavs/LJ024-0026.wav|tests/data/ljspeech/wavs/LJ024-0026.npy +tests/data/ljspeech/wavs/LJ042-0120.wav|tests/data/ljspeech/wavs/LJ042-0120.npy +tests/data/ljspeech/wavs/LJ006-0174.wav|tests/data/ljspeech/wavs/LJ006-0174.npy +tests/data/ljspeech/wavs/LJ027-0066.wav|tests/data/ljspeech/wavs/LJ027-0066.npy +tests/data/ljspeech/wavs/LJ012-0036.wav|tests/data/ljspeech/wavs/LJ012-0036.npy +tests/data/ljspeech/wavs/LJ019-0233.wav|tests/data/ljspeech/wavs/LJ019-0233.npy +tests/data/ljspeech/wavs/LJ017-0113.wav|tests/data/ljspeech/wavs/LJ017-0113.npy +tests/data/ljspeech/wavs/LJ026-0046.wav|tests/data/ljspeech/wavs/LJ026-0046.npy +tests/data/ljspeech/wavs/LJ040-0033.wav|tests/data/ljspeech/wavs/LJ040-0033.npy +tests/data/ljspeech/wavs/LJ036-0186.wav|tests/data/ljspeech/wavs/LJ036-0186.npy +tests/data/ljspeech/wavs/LJ011-0157.wav|tests/data/ljspeech/wavs/LJ011-0157.npy +tests/data/ljspeech/wavs/LJ003-0282.wav|tests/data/ljspeech/wavs/LJ003-0282.npy +tests/data/ljspeech/wavs/LJ045-0249.wav|tests/data/ljspeech/wavs/LJ045-0249.npy +tests/data/ljspeech/wavs/LJ035-0173.wav|tests/data/ljspeech/wavs/LJ035-0173.npy +tests/data/ljspeech/wavs/LJ017-0006.wav|tests/data/ljspeech/wavs/LJ017-0006.npy +tests/data/ljspeech/wavs/LJ048-0093.wav|tests/data/ljspeech/wavs/LJ048-0093.npy +tests/data/ljspeech/wavs/LJ045-0073.wav|tests/data/ljspeech/wavs/LJ045-0073.npy +tests/data/ljspeech/wavs/LJ012-0166.wav|tests/data/ljspeech/wavs/LJ012-0166.npy +tests/data/ljspeech/wavs/LJ047-0139.wav|tests/data/ljspeech/wavs/LJ047-0139.npy +tests/data/ljspeech/wavs/LJ003-0121.wav|tests/data/ljspeech/wavs/LJ003-0121.npy +tests/data/ljspeech/wavs/LJ026-0034.wav|tests/data/ljspeech/wavs/LJ026-0034.npy +tests/data/ljspeech/wavs/LJ039-0142.wav|tests/data/ljspeech/wavs/LJ039-0142.npy +tests/data/ljspeech/wavs/LJ026-0153.wav|tests/data/ljspeech/wavs/LJ026-0153.npy +tests/data/ljspeech/wavs/LJ006-0295.wav|tests/data/ljspeech/wavs/LJ006-0295.npy +tests/data/ljspeech/wavs/LJ014-0193.wav|tests/data/ljspeech/wavs/LJ014-0193.npy +tests/data/ljspeech/wavs/LJ003-0162.wav|tests/data/ljspeech/wavs/LJ003-0162.npy +tests/data/ljspeech/wavs/LJ015-0022.wav|tests/data/ljspeech/wavs/LJ015-0022.npy +tests/data/ljspeech/wavs/LJ050-0106.wav|tests/data/ljspeech/wavs/LJ050-0106.npy +tests/data/ljspeech/wavs/LJ034-0077.wav|tests/data/ljspeech/wavs/LJ034-0077.npy +tests/data/ljspeech/wavs/LJ015-0150.wav|tests/data/ljspeech/wavs/LJ015-0150.npy +tests/data/ljspeech/wavs/LJ017-0062.wav|tests/data/ljspeech/wavs/LJ017-0062.npy +tests/data/ljspeech/wavs/LJ044-0086.wav|tests/data/ljspeech/wavs/LJ044-0086.npy +tests/data/ljspeech/wavs/LJ005-0150.wav|tests/data/ljspeech/wavs/LJ005-0150.npy +tests/data/ljspeech/wavs/LJ004-0025.wav|tests/data/ljspeech/wavs/LJ004-0025.npy +tests/data/ljspeech/wavs/LJ015-0069.wav|tests/data/ljspeech/wavs/LJ015-0069.npy +tests/data/ljspeech/wavs/LJ021-0060.wav|tests/data/ljspeech/wavs/LJ021-0060.npy +tests/data/ljspeech/wavs/LJ010-0008.wav|tests/data/ljspeech/wavs/LJ010-0008.npy +tests/data/ljspeech/wavs/LJ021-0070.wav|tests/data/ljspeech/wavs/LJ021-0070.npy +tests/data/ljspeech/wavs/LJ016-0072.wav|tests/data/ljspeech/wavs/LJ016-0072.npy +tests/data/ljspeech/wavs/LJ017-0190.wav|tests/data/ljspeech/wavs/LJ017-0190.npy +tests/data/ljspeech/wavs/LJ022-0135.wav|tests/data/ljspeech/wavs/LJ022-0135.npy +tests/data/ljspeech/wavs/LJ028-0059.wav|tests/data/ljspeech/wavs/LJ028-0059.npy +tests/data/ljspeech/wavs/LJ035-0129.wav|tests/data/ljspeech/wavs/LJ035-0129.npy +tests/data/ljspeech/wavs/LJ002-0105.wav|tests/data/ljspeech/wavs/LJ002-0105.npy +tests/data/ljspeech/wavs/LJ021-0210.wav|tests/data/ljspeech/wavs/LJ021-0210.npy +tests/data/ljspeech/wavs/LJ019-0303.wav|tests/data/ljspeech/wavs/LJ019-0303.npy +tests/data/ljspeech/wavs/LJ048-0098.wav|tests/data/ljspeech/wavs/LJ048-0098.npy +tests/data/ljspeech/wavs/LJ025-0108.wav|tests/data/ljspeech/wavs/LJ025-0108.npy +tests/data/ljspeech/wavs/LJ009-0285.wav|tests/data/ljspeech/wavs/LJ009-0285.npy +tests/data/ljspeech/wavs/LJ033-0201.wav|tests/data/ljspeech/wavs/LJ033-0201.npy +tests/data/ljspeech/wavs/LJ050-0224.wav|tests/data/ljspeech/wavs/LJ050-0224.npy +tests/data/ljspeech/wavs/LJ039-0199.wav|tests/data/ljspeech/wavs/LJ039-0199.npy +tests/data/ljspeech/wavs/LJ003-0079.wav|tests/data/ljspeech/wavs/LJ003-0079.npy +tests/data/ljspeech/wavs/LJ037-0141.wav|tests/data/ljspeech/wavs/LJ037-0141.npy +tests/data/ljspeech/wavs/LJ036-0197.wav|tests/data/ljspeech/wavs/LJ036-0197.npy +tests/data/ljspeech/wavs/LJ045-0115.wav|tests/data/ljspeech/wavs/LJ045-0115.npy +tests/data/ljspeech/wavs/LJ031-0218.wav|tests/data/ljspeech/wavs/LJ031-0218.npy +tests/data/ljspeech/wavs/LJ019-0309.wav|tests/data/ljspeech/wavs/LJ019-0309.npy +tests/data/ljspeech/wavs/LJ014-0122.wav|tests/data/ljspeech/wavs/LJ014-0122.npy +tests/data/ljspeech/wavs/LJ036-0132.wav|tests/data/ljspeech/wavs/LJ036-0132.npy +tests/data/ljspeech/wavs/LJ036-0203.wav|tests/data/ljspeech/wavs/LJ036-0203.npy +tests/data/ljspeech/wavs/LJ048-0076.wav|tests/data/ljspeech/wavs/LJ048-0076.npy +tests/data/ljspeech/wavs/LJ021-0111.wav|tests/data/ljspeech/wavs/LJ021-0111.npy +tests/data/ljspeech/wavs/LJ046-0028.wav|tests/data/ljspeech/wavs/LJ046-0028.npy +tests/data/ljspeech/wavs/LJ006-0268.wav|tests/data/ljspeech/wavs/LJ006-0268.npy +tests/data/ljspeech/wavs/LJ002-0306.wav|tests/data/ljspeech/wavs/LJ002-0306.npy +tests/data/ljspeech/wavs/LJ006-0206.wav|tests/data/ljspeech/wavs/LJ006-0206.npy +tests/data/ljspeech/wavs/LJ035-0028.wav|tests/data/ljspeech/wavs/LJ035-0028.npy +tests/data/ljspeech/wavs/LJ028-0131.wav|tests/data/ljspeech/wavs/LJ028-0131.npy +tests/data/ljspeech/wavs/LJ018-0323.wav|tests/data/ljspeech/wavs/LJ018-0323.npy +tests/data/ljspeech/wavs/LJ019-0320.wav|tests/data/ljspeech/wavs/LJ019-0320.npy +tests/data/ljspeech/wavs/LJ041-0043.wav|tests/data/ljspeech/wavs/LJ041-0043.npy +tests/data/ljspeech/wavs/LJ025-0121.wav|tests/data/ljspeech/wavs/LJ025-0121.npy +tests/data/ljspeech/wavs/LJ014-0071.wav|tests/data/ljspeech/wavs/LJ014-0071.npy +tests/data/ljspeech/wavs/LJ050-0257.wav|tests/data/ljspeech/wavs/LJ050-0257.npy +tests/data/ljspeech/wavs/LJ005-0249.wav|tests/data/ljspeech/wavs/LJ005-0249.npy +tests/data/ljspeech/wavs/LJ048-0258.wav|tests/data/ljspeech/wavs/LJ048-0258.npy +tests/data/ljspeech/wavs/LJ037-0132.wav|tests/data/ljspeech/wavs/LJ037-0132.npy +tests/data/ljspeech/wavs/LJ010-0063.wav|tests/data/ljspeech/wavs/LJ010-0063.npy +tests/data/ljspeech/wavs/LJ002-0263.wav|tests/data/ljspeech/wavs/LJ002-0263.npy +tests/data/ljspeech/wavs/LJ035-0205.wav|tests/data/ljspeech/wavs/LJ035-0205.npy +tests/data/ljspeech/wavs/LJ019-0277.wav|tests/data/ljspeech/wavs/LJ019-0277.npy +tests/data/ljspeech/wavs/LJ039-0186.wav|tests/data/ljspeech/wavs/LJ039-0186.npy +tests/data/ljspeech/wavs/LJ005-0250.wav|tests/data/ljspeech/wavs/LJ005-0250.npy +tests/data/ljspeech/wavs/LJ045-0014.wav|tests/data/ljspeech/wavs/LJ045-0014.npy +tests/data/ljspeech/wavs/LJ023-0007.wav|tests/data/ljspeech/wavs/LJ023-0007.npy +tests/data/ljspeech/wavs/LJ031-0055.wav|tests/data/ljspeech/wavs/LJ031-0055.npy +tests/data/ljspeech/wavs/LJ003-0302.wav|tests/data/ljspeech/wavs/LJ003-0302.npy +tests/data/ljspeech/wavs/LJ029-0019.wav|tests/data/ljspeech/wavs/LJ029-0019.npy +tests/data/ljspeech/wavs/LJ024-0084.wav|tests/data/ljspeech/wavs/LJ024-0084.npy +tests/data/ljspeech/wavs/LJ005-0226.wav|tests/data/ljspeech/wavs/LJ005-0226.npy +tests/data/ljspeech/wavs/LJ041-0132.wav|tests/data/ljspeech/wavs/LJ041-0132.npy +tests/data/ljspeech/wavs/LJ001-0036.wav|tests/data/ljspeech/wavs/LJ001-0036.npy +tests/data/ljspeech/wavs/LJ029-0148.wav|tests/data/ljspeech/wavs/LJ029-0148.npy +tests/data/ljspeech/wavs/LJ025-0014.wav|tests/data/ljspeech/wavs/LJ025-0014.npy +tests/data/ljspeech/wavs/LJ005-0258.wav|tests/data/ljspeech/wavs/LJ005-0258.npy +tests/data/ljspeech/wavs/LJ014-0142.wav|tests/data/ljspeech/wavs/LJ014-0142.npy +tests/data/ljspeech/wavs/LJ001-0163.wav|tests/data/ljspeech/wavs/LJ001-0163.npy +tests/data/ljspeech/wavs/LJ041-0184.wav|tests/data/ljspeech/wavs/LJ041-0184.npy +tests/data/ljspeech/wavs/LJ010-0052.wav|tests/data/ljspeech/wavs/LJ010-0052.npy +tests/data/ljspeech/wavs/LJ012-0122.wav|tests/data/ljspeech/wavs/LJ012-0122.npy +tests/data/ljspeech/wavs/LJ037-0111.wav|tests/data/ljspeech/wavs/LJ037-0111.npy +tests/data/ljspeech/wavs/LJ006-0075.wav|tests/data/ljspeech/wavs/LJ006-0075.npy +tests/data/ljspeech/wavs/LJ016-0025.wav|tests/data/ljspeech/wavs/LJ016-0025.npy +tests/data/ljspeech/wavs/LJ011-0288.wav|tests/data/ljspeech/wavs/LJ011-0288.npy +tests/data/ljspeech/wavs/LJ021-0139.wav|tests/data/ljspeech/wavs/LJ021-0139.npy +tests/data/ljspeech/wavs/LJ006-0014.wav|tests/data/ljspeech/wavs/LJ006-0014.npy +tests/data/ljspeech/wavs/LJ030-0036.wav|tests/data/ljspeech/wavs/LJ030-0036.npy +tests/data/ljspeech/wavs/LJ008-0254.wav|tests/data/ljspeech/wavs/LJ008-0254.npy +tests/data/ljspeech/wavs/LJ014-0136.wav|tests/data/ljspeech/wavs/LJ014-0136.npy +tests/data/ljspeech/wavs/LJ021-0071.wav|tests/data/ljspeech/wavs/LJ021-0071.npy +tests/data/ljspeech/wavs/LJ050-0026.wav|tests/data/ljspeech/wavs/LJ050-0026.npy +tests/data/ljspeech/wavs/LJ031-0126.wav|tests/data/ljspeech/wavs/LJ031-0126.npy +tests/data/ljspeech/wavs/LJ031-0046.wav|tests/data/ljspeech/wavs/LJ031-0046.npy +tests/data/ljspeech/wavs/LJ036-0076.wav|tests/data/ljspeech/wavs/LJ036-0076.npy +tests/data/ljspeech/wavs/LJ045-0131.wav|tests/data/ljspeech/wavs/LJ045-0131.npy +tests/data/ljspeech/wavs/LJ031-0210.wav|tests/data/ljspeech/wavs/LJ031-0210.npy +tests/data/ljspeech/wavs/LJ045-0181.wav|tests/data/ljspeech/wavs/LJ045-0181.npy +tests/data/ljspeech/wavs/LJ012-0001.wav|tests/data/ljspeech/wavs/LJ012-0001.npy +tests/data/ljspeech/wavs/LJ047-0154.wav|tests/data/ljspeech/wavs/LJ047-0154.npy +tests/data/ljspeech/wavs/LJ016-0207.wav|tests/data/ljspeech/wavs/LJ016-0207.npy +tests/data/ljspeech/wavs/LJ003-0201.wav|tests/data/ljspeech/wavs/LJ003-0201.npy +tests/data/ljspeech/wavs/LJ006-0162.wav|tests/data/ljspeech/wavs/LJ006-0162.npy +tests/data/ljspeech/wavs/LJ039-0067.wav|tests/data/ljspeech/wavs/LJ039-0067.npy +tests/data/ljspeech/wavs/LJ031-0059.wav|tests/data/ljspeech/wavs/LJ031-0059.npy +tests/data/ljspeech/wavs/LJ014-0215.wav|tests/data/ljspeech/wavs/LJ014-0215.npy +tests/data/ljspeech/wavs/LJ004-0032.wav|tests/data/ljspeech/wavs/LJ004-0032.npy +tests/data/ljspeech/wavs/LJ011-0049.wav|tests/data/ljspeech/wavs/LJ011-0049.npy +tests/data/ljspeech/wavs/LJ003-0196.wav|tests/data/ljspeech/wavs/LJ003-0196.npy +tests/data/ljspeech/wavs/LJ004-0195.wav|tests/data/ljspeech/wavs/LJ004-0195.npy +tests/data/ljspeech/wavs/LJ005-0162.wav|tests/data/ljspeech/wavs/LJ005-0162.npy +tests/data/ljspeech/wavs/LJ003-0024.wav|tests/data/ljspeech/wavs/LJ003-0024.npy +tests/data/ljspeech/wavs/LJ038-0094.wav|tests/data/ljspeech/wavs/LJ038-0094.npy +tests/data/ljspeech/wavs/LJ048-0047.wav|tests/data/ljspeech/wavs/LJ048-0047.npy +tests/data/ljspeech/wavs/LJ040-0164.wav|tests/data/ljspeech/wavs/LJ040-0164.npy +tests/data/ljspeech/wavs/LJ046-0017.wav|tests/data/ljspeech/wavs/LJ046-0017.npy +tests/data/ljspeech/wavs/LJ050-0108.wav|tests/data/ljspeech/wavs/LJ050-0108.npy +tests/data/ljspeech/wavs/LJ037-0154.wav|tests/data/ljspeech/wavs/LJ037-0154.npy +tests/data/ljspeech/wavs/LJ012-0118.wav|tests/data/ljspeech/wavs/LJ012-0118.npy +tests/data/ljspeech/wavs/LJ003-0344.wav|tests/data/ljspeech/wavs/LJ003-0344.npy +tests/data/ljspeech/wavs/LJ018-0149.wav|tests/data/ljspeech/wavs/LJ018-0149.npy +tests/data/ljspeech/wavs/LJ030-0037.wav|tests/data/ljspeech/wavs/LJ030-0037.npy +tests/data/ljspeech/wavs/LJ014-0274.wav|tests/data/ljspeech/wavs/LJ014-0274.npy +tests/data/ljspeech/wavs/LJ035-0115.wav|tests/data/ljspeech/wavs/LJ035-0115.npy +tests/data/ljspeech/wavs/LJ037-0143.wav|tests/data/ljspeech/wavs/LJ037-0143.npy +tests/data/ljspeech/wavs/LJ007-0021.wav|tests/data/ljspeech/wavs/LJ007-0021.npy +tests/data/ljspeech/wavs/LJ037-0255.wav|tests/data/ljspeech/wavs/LJ037-0255.npy +tests/data/ljspeech/wavs/LJ002-0147.wav|tests/data/ljspeech/wavs/LJ002-0147.npy +tests/data/ljspeech/wavs/LJ036-0020.wav|tests/data/ljspeech/wavs/LJ036-0020.npy +tests/data/ljspeech/wavs/LJ036-0036.wav|tests/data/ljspeech/wavs/LJ036-0036.npy +tests/data/ljspeech/wavs/LJ032-0098.wav|tests/data/ljspeech/wavs/LJ032-0098.npy +tests/data/ljspeech/wavs/LJ029-0039.wav|tests/data/ljspeech/wavs/LJ029-0039.npy +tests/data/ljspeech/wavs/LJ033-0024.wav|tests/data/ljspeech/wavs/LJ033-0024.npy +tests/data/ljspeech/wavs/LJ019-0188.wav|tests/data/ljspeech/wavs/LJ019-0188.npy +tests/data/ljspeech/wavs/LJ012-0024.wav|tests/data/ljspeech/wavs/LJ012-0024.npy +tests/data/ljspeech/wavs/LJ010-0170.wav|tests/data/ljspeech/wavs/LJ010-0170.npy +tests/data/ljspeech/wavs/LJ040-0206.wav|tests/data/ljspeech/wavs/LJ040-0206.npy +tests/data/ljspeech/wavs/LJ044-0170.wav|tests/data/ljspeech/wavs/LJ044-0170.npy +tests/data/ljspeech/wavs/LJ015-0105.wav|tests/data/ljspeech/wavs/LJ015-0105.npy +tests/data/ljspeech/wavs/LJ012-0252.wav|tests/data/ljspeech/wavs/LJ012-0252.npy +tests/data/ljspeech/wavs/LJ037-0220.wav|tests/data/ljspeech/wavs/LJ037-0220.npy +tests/data/ljspeech/wavs/LJ012-0053.wav|tests/data/ljspeech/wavs/LJ012-0053.npy +tests/data/ljspeech/wavs/LJ012-0075.wav|tests/data/ljspeech/wavs/LJ012-0075.npy +tests/data/ljspeech/wavs/LJ015-0119.wav|tests/data/ljspeech/wavs/LJ015-0119.npy +tests/data/ljspeech/wavs/LJ050-0181.wav|tests/data/ljspeech/wavs/LJ050-0181.npy +tests/data/ljspeech/wavs/LJ015-0100.wav|tests/data/ljspeech/wavs/LJ015-0100.npy +tests/data/ljspeech/wavs/LJ044-0236.wav|tests/data/ljspeech/wavs/LJ044-0236.npy +tests/data/ljspeech/wavs/LJ036-0042.wav|tests/data/ljspeech/wavs/LJ036-0042.npy +tests/data/ljspeech/wavs/LJ049-0184.wav|tests/data/ljspeech/wavs/LJ049-0184.npy +tests/data/ljspeech/wavs/LJ015-0114.wav|tests/data/ljspeech/wavs/LJ015-0114.npy +tests/data/ljspeech/wavs/LJ010-0164.wav|tests/data/ljspeech/wavs/LJ010-0164.npy +tests/data/ljspeech/wavs/LJ002-0327.wav|tests/data/ljspeech/wavs/LJ002-0327.npy +tests/data/ljspeech/wavs/LJ032-0214.wav|tests/data/ljspeech/wavs/LJ032-0214.npy +tests/data/ljspeech/wavs/LJ028-0025.wav|tests/data/ljspeech/wavs/LJ028-0025.npy +tests/data/ljspeech/wavs/LJ045-0175.wav|tests/data/ljspeech/wavs/LJ045-0175.npy +tests/data/ljspeech/wavs/LJ006-0305.wav|tests/data/ljspeech/wavs/LJ006-0305.npy +tests/data/ljspeech/wavs/LJ036-0033.wav|tests/data/ljspeech/wavs/LJ036-0033.npy +tests/data/ljspeech/wavs/LJ035-0030.wav|tests/data/ljspeech/wavs/LJ035-0030.npy +tests/data/ljspeech/wavs/LJ032-0213.wav|tests/data/ljspeech/wavs/LJ032-0213.npy +tests/data/ljspeech/wavs/LJ011-0001.wav|tests/data/ljspeech/wavs/LJ011-0001.npy +tests/data/ljspeech/wavs/LJ036-0202.wav|tests/data/ljspeech/wavs/LJ036-0202.npy +tests/data/ljspeech/wavs/LJ046-0088.wav|tests/data/ljspeech/wavs/LJ046-0088.npy +tests/data/ljspeech/wavs/LJ004-0115.wav|tests/data/ljspeech/wavs/LJ004-0115.npy +tests/data/ljspeech/wavs/LJ041-0047.wav|tests/data/ljspeech/wavs/LJ041-0047.npy +tests/data/ljspeech/wavs/LJ044-0176.wav|tests/data/ljspeech/wavs/LJ044-0176.npy +tests/data/ljspeech/wavs/LJ047-0217.wav|tests/data/ljspeech/wavs/LJ047-0217.npy +tests/data/ljspeech/wavs/LJ044-0187.wav|tests/data/ljspeech/wavs/LJ044-0187.npy +tests/data/ljspeech/wavs/LJ034-0201.wav|tests/data/ljspeech/wavs/LJ034-0201.npy +tests/data/ljspeech/wavs/LJ003-0306.wav|tests/data/ljspeech/wavs/LJ003-0306.npy +tests/data/ljspeech/wavs/LJ013-0253.wav|tests/data/ljspeech/wavs/LJ013-0253.npy +tests/data/ljspeech/wavs/LJ002-0279.wav|tests/data/ljspeech/wavs/LJ002-0279.npy +tests/data/ljspeech/wavs/LJ011-0110.wav|tests/data/ljspeech/wavs/LJ011-0110.npy +tests/data/ljspeech/wavs/LJ041-0063.wav|tests/data/ljspeech/wavs/LJ041-0063.npy +tests/data/ljspeech/wavs/LJ028-0310.wav|tests/data/ljspeech/wavs/LJ028-0310.npy +tests/data/ljspeech/wavs/LJ009-0185.wav|tests/data/ljspeech/wavs/LJ009-0185.npy +tests/data/ljspeech/wavs/LJ050-0122.wav|tests/data/ljspeech/wavs/LJ050-0122.npy +tests/data/ljspeech/wavs/LJ032-0260.wav|tests/data/ljspeech/wavs/LJ032-0260.npy +tests/data/ljspeech/wavs/LJ014-0062.wav|tests/data/ljspeech/wavs/LJ014-0062.npy +tests/data/ljspeech/wavs/LJ006-0230.wav|tests/data/ljspeech/wavs/LJ006-0230.npy +tests/data/ljspeech/wavs/LJ029-0115.wav|tests/data/ljspeech/wavs/LJ029-0115.npy +tests/data/ljspeech/wavs/LJ031-0185.wav|tests/data/ljspeech/wavs/LJ031-0185.npy +tests/data/ljspeech/wavs/LJ037-0066.wav|tests/data/ljspeech/wavs/LJ037-0066.npy +tests/data/ljspeech/wavs/LJ019-0366.wav|tests/data/ljspeech/wavs/LJ019-0366.npy +tests/data/ljspeech/wavs/LJ032-0069.wav|tests/data/ljspeech/wavs/LJ032-0069.npy +tests/data/ljspeech/wavs/LJ016-0428.wav|tests/data/ljspeech/wavs/LJ016-0428.npy +tests/data/ljspeech/wavs/LJ031-0065.wav|tests/data/ljspeech/wavs/LJ031-0065.npy +tests/data/ljspeech/wavs/LJ005-0175.wav|tests/data/ljspeech/wavs/LJ005-0175.npy +tests/data/ljspeech/wavs/LJ030-0032.wav|tests/data/ljspeech/wavs/LJ030-0032.npy +tests/data/ljspeech/wavs/LJ039-0034.wav|tests/data/ljspeech/wavs/LJ039-0034.npy +tests/data/ljspeech/wavs/LJ002-0238.wav|tests/data/ljspeech/wavs/LJ002-0238.npy +tests/data/ljspeech/wavs/LJ032-0044.wav|tests/data/ljspeech/wavs/LJ032-0044.npy +tests/data/ljspeech/wavs/LJ036-0024.wav|tests/data/ljspeech/wavs/LJ036-0024.npy +tests/data/ljspeech/wavs/LJ023-0026.wav|tests/data/ljspeech/wavs/LJ023-0026.npy +tests/data/ljspeech/wavs/LJ017-0001.wav|tests/data/ljspeech/wavs/LJ017-0001.npy +tests/data/ljspeech/wavs/LJ050-0069.wav|tests/data/ljspeech/wavs/LJ050-0069.npy +tests/data/ljspeech/wavs/LJ010-0148.wav|tests/data/ljspeech/wavs/LJ010-0148.npy +tests/data/ljspeech/wavs/LJ049-0187.wav|tests/data/ljspeech/wavs/LJ049-0187.npy +tests/data/ljspeech/wavs/LJ018-0063.wav|tests/data/ljspeech/wavs/LJ018-0063.npy +tests/data/ljspeech/wavs/LJ003-0223.wav|tests/data/ljspeech/wavs/LJ003-0223.npy +tests/data/ljspeech/wavs/LJ047-0095.wav|tests/data/ljspeech/wavs/LJ047-0095.npy +tests/data/ljspeech/wavs/LJ036-0146.wav|tests/data/ljspeech/wavs/LJ036-0146.npy +tests/data/ljspeech/wavs/LJ027-0052.wav|tests/data/ljspeech/wavs/LJ027-0052.npy +tests/data/ljspeech/wavs/LJ045-0159.wav|tests/data/ljspeech/wavs/LJ045-0159.npy +tests/data/ljspeech/wavs/LJ011-0075.wav|tests/data/ljspeech/wavs/LJ011-0075.npy +tests/data/ljspeech/wavs/LJ017-0124.wav|tests/data/ljspeech/wavs/LJ017-0124.npy +tests/data/ljspeech/wavs/LJ016-0392.wav|tests/data/ljspeech/wavs/LJ016-0392.npy +tests/data/ljspeech/wavs/LJ027-0048.wav|tests/data/ljspeech/wavs/LJ027-0048.npy +tests/data/ljspeech/wavs/LJ037-0202.wav|tests/data/ljspeech/wavs/LJ037-0202.npy +tests/data/ljspeech/wavs/LJ030-0126.wav|tests/data/ljspeech/wavs/LJ030-0126.npy +tests/data/ljspeech/wavs/LJ012-0027.wav|tests/data/ljspeech/wavs/LJ012-0027.npy +tests/data/ljspeech/wavs/LJ006-0280.wav|tests/data/ljspeech/wavs/LJ006-0280.npy +tests/data/ljspeech/wavs/LJ017-0130.wav|tests/data/ljspeech/wavs/LJ017-0130.npy +tests/data/ljspeech/wavs/LJ011-0038.wav|tests/data/ljspeech/wavs/LJ011-0038.npy +tests/data/ljspeech/wavs/LJ044-0225.wav|tests/data/ljspeech/wavs/LJ044-0225.npy +tests/data/ljspeech/wavs/LJ034-0051.wav|tests/data/ljspeech/wavs/LJ034-0051.npy +tests/data/ljspeech/wavs/LJ034-0008.wav|tests/data/ljspeech/wavs/LJ034-0008.npy +tests/data/ljspeech/wavs/LJ001-0111.wav|tests/data/ljspeech/wavs/LJ001-0111.npy +tests/data/ljspeech/wavs/LJ036-0155.wav|tests/data/ljspeech/wavs/LJ036-0155.npy +tests/data/ljspeech/wavs/LJ016-0256.wav|tests/data/ljspeech/wavs/LJ016-0256.npy +tests/data/ljspeech/wavs/LJ002-0171.wav|tests/data/ljspeech/wavs/LJ002-0171.npy +tests/data/ljspeech/wavs/LJ010-0219.wav|tests/data/ljspeech/wavs/LJ010-0219.npy +tests/data/ljspeech/wavs/LJ046-0035.wav|tests/data/ljspeech/wavs/LJ046-0035.npy +tests/data/ljspeech/wavs/LJ031-0025.wav|tests/data/ljspeech/wavs/LJ031-0025.npy +tests/data/ljspeech/wavs/LJ003-0001.wav|tests/data/ljspeech/wavs/LJ003-0001.npy +tests/data/ljspeech/wavs/LJ018-0001.wav|tests/data/ljspeech/wavs/LJ018-0001.npy +tests/data/ljspeech/wavs/LJ018-0138.wav|tests/data/ljspeech/wavs/LJ018-0138.npy +tests/data/ljspeech/wavs/LJ026-0133.wav|tests/data/ljspeech/wavs/LJ026-0133.npy +tests/data/ljspeech/wavs/LJ006-0021.wav|tests/data/ljspeech/wavs/LJ006-0021.npy +tests/data/ljspeech/wavs/LJ028-0480.wav|tests/data/ljspeech/wavs/LJ028-0480.npy +tests/data/ljspeech/wavs/LJ006-0173.wav|tests/data/ljspeech/wavs/LJ006-0173.npy +tests/data/ljspeech/wavs/LJ018-0023.wav|tests/data/ljspeech/wavs/LJ018-0023.npy +tests/data/ljspeech/wavs/LJ011-0176.wav|tests/data/ljspeech/wavs/LJ011-0176.npy +tests/data/ljspeech/wavs/LJ016-0150.wav|tests/data/ljspeech/wavs/LJ016-0150.npy +tests/data/ljspeech/wavs/LJ018-0113.wav|tests/data/ljspeech/wavs/LJ018-0113.npy +tests/data/ljspeech/wavs/LJ017-0253.wav|tests/data/ljspeech/wavs/LJ017-0253.npy +tests/data/ljspeech/wavs/LJ011-0261.wav|tests/data/ljspeech/wavs/LJ011-0261.npy +tests/data/ljspeech/wavs/LJ039-0230.wav|tests/data/ljspeech/wavs/LJ039-0230.npy +tests/data/ljspeech/wavs/LJ041-0110.wav|tests/data/ljspeech/wavs/LJ041-0110.npy +tests/data/ljspeech/wavs/LJ008-0193.wav|tests/data/ljspeech/wavs/LJ008-0193.npy +tests/data/ljspeech/wavs/LJ022-0030.wav|tests/data/ljspeech/wavs/LJ022-0030.npy +tests/data/ljspeech/wavs/LJ044-0229.wav|tests/data/ljspeech/wavs/LJ044-0229.npy +tests/data/ljspeech/wavs/LJ046-0148.wav|tests/data/ljspeech/wavs/LJ046-0148.npy +tests/data/ljspeech/wavs/LJ008-0247.wav|tests/data/ljspeech/wavs/LJ008-0247.npy +tests/data/ljspeech/wavs/LJ018-0109.wav|tests/data/ljspeech/wavs/LJ018-0109.npy +tests/data/ljspeech/wavs/LJ016-0274.wav|tests/data/ljspeech/wavs/LJ016-0274.npy +tests/data/ljspeech/wavs/LJ037-0071.wav|tests/data/ljspeech/wavs/LJ037-0071.npy +tests/data/ljspeech/wavs/LJ037-0106.wav|tests/data/ljspeech/wavs/LJ037-0106.npy +tests/data/ljspeech/wavs/LJ016-0267.wav|tests/data/ljspeech/wavs/LJ016-0267.npy +tests/data/ljspeech/wavs/LJ028-0259.wav|tests/data/ljspeech/wavs/LJ028-0259.npy +tests/data/ljspeech/wavs/LJ036-0079.wav|tests/data/ljspeech/wavs/LJ036-0079.npy +tests/data/ljspeech/wavs/LJ008-0189.wav|tests/data/ljspeech/wavs/LJ008-0189.npy +tests/data/ljspeech/wavs/LJ018-0042.wav|tests/data/ljspeech/wavs/LJ018-0042.npy +tests/data/ljspeech/wavs/LJ002-0253.wav|tests/data/ljspeech/wavs/LJ002-0253.npy +tests/data/ljspeech/wavs/LJ042-0134.wav|tests/data/ljspeech/wavs/LJ042-0134.npy +tests/data/ljspeech/wavs/LJ038-0198.wav|tests/data/ljspeech/wavs/LJ038-0198.npy +tests/data/ljspeech/wavs/LJ010-0001.wav|tests/data/ljspeech/wavs/LJ010-0001.npy +tests/data/ljspeech/wavs/LJ046-0155.wav|tests/data/ljspeech/wavs/LJ046-0155.npy +tests/data/ljspeech/wavs/LJ019-0118.wav|tests/data/ljspeech/wavs/LJ019-0118.npy +tests/data/ljspeech/wavs/LJ048-0166.wav|tests/data/ljspeech/wavs/LJ048-0166.npy +tests/data/ljspeech/wavs/LJ002-0089.wav|tests/data/ljspeech/wavs/LJ002-0089.npy +tests/data/ljspeech/wavs/LJ001-0044.wav|tests/data/ljspeech/wavs/LJ001-0044.npy +tests/data/ljspeech/wavs/LJ019-0198.wav|tests/data/ljspeech/wavs/LJ019-0198.npy +tests/data/ljspeech/wavs/LJ010-0199.wav|tests/data/ljspeech/wavs/LJ010-0199.npy +tests/data/ljspeech/wavs/LJ021-0141.wav|tests/data/ljspeech/wavs/LJ021-0141.npy +tests/data/ljspeech/wavs/LJ039-0099.wav|tests/data/ljspeech/wavs/LJ039-0099.npy +tests/data/ljspeech/wavs/LJ030-0102.wav|tests/data/ljspeech/wavs/LJ030-0102.npy +tests/data/ljspeech/wavs/LJ024-0056.wav|tests/data/ljspeech/wavs/LJ024-0056.npy +tests/data/ljspeech/wavs/LJ019-0103.wav|tests/data/ljspeech/wavs/LJ019-0103.npy +tests/data/ljspeech/wavs/LJ009-0040.wav|tests/data/ljspeech/wavs/LJ009-0040.npy +tests/data/ljspeech/wavs/LJ001-0182.wav|tests/data/ljspeech/wavs/LJ001-0182.npy +tests/data/ljspeech/wavs/LJ035-0119.wav|tests/data/ljspeech/wavs/LJ035-0119.npy +tests/data/ljspeech/wavs/LJ001-0033.wav|tests/data/ljspeech/wavs/LJ001-0033.npy +tests/data/ljspeech/wavs/LJ031-0136.wav|tests/data/ljspeech/wavs/LJ031-0136.npy +tests/data/ljspeech/wavs/LJ010-0207.wav|tests/data/ljspeech/wavs/LJ010-0207.npy +tests/data/ljspeech/wavs/LJ014-0251.wav|tests/data/ljspeech/wavs/LJ014-0251.npy +tests/data/ljspeech/wavs/LJ012-0168.wav|tests/data/ljspeech/wavs/LJ012-0168.npy +tests/data/ljspeech/wavs/LJ003-0207.wav|tests/data/ljspeech/wavs/LJ003-0207.npy +tests/data/ljspeech/wavs/LJ025-0082.wav|tests/data/ljspeech/wavs/LJ025-0082.npy +tests/data/ljspeech/wavs/LJ046-0104.wav|tests/data/ljspeech/wavs/LJ046-0104.npy +tests/data/ljspeech/wavs/LJ010-0205.wav|tests/data/ljspeech/wavs/LJ010-0205.npy +tests/data/ljspeech/wavs/LJ012-0190.wav|tests/data/ljspeech/wavs/LJ012-0190.npy +tests/data/ljspeech/wavs/LJ014-0214.wav|tests/data/ljspeech/wavs/LJ014-0214.npy +tests/data/ljspeech/wavs/LJ008-0008.wav|tests/data/ljspeech/wavs/LJ008-0008.npy +tests/data/ljspeech/wavs/LJ009-0254.wav|tests/data/ljspeech/wavs/LJ009-0254.npy +tests/data/ljspeech/wavs/LJ030-0148.wav|tests/data/ljspeech/wavs/LJ030-0148.npy +tests/data/ljspeech/wavs/LJ002-0102.wav|tests/data/ljspeech/wavs/LJ002-0102.npy +tests/data/ljspeech/wavs/LJ002-0011.wav|tests/data/ljspeech/wavs/LJ002-0011.npy +tests/data/ljspeech/wavs/LJ004-0223.wav|tests/data/ljspeech/wavs/LJ004-0223.npy +tests/data/ljspeech/wavs/LJ004-0228.wav|tests/data/ljspeech/wavs/LJ004-0228.npy +tests/data/ljspeech/wavs/LJ046-0073.wav|tests/data/ljspeech/wavs/LJ046-0073.npy +tests/data/ljspeech/wavs/LJ010-0078.wav|tests/data/ljspeech/wavs/LJ010-0078.npy +tests/data/ljspeech/wavs/LJ031-0051.wav|tests/data/ljspeech/wavs/LJ031-0051.npy +tests/data/ljspeech/wavs/LJ009-0224.wav|tests/data/ljspeech/wavs/LJ009-0224.npy +tests/data/ljspeech/wavs/LJ033-0097.wav|tests/data/ljspeech/wavs/LJ033-0097.npy +tests/data/ljspeech/wavs/LJ038-0283.wav|tests/data/ljspeech/wavs/LJ038-0283.npy +tests/data/ljspeech/wavs/LJ025-0175.wav|tests/data/ljspeech/wavs/LJ025-0175.npy +tests/data/ljspeech/wavs/LJ035-0149.wav|tests/data/ljspeech/wavs/LJ035-0149.npy +tests/data/ljspeech/wavs/LJ042-0115.wav|tests/data/ljspeech/wavs/LJ042-0115.npy +tests/data/ljspeech/wavs/LJ050-0047.wav|tests/data/ljspeech/wavs/LJ050-0047.npy +tests/data/ljspeech/wavs/LJ047-0222.wav|tests/data/ljspeech/wavs/LJ047-0222.npy +tests/data/ljspeech/wavs/LJ026-0009.wav|tests/data/ljspeech/wavs/LJ026-0009.npy +tests/data/ljspeech/wavs/LJ044-0129.wav|tests/data/ljspeech/wavs/LJ044-0129.npy +tests/data/ljspeech/wavs/LJ040-0200.wav|tests/data/ljspeech/wavs/LJ040-0200.npy +tests/data/ljspeech/wavs/LJ003-0342.wav|tests/data/ljspeech/wavs/LJ003-0342.npy +tests/data/ljspeech/wavs/LJ047-0115.wav|tests/data/ljspeech/wavs/LJ047-0115.npy +tests/data/ljspeech/wavs/LJ041-0065.wav|tests/data/ljspeech/wavs/LJ041-0065.npy +tests/data/ljspeech/wavs/LJ007-0053.wav|tests/data/ljspeech/wavs/LJ007-0053.npy +tests/data/ljspeech/wavs/LJ048-0161.wav|tests/data/ljspeech/wavs/LJ048-0161.npy +tests/data/ljspeech/wavs/LJ024-0099.wav|tests/data/ljspeech/wavs/LJ024-0099.npy +tests/data/ljspeech/wavs/LJ024-0085.wav|tests/data/ljspeech/wavs/LJ024-0085.npy +tests/data/ljspeech/wavs/LJ029-0025.wav|tests/data/ljspeech/wavs/LJ029-0025.npy +tests/data/ljspeech/wavs/LJ035-0003.wav|tests/data/ljspeech/wavs/LJ035-0003.npy +tests/data/ljspeech/wavs/LJ024-0108.wav|tests/data/ljspeech/wavs/LJ024-0108.npy +tests/data/ljspeech/wavs/LJ028-0170.wav|tests/data/ljspeech/wavs/LJ028-0170.npy +tests/data/ljspeech/wavs/LJ048-0049.wav|tests/data/ljspeech/wavs/LJ048-0049.npy +tests/data/ljspeech/wavs/LJ006-0042.wav|tests/data/ljspeech/wavs/LJ006-0042.npy +tests/data/ljspeech/wavs/LJ005-0208.wav|tests/data/ljspeech/wavs/LJ005-0208.npy +tests/data/ljspeech/wavs/LJ015-0154.wav|tests/data/ljspeech/wavs/LJ015-0154.npy +tests/data/ljspeech/wavs/LJ033-0020.wav|tests/data/ljspeech/wavs/LJ033-0020.npy +tests/data/ljspeech/wavs/LJ036-0208.wav|tests/data/ljspeech/wavs/LJ036-0208.npy +tests/data/ljspeech/wavs/LJ033-0177.wav|tests/data/ljspeech/wavs/LJ033-0177.npy +tests/data/ljspeech/wavs/LJ046-0137.wav|tests/data/ljspeech/wavs/LJ046-0137.npy +tests/data/ljspeech/wavs/LJ039-0141.wav|tests/data/ljspeech/wavs/LJ039-0141.npy +tests/data/ljspeech/wavs/LJ026-0070.wav|tests/data/ljspeech/wavs/LJ026-0070.npy +tests/data/ljspeech/wavs/LJ002-0157.wav|tests/data/ljspeech/wavs/LJ002-0157.npy +tests/data/ljspeech/wavs/LJ008-0216.wav|tests/data/ljspeech/wavs/LJ008-0216.npy +tests/data/ljspeech/wavs/LJ015-0233.wav|tests/data/ljspeech/wavs/LJ015-0233.npy +tests/data/ljspeech/wavs/LJ037-0015.wav|tests/data/ljspeech/wavs/LJ037-0015.npy +tests/data/ljspeech/wavs/LJ021-0118.wav|tests/data/ljspeech/wavs/LJ021-0118.npy +tests/data/ljspeech/wavs/LJ037-0265.wav|tests/data/ljspeech/wavs/LJ037-0265.npy +tests/data/ljspeech/wavs/LJ030-0216.wav|tests/data/ljspeech/wavs/LJ030-0216.npy +tests/data/ljspeech/wavs/LJ031-0156.wav|tests/data/ljspeech/wavs/LJ031-0156.npy +tests/data/ljspeech/wavs/LJ031-0190.wav|tests/data/ljspeech/wavs/LJ031-0190.npy +tests/data/ljspeech/wavs/LJ026-0050.wav|tests/data/ljspeech/wavs/LJ026-0050.npy +tests/data/ljspeech/wavs/LJ015-0136.wav|tests/data/ljspeech/wavs/LJ015-0136.npy +tests/data/ljspeech/wavs/LJ047-0126.wav|tests/data/ljspeech/wavs/LJ047-0126.npy +tests/data/ljspeech/wavs/LJ005-0016.wav|tests/data/ljspeech/wavs/LJ005-0016.npy +tests/data/ljspeech/wavs/LJ012-0090.wav|tests/data/ljspeech/wavs/LJ012-0090.npy +tests/data/ljspeech/wavs/LJ035-0174.wav|tests/data/ljspeech/wavs/LJ035-0174.npy +tests/data/ljspeech/wavs/LJ031-0193.wav|tests/data/ljspeech/wavs/LJ031-0193.npy +tests/data/ljspeech/wavs/LJ004-0080.wav|tests/data/ljspeech/wavs/LJ004-0080.npy +tests/data/ljspeech/wavs/LJ021-0088.wav|tests/data/ljspeech/wavs/LJ021-0088.npy +tests/data/ljspeech/wavs/LJ004-0186.wav|tests/data/ljspeech/wavs/LJ004-0186.npy +tests/data/ljspeech/wavs/LJ011-0158.wav|tests/data/ljspeech/wavs/LJ011-0158.npy +tests/data/ljspeech/wavs/LJ004-0033.wav|tests/data/ljspeech/wavs/LJ004-0033.npy +tests/data/ljspeech/wavs/LJ012-0096.wav|tests/data/ljspeech/wavs/LJ012-0096.npy +tests/data/ljspeech/wavs/LJ004-0031.wav|tests/data/ljspeech/wavs/LJ004-0031.npy +tests/data/ljspeech/wavs/LJ007-0115.wav|tests/data/ljspeech/wavs/LJ007-0115.npy +tests/data/ljspeech/wavs/LJ030-0229.wav|tests/data/ljspeech/wavs/LJ030-0229.npy +tests/data/ljspeech/wavs/LJ026-0066.wav|tests/data/ljspeech/wavs/LJ026-0066.npy +tests/data/ljspeech/wavs/LJ039-0092.wav|tests/data/ljspeech/wavs/LJ039-0092.npy +tests/data/ljspeech/wavs/LJ039-0086.wav|tests/data/ljspeech/wavs/LJ039-0086.npy +tests/data/ljspeech/wavs/LJ004-0103.wav|tests/data/ljspeech/wavs/LJ004-0103.npy +tests/data/ljspeech/wavs/LJ037-0226.wav|tests/data/ljspeech/wavs/LJ037-0226.npy +tests/data/ljspeech/wavs/LJ002-0338.wav|tests/data/ljspeech/wavs/LJ002-0338.npy +tests/data/ljspeech/wavs/LJ036-0184.wav|tests/data/ljspeech/wavs/LJ036-0184.npy +tests/data/ljspeech/wavs/LJ036-0195.wav|tests/data/ljspeech/wavs/LJ036-0195.npy +tests/data/ljspeech/wavs/LJ024-0098.wav|tests/data/ljspeech/wavs/LJ024-0098.npy +tests/data/ljspeech/wavs/LJ022-0115.wav|tests/data/ljspeech/wavs/LJ022-0115.npy +tests/data/ljspeech/wavs/LJ013-0140.wav|tests/data/ljspeech/wavs/LJ013-0140.npy +tests/data/ljspeech/wavs/LJ028-0185.wav|tests/data/ljspeech/wavs/LJ028-0185.npy +tests/data/ljspeech/wavs/LJ025-0022.wav|tests/data/ljspeech/wavs/LJ025-0022.npy +tests/data/ljspeech/wavs/LJ013-0205.wav|tests/data/ljspeech/wavs/LJ013-0205.npy +tests/data/ljspeech/wavs/LJ038-0085.wav|tests/data/ljspeech/wavs/LJ038-0085.npy +tests/data/ljspeech/wavs/LJ024-0141.wav|tests/data/ljspeech/wavs/LJ024-0141.npy +tests/data/ljspeech/wavs/LJ027-0076.wav|tests/data/ljspeech/wavs/LJ027-0076.npy +tests/data/ljspeech/wavs/LJ024-0122.wav|tests/data/ljspeech/wavs/LJ024-0122.npy +tests/data/ljspeech/wavs/LJ049-0057.wav|tests/data/ljspeech/wavs/LJ049-0057.npy +tests/data/ljspeech/wavs/LJ003-0107.wav|tests/data/ljspeech/wavs/LJ003-0107.npy +tests/data/ljspeech/wavs/LJ013-0035.wav|tests/data/ljspeech/wavs/LJ013-0035.npy +tests/data/ljspeech/wavs/LJ017-0033.wav|tests/data/ljspeech/wavs/LJ017-0033.npy +tests/data/ljspeech/wavs/LJ028-0177.wav|tests/data/ljspeech/wavs/LJ028-0177.npy +tests/data/ljspeech/wavs/LJ023-0084.wav|tests/data/ljspeech/wavs/LJ023-0084.npy +tests/data/ljspeech/wavs/LJ004-0035.wav|tests/data/ljspeech/wavs/LJ004-0035.npy +tests/data/ljspeech/wavs/LJ012-0111.wav|tests/data/ljspeech/wavs/LJ012-0111.npy +tests/data/ljspeech/wavs/LJ013-0102.wav|tests/data/ljspeech/wavs/LJ013-0102.npy +tests/data/ljspeech/wavs/LJ003-0280.wav|tests/data/ljspeech/wavs/LJ003-0280.npy +tests/data/ljspeech/wavs/LJ013-0130.wav|tests/data/ljspeech/wavs/LJ013-0130.npy +tests/data/ljspeech/wavs/LJ017-0018.wav|tests/data/ljspeech/wavs/LJ017-0018.npy +tests/data/ljspeech/wavs/LJ003-0032.wav|tests/data/ljspeech/wavs/LJ003-0032.npy +tests/data/ljspeech/wavs/LJ050-0273.wav|tests/data/ljspeech/wavs/LJ050-0273.npy +tests/data/ljspeech/wavs/LJ011-0223.wav|tests/data/ljspeech/wavs/LJ011-0223.npy +tests/data/ljspeech/wavs/LJ050-0211.wav|tests/data/ljspeech/wavs/LJ050-0211.npy +tests/data/ljspeech/wavs/LJ023-0016.wav|tests/data/ljspeech/wavs/LJ023-0016.npy +tests/data/ljspeech/wavs/LJ022-0194.wav|tests/data/ljspeech/wavs/LJ022-0194.npy +tests/data/ljspeech/wavs/LJ046-0158.wav|tests/data/ljspeech/wavs/LJ046-0158.npy +tests/data/ljspeech/wavs/LJ047-0129.wav|tests/data/ljspeech/wavs/LJ047-0129.npy +tests/data/ljspeech/wavs/LJ004-0020.wav|tests/data/ljspeech/wavs/LJ004-0020.npy +tests/data/ljspeech/wavs/LJ023-0125.wav|tests/data/ljspeech/wavs/LJ023-0125.npy +tests/data/ljspeech/wavs/LJ014-0228.wav|tests/data/ljspeech/wavs/LJ014-0228.npy +tests/data/ljspeech/wavs/LJ012-0251.wav|tests/data/ljspeech/wavs/LJ012-0251.npy +tests/data/ljspeech/wavs/LJ023-0101.wav|tests/data/ljspeech/wavs/LJ023-0101.npy +tests/data/ljspeech/wavs/LJ025-0047.wav|tests/data/ljspeech/wavs/LJ025-0047.npy +tests/data/ljspeech/wavs/LJ042-0208.wav|tests/data/ljspeech/wavs/LJ042-0208.npy +tests/data/ljspeech/wavs/LJ039-0058.wav|tests/data/ljspeech/wavs/LJ039-0058.npy +tests/data/ljspeech/wavs/LJ042-0037.wav|tests/data/ljspeech/wavs/LJ042-0037.npy +tests/data/ljspeech/wavs/LJ008-0060.wav|tests/data/ljspeech/wavs/LJ008-0060.npy +tests/data/ljspeech/wavs/LJ001-0082.wav|tests/data/ljspeech/wavs/LJ001-0082.npy +tests/data/ljspeech/wavs/LJ028-0075.wav|tests/data/ljspeech/wavs/LJ028-0075.npy +tests/data/ljspeech/wavs/LJ001-0073.wav|tests/data/ljspeech/wavs/LJ001-0073.npy +tests/data/ljspeech/wavs/LJ016-0110.wav|tests/data/ljspeech/wavs/LJ016-0110.npy +tests/data/ljspeech/wavs/LJ028-0509.wav|tests/data/ljspeech/wavs/LJ028-0509.npy +tests/data/ljspeech/wavs/LJ003-0256.wav|tests/data/ljspeech/wavs/LJ003-0256.npy +tests/data/ljspeech/wavs/LJ015-0192.wav|tests/data/ljspeech/wavs/LJ015-0192.npy +tests/data/ljspeech/wavs/LJ011-0183.wav|tests/data/ljspeech/wavs/LJ011-0183.npy +tests/data/ljspeech/wavs/LJ007-0139.wav|tests/data/ljspeech/wavs/LJ007-0139.npy +tests/data/ljspeech/wavs/LJ028-0213.wav|tests/data/ljspeech/wavs/LJ028-0213.npy +tests/data/ljspeech/wavs/LJ045-0189.wav|tests/data/ljspeech/wavs/LJ045-0189.npy +tests/data/ljspeech/wavs/LJ029-0074.wav|tests/data/ljspeech/wavs/LJ029-0074.npy +tests/data/ljspeech/wavs/LJ049-0162.wav|tests/data/ljspeech/wavs/LJ049-0162.npy +tests/data/ljspeech/wavs/LJ038-0203.wav|tests/data/ljspeech/wavs/LJ038-0203.npy +tests/data/ljspeech/wavs/LJ028-0256.wav|tests/data/ljspeech/wavs/LJ028-0256.npy +tests/data/ljspeech/wavs/LJ033-0205.wav|tests/data/ljspeech/wavs/LJ033-0205.npy +tests/data/ljspeech/wavs/LJ023-0106.wav|tests/data/ljspeech/wavs/LJ023-0106.npy +tests/data/ljspeech/wavs/LJ018-0115.wav|tests/data/ljspeech/wavs/LJ018-0115.npy +tests/data/ljspeech/wavs/LJ015-0084.wav|tests/data/ljspeech/wavs/LJ015-0084.npy +tests/data/ljspeech/wavs/LJ047-0046.wav|tests/data/ljspeech/wavs/LJ047-0046.npy +tests/data/ljspeech/wavs/LJ019-0098.wav|tests/data/ljspeech/wavs/LJ019-0098.npy +tests/data/ljspeech/wavs/LJ025-0059.wav|tests/data/ljspeech/wavs/LJ025-0059.npy +tests/data/ljspeech/wavs/LJ047-0081.wav|tests/data/ljspeech/wavs/LJ047-0081.npy +tests/data/ljspeech/wavs/LJ002-0240.wav|tests/data/ljspeech/wavs/LJ002-0240.npy +tests/data/ljspeech/wavs/LJ042-0117.wav|tests/data/ljspeech/wavs/LJ042-0117.npy +tests/data/ljspeech/wavs/LJ047-0061.wav|tests/data/ljspeech/wavs/LJ047-0061.npy +tests/data/ljspeech/wavs/LJ030-0046.wav|tests/data/ljspeech/wavs/LJ030-0046.npy +tests/data/ljspeech/wavs/LJ041-0198.wav|tests/data/ljspeech/wavs/LJ041-0198.npy +tests/data/ljspeech/wavs/LJ017-0259.wav|tests/data/ljspeech/wavs/LJ017-0259.npy +tests/data/ljspeech/wavs/LJ004-0096.wav|tests/data/ljspeech/wavs/LJ004-0096.npy +tests/data/ljspeech/wavs/LJ004-0202.wav|tests/data/ljspeech/wavs/LJ004-0202.npy +tests/data/ljspeech/wavs/LJ040-0094.wav|tests/data/ljspeech/wavs/LJ040-0094.npy +tests/data/ljspeech/wavs/LJ042-0141.wav|tests/data/ljspeech/wavs/LJ042-0141.npy +tests/data/ljspeech/wavs/LJ022-0090.wav|tests/data/ljspeech/wavs/LJ022-0090.npy +tests/data/ljspeech/wavs/LJ048-0173.wav|tests/data/ljspeech/wavs/LJ048-0173.npy +tests/data/ljspeech/wavs/LJ009-0289.wav|tests/data/ljspeech/wavs/LJ009-0289.npy +tests/data/ljspeech/wavs/LJ049-0156.wav|tests/data/ljspeech/wavs/LJ049-0156.npy +tests/data/ljspeech/wavs/LJ014-0098.wav|tests/data/ljspeech/wavs/LJ014-0098.npy +tests/data/ljspeech/wavs/LJ018-0040.wav|tests/data/ljspeech/wavs/LJ018-0040.npy +tests/data/ljspeech/wavs/LJ010-0208.wav|tests/data/ljspeech/wavs/LJ010-0208.npy +tests/data/ljspeech/wavs/LJ027-0124.wav|tests/data/ljspeech/wavs/LJ027-0124.npy +tests/data/ljspeech/wavs/LJ022-0016.wav|tests/data/ljspeech/wavs/LJ022-0016.npy +tests/data/ljspeech/wavs/LJ019-0081.wav|tests/data/ljspeech/wavs/LJ019-0081.npy +tests/data/ljspeech/wavs/LJ009-0065.wav|tests/data/ljspeech/wavs/LJ009-0065.npy +tests/data/ljspeech/wavs/LJ009-0261.wav|tests/data/ljspeech/wavs/LJ009-0261.npy +tests/data/ljspeech/wavs/LJ029-0135.wav|tests/data/ljspeech/wavs/LJ029-0135.npy +tests/data/ljspeech/wavs/LJ039-0153.wav|tests/data/ljspeech/wavs/LJ039-0153.npy +tests/data/ljspeech/wavs/LJ003-0340.wav|tests/data/ljspeech/wavs/LJ003-0340.npy +tests/data/ljspeech/wavs/LJ028-0401.wav|tests/data/ljspeech/wavs/LJ028-0401.npy +tests/data/ljspeech/wavs/LJ047-0190.wav|tests/data/ljspeech/wavs/LJ047-0190.npy +tests/data/ljspeech/wavs/LJ019-0015.wav|tests/data/ljspeech/wavs/LJ019-0015.npy +tests/data/ljspeech/wavs/LJ043-0085.wav|tests/data/ljspeech/wavs/LJ043-0085.npy +tests/data/ljspeech/wavs/LJ043-0100.wav|tests/data/ljspeech/wavs/LJ043-0100.npy +tests/data/ljspeech/wavs/LJ031-0166.wav|tests/data/ljspeech/wavs/LJ031-0166.npy +tests/data/ljspeech/wavs/LJ033-0040.wav|tests/data/ljspeech/wavs/LJ033-0040.npy +tests/data/ljspeech/wavs/LJ036-0144.wav|tests/data/ljspeech/wavs/LJ036-0144.npy +tests/data/ljspeech/wavs/LJ044-0163.wav|tests/data/ljspeech/wavs/LJ044-0163.npy +tests/data/ljspeech/wavs/LJ018-0384.wav|tests/data/ljspeech/wavs/LJ018-0384.npy +tests/data/ljspeech/wavs/LJ018-0271.wav|tests/data/ljspeech/wavs/LJ018-0271.npy +tests/data/ljspeech/wavs/LJ018-0263.wav|tests/data/ljspeech/wavs/LJ018-0263.npy +tests/data/ljspeech/wavs/LJ050-0258.wav|tests/data/ljspeech/wavs/LJ050-0258.npy +tests/data/ljspeech/wavs/LJ018-0359.wav|tests/data/ljspeech/wavs/LJ018-0359.npy +tests/data/ljspeech/wavs/LJ034-0219.wav|tests/data/ljspeech/wavs/LJ034-0219.npy +tests/data/ljspeech/wavs/LJ047-0247.wav|tests/data/ljspeech/wavs/LJ047-0247.npy +tests/data/ljspeech/wavs/LJ018-0195.wav|tests/data/ljspeech/wavs/LJ018-0195.npy +tests/data/ljspeech/wavs/LJ048-0253.wav|tests/data/ljspeech/wavs/LJ048-0253.npy +tests/data/ljspeech/wavs/LJ019-0012.wav|tests/data/ljspeech/wavs/LJ019-0012.npy +tests/data/ljspeech/wavs/LJ011-0057.wav|tests/data/ljspeech/wavs/LJ011-0057.npy +tests/data/ljspeech/wavs/LJ010-0162.wav|tests/data/ljspeech/wavs/LJ010-0162.npy +tests/data/ljspeech/wavs/LJ030-0053.wav|tests/data/ljspeech/wavs/LJ030-0053.npy +tests/data/ljspeech/wavs/LJ010-0191.wav|tests/data/ljspeech/wavs/LJ010-0191.npy +tests/data/ljspeech/wavs/LJ021-0181.wav|tests/data/ljspeech/wavs/LJ021-0181.npy +tests/data/ljspeech/wavs/LJ018-0351.wav|tests/data/ljspeech/wavs/LJ018-0351.npy +tests/data/ljspeech/wavs/LJ018-0189.wav|tests/data/ljspeech/wavs/LJ018-0189.npy +tests/data/ljspeech/wavs/LJ017-0066.wav|tests/data/ljspeech/wavs/LJ017-0066.npy +tests/data/ljspeech/wavs/LJ033-0099.wav|tests/data/ljspeech/wavs/LJ033-0099.npy +tests/data/ljspeech/wavs/LJ018-0314.wav|tests/data/ljspeech/wavs/LJ018-0314.npy +tests/data/ljspeech/wavs/LJ028-0467.wav|tests/data/ljspeech/wavs/LJ028-0467.npy +tests/data/ljspeech/wavs/LJ031-0124.wav|tests/data/ljspeech/wavs/LJ031-0124.npy +tests/data/ljspeech/wavs/LJ009-0105.wav|tests/data/ljspeech/wavs/LJ009-0105.npy +tests/data/ljspeech/wavs/LJ030-0187.wav|tests/data/ljspeech/wavs/LJ030-0187.npy +tests/data/ljspeech/wavs/LJ011-0003.wav|tests/data/ljspeech/wavs/LJ011-0003.npy +tests/data/ljspeech/wavs/LJ048-0248.wav|tests/data/ljspeech/wavs/LJ048-0248.npy +tests/data/ljspeech/wavs/LJ026-0001.wav|tests/data/ljspeech/wavs/LJ026-0001.npy +tests/data/ljspeech/wavs/LJ019-0014.wav|tests/data/ljspeech/wavs/LJ019-0014.npy +tests/data/ljspeech/wavs/LJ024-0112.wav|tests/data/ljspeech/wavs/LJ024-0112.npy +tests/data/ljspeech/wavs/LJ002-0266.wav|tests/data/ljspeech/wavs/LJ002-0266.npy +tests/data/ljspeech/wavs/LJ050-0142.wav|tests/data/ljspeech/wavs/LJ050-0142.npy +tests/data/ljspeech/wavs/LJ031-0010.wav|tests/data/ljspeech/wavs/LJ031-0010.npy +tests/data/ljspeech/wavs/LJ027-0049.wav|tests/data/ljspeech/wavs/LJ027-0049.npy +tests/data/ljspeech/wavs/LJ006-0047.wav|tests/data/ljspeech/wavs/LJ006-0047.npy +tests/data/ljspeech/wavs/LJ041-0119.wav|tests/data/ljspeech/wavs/LJ041-0119.npy +tests/data/ljspeech/wavs/LJ030-0166.wav|tests/data/ljspeech/wavs/LJ030-0166.npy +tests/data/ljspeech/wavs/LJ009-0177.wav|tests/data/ljspeech/wavs/LJ009-0177.npy +tests/data/ljspeech/wavs/LJ018-0395.wav|tests/data/ljspeech/wavs/LJ018-0395.npy +tests/data/ljspeech/wavs/LJ049-0101.wav|tests/data/ljspeech/wavs/LJ049-0101.npy +tests/data/ljspeech/wavs/LJ019-0002.wav|tests/data/ljspeech/wavs/LJ019-0002.npy +tests/data/ljspeech/wavs/LJ032-0106.wav|tests/data/ljspeech/wavs/LJ032-0106.npy +tests/data/ljspeech/wavs/LJ010-0172.wav|tests/data/ljspeech/wavs/LJ010-0172.npy +tests/data/ljspeech/wavs/LJ048-0218.wav|tests/data/ljspeech/wavs/LJ048-0218.npy +tests/data/ljspeech/wavs/LJ003-0300.wav|tests/data/ljspeech/wavs/LJ003-0300.npy +tests/data/ljspeech/wavs/LJ002-0165.wav|tests/data/ljspeech/wavs/LJ002-0165.npy +tests/data/ljspeech/wavs/LJ046-0101.wav|tests/data/ljspeech/wavs/LJ046-0101.npy +tests/data/ljspeech/wavs/LJ042-0147.wav|tests/data/ljspeech/wavs/LJ042-0147.npy +tests/data/ljspeech/wavs/LJ019-0394.wav|tests/data/ljspeech/wavs/LJ019-0394.npy +tests/data/ljspeech/wavs/LJ028-0449.wav|tests/data/ljspeech/wavs/LJ028-0449.npy +tests/data/ljspeech/wavs/LJ017-0116.wav|tests/data/ljspeech/wavs/LJ017-0116.npy +tests/data/ljspeech/wavs/LJ038-0065.wav|tests/data/ljspeech/wavs/LJ038-0065.npy +tests/data/ljspeech/wavs/LJ006-0207.wav|tests/data/ljspeech/wavs/LJ006-0207.npy +tests/data/ljspeech/wavs/LJ009-0123.wav|tests/data/ljspeech/wavs/LJ009-0123.npy +tests/data/ljspeech/wavs/LJ018-0203.wav|tests/data/ljspeech/wavs/LJ018-0203.npy +tests/data/ljspeech/wavs/LJ039-0125.wav|tests/data/ljspeech/wavs/LJ039-0125.npy +tests/data/ljspeech/wavs/LJ034-0133.wav|tests/data/ljspeech/wavs/LJ034-0133.npy +tests/data/ljspeech/wavs/LJ008-0074.wav|tests/data/ljspeech/wavs/LJ008-0074.npy +tests/data/ljspeech/wavs/LJ030-0246.wav|tests/data/ljspeech/wavs/LJ030-0246.npy +tests/data/ljspeech/wavs/LJ045-0019.wav|tests/data/ljspeech/wavs/LJ045-0019.npy +tests/data/ljspeech/wavs/LJ039-0016.wav|tests/data/ljspeech/wavs/LJ039-0016.npy +tests/data/ljspeech/wavs/LJ019-0341.wav|tests/data/ljspeech/wavs/LJ019-0341.npy +tests/data/ljspeech/wavs/LJ033-0102.wav|tests/data/ljspeech/wavs/LJ033-0102.npy +tests/data/ljspeech/wavs/LJ033-0090.wav|tests/data/ljspeech/wavs/LJ033-0090.npy +tests/data/ljspeech/wavs/LJ008-0142.wav|tests/data/ljspeech/wavs/LJ008-0142.npy +tests/data/ljspeech/wavs/LJ038-0184.wav|tests/data/ljspeech/wavs/LJ038-0184.npy +tests/data/ljspeech/wavs/LJ006-0080.wav|tests/data/ljspeech/wavs/LJ006-0080.npy +tests/data/ljspeech/wavs/LJ013-0239.wav|tests/data/ljspeech/wavs/LJ013-0239.npy +tests/data/ljspeech/wavs/LJ015-0149.wav|tests/data/ljspeech/wavs/LJ015-0149.npy +tests/data/ljspeech/wavs/LJ007-0047.wav|tests/data/ljspeech/wavs/LJ007-0047.npy +tests/data/ljspeech/wavs/LJ028-0457.wav|tests/data/ljspeech/wavs/LJ028-0457.npy +tests/data/ljspeech/wavs/LJ012-0079.wav|tests/data/ljspeech/wavs/LJ012-0079.npy +tests/data/ljspeech/wavs/LJ050-0052.wav|tests/data/ljspeech/wavs/LJ050-0052.npy +tests/data/ljspeech/wavs/LJ018-0360.wav|tests/data/ljspeech/wavs/LJ018-0360.npy +tests/data/ljspeech/wavs/LJ014-0111.wav|tests/data/ljspeech/wavs/LJ014-0111.npy +tests/data/ljspeech/wavs/LJ019-0210.wav|tests/data/ljspeech/wavs/LJ019-0210.npy +tests/data/ljspeech/wavs/LJ012-0081.wav|tests/data/ljspeech/wavs/LJ012-0081.npy +tests/data/ljspeech/wavs/LJ035-0159.wav|tests/data/ljspeech/wavs/LJ035-0159.npy +tests/data/ljspeech/wavs/LJ050-0109.wav|tests/data/ljspeech/wavs/LJ050-0109.npy +tests/data/ljspeech/wavs/LJ004-0182.wav|tests/data/ljspeech/wavs/LJ004-0182.npy +tests/data/ljspeech/wavs/LJ010-0085.wav|tests/data/ljspeech/wavs/LJ010-0085.npy +tests/data/ljspeech/wavs/LJ003-0276.wav|tests/data/ljspeech/wavs/LJ003-0276.npy +tests/data/ljspeech/wavs/LJ021-0086.wav|tests/data/ljspeech/wavs/LJ021-0086.npy +tests/data/ljspeech/wavs/LJ020-0083.wav|tests/data/ljspeech/wavs/LJ020-0083.npy +tests/data/ljspeech/wavs/LJ003-0332.wav|tests/data/ljspeech/wavs/LJ003-0332.npy +tests/data/ljspeech/wavs/LJ018-0340.wav|tests/data/ljspeech/wavs/LJ018-0340.npy +tests/data/ljspeech/wavs/LJ001-0067.wav|tests/data/ljspeech/wavs/LJ001-0067.npy +tests/data/ljspeech/wavs/LJ004-0181.wav|tests/data/ljspeech/wavs/LJ004-0181.npy +tests/data/ljspeech/wavs/LJ013-0247.wav|tests/data/ljspeech/wavs/LJ013-0247.npy +tests/data/ljspeech/wavs/LJ039-0073.wav|tests/data/ljspeech/wavs/LJ039-0073.npy +tests/data/ljspeech/wavs/LJ045-0029.wav|tests/data/ljspeech/wavs/LJ045-0029.npy +tests/data/ljspeech/wavs/LJ038-0136.wav|tests/data/ljspeech/wavs/LJ038-0136.npy +tests/data/ljspeech/wavs/LJ009-0197.wav|tests/data/ljspeech/wavs/LJ009-0197.npy +tests/data/ljspeech/wavs/LJ039-0103.wav|tests/data/ljspeech/wavs/LJ039-0103.npy +tests/data/ljspeech/wavs/LJ038-0201.wav|tests/data/ljspeech/wavs/LJ038-0201.npy +tests/data/ljspeech/wavs/LJ009-0272.wav|tests/data/ljspeech/wavs/LJ009-0272.npy +tests/data/ljspeech/wavs/LJ038-0134.wav|tests/data/ljspeech/wavs/LJ038-0134.npy +tests/data/ljspeech/wavs/LJ014-0234.wav|tests/data/ljspeech/wavs/LJ014-0234.npy +tests/data/ljspeech/wavs/LJ047-0074.wav|tests/data/ljspeech/wavs/LJ047-0074.npy +tests/data/ljspeech/wavs/LJ024-0005.wav|tests/data/ljspeech/wavs/LJ024-0005.npy +tests/data/ljspeech/wavs/LJ042-0242.wav|tests/data/ljspeech/wavs/LJ042-0242.npy +tests/data/ljspeech/wavs/LJ045-0034.wav|tests/data/ljspeech/wavs/LJ045-0034.npy +tests/data/ljspeech/wavs/LJ012-0193.wav|tests/data/ljspeech/wavs/LJ012-0193.npy +tests/data/ljspeech/wavs/LJ033-0156.wav|tests/data/ljspeech/wavs/LJ033-0156.npy +tests/data/ljspeech/wavs/LJ019-0141.wav|tests/data/ljspeech/wavs/LJ019-0141.npy +tests/data/ljspeech/wavs/LJ007-0024.wav|tests/data/ljspeech/wavs/LJ007-0024.npy +tests/data/ljspeech/wavs/LJ009-0192.wav|tests/data/ljspeech/wavs/LJ009-0192.npy +tests/data/ljspeech/wavs/LJ013-0210.wav|tests/data/ljspeech/wavs/LJ013-0210.npy +tests/data/ljspeech/wavs/LJ012-0163.wav|tests/data/ljspeech/wavs/LJ012-0163.npy +tests/data/ljspeech/wavs/LJ042-0075.wav|tests/data/ljspeech/wavs/LJ042-0075.npy +tests/data/ljspeech/wavs/LJ031-0096.wav|tests/data/ljspeech/wavs/LJ031-0096.npy +tests/data/ljspeech/wavs/LJ014-0068.wav|tests/data/ljspeech/wavs/LJ014-0068.npy +tests/data/ljspeech/wavs/LJ014-0263.wav|tests/data/ljspeech/wavs/LJ014-0263.npy +tests/data/ljspeech/wavs/LJ014-0144.wav|tests/data/ljspeech/wavs/LJ014-0144.npy +tests/data/ljspeech/wavs/LJ004-0218.wav|tests/data/ljspeech/wavs/LJ004-0218.npy +tests/data/ljspeech/wavs/LJ028-0095.wav|tests/data/ljspeech/wavs/LJ028-0095.npy +tests/data/ljspeech/wavs/LJ045-0015.wav|tests/data/ljspeech/wavs/LJ045-0015.npy +tests/data/ljspeech/wavs/LJ031-0153.wav|tests/data/ljspeech/wavs/LJ031-0153.npy +tests/data/ljspeech/wavs/LJ014-0177.wav|tests/data/ljspeech/wavs/LJ014-0177.npy +tests/data/ljspeech/wavs/LJ012-0269.wav|tests/data/ljspeech/wavs/LJ012-0269.npy +tests/data/ljspeech/wavs/LJ001-0050.wav|tests/data/ljspeech/wavs/LJ001-0050.npy +tests/data/ljspeech/wavs/LJ042-0033.wav|tests/data/ljspeech/wavs/LJ042-0033.npy +tests/data/ljspeech/wavs/LJ037-0022.wav|tests/data/ljspeech/wavs/LJ037-0022.npy +tests/data/ljspeech/wavs/LJ016-0325.wav|tests/data/ljspeech/wavs/LJ016-0325.npy +tests/data/ljspeech/wavs/LJ031-0206.wav|tests/data/ljspeech/wavs/LJ031-0206.npy +tests/data/ljspeech/wavs/LJ036-0067.wav|tests/data/ljspeech/wavs/LJ036-0067.npy +tests/data/ljspeech/wavs/LJ042-0132.wav|tests/data/ljspeech/wavs/LJ042-0132.npy +tests/data/ljspeech/wavs/LJ042-0101.wav|tests/data/ljspeech/wavs/LJ042-0101.npy +tests/data/ljspeech/wavs/LJ011-0253.wav|tests/data/ljspeech/wavs/LJ011-0253.npy +tests/data/ljspeech/wavs/LJ042-0036.wav|tests/data/ljspeech/wavs/LJ042-0036.npy +tests/data/ljspeech/wavs/LJ032-0144.wav|tests/data/ljspeech/wavs/LJ032-0144.npy +tests/data/ljspeech/wavs/LJ018-0134.wav|tests/data/ljspeech/wavs/LJ018-0134.npy +tests/data/ljspeech/wavs/LJ026-0144.wav|tests/data/ljspeech/wavs/LJ026-0144.npy +tests/data/ljspeech/wavs/LJ005-0035.wav|tests/data/ljspeech/wavs/LJ005-0035.npy +tests/data/ljspeech/wavs/LJ043-0081.wav|tests/data/ljspeech/wavs/LJ043-0081.npy +tests/data/ljspeech/wavs/LJ023-0050.wav|tests/data/ljspeech/wavs/LJ023-0050.npy +tests/data/ljspeech/wavs/LJ005-0179.wav|tests/data/ljspeech/wavs/LJ005-0179.npy +tests/data/ljspeech/wavs/LJ008-0084.wav|tests/data/ljspeech/wavs/LJ008-0084.npy +tests/data/ljspeech/wavs/LJ018-0355.wav|tests/data/ljspeech/wavs/LJ018-0355.npy +tests/data/ljspeech/wavs/LJ040-0131.wav|tests/data/ljspeech/wavs/LJ040-0131.npy +tests/data/ljspeech/wavs/LJ008-0175.wav|tests/data/ljspeech/wavs/LJ008-0175.npy +tests/data/ljspeech/wavs/LJ017-0215.wav|tests/data/ljspeech/wavs/LJ017-0215.npy +tests/data/ljspeech/wavs/LJ039-0179.wav|tests/data/ljspeech/wavs/LJ039-0179.npy +tests/data/ljspeech/wavs/LJ011-0148.wav|tests/data/ljspeech/wavs/LJ011-0148.npy +tests/data/ljspeech/wavs/LJ017-0177.wav|tests/data/ljspeech/wavs/LJ017-0177.npy +tests/data/ljspeech/wavs/LJ027-0090.wav|tests/data/ljspeech/wavs/LJ027-0090.npy +tests/data/ljspeech/wavs/LJ012-0237.wav|tests/data/ljspeech/wavs/LJ012-0237.npy +tests/data/ljspeech/wavs/LJ027-0036.wav|tests/data/ljspeech/wavs/LJ027-0036.npy +tests/data/ljspeech/wavs/LJ049-0226.wav|tests/data/ljspeech/wavs/LJ049-0226.npy +tests/data/ljspeech/wavs/LJ046-0062.wav|tests/data/ljspeech/wavs/LJ046-0062.npy +tests/data/ljspeech/wavs/LJ016-0358.wav|tests/data/ljspeech/wavs/LJ016-0358.npy +tests/data/ljspeech/wavs/LJ002-0228.wav|tests/data/ljspeech/wavs/LJ002-0228.npy +tests/data/ljspeech/wavs/LJ028-0299.wav|tests/data/ljspeech/wavs/LJ028-0299.npy +tests/data/ljspeech/wavs/LJ004-0107.wav|tests/data/ljspeech/wavs/LJ004-0107.npy +tests/data/ljspeech/wavs/LJ017-0073.wav|tests/data/ljspeech/wavs/LJ017-0073.npy +tests/data/ljspeech/wavs/LJ011-0140.wav|tests/data/ljspeech/wavs/LJ011-0140.npy +tests/data/ljspeech/wavs/LJ046-0128.wav|tests/data/ljspeech/wavs/LJ046-0128.npy +tests/data/ljspeech/wavs/LJ021-0168.wav|tests/data/ljspeech/wavs/LJ021-0168.npy +tests/data/ljspeech/wavs/LJ022-0086.wav|tests/data/ljspeech/wavs/LJ022-0086.npy +tests/data/ljspeech/wavs/LJ016-0129.wav|tests/data/ljspeech/wavs/LJ016-0129.npy +tests/data/ljspeech/wavs/LJ022-0073.wav|tests/data/ljspeech/wavs/LJ022-0073.npy +tests/data/ljspeech/wavs/LJ011-0190.wav|tests/data/ljspeech/wavs/LJ011-0190.npy +tests/data/ljspeech/wavs/LJ003-0063.wav|tests/data/ljspeech/wavs/LJ003-0063.npy +tests/data/ljspeech/wavs/LJ021-0167.wav|tests/data/ljspeech/wavs/LJ021-0167.npy +tests/data/ljspeech/wavs/LJ018-0188.wav|tests/data/ljspeech/wavs/LJ018-0188.npy +tests/data/ljspeech/wavs/LJ001-0143.wav|tests/data/ljspeech/wavs/LJ001-0143.npy +tests/data/ljspeech/wavs/LJ042-0133.wav|tests/data/ljspeech/wavs/LJ042-0133.npy +tests/data/ljspeech/wavs/LJ037-0089.wav|tests/data/ljspeech/wavs/LJ037-0089.npy +tests/data/ljspeech/wavs/LJ018-0175.wav|tests/data/ljspeech/wavs/LJ018-0175.npy +tests/data/ljspeech/wavs/LJ017-0239.wav|tests/data/ljspeech/wavs/LJ017-0239.npy +tests/data/ljspeech/wavs/LJ011-0259.wav|tests/data/ljspeech/wavs/LJ011-0259.npy +tests/data/ljspeech/wavs/LJ017-0017.wav|tests/data/ljspeech/wavs/LJ017-0017.npy +tests/data/ljspeech/wavs/LJ016-0222.wav|tests/data/ljspeech/wavs/LJ016-0222.npy +tests/data/ljspeech/wavs/LJ001-0072.wav|tests/data/ljspeech/wavs/LJ001-0072.npy +tests/data/ljspeech/wavs/LJ010-0224.wav|tests/data/ljspeech/wavs/LJ010-0224.npy +tests/data/ljspeech/wavs/LJ011-0214.wav|tests/data/ljspeech/wavs/LJ011-0214.npy +tests/data/ljspeech/wavs/LJ006-0272.wav|tests/data/ljspeech/wavs/LJ006-0272.npy +tests/data/ljspeech/wavs/LJ032-0167.wav|tests/data/ljspeech/wavs/LJ032-0167.npy +tests/data/ljspeech/wavs/LJ017-0281.wav|tests/data/ljspeech/wavs/LJ017-0281.npy +tests/data/ljspeech/wavs/LJ032-0233.wav|tests/data/ljspeech/wavs/LJ032-0233.npy +tests/data/ljspeech/wavs/LJ006-0222.wav|tests/data/ljspeech/wavs/LJ006-0222.npy +tests/data/ljspeech/wavs/LJ017-0254.wav|tests/data/ljspeech/wavs/LJ017-0254.npy +tests/data/ljspeech/wavs/LJ030-0173.wav|tests/data/ljspeech/wavs/LJ030-0173.npy +tests/data/ljspeech/wavs/LJ015-0285.wav|tests/data/ljspeech/wavs/LJ015-0285.npy +tests/data/ljspeech/wavs/LJ017-0251.wav|tests/data/ljspeech/wavs/LJ017-0251.npy +tests/data/ljspeech/wavs/LJ019-0184.wav|tests/data/ljspeech/wavs/LJ019-0184.npy +tests/data/ljspeech/wavs/LJ013-0048.wav|tests/data/ljspeech/wavs/LJ013-0048.npy +tests/data/ljspeech/wavs/LJ001-0007.wav|tests/data/ljspeech/wavs/LJ001-0007.npy +tests/data/ljspeech/wavs/LJ008-0036.wav|tests/data/ljspeech/wavs/LJ008-0036.npy +tests/data/ljspeech/wavs/LJ026-0023.wav|tests/data/ljspeech/wavs/LJ026-0023.npy +tests/data/ljspeech/wavs/LJ030-0020.wav|tests/data/ljspeech/wavs/LJ030-0020.npy +tests/data/ljspeech/wavs/LJ016-0203.wav|tests/data/ljspeech/wavs/LJ016-0203.npy +tests/data/ljspeech/wavs/LJ034-0160.wav|tests/data/ljspeech/wavs/LJ034-0160.npy +tests/data/ljspeech/wavs/LJ005-0001.wav|tests/data/ljspeech/wavs/LJ005-0001.npy +tests/data/ljspeech/wavs/LJ031-0042.wav|tests/data/ljspeech/wavs/LJ031-0042.npy +tests/data/ljspeech/wavs/LJ008-0014.wav|tests/data/ljspeech/wavs/LJ008-0014.npy +tests/data/ljspeech/wavs/LJ042-0012.wav|tests/data/ljspeech/wavs/LJ042-0012.npy +tests/data/ljspeech/wavs/LJ022-0156.wav|tests/data/ljspeech/wavs/LJ022-0156.npy +tests/data/ljspeech/wavs/LJ024-0063.wav|tests/data/ljspeech/wavs/LJ024-0063.npy +tests/data/ljspeech/wavs/LJ026-0166.wav|tests/data/ljspeech/wavs/LJ026-0166.npy +tests/data/ljspeech/wavs/LJ037-0221.wav|tests/data/ljspeech/wavs/LJ037-0221.npy +tests/data/ljspeech/wavs/LJ036-0080.wav|tests/data/ljspeech/wavs/LJ036-0080.npy +tests/data/ljspeech/wavs/LJ022-0006.wav|tests/data/ljspeech/wavs/LJ022-0006.npy +tests/data/ljspeech/wavs/LJ045-0111.wav|tests/data/ljspeech/wavs/LJ045-0111.npy +tests/data/ljspeech/wavs/LJ044-0227.wav|tests/data/ljspeech/wavs/LJ044-0227.npy +tests/data/ljspeech/wavs/LJ038-0170.wav|tests/data/ljspeech/wavs/LJ038-0170.npy +tests/data/ljspeech/wavs/LJ014-0153.wav|tests/data/ljspeech/wavs/LJ014-0153.npy +tests/data/ljspeech/wavs/LJ021-0044.wav|tests/data/ljspeech/wavs/LJ021-0044.npy +tests/data/ljspeech/wavs/LJ039-0078.wav|tests/data/ljspeech/wavs/LJ039-0078.npy +tests/data/ljspeech/wavs/LJ048-0193.wav|tests/data/ljspeech/wavs/LJ048-0193.npy +tests/data/ljspeech/wavs/LJ039-0245.wav|tests/data/ljspeech/wavs/LJ039-0245.npy +tests/data/ljspeech/wavs/LJ039-0085.wav|tests/data/ljspeech/wavs/LJ039-0085.npy +tests/data/ljspeech/wavs/LJ014-0131.wav|tests/data/ljspeech/wavs/LJ014-0131.npy +tests/data/ljspeech/wavs/LJ025-0019.wav|tests/data/ljspeech/wavs/LJ025-0019.npy +tests/data/ljspeech/wavs/LJ009-0275.wav|tests/data/ljspeech/wavs/LJ009-0275.npy +tests/data/ljspeech/wavs/LJ045-0060.wav|tests/data/ljspeech/wavs/LJ045-0060.npy +tests/data/ljspeech/wavs/LJ002-0289.wav|tests/data/ljspeech/wavs/LJ002-0289.npy +tests/data/ljspeech/wavs/LJ042-0064.wav|tests/data/ljspeech/wavs/LJ042-0064.npy +tests/data/ljspeech/wavs/LJ019-0218.wav|tests/data/ljspeech/wavs/LJ019-0218.npy +tests/data/ljspeech/wavs/LJ041-0041.wav|tests/data/ljspeech/wavs/LJ041-0041.npy +tests/data/ljspeech/wavs/LJ031-0216.wav|tests/data/ljspeech/wavs/LJ031-0216.npy +tests/data/ljspeech/wavs/LJ047-0096.wav|tests/data/ljspeech/wavs/LJ047-0096.npy +tests/data/ljspeech/wavs/LJ019-0149.wav|tests/data/ljspeech/wavs/LJ019-0149.npy +tests/data/ljspeech/wavs/LJ030-0225.wav|tests/data/ljspeech/wavs/LJ030-0225.npy +tests/data/ljspeech/wavs/LJ022-0054.wav|tests/data/ljspeech/wavs/LJ022-0054.npy +tests/data/ljspeech/wavs/LJ007-0215.wav|tests/data/ljspeech/wavs/LJ007-0215.npy +tests/data/ljspeech/wavs/LJ010-0293.wav|tests/data/ljspeech/wavs/LJ010-0293.npy +tests/data/ljspeech/wavs/LJ005-0120.wav|tests/data/ljspeech/wavs/LJ005-0120.npy +tests/data/ljspeech/wavs/LJ027-0121.wav|tests/data/ljspeech/wavs/LJ027-0121.npy +tests/data/ljspeech/wavs/LJ003-0236.wav|tests/data/ljspeech/wavs/LJ003-0236.npy +tests/data/ljspeech/wavs/LJ029-0103.wav|tests/data/ljspeech/wavs/LJ029-0103.npy +tests/data/ljspeech/wavs/LJ024-0128.wav|tests/data/ljspeech/wavs/LJ024-0128.npy +tests/data/ljspeech/wavs/LJ008-0055.wav|tests/data/ljspeech/wavs/LJ008-0055.npy +tests/data/ljspeech/wavs/LJ011-0069.wav|tests/data/ljspeech/wavs/LJ011-0069.npy +tests/data/ljspeech/wavs/LJ003-0299.wav|tests/data/ljspeech/wavs/LJ003-0299.npy +tests/data/ljspeech/wavs/LJ043-0128.wav|tests/data/ljspeech/wavs/LJ043-0128.npy +tests/data/ljspeech/wavs/LJ011-0068.wav|tests/data/ljspeech/wavs/LJ011-0068.npy +tests/data/ljspeech/wavs/LJ037-0120.wav|tests/data/ljspeech/wavs/LJ037-0120.npy +tests/data/ljspeech/wavs/LJ028-0434.wav|tests/data/ljspeech/wavs/LJ028-0434.npy +tests/data/ljspeech/wavs/LJ019-0311.wav|tests/data/ljspeech/wavs/LJ019-0311.npy +tests/data/ljspeech/wavs/LJ040-0061.wav|tests/data/ljspeech/wavs/LJ040-0061.npy +tests/data/ljspeech/wavs/LJ004-0131.wav|tests/data/ljspeech/wavs/LJ004-0131.npy +tests/data/ljspeech/wavs/LJ002-0303.wav|tests/data/ljspeech/wavs/LJ002-0303.npy +tests/data/ljspeech/wavs/LJ044-0138.wav|tests/data/ljspeech/wavs/LJ044-0138.npy +tests/data/ljspeech/wavs/LJ049-0071.wav|tests/data/ljspeech/wavs/LJ049-0071.npy +tests/data/ljspeech/wavs/LJ008-0207.wav|tests/data/ljspeech/wavs/LJ008-0207.npy +tests/data/ljspeech/wavs/LJ025-0161.wav|tests/data/ljspeech/wavs/LJ025-0161.npy +tests/data/ljspeech/wavs/LJ045-0232.wav|tests/data/ljspeech/wavs/LJ045-0232.npy +tests/data/ljspeech/wavs/LJ009-0211.wav|tests/data/ljspeech/wavs/LJ009-0211.npy +tests/data/ljspeech/wavs/LJ039-0091.wav|tests/data/ljspeech/wavs/LJ039-0091.npy +tests/data/ljspeech/wavs/LJ018-0253.wav|tests/data/ljspeech/wavs/LJ018-0253.npy +tests/data/ljspeech/wavs/LJ015-0253.wav|tests/data/ljspeech/wavs/LJ015-0253.npy +tests/data/ljspeech/wavs/LJ005-0220.wav|tests/data/ljspeech/wavs/LJ005-0220.npy +tests/data/ljspeech/wavs/LJ010-0147.wav|tests/data/ljspeech/wavs/LJ010-0147.npy +tests/data/ljspeech/wavs/LJ018-0122.wav|tests/data/ljspeech/wavs/LJ018-0122.npy +tests/data/ljspeech/wavs/LJ019-0005.wav|tests/data/ljspeech/wavs/LJ019-0005.npy +tests/data/ljspeech/wavs/LJ018-0154.wav|tests/data/ljspeech/wavs/LJ018-0154.npy +tests/data/ljspeech/wavs/LJ028-0234.wav|tests/data/ljspeech/wavs/LJ028-0234.npy +tests/data/ljspeech/wavs/LJ031-0131.wav|tests/data/ljspeech/wavs/LJ031-0131.npy +tests/data/ljspeech/wavs/LJ010-0166.wav|tests/data/ljspeech/wavs/LJ010-0166.npy +tests/data/ljspeech/wavs/LJ021-0095.wav|tests/data/ljspeech/wavs/LJ021-0095.npy +tests/data/ljspeech/wavs/LJ016-0009.wav|tests/data/ljspeech/wavs/LJ016-0009.npy +tests/data/ljspeech/wavs/LJ014-0205.wav|tests/data/ljspeech/wavs/LJ014-0205.npy +tests/data/ljspeech/wavs/LJ028-0020.wav|tests/data/ljspeech/wavs/LJ028-0020.npy +tests/data/ljspeech/wavs/LJ012-0073.wav|tests/data/ljspeech/wavs/LJ012-0073.npy +tests/data/ljspeech/wavs/LJ015-0228.wav|tests/data/ljspeech/wavs/LJ015-0228.npy +tests/data/ljspeech/wavs/LJ023-0029.wav|tests/data/ljspeech/wavs/LJ023-0029.npy +tests/data/ljspeech/wavs/LJ015-0303.wav|tests/data/ljspeech/wavs/LJ015-0303.npy +tests/data/ljspeech/wavs/LJ027-0176.wav|tests/data/ljspeech/wavs/LJ027-0176.npy +tests/data/ljspeech/wavs/LJ037-0178.wav|tests/data/ljspeech/wavs/LJ037-0178.npy +tests/data/ljspeech/wavs/LJ049-0183.wav|tests/data/ljspeech/wavs/LJ049-0183.npy +tests/data/ljspeech/wavs/LJ023-0053.wav|tests/data/ljspeech/wavs/LJ023-0053.npy +tests/data/ljspeech/wavs/LJ023-0097.wav|tests/data/ljspeech/wavs/LJ023-0097.npy +tests/data/ljspeech/wavs/LJ005-0155.wav|tests/data/ljspeech/wavs/LJ005-0155.npy +tests/data/ljspeech/wavs/LJ018-0327.wav|tests/data/ljspeech/wavs/LJ018-0327.npy +tests/data/ljspeech/wavs/LJ006-0286.wav|tests/data/ljspeech/wavs/LJ006-0286.npy +tests/data/ljspeech/wavs/LJ018-0329.wav|tests/data/ljspeech/wavs/LJ018-0329.npy +tests/data/ljspeech/wavs/LJ028-0464.wav|tests/data/ljspeech/wavs/LJ028-0464.npy +tests/data/ljspeech/wavs/LJ021-0108.wav|tests/data/ljspeech/wavs/LJ021-0108.npy +tests/data/ljspeech/wavs/LJ026-0075.wav|tests/data/ljspeech/wavs/LJ026-0075.npy +tests/data/ljspeech/wavs/LJ018-0129.wav|tests/data/ljspeech/wavs/LJ018-0129.npy +tests/data/ljspeech/wavs/LJ030-0005.wav|tests/data/ljspeech/wavs/LJ030-0005.npy +tests/data/ljspeech/wavs/LJ034-0011.wav|tests/data/ljspeech/wavs/LJ034-0011.npy +tests/data/ljspeech/wavs/LJ004-0001.wav|tests/data/ljspeech/wavs/LJ004-0001.npy +tests/data/ljspeech/wavs/LJ034-0116.wav|tests/data/ljspeech/wavs/LJ034-0116.npy +tests/data/ljspeech/wavs/LJ003-0170.wav|tests/data/ljspeech/wavs/LJ003-0170.npy +tests/data/ljspeech/wavs/LJ044-0112.wav|tests/data/ljspeech/wavs/LJ044-0112.npy +tests/data/ljspeech/wavs/LJ046-0038.wav|tests/data/ljspeech/wavs/LJ046-0038.npy +tests/data/ljspeech/wavs/LJ035-0157.wav|tests/data/ljspeech/wavs/LJ035-0157.npy +tests/data/ljspeech/wavs/LJ003-0091.wav|tests/data/ljspeech/wavs/LJ003-0091.npy +tests/data/ljspeech/wavs/LJ021-0134.wav|tests/data/ljspeech/wavs/LJ021-0134.npy +tests/data/ljspeech/wavs/LJ035-0143.wav|tests/data/ljspeech/wavs/LJ035-0143.npy +tests/data/ljspeech/wavs/LJ038-0031.wav|tests/data/ljspeech/wavs/LJ038-0031.npy +tests/data/ljspeech/wavs/LJ029-0008.wav|tests/data/ljspeech/wavs/LJ029-0008.npy +tests/data/ljspeech/wavs/LJ014-0224.wav|tests/data/ljspeech/wavs/LJ014-0224.npy +tests/data/ljspeech/wavs/LJ046-0114.wav|tests/data/ljspeech/wavs/LJ046-0114.npy +tests/data/ljspeech/wavs/LJ019-0095.wav|tests/data/ljspeech/wavs/LJ019-0095.npy +tests/data/ljspeech/wavs/LJ022-0197.wav|tests/data/ljspeech/wavs/LJ022-0197.npy +tests/data/ljspeech/wavs/LJ038-0045.wav|tests/data/ljspeech/wavs/LJ038-0045.npy +tests/data/ljspeech/wavs/LJ031-0105.wav|tests/data/ljspeech/wavs/LJ031-0105.npy +tests/data/ljspeech/wavs/LJ043-0187.wav|tests/data/ljspeech/wavs/LJ043-0187.npy +tests/data/ljspeech/wavs/LJ006-0127.wav|tests/data/ljspeech/wavs/LJ006-0127.npy +tests/data/ljspeech/wavs/LJ018-0318.wav|tests/data/ljspeech/wavs/LJ018-0318.npy +tests/data/ljspeech/wavs/LJ028-0044.wav|tests/data/ljspeech/wavs/LJ028-0044.npy +tests/data/ljspeech/wavs/LJ011-0251.wav|tests/data/ljspeech/wavs/LJ011-0251.npy +tests/data/ljspeech/wavs/LJ046-0152.wav|tests/data/ljspeech/wavs/LJ046-0152.npy +tests/data/ljspeech/wavs/LJ010-0004.wav|tests/data/ljspeech/wavs/LJ010-0004.npy +tests/data/ljspeech/wavs/LJ040-0234.wav|tests/data/ljspeech/wavs/LJ040-0234.npy +tests/data/ljspeech/wavs/LJ019-0080.wav|tests/data/ljspeech/wavs/LJ019-0080.npy +tests/data/ljspeech/wavs/LJ015-0177.wav|tests/data/ljspeech/wavs/LJ015-0177.npy +tests/data/ljspeech/wavs/LJ019-0124.wav|tests/data/ljspeech/wavs/LJ019-0124.npy +tests/data/ljspeech/wavs/LJ033-0196.wav|tests/data/ljspeech/wavs/LJ033-0196.npy +tests/data/ljspeech/wavs/LJ021-0171.wav|tests/data/ljspeech/wavs/LJ021-0171.npy +tests/data/ljspeech/wavs/LJ038-0069.wav|tests/data/ljspeech/wavs/LJ038-0069.npy +tests/data/ljspeech/wavs/LJ025-0101.wav|tests/data/ljspeech/wavs/LJ025-0101.npy +tests/data/ljspeech/wavs/LJ031-0209.wav|tests/data/ljspeech/wavs/LJ031-0209.npy +tests/data/ljspeech/wavs/LJ030-0074.wav|tests/data/ljspeech/wavs/LJ030-0074.npy +tests/data/ljspeech/wavs/LJ016-0149.wav|tests/data/ljspeech/wavs/LJ016-0149.npy +tests/data/ljspeech/wavs/LJ027-0029.wav|tests/data/ljspeech/wavs/LJ027-0029.npy +tests/data/ljspeech/wavs/LJ031-0196.wav|tests/data/ljspeech/wavs/LJ031-0196.npy +tests/data/ljspeech/wavs/LJ032-0090.wav|tests/data/ljspeech/wavs/LJ032-0090.npy +tests/data/ljspeech/wavs/LJ029-0163.wav|tests/data/ljspeech/wavs/LJ029-0163.npy +tests/data/ljspeech/wavs/LJ007-0209.wav|tests/data/ljspeech/wavs/LJ007-0209.npy +tests/data/ljspeech/wavs/LJ032-0268.wav|tests/data/ljspeech/wavs/LJ032-0268.npy +tests/data/ljspeech/wavs/LJ032-0269.wav|tests/data/ljspeech/wavs/LJ032-0269.npy +tests/data/ljspeech/wavs/LJ028-0118.wav|tests/data/ljspeech/wavs/LJ028-0118.npy +tests/data/ljspeech/wavs/LJ032-0195.wav|tests/data/ljspeech/wavs/LJ032-0195.npy +tests/data/ljspeech/wavs/LJ033-0065.wav|tests/data/ljspeech/wavs/LJ033-0065.npy +tests/data/ljspeech/wavs/LJ027-0166.wav|tests/data/ljspeech/wavs/LJ027-0166.npy +tests/data/ljspeech/wavs/LJ028-0438.wav|tests/data/ljspeech/wavs/LJ028-0438.npy +tests/data/ljspeech/wavs/LJ014-0316.wav|tests/data/ljspeech/wavs/LJ014-0316.npy +tests/data/ljspeech/wavs/LJ004-0149.wav|tests/data/ljspeech/wavs/LJ004-0149.npy +tests/data/ljspeech/wavs/LJ029-0132.wav|tests/data/ljspeech/wavs/LJ029-0132.npy +tests/data/ljspeech/wavs/LJ029-0053.wav|tests/data/ljspeech/wavs/LJ029-0053.npy +tests/data/ljspeech/wavs/LJ032-0270.wav|tests/data/ljspeech/wavs/LJ032-0270.npy +tests/data/ljspeech/wavs/LJ032-0194.wav|tests/data/ljspeech/wavs/LJ032-0194.npy +tests/data/ljspeech/wavs/LJ032-0096.wav|tests/data/ljspeech/wavs/LJ032-0096.npy +tests/data/ljspeech/wavs/LJ028-0078.wav|tests/data/ljspeech/wavs/LJ028-0078.npy +tests/data/ljspeech/wavs/LJ047-0234.wav|tests/data/ljspeech/wavs/LJ047-0234.npy +tests/data/ljspeech/wavs/LJ028-0176.wav|tests/data/ljspeech/wavs/LJ028-0176.npy +tests/data/ljspeech/wavs/LJ028-0205.wav|tests/data/ljspeech/wavs/LJ028-0205.npy +tests/data/ljspeech/wavs/LJ037-0151.wav|tests/data/ljspeech/wavs/LJ037-0151.npy +tests/data/ljspeech/wavs/LJ028-0512.wav|tests/data/ljspeech/wavs/LJ028-0512.npy +tests/data/ljspeech/wavs/LJ042-0100.wav|tests/data/ljspeech/wavs/LJ042-0100.npy +tests/data/ljspeech/wavs/LJ049-0111.wav|tests/data/ljspeech/wavs/LJ049-0111.npy +tests/data/ljspeech/wavs/LJ015-0019.wav|tests/data/ljspeech/wavs/LJ015-0019.npy +tests/data/ljspeech/wavs/LJ032-0057.wav|tests/data/ljspeech/wavs/LJ032-0057.npy +tests/data/ljspeech/wavs/LJ050-0078.wav|tests/data/ljspeech/wavs/LJ050-0078.npy +tests/data/ljspeech/wavs/LJ026-0122.wav|tests/data/ljspeech/wavs/LJ026-0122.npy +tests/data/ljspeech/wavs/LJ026-0164.wav|tests/data/ljspeech/wavs/LJ026-0164.npy +tests/data/ljspeech/wavs/LJ028-0054.wav|tests/data/ljspeech/wavs/LJ028-0054.npy +tests/data/ljspeech/wavs/LJ043-0020.wav|tests/data/ljspeech/wavs/LJ043-0020.npy +tests/data/ljspeech/wavs/LJ036-0212.wav|tests/data/ljspeech/wavs/LJ036-0212.npy +tests/data/ljspeech/wavs/LJ028-0221.wav|tests/data/ljspeech/wavs/LJ028-0221.npy +tests/data/ljspeech/wavs/LJ021-0036.wav|tests/data/ljspeech/wavs/LJ021-0036.npy +tests/data/ljspeech/wavs/LJ019-0378.wav|tests/data/ljspeech/wavs/LJ019-0378.npy +tests/data/ljspeech/wavs/LJ042-0198.wav|tests/data/ljspeech/wavs/LJ042-0198.npy +tests/data/ljspeech/wavs/LJ021-0016.wav|tests/data/ljspeech/wavs/LJ021-0016.npy +tests/data/ljspeech/wavs/LJ007-0122.wav|tests/data/ljspeech/wavs/LJ007-0122.npy +tests/data/ljspeech/wavs/LJ027-0003.wav|tests/data/ljspeech/wavs/LJ027-0003.npy +tests/data/ljspeech/wavs/LJ028-0472.wav|tests/data/ljspeech/wavs/LJ028-0472.npy +tests/data/ljspeech/wavs/LJ030-0150.wav|tests/data/ljspeech/wavs/LJ030-0150.npy +tests/data/ljspeech/wavs/LJ043-0043.wav|tests/data/ljspeech/wavs/LJ043-0043.npy +tests/data/ljspeech/wavs/LJ033-0166.wav|tests/data/ljspeech/wavs/LJ033-0166.npy +tests/data/ljspeech/wavs/LJ007-0156.wav|tests/data/ljspeech/wavs/LJ007-0156.npy +tests/data/ljspeech/wavs/LJ021-0019.wav|tests/data/ljspeech/wavs/LJ021-0019.npy +tests/data/ljspeech/wavs/LJ050-0097.wav|tests/data/ljspeech/wavs/LJ050-0097.npy +tests/data/ljspeech/wavs/LJ021-0030.wav|tests/data/ljspeech/wavs/LJ021-0030.npy +tests/data/ljspeech/wavs/LJ018-0171.wav|tests/data/ljspeech/wavs/LJ018-0171.npy +tests/data/ljspeech/wavs/LJ042-0165.wav|tests/data/ljspeech/wavs/LJ042-0165.npy +tests/data/ljspeech/wavs/LJ050-0131.wav|tests/data/ljspeech/wavs/LJ050-0131.npy +tests/data/ljspeech/wavs/LJ018-0058.wav|tests/data/ljspeech/wavs/LJ018-0058.npy +tests/data/ljspeech/wavs/LJ005-0290.wav|tests/data/ljspeech/wavs/LJ005-0290.npy +tests/data/ljspeech/wavs/LJ042-0124.wav|tests/data/ljspeech/wavs/LJ042-0124.npy +tests/data/ljspeech/wavs/LJ032-0190.wav|tests/data/ljspeech/wavs/LJ032-0190.npy +tests/data/ljspeech/wavs/LJ028-0495.wav|tests/data/ljspeech/wavs/LJ028-0495.npy +tests/data/ljspeech/wavs/LJ033-0036.wav|tests/data/ljspeech/wavs/LJ033-0036.npy +tests/data/ljspeech/wavs/LJ049-0059.wav|tests/data/ljspeech/wavs/LJ049-0059.npy +tests/data/ljspeech/wavs/LJ014-0324.wav|tests/data/ljspeech/wavs/LJ014-0324.npy +tests/data/ljspeech/wavs/LJ044-0015.wav|tests/data/ljspeech/wavs/LJ044-0015.npy +tests/data/ljspeech/wavs/LJ005-0217.wav|tests/data/ljspeech/wavs/LJ005-0217.npy +tests/data/ljspeech/wavs/LJ039-0159.wav|tests/data/ljspeech/wavs/LJ039-0159.npy +tests/data/ljspeech/wavs/LJ021-0114.wav|tests/data/ljspeech/wavs/LJ021-0114.npy +tests/data/ljspeech/wavs/LJ036-0048.wav|tests/data/ljspeech/wavs/LJ036-0048.npy +tests/data/ljspeech/wavs/LJ044-0053.wav|tests/data/ljspeech/wavs/LJ044-0053.npy +tests/data/ljspeech/wavs/LJ021-0184.wav|tests/data/ljspeech/wavs/LJ021-0184.npy +tests/data/ljspeech/wavs/LJ021-0154.wav|tests/data/ljspeech/wavs/LJ021-0154.npy +tests/data/ljspeech/wavs/LJ049-0012.wav|tests/data/ljspeech/wavs/LJ049-0012.npy +tests/data/ljspeech/wavs/LJ034-0104.wav|tests/data/ljspeech/wavs/LJ034-0104.npy +tests/data/ljspeech/wavs/LJ017-0038.wav|tests/data/ljspeech/wavs/LJ017-0038.npy +tests/data/ljspeech/wavs/LJ012-0266.wav|tests/data/ljspeech/wavs/LJ012-0266.npy +tests/data/ljspeech/wavs/LJ016-0262.wav|tests/data/ljspeech/wavs/LJ016-0262.npy +tests/data/ljspeech/wavs/LJ012-0068.wav|tests/data/ljspeech/wavs/LJ012-0068.npy +tests/data/ljspeech/wavs/LJ038-0250.wav|tests/data/ljspeech/wavs/LJ038-0250.npy +tests/data/ljspeech/wavs/LJ005-0077.wav|tests/data/ljspeech/wavs/LJ005-0077.npy +tests/data/ljspeech/wavs/LJ018-0148.wav|tests/data/ljspeech/wavs/LJ018-0148.npy +tests/data/ljspeech/wavs/LJ013-0164.wav|tests/data/ljspeech/wavs/LJ013-0164.npy +tests/data/ljspeech/wavs/LJ019-0339.wav|tests/data/ljspeech/wavs/LJ019-0339.npy +tests/data/ljspeech/wavs/LJ016-0422.wav|tests/data/ljspeech/wavs/LJ016-0422.npy +tests/data/ljspeech/wavs/LJ005-0147.wav|tests/data/ljspeech/wavs/LJ005-0147.npy +tests/data/ljspeech/wavs/LJ008-0217.wav|tests/data/ljspeech/wavs/LJ008-0217.npy +tests/data/ljspeech/wavs/LJ014-0036.wav|tests/data/ljspeech/wavs/LJ014-0036.npy +tests/data/ljspeech/wavs/LJ015-0067.wav|tests/data/ljspeech/wavs/LJ015-0067.npy +tests/data/ljspeech/wavs/LJ012-0087.wav|tests/data/ljspeech/wavs/LJ012-0087.npy +tests/data/ljspeech/wavs/LJ049-0002.wav|tests/data/ljspeech/wavs/LJ049-0002.npy +tests/data/ljspeech/wavs/LJ039-0003.wav|tests/data/ljspeech/wavs/LJ039-0003.npy +tests/data/ljspeech/wavs/LJ004-0173.wav|tests/data/ljspeech/wavs/LJ004-0173.npy +tests/data/ljspeech/wavs/LJ004-0168.wav|tests/data/ljspeech/wavs/LJ004-0168.npy +tests/data/ljspeech/wavs/LJ018-0377.wav|tests/data/ljspeech/wavs/LJ018-0377.npy +tests/data/ljspeech/wavs/LJ015-0134.wav|tests/data/ljspeech/wavs/LJ015-0134.npy +tests/data/ljspeech/wavs/LJ037-0252.wav|tests/data/ljspeech/wavs/LJ037-0252.npy +tests/data/ljspeech/wavs/LJ016-0180.wav|tests/data/ljspeech/wavs/LJ016-0180.npy +tests/data/ljspeech/wavs/LJ011-0124.wav|tests/data/ljspeech/wavs/LJ011-0124.npy +tests/data/ljspeech/wavs/LJ042-0007.wav|tests/data/ljspeech/wavs/LJ042-0007.npy +tests/data/ljspeech/wavs/LJ045-0107.wav|tests/data/ljspeech/wavs/LJ045-0107.npy +tests/data/ljspeech/wavs/LJ040-0052.wav|tests/data/ljspeech/wavs/LJ040-0052.npy +tests/data/ljspeech/wavs/LJ010-0235.wav|tests/data/ljspeech/wavs/LJ010-0235.npy +tests/data/ljspeech/wavs/LJ015-0282.wav|tests/data/ljspeech/wavs/LJ015-0282.npy +tests/data/ljspeech/wavs/LJ022-0200.wav|tests/data/ljspeech/wavs/LJ022-0200.npy +tests/data/ljspeech/wavs/LJ016-0018.wav|tests/data/ljspeech/wavs/LJ016-0018.npy +tests/data/ljspeech/wavs/LJ047-0248.wav|tests/data/ljspeech/wavs/LJ047-0248.npy +tests/data/ljspeech/wavs/LJ014-0213.wav|tests/data/ljspeech/wavs/LJ014-0213.npy +tests/data/ljspeech/wavs/LJ003-0187.wav|tests/data/ljspeech/wavs/LJ003-0187.npy +tests/data/ljspeech/wavs/LJ041-0089.wav|tests/data/ljspeech/wavs/LJ041-0089.npy +tests/data/ljspeech/wavs/LJ017-0056.wav|tests/data/ljspeech/wavs/LJ017-0056.npy +tests/data/ljspeech/wavs/LJ017-0149.wav|tests/data/ljspeech/wavs/LJ017-0149.npy +tests/data/ljspeech/wavs/LJ010-0143.wav|tests/data/ljspeech/wavs/LJ010-0143.npy +tests/data/ljspeech/wavs/LJ019-0306.wav|tests/data/ljspeech/wavs/LJ019-0306.npy +tests/data/ljspeech/wavs/LJ036-0043.wav|tests/data/ljspeech/wavs/LJ036-0043.npy +tests/data/ljspeech/wavs/LJ050-0006.wav|tests/data/ljspeech/wavs/LJ050-0006.npy +tests/data/ljspeech/wavs/LJ037-0253.wav|tests/data/ljspeech/wavs/LJ037-0253.npy +tests/data/ljspeech/wavs/LJ045-0186.wav|tests/data/ljspeech/wavs/LJ045-0186.npy +tests/data/ljspeech/wavs/LJ045-0188.wav|tests/data/ljspeech/wavs/LJ045-0188.npy +tests/data/ljspeech/wavs/LJ023-0107.wav|tests/data/ljspeech/wavs/LJ023-0107.npy +tests/data/ljspeech/wavs/LJ003-0292.wav|tests/data/ljspeech/wavs/LJ003-0292.npy +tests/data/ljspeech/wavs/LJ039-0219.wav|tests/data/ljspeech/wavs/LJ039-0219.npy +tests/data/ljspeech/wavs/LJ013-0150.wav|tests/data/ljspeech/wavs/LJ013-0150.npy +tests/data/ljspeech/wavs/LJ019-0129.wav|tests/data/ljspeech/wavs/LJ019-0129.npy +tests/data/ljspeech/wavs/LJ015-0308.wav|tests/data/ljspeech/wavs/LJ015-0308.npy +tests/data/ljspeech/wavs/LJ011-0211.wav|tests/data/ljspeech/wavs/LJ011-0211.npy +tests/data/ljspeech/wavs/LJ016-0238.wav|tests/data/ljspeech/wavs/LJ016-0238.npy +tests/data/ljspeech/wavs/LJ044-0233.wav|tests/data/ljspeech/wavs/LJ044-0233.npy +tests/data/ljspeech/wavs/LJ017-0174.wav|tests/data/ljspeech/wavs/LJ017-0174.npy +tests/data/ljspeech/wavs/LJ046-0121.wav|tests/data/ljspeech/wavs/LJ046-0121.npy +tests/data/ljspeech/wavs/LJ024-0030.wav|tests/data/ljspeech/wavs/LJ024-0030.npy +tests/data/ljspeech/wavs/LJ046-0071.wav|tests/data/ljspeech/wavs/LJ046-0071.npy +tests/data/ljspeech/wavs/LJ010-0159.wav|tests/data/ljspeech/wavs/LJ010-0159.npy +tests/data/ljspeech/wavs/LJ004-0065.wav|tests/data/ljspeech/wavs/LJ004-0065.npy +tests/data/ljspeech/wavs/LJ002-0314.wav|tests/data/ljspeech/wavs/LJ002-0314.npy +tests/data/ljspeech/wavs/LJ030-0026.wav|tests/data/ljspeech/wavs/LJ030-0026.npy +tests/data/ljspeech/wavs/LJ049-0044.wav|tests/data/ljspeech/wavs/LJ049-0044.npy +tests/data/ljspeech/wavs/LJ002-0227.wav|tests/data/ljspeech/wavs/LJ002-0227.npy +tests/data/ljspeech/wavs/LJ002-0167.wav|tests/data/ljspeech/wavs/LJ002-0167.npy +tests/data/ljspeech/wavs/LJ002-0316.wav|tests/data/ljspeech/wavs/LJ002-0316.npy +tests/data/ljspeech/wavs/LJ040-0034.wav|tests/data/ljspeech/wavs/LJ040-0034.npy +tests/data/ljspeech/wavs/LJ033-0193.wav|tests/data/ljspeech/wavs/LJ033-0193.npy +tests/data/ljspeech/wavs/LJ024-0070.wav|tests/data/ljspeech/wavs/LJ024-0070.npy +tests/data/ljspeech/wavs/LJ004-0064.wav|tests/data/ljspeech/wavs/LJ004-0064.npy +tests/data/ljspeech/wavs/LJ001-0120.wav|tests/data/ljspeech/wavs/LJ001-0120.npy +tests/data/ljspeech/wavs/LJ015-0246.wav|tests/data/ljspeech/wavs/LJ015-0246.npy +tests/data/ljspeech/wavs/LJ044-0128.wav|tests/data/ljspeech/wavs/LJ044-0128.npy +tests/data/ljspeech/wavs/LJ003-0335.wav|tests/data/ljspeech/wavs/LJ003-0335.npy +tests/data/ljspeech/wavs/LJ004-0133.wav|tests/data/ljspeech/wavs/LJ004-0133.npy +tests/data/ljspeech/wavs/LJ024-0036.wav|tests/data/ljspeech/wavs/LJ024-0036.npy +tests/data/ljspeech/wavs/LJ024-0035.wav|tests/data/ljspeech/wavs/LJ024-0035.npy +tests/data/ljspeech/wavs/LJ001-0058.wav|tests/data/ljspeech/wavs/LJ001-0058.npy +tests/data/ljspeech/wavs/LJ022-0136.wav|tests/data/ljspeech/wavs/LJ022-0136.npy +tests/data/ljspeech/wavs/LJ010-0271.wav|tests/data/ljspeech/wavs/LJ010-0271.npy +tests/data/ljspeech/wavs/LJ028-0341.wav|tests/data/ljspeech/wavs/LJ028-0341.npy +tests/data/ljspeech/wavs/LJ010-0168.wav|tests/data/ljspeech/wavs/LJ010-0168.npy +tests/data/ljspeech/wavs/LJ002-0106.wav|tests/data/ljspeech/wavs/LJ002-0106.npy +tests/data/ljspeech/wavs/LJ010-0154.wav|tests/data/ljspeech/wavs/LJ010-0154.npy +tests/data/ljspeech/wavs/LJ001-0147.wav|tests/data/ljspeech/wavs/LJ001-0147.npy +tests/data/ljspeech/wavs/LJ002-0176.wav|tests/data/ljspeech/wavs/LJ002-0176.npy +tests/data/ljspeech/wavs/LJ019-0279.wav|tests/data/ljspeech/wavs/LJ019-0279.npy +tests/data/ljspeech/wavs/LJ041-0040.wav|tests/data/ljspeech/wavs/LJ041-0040.npy +tests/data/ljspeech/wavs/LJ021-0163.wav|tests/data/ljspeech/wavs/LJ021-0163.npy +tests/data/ljspeech/wavs/LJ022-0066.wav|tests/data/ljspeech/wavs/LJ022-0066.npy +tests/data/ljspeech/wavs/LJ038-0291.wav|tests/data/ljspeech/wavs/LJ038-0291.npy +tests/data/ljspeech/wavs/LJ002-0146.wav|tests/data/ljspeech/wavs/LJ002-0146.npy +tests/data/ljspeech/wavs/LJ009-0252.wav|tests/data/ljspeech/wavs/LJ009-0252.npy +tests/data/ljspeech/wavs/LJ015-0127.wav|tests/data/ljspeech/wavs/LJ015-0127.npy +tests/data/ljspeech/wavs/LJ048-0195.wav|tests/data/ljspeech/wavs/LJ048-0195.npy +tests/data/ljspeech/wavs/LJ041-0082.wav|tests/data/ljspeech/wavs/LJ041-0082.npy +tests/data/ljspeech/wavs/LJ022-0059.wav|tests/data/ljspeech/wavs/LJ022-0059.npy +tests/data/ljspeech/wavs/LJ019-0004.wav|tests/data/ljspeech/wavs/LJ019-0004.npy +tests/data/ljspeech/wavs/LJ019-0272.wav|tests/data/ljspeech/wavs/LJ019-0272.npy +tests/data/ljspeech/wavs/LJ037-0163.wav|tests/data/ljspeech/wavs/LJ037-0163.npy +tests/data/ljspeech/wavs/LJ040-0169.wav|tests/data/ljspeech/wavs/LJ040-0169.npy +tests/data/ljspeech/wavs/LJ010-0139.wav|tests/data/ljspeech/wavs/LJ010-0139.npy +tests/data/ljspeech/wavs/LJ032-0129.wav|tests/data/ljspeech/wavs/LJ032-0129.npy +tests/data/ljspeech/wavs/LJ016-0026.wav|tests/data/ljspeech/wavs/LJ016-0026.npy +tests/data/ljspeech/wavs/LJ041-0020.wav|tests/data/ljspeech/wavs/LJ041-0020.npy +tests/data/ljspeech/wavs/LJ017-0029.wav|tests/data/ljspeech/wavs/LJ017-0029.npy +tests/data/ljspeech/wavs/LJ022-0046.wav|tests/data/ljspeech/wavs/LJ022-0046.npy +tests/data/ljspeech/wavs/LJ002-0152.wav|tests/data/ljspeech/wavs/LJ002-0152.npy +tests/data/ljspeech/wavs/LJ010-0036.wav|tests/data/ljspeech/wavs/LJ010-0036.npy +tests/data/ljspeech/wavs/LJ037-0105.wav|tests/data/ljspeech/wavs/LJ037-0105.npy +tests/data/ljspeech/wavs/LJ013-0251.wav|tests/data/ljspeech/wavs/LJ013-0251.npy +tests/data/ljspeech/wavs/LJ010-0096.wav|tests/data/ljspeech/wavs/LJ010-0096.npy +tests/data/ljspeech/wavs/LJ002-0175.wav|tests/data/ljspeech/wavs/LJ002-0175.npy +tests/data/ljspeech/wavs/LJ011-0244.wav|tests/data/ljspeech/wavs/LJ011-0244.npy +tests/data/ljspeech/wavs/LJ010-0098.wav|tests/data/ljspeech/wavs/LJ010-0098.npy +tests/data/ljspeech/wavs/LJ002-0242.wav|tests/data/ljspeech/wavs/LJ002-0242.npy +tests/data/ljspeech/wavs/LJ001-0086.wav|tests/data/ljspeech/wavs/LJ001-0086.npy +tests/data/ljspeech/wavs/LJ012-0085.wav|tests/data/ljspeech/wavs/LJ012-0085.npy +tests/data/ljspeech/wavs/LJ038-0190.wav|tests/data/ljspeech/wavs/LJ038-0190.npy +tests/data/ljspeech/wavs/LJ004-0215.wav|tests/data/ljspeech/wavs/LJ004-0215.npy +tests/data/ljspeech/wavs/LJ049-0019.wav|tests/data/ljspeech/wavs/LJ049-0019.npy +tests/data/ljspeech/wavs/LJ012-0041.wav|tests/data/ljspeech/wavs/LJ012-0041.npy +tests/data/ljspeech/wavs/LJ041-0054.wav|tests/data/ljspeech/wavs/LJ041-0054.npy +tests/data/ljspeech/wavs/LJ036-0087.wav|tests/data/ljspeech/wavs/LJ036-0087.npy +tests/data/ljspeech/wavs/LJ001-0148.wav|tests/data/ljspeech/wavs/LJ001-0148.npy +tests/data/ljspeech/wavs/LJ011-0285.wav|tests/data/ljspeech/wavs/LJ011-0285.npy +tests/data/ljspeech/wavs/LJ030-0028.wav|tests/data/ljspeech/wavs/LJ030-0028.npy +tests/data/ljspeech/wavs/LJ014-0146.wav|tests/data/ljspeech/wavs/LJ014-0146.npy +tests/data/ljspeech/wavs/LJ014-0190.wav|tests/data/ljspeech/wavs/LJ014-0190.npy +tests/data/ljspeech/wavs/LJ048-0199.wav|tests/data/ljspeech/wavs/LJ048-0199.npy +tests/data/ljspeech/wavs/LJ024-0088.wav|tests/data/ljspeech/wavs/LJ024-0088.npy +tests/data/ljspeech/wavs/LJ038-0017.wav|tests/data/ljspeech/wavs/LJ038-0017.npy +tests/data/ljspeech/wavs/LJ004-0180.wav|tests/data/ljspeech/wavs/LJ004-0180.npy +tests/data/ljspeech/wavs/LJ015-0123.wav|tests/data/ljspeech/wavs/LJ015-0123.npy +tests/data/ljspeech/wavs/LJ036-0066.wav|tests/data/ljspeech/wavs/LJ036-0066.npy +tests/data/ljspeech/wavs/LJ024-0093.wav|tests/data/ljspeech/wavs/LJ024-0093.npy +tests/data/ljspeech/wavs/LJ028-0049.wav|tests/data/ljspeech/wavs/LJ028-0049.npy +tests/data/ljspeech/wavs/LJ047-0128.wav|tests/data/ljspeech/wavs/LJ047-0128.npy +tests/data/ljspeech/wavs/LJ013-0110.wav|tests/data/ljspeech/wavs/LJ013-0110.npy +tests/data/ljspeech/wavs/LJ014-0154.wav|tests/data/ljspeech/wavs/LJ014-0154.npy +tests/data/ljspeech/wavs/LJ038-0027.wav|tests/data/ljspeech/wavs/LJ038-0027.npy +tests/data/ljspeech/wavs/LJ041-0128.wav|tests/data/ljspeech/wavs/LJ041-0128.npy +tests/data/ljspeech/wavs/LJ046-0096.wav|tests/data/ljspeech/wavs/LJ046-0096.npy +tests/data/ljspeech/wavs/LJ018-0098.wav|tests/data/ljspeech/wavs/LJ018-0098.npy +tests/data/ljspeech/wavs/LJ019-0037.wav|tests/data/ljspeech/wavs/LJ019-0037.npy +tests/data/ljspeech/wavs/LJ002-0042.wav|tests/data/ljspeech/wavs/LJ002-0042.npy +tests/data/ljspeech/wavs/LJ039-0065.wav|tests/data/ljspeech/wavs/LJ039-0065.npy +tests/data/ljspeech/wavs/LJ032-0139.wav|tests/data/ljspeech/wavs/LJ032-0139.npy +tests/data/ljspeech/wavs/LJ049-0015.wav|tests/data/ljspeech/wavs/LJ049-0015.npy +tests/data/ljspeech/wavs/LJ030-0112.wav|tests/data/ljspeech/wavs/LJ030-0112.npy +tests/data/ljspeech/wavs/LJ025-0058.wav|tests/data/ljspeech/wavs/LJ025-0058.npy +tests/data/ljspeech/wavs/LJ025-0057.wav|tests/data/ljspeech/wavs/LJ025-0057.npy +tests/data/ljspeech/wavs/LJ036-0211.wav|tests/data/ljspeech/wavs/LJ036-0211.npy +tests/data/ljspeech/wavs/LJ044-0035.wav|tests/data/ljspeech/wavs/LJ044-0035.npy +tests/data/ljspeech/wavs/LJ004-0056.wav|tests/data/ljspeech/wavs/LJ004-0056.npy +tests/data/ljspeech/wavs/LJ044-0019.wav|tests/data/ljspeech/wavs/LJ044-0019.npy +tests/data/ljspeech/wavs/LJ042-0232.wav|tests/data/ljspeech/wavs/LJ042-0232.npy +tests/data/ljspeech/wavs/LJ021-0146.wav|tests/data/ljspeech/wavs/LJ021-0146.npy +tests/data/ljspeech/wavs/LJ021-0152.wav|tests/data/ljspeech/wavs/LJ021-0152.npy +tests/data/ljspeech/wavs/LJ003-0349.wav|tests/data/ljspeech/wavs/LJ003-0349.npy +tests/data/ljspeech/wavs/LJ018-0336.wav|tests/data/ljspeech/wavs/LJ018-0336.npy +tests/data/ljspeech/wavs/LJ031-0172.wav|tests/data/ljspeech/wavs/LJ031-0172.npy +tests/data/ljspeech/wavs/LJ047-0030.wav|tests/data/ljspeech/wavs/LJ047-0030.npy +tests/data/ljspeech/wavs/LJ027-0046.wav|tests/data/ljspeech/wavs/LJ027-0046.npy +tests/data/ljspeech/wavs/LJ016-0314.wav|tests/data/ljspeech/wavs/LJ016-0314.npy +tests/data/ljspeech/wavs/LJ003-0275.wav|tests/data/ljspeech/wavs/LJ003-0275.npy +tests/data/ljspeech/wavs/LJ004-0004.wav|tests/data/ljspeech/wavs/LJ004-0004.npy +tests/data/ljspeech/wavs/LJ006-0142.wav|tests/data/ljspeech/wavs/LJ006-0142.npy +tests/data/ljspeech/wavs/LJ044-0232.wav|tests/data/ljspeech/wavs/LJ044-0232.npy +tests/data/ljspeech/wavs/LJ021-0112.wav|tests/data/ljspeech/wavs/LJ021-0112.npy +tests/data/ljspeech/wavs/LJ018-0111.wav|tests/data/ljspeech/wavs/LJ018-0111.npy +tests/data/ljspeech/wavs/LJ003-0160.wav|tests/data/ljspeech/wavs/LJ003-0160.npy +tests/data/ljspeech/wavs/LJ046-0215.wav|tests/data/ljspeech/wavs/LJ046-0215.npy +tests/data/ljspeech/wavs/LJ029-0088.wav|tests/data/ljspeech/wavs/LJ029-0088.npy +tests/data/ljspeech/wavs/LJ006-0242.wav|tests/data/ljspeech/wavs/LJ006-0242.npy +tests/data/ljspeech/wavs/LJ034-0006.wav|tests/data/ljspeech/wavs/LJ034-0006.npy +tests/data/ljspeech/wavs/LJ020-0103.wav|tests/data/ljspeech/wavs/LJ020-0103.npy +tests/data/ljspeech/wavs/LJ006-0273.wav|tests/data/ljspeech/wavs/LJ006-0273.npy +tests/data/ljspeech/wavs/LJ023-0013.wav|tests/data/ljspeech/wavs/LJ023-0013.npy +tests/data/ljspeech/wavs/LJ006-0114.wav|tests/data/ljspeech/wavs/LJ006-0114.npy +tests/data/ljspeech/wavs/LJ023-0044.wav|tests/data/ljspeech/wavs/LJ023-0044.npy +tests/data/ljspeech/wavs/LJ029-0018.wav|tests/data/ljspeech/wavs/LJ029-0018.npy +tests/data/ljspeech/wavs/LJ031-0230.wav|tests/data/ljspeech/wavs/LJ031-0230.npy +tests/data/ljspeech/wavs/LJ037-0069.wav|tests/data/ljspeech/wavs/LJ037-0069.npy +tests/data/ljspeech/wavs/LJ007-0165.wav|tests/data/ljspeech/wavs/LJ007-0165.npy +tests/data/ljspeech/wavs/LJ036-0078.wav|tests/data/ljspeech/wavs/LJ036-0078.npy +tests/data/ljspeech/wavs/LJ041-0155.wav|tests/data/ljspeech/wavs/LJ041-0155.npy +tests/data/ljspeech/wavs/LJ005-0038.wav|tests/data/ljspeech/wavs/LJ005-0038.npy +tests/data/ljspeech/wavs/LJ035-0038.wav|tests/data/ljspeech/wavs/LJ035-0038.npy +tests/data/ljspeech/wavs/LJ005-0040.wav|tests/data/ljspeech/wavs/LJ005-0040.npy +tests/data/ljspeech/wavs/LJ007-0144.wav|tests/data/ljspeech/wavs/LJ007-0144.npy +tests/data/ljspeech/wavs/LJ027-0114.wav|tests/data/ljspeech/wavs/LJ027-0114.npy +tests/data/ljspeech/wavs/LJ027-0042.wav|tests/data/ljspeech/wavs/LJ027-0042.npy +tests/data/ljspeech/wavs/LJ031-0150.wav|tests/data/ljspeech/wavs/LJ031-0150.npy +tests/data/ljspeech/wavs/LJ017-0117.wav|tests/data/ljspeech/wavs/LJ017-0117.npy +tests/data/ljspeech/wavs/LJ027-0107.wav|tests/data/ljspeech/wavs/LJ027-0107.npy +tests/data/ljspeech/wavs/LJ007-0075.wav|tests/data/ljspeech/wavs/LJ007-0075.npy +tests/data/ljspeech/wavs/LJ008-0103.wav|tests/data/ljspeech/wavs/LJ008-0103.npy +tests/data/ljspeech/wavs/LJ008-0292.wav|tests/data/ljspeech/wavs/LJ008-0292.npy +tests/data/ljspeech/wavs/LJ008-0053.wav|tests/data/ljspeech/wavs/LJ008-0053.npy +tests/data/ljspeech/wavs/LJ038-0115.wav|tests/data/ljspeech/wavs/LJ038-0115.npy +tests/data/ljspeech/wavs/LJ008-0250.wav|tests/data/ljspeech/wavs/LJ008-0250.npy +tests/data/ljspeech/wavs/LJ003-0081.wav|tests/data/ljspeech/wavs/LJ003-0081.npy +tests/data/ljspeech/wavs/LJ016-0264.wav|tests/data/ljspeech/wavs/LJ016-0264.npy +tests/data/ljspeech/wavs/LJ034-0122.wav|tests/data/ljspeech/wavs/LJ034-0122.npy +tests/data/ljspeech/wavs/LJ005-0043.wav|tests/data/ljspeech/wavs/LJ005-0043.npy +tests/data/ljspeech/wavs/LJ023-0061.wav|tests/data/ljspeech/wavs/LJ023-0061.npy +tests/data/ljspeech/wavs/LJ006-0110.wav|tests/data/ljspeech/wavs/LJ006-0110.npy +tests/data/ljspeech/wavs/LJ034-0213.wav|tests/data/ljspeech/wavs/LJ034-0213.npy +tests/data/ljspeech/wavs/LJ006-0020.wav|tests/data/ljspeech/wavs/LJ006-0020.npy +tests/data/ljspeech/wavs/LJ022-0024.wav|tests/data/ljspeech/wavs/LJ022-0024.npy +tests/data/ljspeech/wavs/LJ008-0275.wav|tests/data/ljspeech/wavs/LJ008-0275.npy +tests/data/ljspeech/wavs/LJ032-0008.wav|tests/data/ljspeech/wavs/LJ032-0008.npy +tests/data/ljspeech/wavs/LJ032-0203.wav|tests/data/ljspeech/wavs/LJ032-0203.npy +tests/data/ljspeech/wavs/LJ015-0006.wav|tests/data/ljspeech/wavs/LJ015-0006.npy +tests/data/ljspeech/wavs/LJ015-0001.wav|tests/data/ljspeech/wavs/LJ015-0001.npy +tests/data/ljspeech/wavs/LJ005-0203.wav|tests/data/ljspeech/wavs/LJ005-0203.npy +tests/data/ljspeech/wavs/LJ048-0035.wav|tests/data/ljspeech/wavs/LJ048-0035.npy +tests/data/ljspeech/wavs/LJ005-0187.wav|tests/data/ljspeech/wavs/LJ005-0187.npy +tests/data/ljspeech/wavs/LJ044-0043.wav|tests/data/ljspeech/wavs/LJ044-0043.npy +tests/data/ljspeech/wavs/LJ016-0251.wav|tests/data/ljspeech/wavs/LJ016-0251.npy +tests/data/ljspeech/wavs/LJ015-0255.wav|tests/data/ljspeech/wavs/LJ015-0255.npy +tests/data/ljspeech/wavs/LJ047-0004.wav|tests/data/ljspeech/wavs/LJ047-0004.npy +tests/data/ljspeech/wavs/LJ037-0056.wav|tests/data/ljspeech/wavs/LJ037-0056.npy +tests/data/ljspeech/wavs/LJ049-0089.wav|tests/data/ljspeech/wavs/LJ049-0089.npy +tests/data/ljspeech/wavs/LJ023-0076.wav|tests/data/ljspeech/wavs/LJ023-0076.npy +tests/data/ljspeech/wavs/LJ014-0105.wav|tests/data/ljspeech/wavs/LJ014-0105.npy +tests/data/ljspeech/wavs/LJ017-0223.wav|tests/data/ljspeech/wavs/LJ017-0223.npy +tests/data/ljspeech/wavs/LJ016-0250.wav|tests/data/ljspeech/wavs/LJ016-0250.npy +tests/data/ljspeech/wavs/LJ024-0137.wav|tests/data/ljspeech/wavs/LJ024-0137.npy +tests/data/ljspeech/wavs/LJ017-0274.wav|tests/data/ljspeech/wavs/LJ017-0274.npy +tests/data/ljspeech/wavs/LJ015-0013.wav|tests/data/ljspeech/wavs/LJ015-0013.npy +tests/data/ljspeech/wavs/LJ036-0093.wav|tests/data/ljspeech/wavs/LJ036-0093.npy +tests/data/ljspeech/wavs/LJ036-0215.wav|tests/data/ljspeech/wavs/LJ036-0215.npy +tests/data/ljspeech/wavs/LJ017-0135.wav|tests/data/ljspeech/wavs/LJ017-0135.npy +tests/data/ljspeech/wavs/LJ016-0164.wav|tests/data/ljspeech/wavs/LJ016-0164.npy +tests/data/ljspeech/wavs/LJ048-0213.wav|tests/data/ljspeech/wavs/LJ048-0213.npy +tests/data/ljspeech/wavs/LJ036-0183.wav|tests/data/ljspeech/wavs/LJ036-0183.npy +tests/data/ljspeech/wavs/LJ045-0187.wav|tests/data/ljspeech/wavs/LJ045-0187.npy +tests/data/ljspeech/wavs/LJ007-0240.wav|tests/data/ljspeech/wavs/LJ007-0240.npy +tests/data/ljspeech/wavs/LJ015-0289.wav|tests/data/ljspeech/wavs/LJ015-0289.npy +tests/data/ljspeech/wavs/LJ005-0117.wav|tests/data/ljspeech/wavs/LJ005-0117.npy +tests/data/ljspeech/wavs/LJ016-0131.wav|tests/data/ljspeech/wavs/LJ016-0131.npy +tests/data/ljspeech/wavs/LJ017-0043.wav|tests/data/ljspeech/wavs/LJ017-0043.npy +tests/data/ljspeech/wavs/LJ037-0044.wav|tests/data/ljspeech/wavs/LJ037-0044.npy +tests/data/ljspeech/wavs/LJ044-0018.wav|tests/data/ljspeech/wavs/LJ044-0018.npy +tests/data/ljspeech/wavs/LJ030-0027.wav|tests/data/ljspeech/wavs/LJ030-0027.npy +tests/data/ljspeech/wavs/LJ031-0022.wav|tests/data/ljspeech/wavs/LJ031-0022.npy +tests/data/ljspeech/wavs/LJ001-0040.wav|tests/data/ljspeech/wavs/LJ001-0040.npy +tests/data/ljspeech/wavs/LJ045-0247.wav|tests/data/ljspeech/wavs/LJ045-0247.npy +tests/data/ljspeech/wavs/LJ045-0205.wav|tests/data/ljspeech/wavs/LJ045-0205.npy +tests/data/ljspeech/wavs/LJ007-0174.wav|tests/data/ljspeech/wavs/LJ007-0174.npy +tests/data/ljspeech/wavs/LJ043-0015.wav|tests/data/ljspeech/wavs/LJ043-0015.npy +tests/data/ljspeech/wavs/LJ030-0068.wav|tests/data/ljspeech/wavs/LJ030-0068.npy +tests/data/ljspeech/wavs/LJ001-0009.wav|tests/data/ljspeech/wavs/LJ001-0009.npy +tests/data/ljspeech/wavs/LJ001-0117.wav|tests/data/ljspeech/wavs/LJ001-0117.npy +tests/data/ljspeech/wavs/LJ014-0220.wav|tests/data/ljspeech/wavs/LJ014-0220.npy +tests/data/ljspeech/wavs/LJ006-0120.wav|tests/data/ljspeech/wavs/LJ006-0120.npy +tests/data/ljspeech/wavs/LJ004-0141.wav|tests/data/ljspeech/wavs/LJ004-0141.npy +tests/data/ljspeech/wavs/LJ031-0007.wav|tests/data/ljspeech/wavs/LJ031-0007.npy +tests/data/ljspeech/wavs/LJ003-0175.wav|tests/data/ljspeech/wavs/LJ003-0175.npy +tests/data/ljspeech/wavs/LJ044-0228.wav|tests/data/ljspeech/wavs/LJ044-0228.npy +tests/data/ljspeech/wavs/LJ030-0233.wav|tests/data/ljspeech/wavs/LJ030-0233.npy +tests/data/ljspeech/wavs/LJ042-0099.wav|tests/data/ljspeech/wavs/LJ042-0099.npy +tests/data/ljspeech/wavs/LJ045-0233.wav|tests/data/ljspeech/wavs/LJ045-0233.npy +tests/data/ljspeech/wavs/LJ010-0305.wav|tests/data/ljspeech/wavs/LJ010-0305.npy +tests/data/ljspeech/wavs/LJ050-0039.wav|tests/data/ljspeech/wavs/LJ050-0039.npy +tests/data/ljspeech/wavs/LJ003-0238.wav|tests/data/ljspeech/wavs/LJ003-0238.npy +tests/data/ljspeech/wavs/LJ007-0039.wav|tests/data/ljspeech/wavs/LJ007-0039.npy +tests/data/ljspeech/wavs/LJ005-0257.wav|tests/data/ljspeech/wavs/LJ005-0257.npy +tests/data/ljspeech/wavs/LJ006-0160.wav|tests/data/ljspeech/wavs/LJ006-0160.npy +tests/data/ljspeech/wavs/LJ007-0200.wav|tests/data/ljspeech/wavs/LJ007-0200.npy +tests/data/ljspeech/wavs/LJ003-0029.wav|tests/data/ljspeech/wavs/LJ003-0029.npy +tests/data/ljspeech/wavs/LJ003-0346.wav|tests/data/ljspeech/wavs/LJ003-0346.npy +tests/data/ljspeech/wavs/LJ007-0121.wav|tests/data/ljspeech/wavs/LJ007-0121.npy +tests/data/ljspeech/wavs/LJ004-0060.wav|tests/data/ljspeech/wavs/LJ004-0060.npy +tests/data/ljspeech/wavs/LJ031-0223.wav|tests/data/ljspeech/wavs/LJ031-0223.npy +tests/data/ljspeech/wavs/LJ009-0300.wav|tests/data/ljspeech/wavs/LJ009-0300.npy +tests/data/ljspeech/wavs/LJ012-0078.wav|tests/data/ljspeech/wavs/LJ012-0078.npy +tests/data/ljspeech/wavs/LJ028-0424.wav|tests/data/ljspeech/wavs/LJ028-0424.npy +tests/data/ljspeech/wavs/LJ041-0008.wav|tests/data/ljspeech/wavs/LJ041-0008.npy +tests/data/ljspeech/wavs/LJ028-0417.wav|tests/data/ljspeech/wavs/LJ028-0417.npy +tests/data/ljspeech/wavs/LJ010-0287.wav|tests/data/ljspeech/wavs/LJ010-0287.npy +tests/data/ljspeech/wavs/LJ040-0123.wav|tests/data/ljspeech/wavs/LJ040-0123.npy +tests/data/ljspeech/wavs/LJ028-0303.wav|tests/data/ljspeech/wavs/LJ028-0303.npy +tests/data/ljspeech/wavs/LJ009-0119.wav|tests/data/ljspeech/wavs/LJ009-0119.npy +tests/data/ljspeech/wavs/LJ042-0025.wav|tests/data/ljspeech/wavs/LJ042-0025.npy +tests/data/ljspeech/wavs/LJ042-0097.wav|tests/data/ljspeech/wavs/LJ042-0097.npy +tests/data/ljspeech/wavs/LJ028-0143.wav|tests/data/ljspeech/wavs/LJ028-0143.npy +tests/data/ljspeech/wavs/LJ028-0288.wav|tests/data/ljspeech/wavs/LJ028-0288.npy +tests/data/ljspeech/wavs/LJ010-0058.wav|tests/data/ljspeech/wavs/LJ010-0058.npy +tests/data/ljspeech/wavs/LJ009-0037.wav|tests/data/ljspeech/wavs/LJ009-0037.npy +tests/data/ljspeech/wavs/LJ038-0254.wav|tests/data/ljspeech/wavs/LJ038-0254.npy +tests/data/ljspeech/wavs/LJ028-0189.wav|tests/data/ljspeech/wavs/LJ028-0189.npy +tests/data/ljspeech/wavs/LJ028-0306.wav|tests/data/ljspeech/wavs/LJ028-0306.npy +tests/data/ljspeech/wavs/LJ028-0471.wav|tests/data/ljspeech/wavs/LJ028-0471.npy +tests/data/ljspeech/wavs/LJ013-0004.wav|tests/data/ljspeech/wavs/LJ013-0004.npy +tests/data/ljspeech/wavs/LJ008-0248.wav|tests/data/ljspeech/wavs/LJ008-0248.npy +tests/data/ljspeech/wavs/LJ010-0086.wav|tests/data/ljspeech/wavs/LJ010-0086.npy +tests/data/ljspeech/wavs/LJ040-0240.wav|tests/data/ljspeech/wavs/LJ040-0240.npy +tests/data/ljspeech/wavs/LJ011-0145.wav|tests/data/ljspeech/wavs/LJ011-0145.npy +tests/data/ljspeech/wavs/LJ013-0010.wav|tests/data/ljspeech/wavs/LJ013-0010.npy +tests/data/ljspeech/wavs/LJ028-0237.wav|tests/data/ljspeech/wavs/LJ028-0237.npy +tests/data/ljspeech/wavs/LJ013-0114.wav|tests/data/ljspeech/wavs/LJ013-0114.npy +tests/data/ljspeech/wavs/LJ009-0043.wav|tests/data/ljspeech/wavs/LJ009-0043.npy +tests/data/ljspeech/wavs/LJ041-0121.wav|tests/data/ljspeech/wavs/LJ041-0121.npy +tests/data/ljspeech/wavs/LJ009-0280.wav|tests/data/ljspeech/wavs/LJ009-0280.npy +tests/data/ljspeech/wavs/LJ013-0075.wav|tests/data/ljspeech/wavs/LJ013-0075.npy +tests/data/ljspeech/wavs/LJ028-0451.wav|tests/data/ljspeech/wavs/LJ028-0451.npy +tests/data/ljspeech/wavs/LJ025-0042.wav|tests/data/ljspeech/wavs/LJ025-0042.npy +tests/data/ljspeech/wavs/LJ021-0007.wav|tests/data/ljspeech/wavs/LJ021-0007.npy +tests/data/ljspeech/wavs/LJ024-0067.wav|tests/data/ljspeech/wavs/LJ024-0067.npy +tests/data/ljspeech/wavs/LJ026-0123.wav|tests/data/ljspeech/wavs/LJ026-0123.npy +tests/data/ljspeech/wavs/LJ024-0107.wav|tests/data/ljspeech/wavs/LJ024-0107.npy +tests/data/ljspeech/wavs/LJ023-0003.wav|tests/data/ljspeech/wavs/LJ023-0003.npy +tests/data/ljspeech/wavs/LJ036-0060.wav|tests/data/ljspeech/wavs/LJ036-0060.npy +tests/data/ljspeech/wavs/LJ019-0088.wav|tests/data/ljspeech/wavs/LJ019-0088.npy +tests/data/ljspeech/wavs/LJ025-0154.wav|tests/data/ljspeech/wavs/LJ025-0154.npy +tests/data/ljspeech/wavs/LJ035-0045.wav|tests/data/ljspeech/wavs/LJ035-0045.npy +tests/data/ljspeech/wavs/LJ024-0092.wav|tests/data/ljspeech/wavs/LJ024-0092.npy +tests/data/ljspeech/wavs/LJ023-0091.wav|tests/data/ljspeech/wavs/LJ023-0091.npy +tests/data/ljspeech/wavs/LJ022-0167.wav|tests/data/ljspeech/wavs/LJ022-0167.npy +tests/data/ljspeech/wavs/LJ022-0025.wav|tests/data/ljspeech/wavs/LJ022-0025.npy +tests/data/ljspeech/wavs/LJ018-0230.wav|tests/data/ljspeech/wavs/LJ018-0230.npy +tests/data/ljspeech/wavs/LJ033-0046.wav|tests/data/ljspeech/wavs/LJ033-0046.npy +tests/data/ljspeech/wavs/LJ022-0153.wav|tests/data/ljspeech/wavs/LJ022-0153.npy +tests/data/ljspeech/wavs/LJ018-0076.wav|tests/data/ljspeech/wavs/LJ018-0076.npy +tests/data/ljspeech/wavs/LJ019-0291.wav|tests/data/ljspeech/wavs/LJ019-0291.npy +tests/data/ljspeech/wavs/LJ022-0129.wav|tests/data/ljspeech/wavs/LJ022-0129.npy +tests/data/ljspeech/wavs/LJ033-0041.wav|tests/data/ljspeech/wavs/LJ033-0041.npy +tests/data/ljspeech/wavs/LJ021-0038.wav|tests/data/ljspeech/wavs/LJ021-0038.npy +tests/data/ljspeech/wavs/LJ019-0202.wav|tests/data/ljspeech/wavs/LJ019-0202.npy +tests/data/ljspeech/wavs/LJ009-0014.wav|tests/data/ljspeech/wavs/LJ009-0014.npy +tests/data/ljspeech/wavs/LJ018-0147.wav|tests/data/ljspeech/wavs/LJ018-0147.npy +tests/data/ljspeech/wavs/LJ019-0033.wav|tests/data/ljspeech/wavs/LJ019-0033.npy +tests/data/ljspeech/wavs/LJ018-0135.wav|tests/data/ljspeech/wavs/LJ018-0135.npy +tests/data/ljspeech/wavs/LJ025-0036.wav|tests/data/ljspeech/wavs/LJ025-0036.npy +tests/data/ljspeech/wavs/LJ012-0109.wav|tests/data/ljspeech/wavs/LJ012-0109.npy +tests/data/ljspeech/wavs/LJ035-0169.wav|tests/data/ljspeech/wavs/LJ035-0169.npy +tests/data/ljspeech/wavs/LJ033-0120.wav|tests/data/ljspeech/wavs/LJ033-0120.npy +tests/data/ljspeech/wavs/LJ019-0357.wav|tests/data/ljspeech/wavs/LJ019-0357.npy +tests/data/ljspeech/wavs/LJ046-0008.wav|tests/data/ljspeech/wavs/LJ046-0008.npy +tests/data/ljspeech/wavs/LJ048-0275.wav|tests/data/ljspeech/wavs/LJ048-0275.npy +tests/data/ljspeech/wavs/LJ026-0117.wav|tests/data/ljspeech/wavs/LJ026-0117.npy +tests/data/ljspeech/wavs/LJ019-0195.wav|tests/data/ljspeech/wavs/LJ019-0195.npy +tests/data/ljspeech/wavs/LJ034-0137.wav|tests/data/ljspeech/wavs/LJ034-0137.npy +tests/data/ljspeech/wavs/LJ039-0006.wav|tests/data/ljspeech/wavs/LJ039-0006.npy +tests/data/ljspeech/wavs/LJ043-0055.wav|tests/data/ljspeech/wavs/LJ043-0055.npy +tests/data/ljspeech/wavs/LJ040-0116.wav|tests/data/ljspeech/wavs/LJ040-0116.npy +tests/data/ljspeech/wavs/LJ015-0103.wav|tests/data/ljspeech/wavs/LJ015-0103.npy +tests/data/ljspeech/wavs/LJ009-0290.wav|tests/data/ljspeech/wavs/LJ009-0290.npy +tests/data/ljspeech/wavs/LJ018-0286.wav|tests/data/ljspeech/wavs/LJ018-0286.npy +tests/data/ljspeech/wavs/LJ004-0161.wav|tests/data/ljspeech/wavs/LJ004-0161.npy +tests/data/ljspeech/wavs/LJ028-0041.wav|tests/data/ljspeech/wavs/LJ028-0041.npy +tests/data/ljspeech/wavs/LJ008-0176.wav|tests/data/ljspeech/wavs/LJ008-0176.npy +tests/data/ljspeech/wavs/LJ026-0154.wav|tests/data/ljspeech/wavs/LJ026-0154.npy +tests/data/ljspeech/wavs/LJ015-0089.wav|tests/data/ljspeech/wavs/LJ015-0089.npy +tests/data/ljspeech/wavs/LJ039-0010.wav|tests/data/ljspeech/wavs/LJ039-0010.npy +tests/data/ljspeech/wavs/LJ013-0228.wav|tests/data/ljspeech/wavs/LJ013-0228.npy +tests/data/ljspeech/wavs/LJ008-0202.wav|tests/data/ljspeech/wavs/LJ008-0202.npy +tests/data/ljspeech/wavs/LJ019-0093.wav|tests/data/ljspeech/wavs/LJ019-0093.npy +tests/data/ljspeech/wavs/LJ030-0048.wav|tests/data/ljspeech/wavs/LJ030-0048.npy +tests/data/ljspeech/wavs/LJ031-0047.wav|tests/data/ljspeech/wavs/LJ031-0047.npy +tests/data/ljspeech/wavs/LJ009-0142.wav|tests/data/ljspeech/wavs/LJ009-0142.npy +tests/data/ljspeech/wavs/LJ006-0215.wav|tests/data/ljspeech/wavs/LJ006-0215.npy +tests/data/ljspeech/wavs/LJ016-0227.wav|tests/data/ljspeech/wavs/LJ016-0227.npy +tests/data/ljspeech/wavs/LJ002-0233.wav|tests/data/ljspeech/wavs/LJ002-0233.npy +tests/data/ljspeech/wavs/LJ008-0205.wav|tests/data/ljspeech/wavs/LJ008-0205.npy +tests/data/ljspeech/wavs/LJ008-0037.wav|tests/data/ljspeech/wavs/LJ008-0037.npy +tests/data/ljspeech/wavs/LJ004-0138.wav|tests/data/ljspeech/wavs/LJ004-0138.npy +tests/data/ljspeech/wavs/LJ013-0234.wav|tests/data/ljspeech/wavs/LJ013-0234.npy +tests/data/ljspeech/wavs/LJ013-0227.wav|tests/data/ljspeech/wavs/LJ013-0227.npy +tests/data/ljspeech/wavs/LJ033-0058.wav|tests/data/ljspeech/wavs/LJ033-0058.npy +tests/data/ljspeech/wavs/LJ003-0074.wav|tests/data/ljspeech/wavs/LJ003-0074.npy +tests/data/ljspeech/wavs/LJ028-0357.wav|tests/data/ljspeech/wavs/LJ028-0357.npy +tests/data/ljspeech/wavs/LJ043-0038.wav|tests/data/ljspeech/wavs/LJ043-0038.npy +tests/data/ljspeech/wavs/LJ033-0038.wav|tests/data/ljspeech/wavs/LJ033-0038.npy +tests/data/ljspeech/wavs/LJ026-0018.wav|tests/data/ljspeech/wavs/LJ026-0018.npy +tests/data/ljspeech/wavs/LJ003-0018.wav|tests/data/ljspeech/wavs/LJ003-0018.npy +tests/data/ljspeech/wavs/LJ030-0106.wav|tests/data/ljspeech/wavs/LJ030-0106.npy +tests/data/ljspeech/wavs/LJ043-0051.wav|tests/data/ljspeech/wavs/LJ043-0051.npy +tests/data/ljspeech/wavs/LJ028-0169.wav|tests/data/ljspeech/wavs/LJ028-0169.npy +tests/data/ljspeech/wavs/LJ047-0005.wav|tests/data/ljspeech/wavs/LJ047-0005.npy +tests/data/ljspeech/wavs/LJ008-0091.wav|tests/data/ljspeech/wavs/LJ008-0091.npy +tests/data/ljspeech/wavs/LJ014-0157.wav|tests/data/ljspeech/wavs/LJ014-0157.npy +tests/data/ljspeech/wavs/LJ007-0201.wav|tests/data/ljspeech/wavs/LJ007-0201.npy +tests/data/ljspeech/wavs/LJ038-0278.wav|tests/data/ljspeech/wavs/LJ038-0278.npy +tests/data/ljspeech/wavs/LJ015-0156.wav|tests/data/ljspeech/wavs/LJ015-0156.npy +tests/data/ljspeech/wavs/LJ024-0025.wav|tests/data/ljspeech/wavs/LJ024-0025.npy +tests/data/ljspeech/wavs/LJ015-0284.wav|tests/data/ljspeech/wavs/LJ015-0284.npy +tests/data/ljspeech/wavs/LJ045-0118.wav|tests/data/ljspeech/wavs/LJ045-0118.npy +tests/data/ljspeech/wavs/LJ048-0111.wav|tests/data/ljspeech/wavs/LJ048-0111.npy +tests/data/ljspeech/wavs/LJ016-0128.wav|tests/data/ljspeech/wavs/LJ016-0128.npy +tests/data/ljspeech/wavs/LJ008-0105.wav|tests/data/ljspeech/wavs/LJ008-0105.npy +tests/data/ljspeech/wavs/LJ028-0022.wav|tests/data/ljspeech/wavs/LJ028-0022.npy +tests/data/ljspeech/wavs/LJ018-0298.wav|tests/data/ljspeech/wavs/LJ018-0298.npy +tests/data/ljspeech/wavs/LJ035-0185.wav|tests/data/ljspeech/wavs/LJ035-0185.npy +tests/data/ljspeech/wavs/LJ014-0015.wav|tests/data/ljspeech/wavs/LJ014-0015.npy +tests/data/ljspeech/wavs/LJ023-0087.wav|tests/data/ljspeech/wavs/LJ023-0087.npy +tests/data/ljspeech/wavs/LJ036-0013.wav|tests/data/ljspeech/wavs/LJ036-0013.npy +tests/data/ljspeech/wavs/LJ016-0108.wav|tests/data/ljspeech/wavs/LJ016-0108.npy +tests/data/ljspeech/wavs/LJ006-0308.wav|tests/data/ljspeech/wavs/LJ006-0308.npy +tests/data/ljspeech/wavs/LJ015-0041.wav|tests/data/ljspeech/wavs/LJ015-0041.npy +tests/data/ljspeech/wavs/LJ004-0015.wav|tests/data/ljspeech/wavs/LJ004-0015.npy +tests/data/ljspeech/wavs/LJ045-0100.wav|tests/data/ljspeech/wavs/LJ045-0100.npy +tests/data/ljspeech/wavs/LJ042-0246.wav|tests/data/ljspeech/wavs/LJ042-0246.npy +tests/data/ljspeech/wavs/LJ039-0232.wav|tests/data/ljspeech/wavs/LJ039-0232.npy +tests/data/ljspeech/wavs/LJ047-0149.wav|tests/data/ljspeech/wavs/LJ047-0149.npy +tests/data/ljspeech/wavs/LJ038-0186.wav|tests/data/ljspeech/wavs/LJ038-0186.npy +tests/data/ljspeech/wavs/LJ011-0204.wav|tests/data/ljspeech/wavs/LJ011-0204.npy +tests/data/ljspeech/wavs/LJ017-0064.wav|tests/data/ljspeech/wavs/LJ017-0064.npy +tests/data/ljspeech/wavs/LJ016-0070.wav|tests/data/ljspeech/wavs/LJ016-0070.npy +tests/data/ljspeech/wavs/LJ010-0195.wav|tests/data/ljspeech/wavs/LJ010-0195.npy +tests/data/ljspeech/wavs/LJ019-0122.wav|tests/data/ljspeech/wavs/LJ019-0122.npy +tests/data/ljspeech/wavs/LJ005-0088.wav|tests/data/ljspeech/wavs/LJ005-0088.npy +tests/data/ljspeech/wavs/LJ003-0347.wav|tests/data/ljspeech/wavs/LJ003-0347.npy +tests/data/ljspeech/wavs/LJ001-0032.wav|tests/data/ljspeech/wavs/LJ001-0032.npy +tests/data/ljspeech/wavs/LJ035-0057.wav|tests/data/ljspeech/wavs/LJ035-0057.npy +tests/data/ljspeech/wavs/LJ030-0044.wav|tests/data/ljspeech/wavs/LJ030-0044.npy +tests/data/ljspeech/wavs/LJ038-0019.wav|tests/data/ljspeech/wavs/LJ038-0019.npy +tests/data/ljspeech/wavs/LJ003-0214.wav|tests/data/ljspeech/wavs/LJ003-0214.npy +tests/data/ljspeech/wavs/LJ029-0003.wav|tests/data/ljspeech/wavs/LJ029-0003.npy +tests/data/ljspeech/wavs/LJ004-0247.wav|tests/data/ljspeech/wavs/LJ004-0247.npy +tests/data/ljspeech/wavs/LJ041-0021.wav|tests/data/ljspeech/wavs/LJ041-0021.npy +tests/data/ljspeech/wavs/LJ027-0057.wav|tests/data/ljspeech/wavs/LJ027-0057.npy +tests/data/ljspeech/wavs/LJ005-0002.wav|tests/data/ljspeech/wavs/LJ005-0002.npy +tests/data/ljspeech/wavs/LJ045-0146.wav|tests/data/ljspeech/wavs/LJ045-0146.npy +tests/data/ljspeech/wavs/LJ050-0012.wav|tests/data/ljspeech/wavs/LJ050-0012.npy +tests/data/ljspeech/wavs/LJ031-0202.wav|tests/data/ljspeech/wavs/LJ031-0202.npy +tests/data/ljspeech/wavs/LJ019-0092.wav|tests/data/ljspeech/wavs/LJ019-0092.npy +tests/data/ljspeech/wavs/LJ035-0092.wav|tests/data/ljspeech/wavs/LJ035-0092.npy +tests/data/ljspeech/wavs/LJ005-0010.wav|tests/data/ljspeech/wavs/LJ005-0010.npy +tests/data/ljspeech/wavs/LJ039-0157.wav|tests/data/ljspeech/wavs/LJ039-0157.npy +tests/data/ljspeech/wavs/LJ010-0290.wav|tests/data/ljspeech/wavs/LJ010-0290.npy +tests/data/ljspeech/wavs/LJ025-0162.wav|tests/data/ljspeech/wavs/LJ025-0162.npy +tests/data/ljspeech/wavs/LJ002-0330.wav|tests/data/ljspeech/wavs/LJ002-0330.npy +tests/data/ljspeech/wavs/LJ011-0184.wav|tests/data/ljspeech/wavs/LJ011-0184.npy +tests/data/ljspeech/wavs/LJ039-0180.wav|tests/data/ljspeech/wavs/LJ039-0180.npy +tests/data/ljspeech/wavs/LJ001-0024.wav|tests/data/ljspeech/wavs/LJ001-0024.npy +tests/data/ljspeech/wavs/LJ031-0014.wav|tests/data/ljspeech/wavs/LJ031-0014.npy +tests/data/ljspeech/wavs/LJ039-0196.wav|tests/data/ljspeech/wavs/LJ039-0196.npy +tests/data/ljspeech/wavs/LJ028-0216.wav|tests/data/ljspeech/wavs/LJ028-0216.npy +tests/data/ljspeech/wavs/LJ025-0092.wav|tests/data/ljspeech/wavs/LJ025-0092.npy +tests/data/ljspeech/wavs/LJ026-0128.wav|tests/data/ljspeech/wavs/LJ026-0128.npy +tests/data/ljspeech/wavs/LJ029-0210.wav|tests/data/ljspeech/wavs/LJ029-0210.npy +tests/data/ljspeech/wavs/LJ033-0074.wav|tests/data/ljspeech/wavs/LJ033-0074.npy +tests/data/ljspeech/wavs/LJ028-0278.wav|tests/data/ljspeech/wavs/LJ028-0278.npy +tests/data/ljspeech/wavs/LJ012-0283.wav|tests/data/ljspeech/wavs/LJ012-0283.npy +tests/data/ljspeech/wavs/LJ009-0052.wav|tests/data/ljspeech/wavs/LJ009-0052.npy +tests/data/ljspeech/wavs/LJ050-0036.wav|tests/data/ljspeech/wavs/LJ050-0036.npy +tests/data/ljspeech/wavs/LJ041-0011.wav|tests/data/ljspeech/wavs/LJ041-0011.npy +tests/data/ljspeech/wavs/LJ017-0238.wav|tests/data/ljspeech/wavs/LJ017-0238.npy +tests/data/ljspeech/wavs/LJ016-0335.wav|tests/data/ljspeech/wavs/LJ016-0335.npy +tests/data/ljspeech/wavs/LJ011-0255.wav|tests/data/ljspeech/wavs/LJ011-0255.npy +tests/data/ljspeech/wavs/LJ022-0009.wav|tests/data/ljspeech/wavs/LJ022-0009.npy +tests/data/ljspeech/wavs/LJ012-0217.wav|tests/data/ljspeech/wavs/LJ012-0217.npy +tests/data/ljspeech/wavs/LJ012-0165.wav|tests/data/ljspeech/wavs/LJ012-0165.npy +tests/data/ljspeech/wavs/LJ028-0485.wav|tests/data/ljspeech/wavs/LJ028-0485.npy +tests/data/ljspeech/wavs/LJ033-0108.wav|tests/data/ljspeech/wavs/LJ033-0108.npy +tests/data/ljspeech/wavs/LJ005-0029.wav|tests/data/ljspeech/wavs/LJ005-0029.npy +tests/data/ljspeech/wavs/LJ024-0136.wav|tests/data/ljspeech/wavs/LJ024-0136.npy +tests/data/ljspeech/wavs/LJ011-0013.wav|tests/data/ljspeech/wavs/LJ011-0013.npy +tests/data/ljspeech/wavs/LJ050-0074.wav|tests/data/ljspeech/wavs/LJ050-0074.npy +tests/data/ljspeech/wavs/LJ002-0077.wav|tests/data/ljspeech/wavs/LJ002-0077.npy +tests/data/ljspeech/wavs/LJ017-0121.wav|tests/data/ljspeech/wavs/LJ017-0121.npy +tests/data/ljspeech/wavs/LJ019-0102.wav|tests/data/ljspeech/wavs/LJ019-0102.npy +tests/data/ljspeech/wavs/LJ035-0141.wav|tests/data/ljspeech/wavs/LJ035-0141.npy +tests/data/ljspeech/wavs/LJ020-0057.wav|tests/data/ljspeech/wavs/LJ020-0057.npy +tests/data/ljspeech/wavs/LJ028-0196.wav|tests/data/ljspeech/wavs/LJ028-0196.npy +tests/data/ljspeech/wavs/LJ039-0015.wav|tests/data/ljspeech/wavs/LJ039-0015.npy +tests/data/ljspeech/wavs/LJ018-0158.wav|tests/data/ljspeech/wavs/LJ018-0158.npy +tests/data/ljspeech/wavs/LJ045-0069.wav|tests/data/ljspeech/wavs/LJ045-0069.npy +tests/data/ljspeech/wavs/LJ038-0106.wav|tests/data/ljspeech/wavs/LJ038-0106.npy +tests/data/ljspeech/wavs/LJ034-0012.wav|tests/data/ljspeech/wavs/LJ034-0012.npy +tests/data/ljspeech/wavs/LJ026-0084.wav|tests/data/ljspeech/wavs/LJ026-0084.npy +tests/data/ljspeech/wavs/LJ038-0104.wav|tests/data/ljspeech/wavs/LJ038-0104.npy +tests/data/ljspeech/wavs/LJ021-0148.wav|tests/data/ljspeech/wavs/LJ021-0148.npy +tests/data/ljspeech/wavs/LJ039-0123.wav|tests/data/ljspeech/wavs/LJ039-0123.npy +tests/data/ljspeech/wavs/LJ010-0272.wav|tests/data/ljspeech/wavs/LJ010-0272.npy +tests/data/ljspeech/wavs/LJ040-0019.wav|tests/data/ljspeech/wavs/LJ040-0019.npy +tests/data/ljspeech/wavs/LJ008-0082.wav|tests/data/ljspeech/wavs/LJ008-0082.npy +tests/data/ljspeech/wavs/LJ016-0415.wav|tests/data/ljspeech/wavs/LJ016-0415.npy +tests/data/ljspeech/wavs/LJ047-0100.wav|tests/data/ljspeech/wavs/LJ047-0100.npy +tests/data/ljspeech/wavs/LJ040-0041.wav|tests/data/ljspeech/wavs/LJ040-0041.npy +tests/data/ljspeech/wavs/LJ038-0062.wav|tests/data/ljspeech/wavs/LJ038-0062.npy +tests/data/ljspeech/wavs/LJ020-0026.wav|tests/data/ljspeech/wavs/LJ020-0026.npy +tests/data/ljspeech/wavs/LJ049-0208.wav|tests/data/ljspeech/wavs/LJ049-0208.npy +tests/data/ljspeech/wavs/LJ003-0285.wav|tests/data/ljspeech/wavs/LJ003-0285.npy +tests/data/ljspeech/wavs/LJ019-0369.wav|tests/data/ljspeech/wavs/LJ019-0369.npy +tests/data/ljspeech/wavs/LJ005-0236.wav|tests/data/ljspeech/wavs/LJ005-0236.npy +tests/data/ljspeech/wavs/LJ014-0210.wav|tests/data/ljspeech/wavs/LJ014-0210.npy +tests/data/ljspeech/wavs/LJ044-0056.wav|tests/data/ljspeech/wavs/LJ044-0056.npy +tests/data/ljspeech/wavs/LJ034-0058.wav|tests/data/ljspeech/wavs/LJ034-0058.npy +tests/data/ljspeech/wavs/LJ011-0114.wav|tests/data/ljspeech/wavs/LJ011-0114.npy +tests/data/ljspeech/wavs/LJ019-0185.wav|tests/data/ljspeech/wavs/LJ019-0185.npy +tests/data/ljspeech/wavs/LJ011-0193.wav|tests/data/ljspeech/wavs/LJ011-0193.npy +tests/data/ljspeech/wavs/LJ039-0240.wav|tests/data/ljspeech/wavs/LJ039-0240.npy +tests/data/ljspeech/wavs/LJ038-0029.wav|tests/data/ljspeech/wavs/LJ038-0029.npy +tests/data/ljspeech/wavs/LJ038-0091.wav|tests/data/ljspeech/wavs/LJ038-0091.npy +tests/data/ljspeech/wavs/LJ043-0094.wav|tests/data/ljspeech/wavs/LJ043-0094.npy +tests/data/ljspeech/wavs/LJ011-0085.wav|tests/data/ljspeech/wavs/LJ011-0085.npy +tests/data/ljspeech/wavs/LJ039-0185.wav|tests/data/ljspeech/wavs/LJ039-0185.npy +tests/data/ljspeech/wavs/LJ022-0081.wav|tests/data/ljspeech/wavs/LJ022-0081.npy +tests/data/ljspeech/wavs/LJ030-0010.wav|tests/data/ljspeech/wavs/LJ030-0010.npy +tests/data/ljspeech/wavs/LJ039-0023.wav|tests/data/ljspeech/wavs/LJ039-0023.npy +tests/data/ljspeech/wavs/LJ032-0124.wav|tests/data/ljspeech/wavs/LJ032-0124.npy +tests/data/ljspeech/wavs/LJ013-0261.wav|tests/data/ljspeech/wavs/LJ013-0261.npy +tests/data/ljspeech/wavs/LJ004-0073.wav|tests/data/ljspeech/wavs/LJ004-0073.npy +tests/data/ljspeech/wavs/LJ028-0323.wav|tests/data/ljspeech/wavs/LJ028-0323.npy +tests/data/ljspeech/wavs/LJ028-0153.wav|tests/data/ljspeech/wavs/LJ028-0153.npy +tests/data/ljspeech/wavs/LJ028-0473.wav|tests/data/ljspeech/wavs/LJ028-0473.npy +tests/data/ljspeech/wavs/LJ050-0171.wav|tests/data/ljspeech/wavs/LJ050-0171.npy +tests/data/ljspeech/wavs/LJ039-0131.wav|tests/data/ljspeech/wavs/LJ039-0131.npy +tests/data/ljspeech/wavs/LJ012-0031.wav|tests/data/ljspeech/wavs/LJ012-0031.npy +tests/data/ljspeech/wavs/LJ004-0216.wav|tests/data/ljspeech/wavs/LJ004-0216.npy +tests/data/ljspeech/wavs/LJ049-0013.wav|tests/data/ljspeech/wavs/LJ049-0013.npy +tests/data/ljspeech/wavs/LJ018-0367.wav|tests/data/ljspeech/wavs/LJ018-0367.npy +tests/data/ljspeech/wavs/LJ022-0055.wav|tests/data/ljspeech/wavs/LJ022-0055.npy +tests/data/ljspeech/wavs/LJ004-0135.wav|tests/data/ljspeech/wavs/LJ004-0135.npy +tests/data/ljspeech/wavs/LJ004-0074.wav|tests/data/ljspeech/wavs/LJ004-0074.npy +tests/data/ljspeech/wavs/LJ042-0200.wav|tests/data/ljspeech/wavs/LJ042-0200.npy +tests/data/ljspeech/wavs/LJ005-0170.wav|tests/data/ljspeech/wavs/LJ005-0170.npy +tests/data/ljspeech/wavs/LJ019-0046.wav|tests/data/ljspeech/wavs/LJ019-0046.npy +tests/data/ljspeech/wavs/LJ012-0158.wav|tests/data/ljspeech/wavs/LJ012-0158.npy +tests/data/ljspeech/wavs/LJ028-0334.wav|tests/data/ljspeech/wavs/LJ028-0334.npy +tests/data/ljspeech/wavs/LJ019-0089.wav|tests/data/ljspeech/wavs/LJ019-0089.npy +tests/data/ljspeech/wavs/LJ014-0204.wav|tests/data/ljspeech/wavs/LJ014-0204.npy +tests/data/ljspeech/wavs/LJ013-0104.wav|tests/data/ljspeech/wavs/LJ013-0104.npy +tests/data/ljspeech/wavs/LJ005-0157.wav|tests/data/ljspeech/wavs/LJ005-0157.npy +tests/data/ljspeech/wavs/LJ038-0239.wav|tests/data/ljspeech/wavs/LJ038-0239.npy +tests/data/ljspeech/wavs/LJ050-0172.wav|tests/data/ljspeech/wavs/LJ050-0172.npy +tests/data/ljspeech/wavs/LJ025-0153.wav|tests/data/ljspeech/wavs/LJ025-0153.npy +tests/data/ljspeech/wavs/LJ028-0491.wav|tests/data/ljspeech/wavs/LJ028-0491.npy +tests/data/ljspeech/wavs/LJ039-0160.wav|tests/data/ljspeech/wavs/LJ039-0160.npy +tests/data/ljspeech/wavs/LJ002-0016.wav|tests/data/ljspeech/wavs/LJ002-0016.npy +tests/data/ljspeech/wavs/LJ035-0179.wav|tests/data/ljspeech/wavs/LJ035-0179.npy +tests/data/ljspeech/wavs/LJ029-0160.wav|tests/data/ljspeech/wavs/LJ029-0160.npy +tests/data/ljspeech/wavs/LJ001-0186.wav|tests/data/ljspeech/wavs/LJ001-0186.npy +tests/data/ljspeech/wavs/LJ005-0018.wav|tests/data/ljspeech/wavs/LJ005-0018.npy +tests/data/ljspeech/wavs/LJ036-0051.wav|tests/data/ljspeech/wavs/LJ036-0051.npy +tests/data/ljspeech/wavs/LJ042-0156.wav|tests/data/ljspeech/wavs/LJ042-0156.npy +tests/data/ljspeech/wavs/LJ029-0030.wav|tests/data/ljspeech/wavs/LJ029-0030.npy +tests/data/ljspeech/wavs/LJ010-0028.wav|tests/data/ljspeech/wavs/LJ010-0028.npy +tests/data/ljspeech/wavs/LJ048-0120.wav|tests/data/ljspeech/wavs/LJ048-0120.npy +tests/data/ljspeech/wavs/LJ047-0249.wav|tests/data/ljspeech/wavs/LJ047-0249.npy +tests/data/ljspeech/wavs/LJ007-0087.wav|tests/data/ljspeech/wavs/LJ007-0087.npy +tests/data/ljspeech/wavs/LJ014-0054.wav|tests/data/ljspeech/wavs/LJ014-0054.npy +tests/data/ljspeech/wavs/LJ046-0201.wav|tests/data/ljspeech/wavs/LJ046-0201.npy +tests/data/ljspeech/wavs/LJ012-0103.wav|tests/data/ljspeech/wavs/LJ012-0103.npy +tests/data/ljspeech/wavs/LJ044-0057.wav|tests/data/ljspeech/wavs/LJ044-0057.npy +tests/data/ljspeech/wavs/LJ010-0049.wav|tests/data/ljspeech/wavs/LJ010-0049.npy +tests/data/ljspeech/wavs/LJ010-0048.wav|tests/data/ljspeech/wavs/LJ010-0048.npy +tests/data/ljspeech/wavs/LJ035-0077.wav|tests/data/ljspeech/wavs/LJ035-0077.npy +tests/data/ljspeech/wavs/LJ036-0062.wav|tests/data/ljspeech/wavs/LJ036-0062.npy +tests/data/ljspeech/wavs/LJ002-0297.wav|tests/data/ljspeech/wavs/LJ002-0297.npy +tests/data/ljspeech/wavs/LJ001-0176.wav|tests/data/ljspeech/wavs/LJ001-0176.npy +tests/data/ljspeech/wavs/LJ008-0119.wav|tests/data/ljspeech/wavs/LJ008-0119.npy +tests/data/ljspeech/wavs/LJ006-0072.wav|tests/data/ljspeech/wavs/LJ006-0072.npy +tests/data/ljspeech/wavs/LJ033-0143.wav|tests/data/ljspeech/wavs/LJ033-0143.npy +tests/data/ljspeech/wavs/LJ014-0075.wav|tests/data/ljspeech/wavs/LJ014-0075.npy +tests/data/ljspeech/wavs/LJ018-0243.wav|tests/data/ljspeech/wavs/LJ018-0243.npy +tests/data/ljspeech/wavs/LJ035-0210.wav|tests/data/ljspeech/wavs/LJ035-0210.npy +tests/data/ljspeech/wavs/LJ049-0087.wav|tests/data/ljspeech/wavs/LJ049-0087.npy +tests/data/ljspeech/wavs/LJ045-0219.wav|tests/data/ljspeech/wavs/LJ045-0219.npy +tests/data/ljspeech/wavs/LJ003-0006.wav|tests/data/ljspeech/wavs/LJ003-0006.npy +tests/data/ljspeech/wavs/LJ034-0004.wav|tests/data/ljspeech/wavs/LJ034-0004.npy +tests/data/ljspeech/wavs/LJ034-0181.wav|tests/data/ljspeech/wavs/LJ034-0181.npy +tests/data/ljspeech/wavs/LJ033-0009.wav|tests/data/ljspeech/wavs/LJ033-0009.npy +tests/data/ljspeech/wavs/LJ042-0131.wav|tests/data/ljspeech/wavs/LJ042-0131.npy +tests/data/ljspeech/wavs/LJ042-0130.wav|tests/data/ljspeech/wavs/LJ042-0130.npy +tests/data/ljspeech/wavs/LJ016-0185.wav|tests/data/ljspeech/wavs/LJ016-0185.npy +tests/data/ljspeech/wavs/LJ034-0152.wav|tests/data/ljspeech/wavs/LJ034-0152.npy +tests/data/ljspeech/wavs/LJ047-0167.wav|tests/data/ljspeech/wavs/LJ047-0167.npy +tests/data/ljspeech/wavs/LJ025-0111.wav|tests/data/ljspeech/wavs/LJ025-0111.npy +tests/data/ljspeech/wavs/LJ009-0120.wav|tests/data/ljspeech/wavs/LJ009-0120.npy +tests/data/ljspeech/wavs/LJ037-0072.wav|tests/data/ljspeech/wavs/LJ037-0072.npy +tests/data/ljspeech/wavs/LJ009-0276.wav|tests/data/ljspeech/wavs/LJ009-0276.npy +tests/data/ljspeech/wavs/LJ002-0269.wav|tests/data/ljspeech/wavs/LJ002-0269.npy +tests/data/ljspeech/wavs/LJ009-0266.wav|tests/data/ljspeech/wavs/LJ009-0266.npy +tests/data/ljspeech/wavs/LJ043-0153.wav|tests/data/ljspeech/wavs/LJ043-0153.npy +tests/data/ljspeech/wavs/LJ016-0411.wav|tests/data/ljspeech/wavs/LJ016-0411.npy +tests/data/ljspeech/wavs/LJ018-0229.wav|tests/data/ljspeech/wavs/LJ018-0229.npy +tests/data/ljspeech/wavs/LJ016-0171.wav|tests/data/ljspeech/wavs/LJ016-0171.npy +tests/data/ljspeech/wavs/LJ029-0035.wav|tests/data/ljspeech/wavs/LJ029-0035.npy +tests/data/ljspeech/wavs/LJ016-0054.wav|tests/data/ljspeech/wavs/LJ016-0054.npy +tests/data/ljspeech/wavs/LJ025-0003.wav|tests/data/ljspeech/wavs/LJ025-0003.npy +tests/data/ljspeech/wavs/LJ024-0046.wav|tests/data/ljspeech/wavs/LJ024-0046.npy +tests/data/ljspeech/wavs/LJ020-0084.wav|tests/data/ljspeech/wavs/LJ020-0084.npy +tests/data/ljspeech/wavs/LJ034-0211.wav|tests/data/ljspeech/wavs/LJ034-0211.npy +tests/data/ljspeech/wavs/LJ046-0049.wav|tests/data/ljspeech/wavs/LJ046-0049.npy +tests/data/ljspeech/wavs/LJ036-0143.wav|tests/data/ljspeech/wavs/LJ036-0143.npy +tests/data/ljspeech/wavs/LJ003-0027.wav|tests/data/ljspeech/wavs/LJ003-0027.npy +tests/data/ljspeech/wavs/LJ018-0161.wav|tests/data/ljspeech/wavs/LJ018-0161.npy +tests/data/ljspeech/wavs/LJ017-0010.wav|tests/data/ljspeech/wavs/LJ017-0010.npy +tests/data/ljspeech/wavs/LJ016-0430.wav|tests/data/ljspeech/wavs/LJ016-0430.npy +tests/data/ljspeech/wavs/LJ002-0134.wav|tests/data/ljspeech/wavs/LJ002-0134.npy +tests/data/ljspeech/wavs/LJ018-0194.wav|tests/data/ljspeech/wavs/LJ018-0194.npy +tests/data/ljspeech/wavs/LJ045-0197.wav|tests/data/ljspeech/wavs/LJ045-0197.npy +tests/data/ljspeech/wavs/LJ009-0172.wav|tests/data/ljspeech/wavs/LJ009-0172.npy +tests/data/ljspeech/wavs/LJ018-0170.wav|tests/data/ljspeech/wavs/LJ018-0170.npy +tests/data/ljspeech/wavs/LJ018-0085.wav|tests/data/ljspeech/wavs/LJ018-0085.npy +tests/data/ljspeech/wavs/LJ035-0019.wav|tests/data/ljspeech/wavs/LJ035-0019.npy +tests/data/ljspeech/wavs/LJ024-0115.wav|tests/data/ljspeech/wavs/LJ024-0115.npy +tests/data/ljspeech/wavs/LJ012-0277.wav|tests/data/ljspeech/wavs/LJ012-0277.npy +tests/data/ljspeech/wavs/LJ042-0205.wav|tests/data/ljspeech/wavs/LJ042-0205.npy +tests/data/ljspeech/wavs/LJ035-0128.wav|tests/data/ljspeech/wavs/LJ035-0128.npy +tests/data/ljspeech/wavs/LJ026-0099.wav|tests/data/ljspeech/wavs/LJ026-0099.npy +tests/data/ljspeech/wavs/LJ018-0041.wav|tests/data/ljspeech/wavs/LJ018-0041.npy +tests/data/ljspeech/wavs/LJ008-0245.wav|tests/data/ljspeech/wavs/LJ008-0245.npy +tests/data/ljspeech/wavs/LJ003-0130.wav|tests/data/ljspeech/wavs/LJ003-0130.npy +tests/data/ljspeech/wavs/LJ015-0171.wav|tests/data/ljspeech/wavs/LJ015-0171.npy +tests/data/ljspeech/wavs/LJ020-0047.wav|tests/data/ljspeech/wavs/LJ020-0047.npy +tests/data/ljspeech/wavs/LJ018-0078.wav|tests/data/ljspeech/wavs/LJ018-0078.npy +tests/data/ljspeech/wavs/LJ018-0266.wav|tests/data/ljspeech/wavs/LJ018-0266.npy +tests/data/ljspeech/wavs/LJ032-0165.wav|tests/data/ljspeech/wavs/LJ032-0165.npy +tests/data/ljspeech/wavs/LJ015-0272.wav|tests/data/ljspeech/wavs/LJ015-0272.npy +tests/data/ljspeech/wavs/LJ004-0238.wav|tests/data/ljspeech/wavs/LJ004-0238.npy +tests/data/ljspeech/wavs/LJ032-0004.wav|tests/data/ljspeech/wavs/LJ032-0004.npy +tests/data/ljspeech/wavs/LJ018-0038.wav|tests/data/ljspeech/wavs/LJ018-0038.npy +tests/data/ljspeech/wavs/LJ015-0160.wav|tests/data/ljspeech/wavs/LJ015-0160.npy +tests/data/ljspeech/wavs/LJ036-0091.wav|tests/data/ljspeech/wavs/LJ036-0091.npy +tests/data/ljspeech/wavs/LJ010-0093.wav|tests/data/ljspeech/wavs/LJ010-0093.npy +tests/data/ljspeech/wavs/LJ017-0221.wav|tests/data/ljspeech/wavs/LJ017-0221.npy +tests/data/ljspeech/wavs/LJ031-0217.wav|tests/data/ljspeech/wavs/LJ031-0217.npy +tests/data/ljspeech/wavs/LJ003-0150.wav|tests/data/ljspeech/wavs/LJ003-0150.npy +tests/data/ljspeech/wavs/LJ029-0068.wav|tests/data/ljspeech/wavs/LJ029-0068.npy +tests/data/ljspeech/wavs/LJ049-0094.wav|tests/data/ljspeech/wavs/LJ049-0094.npy +tests/data/ljspeech/wavs/LJ016-0282.wav|tests/data/ljspeech/wavs/LJ016-0282.npy +tests/data/ljspeech/wavs/LJ001-0075.wav|tests/data/ljspeech/wavs/LJ001-0075.npy +tests/data/ljspeech/wavs/LJ046-0058.wav|tests/data/ljspeech/wavs/LJ046-0058.npy +tests/data/ljspeech/wavs/LJ044-0080.wav|tests/data/ljspeech/wavs/LJ044-0080.npy +tests/data/ljspeech/wavs/LJ039-0021.wav|tests/data/ljspeech/wavs/LJ039-0021.npy +tests/data/ljspeech/wavs/LJ012-0065.wav|tests/data/ljspeech/wavs/LJ012-0065.npy +tests/data/ljspeech/wavs/LJ016-0443.wav|tests/data/ljspeech/wavs/LJ016-0443.npy +tests/data/ljspeech/wavs/LJ006-0118.wav|tests/data/ljspeech/wavs/LJ006-0118.npy +tests/data/ljspeech/wavs/LJ016-0316.wav|tests/data/ljspeech/wavs/LJ016-0316.npy +tests/data/ljspeech/wavs/LJ029-0144.wav|tests/data/ljspeech/wavs/LJ029-0144.npy +tests/data/ljspeech/wavs/LJ039-0218.wav|tests/data/ljspeech/wavs/LJ039-0218.npy +tests/data/ljspeech/wavs/LJ019-0097.wav|tests/data/ljspeech/wavs/LJ019-0097.npy +tests/data/ljspeech/wavs/LJ046-0248.wav|tests/data/ljspeech/wavs/LJ046-0248.npy +tests/data/ljspeech/wavs/LJ050-0194.wav|tests/data/ljspeech/wavs/LJ050-0194.npy +tests/data/ljspeech/wavs/LJ017-0059.wav|tests/data/ljspeech/wavs/LJ017-0059.npy +tests/data/ljspeech/wavs/LJ017-0166.wav|tests/data/ljspeech/wavs/LJ017-0166.npy +tests/data/ljspeech/wavs/LJ017-0270.wav|tests/data/ljspeech/wavs/LJ017-0270.npy +tests/data/ljspeech/wavs/LJ034-0053.wav|tests/data/ljspeech/wavs/LJ034-0053.npy +tests/data/ljspeech/wavs/LJ031-0161.wav|tests/data/ljspeech/wavs/LJ031-0161.npy +tests/data/ljspeech/wavs/LJ001-0168.wav|tests/data/ljspeech/wavs/LJ001-0168.npy +tests/data/ljspeech/wavs/LJ007-0166.wav|tests/data/ljspeech/wavs/LJ007-0166.npy +tests/data/ljspeech/wavs/LJ048-0214.wav|tests/data/ljspeech/wavs/LJ048-0214.npy +tests/data/ljspeech/wavs/LJ020-0052.wav|tests/data/ljspeech/wavs/LJ020-0052.npy +tests/data/ljspeech/wavs/LJ005-0095.wav|tests/data/ljspeech/wavs/LJ005-0095.npy +tests/data/ljspeech/wavs/LJ022-0007.wav|tests/data/ljspeech/wavs/LJ022-0007.npy +tests/data/ljspeech/wavs/LJ024-0049.wav|tests/data/ljspeech/wavs/LJ024-0049.npy +tests/data/ljspeech/wavs/LJ001-0121.wav|tests/data/ljspeech/wavs/LJ001-0121.npy +tests/data/ljspeech/wavs/LJ012-0044.wav|tests/data/ljspeech/wavs/LJ012-0044.npy +tests/data/ljspeech/wavs/LJ025-0158.wav|tests/data/ljspeech/wavs/LJ025-0158.npy +tests/data/ljspeech/wavs/LJ035-0146.wav|tests/data/ljspeech/wavs/LJ035-0146.npy +tests/data/ljspeech/wavs/LJ001-0065.wav|tests/data/ljspeech/wavs/LJ001-0065.npy +tests/data/ljspeech/wavs/LJ017-0075.wav|tests/data/ljspeech/wavs/LJ017-0075.npy +tests/data/ljspeech/wavs/LJ009-0023.wav|tests/data/ljspeech/wavs/LJ009-0023.npy +tests/data/ljspeech/wavs/LJ009-0195.wav|tests/data/ljspeech/wavs/LJ009-0195.npy +tests/data/ljspeech/wavs/LJ012-0043.wav|tests/data/ljspeech/wavs/LJ012-0043.npy +tests/data/ljspeech/wavs/LJ018-0143.wav|tests/data/ljspeech/wavs/LJ018-0143.npy +tests/data/ljspeech/wavs/LJ043-0022.wav|tests/data/ljspeech/wavs/LJ043-0022.npy +tests/data/ljspeech/wavs/LJ016-0008.wav|tests/data/ljspeech/wavs/LJ016-0008.npy +tests/data/ljspeech/wavs/LJ018-0141.wav|tests/data/ljspeech/wavs/LJ018-0141.npy +tests/data/ljspeech/wavs/LJ008-0010.wav|tests/data/ljspeech/wavs/LJ008-0010.npy +tests/data/ljspeech/wavs/LJ001-0049.wav|tests/data/ljspeech/wavs/LJ001-0049.npy +tests/data/ljspeech/wavs/LJ050-0260.wav|tests/data/ljspeech/wavs/LJ050-0260.npy +tests/data/ljspeech/wavs/LJ049-0054.wav|tests/data/ljspeech/wavs/LJ049-0054.npy +tests/data/ljspeech/wavs/LJ046-0169.wav|tests/data/ljspeech/wavs/LJ046-0169.npy +tests/data/ljspeech/wavs/LJ018-0179.wav|tests/data/ljspeech/wavs/LJ018-0179.npy +tests/data/ljspeech/wavs/LJ011-0224.wav|tests/data/ljspeech/wavs/LJ011-0224.npy +tests/data/ljspeech/wavs/LJ014-0252.wav|tests/data/ljspeech/wavs/LJ014-0252.npy +tests/data/ljspeech/wavs/LJ019-0052.wav|tests/data/ljspeech/wavs/LJ019-0052.npy +tests/data/ljspeech/wavs/LJ028-0287.wav|tests/data/ljspeech/wavs/LJ028-0287.npy +tests/data/ljspeech/wavs/LJ017-0231.wav|tests/data/ljspeech/wavs/LJ017-0231.npy +tests/data/ljspeech/wavs/LJ003-0051.wav|tests/data/ljspeech/wavs/LJ003-0051.npy +tests/data/ljspeech/wavs/LJ036-0158.wav|tests/data/ljspeech/wavs/LJ036-0158.npy +tests/data/ljspeech/wavs/LJ006-0180.wav|tests/data/ljspeech/wavs/LJ006-0180.npy +tests/data/ljspeech/wavs/LJ019-0287.wav|tests/data/ljspeech/wavs/LJ019-0287.npy +tests/data/ljspeech/wavs/LJ024-0105.wav|tests/data/ljspeech/wavs/LJ024-0105.npy +tests/data/ljspeech/wavs/LJ009-0157.wav|tests/data/ljspeech/wavs/LJ009-0157.npy +tests/data/ljspeech/wavs/LJ028-0409.wav|tests/data/ljspeech/wavs/LJ028-0409.npy +tests/data/ljspeech/wavs/LJ035-0132.wav|tests/data/ljspeech/wavs/LJ035-0132.npy +tests/data/ljspeech/wavs/LJ028-0435.wav|tests/data/ljspeech/wavs/LJ028-0435.npy +tests/data/ljspeech/wavs/LJ011-0032.wav|tests/data/ljspeech/wavs/LJ011-0032.npy +tests/data/ljspeech/wavs/LJ047-0215.wav|tests/data/ljspeech/wavs/LJ047-0215.npy +tests/data/ljspeech/wavs/LJ016-0016.wav|tests/data/ljspeech/wavs/LJ016-0016.npy +tests/data/ljspeech/wavs/LJ019-0060.wav|tests/data/ljspeech/wavs/LJ019-0060.npy +tests/data/ljspeech/wavs/LJ028-0293.wav|tests/data/ljspeech/wavs/LJ028-0293.npy +tests/data/ljspeech/wavs/LJ023-0105.wav|tests/data/ljspeech/wavs/LJ023-0105.npy +tests/data/ljspeech/wavs/LJ028-0513.wav|tests/data/ljspeech/wavs/LJ028-0513.npy +tests/data/ljspeech/wavs/LJ023-0072.wav|tests/data/ljspeech/wavs/LJ023-0072.npy +tests/data/ljspeech/wavs/LJ026-0003.wav|tests/data/ljspeech/wavs/LJ026-0003.npy +tests/data/ljspeech/wavs/LJ040-0189.wav|tests/data/ljspeech/wavs/LJ040-0189.npy +tests/data/ljspeech/wavs/LJ008-0101.wav|tests/data/ljspeech/wavs/LJ008-0101.npy +tests/data/ljspeech/wavs/LJ015-0147.wav|tests/data/ljspeech/wavs/LJ015-0147.npy +tests/data/ljspeech/wavs/LJ008-0032.wav|tests/data/ljspeech/wavs/LJ008-0032.npy +tests/data/ljspeech/wavs/LJ015-0033.wav|tests/data/ljspeech/wavs/LJ015-0033.npy +tests/data/ljspeech/wavs/LJ023-0117.wav|tests/data/ljspeech/wavs/LJ023-0117.npy +tests/data/ljspeech/wavs/LJ046-0210.wav|tests/data/ljspeech/wavs/LJ046-0210.npy +tests/data/ljspeech/wavs/LJ006-0136.wav|tests/data/ljspeech/wavs/LJ006-0136.npy +tests/data/ljspeech/wavs/LJ044-0167.wav|tests/data/ljspeech/wavs/LJ044-0167.npy +tests/data/ljspeech/wavs/LJ027-0154.wav|tests/data/ljspeech/wavs/LJ027-0154.npy +tests/data/ljspeech/wavs/LJ015-0025.wav|tests/data/ljspeech/wavs/LJ015-0025.npy +tests/data/ljspeech/wavs/LJ038-0052.wav|tests/data/ljspeech/wavs/LJ038-0052.npy +tests/data/ljspeech/wavs/LJ003-0199.wav|tests/data/ljspeech/wavs/LJ003-0199.npy +tests/data/ljspeech/wavs/LJ008-0027.wav|tests/data/ljspeech/wavs/LJ008-0027.npy +tests/data/ljspeech/wavs/LJ045-0222.wav|tests/data/ljspeech/wavs/LJ045-0222.npy +tests/data/ljspeech/wavs/LJ006-0255.wav|tests/data/ljspeech/wavs/LJ006-0255.npy +tests/data/ljspeech/wavs/LJ037-0217.wav|tests/data/ljspeech/wavs/LJ037-0217.npy +tests/data/ljspeech/wavs/LJ014-0076.wav|tests/data/ljspeech/wavs/LJ014-0076.npy +tests/data/ljspeech/wavs/LJ009-0125.wav|tests/data/ljspeech/wavs/LJ009-0125.npy +tests/data/ljspeech/wavs/LJ015-0187.wav|tests/data/ljspeech/wavs/LJ015-0187.npy +tests/data/ljspeech/wavs/LJ006-0239.wav|tests/data/ljspeech/wavs/LJ006-0239.npy +tests/data/ljspeech/wavs/LJ028-0068.wav|tests/data/ljspeech/wavs/LJ028-0068.npy +tests/data/ljspeech/wavs/LJ010-0180.wav|tests/data/ljspeech/wavs/LJ010-0180.npy +tests/data/ljspeech/wavs/LJ006-0003.wav|tests/data/ljspeech/wavs/LJ006-0003.npy +tests/data/ljspeech/wavs/LJ049-0109.wav|tests/data/ljspeech/wavs/LJ049-0109.npy +tests/data/ljspeech/wavs/LJ006-0283.wav|tests/data/ljspeech/wavs/LJ006-0283.npy +tests/data/ljspeech/wavs/LJ015-0237.wav|tests/data/ljspeech/wavs/LJ015-0237.npy +tests/data/ljspeech/wavs/LJ010-0100.wav|tests/data/ljspeech/wavs/LJ010-0100.npy +tests/data/ljspeech/wavs/LJ032-0180.wav|tests/data/ljspeech/wavs/LJ032-0180.npy +tests/data/ljspeech/wavs/LJ002-0320.wav|tests/data/ljspeech/wavs/LJ002-0320.npy +tests/data/ljspeech/wavs/LJ044-0171.wav|tests/data/ljspeech/wavs/LJ044-0171.npy +tests/data/ljspeech/wavs/LJ031-0154.wav|tests/data/ljspeech/wavs/LJ031-0154.npy +tests/data/ljspeech/wavs/LJ006-0008.wav|tests/data/ljspeech/wavs/LJ006-0008.npy +tests/data/ljspeech/wavs/LJ044-0207.wav|tests/data/ljspeech/wavs/LJ044-0207.npy +tests/data/ljspeech/wavs/LJ031-0180.wav|tests/data/ljspeech/wavs/LJ031-0180.npy +tests/data/ljspeech/wavs/LJ019-0003.wav|tests/data/ljspeech/wavs/LJ019-0003.npy +tests/data/ljspeech/wavs/LJ048-0176.wav|tests/data/ljspeech/wavs/LJ048-0176.npy +tests/data/ljspeech/wavs/LJ020-0020.wav|tests/data/ljspeech/wavs/LJ020-0020.npy +tests/data/ljspeech/wavs/LJ020-0011.wav|tests/data/ljspeech/wavs/LJ020-0011.npy +tests/data/ljspeech/wavs/LJ042-0129.wav|tests/data/ljspeech/wavs/LJ042-0129.npy +tests/data/ljspeech/wavs/LJ033-0022.wav|tests/data/ljspeech/wavs/LJ033-0022.npy +tests/data/ljspeech/wavs/LJ037-0058.wav|tests/data/ljspeech/wavs/LJ037-0058.npy +tests/data/ljspeech/wavs/LJ026-0064.wav|tests/data/ljspeech/wavs/LJ026-0064.npy +tests/data/ljspeech/wavs/LJ040-0238.wav|tests/data/ljspeech/wavs/LJ040-0238.npy +tests/data/ljspeech/wavs/LJ037-0073.wav|tests/data/ljspeech/wavs/LJ037-0073.npy +tests/data/ljspeech/wavs/LJ002-0154.wav|tests/data/ljspeech/wavs/LJ002-0154.npy +tests/data/ljspeech/wavs/LJ003-0171.wav|tests/data/ljspeech/wavs/LJ003-0171.npy +tests/data/ljspeech/wavs/LJ026-0112.wav|tests/data/ljspeech/wavs/LJ026-0112.npy +tests/data/ljspeech/wavs/LJ004-0140.wav|tests/data/ljspeech/wavs/LJ004-0140.npy +tests/data/ljspeech/wavs/LJ046-0226.wav|tests/data/ljspeech/wavs/LJ046-0226.npy +tests/data/ljspeech/wavs/LJ002-0219.wav|tests/data/ljspeech/wavs/LJ002-0219.npy +tests/data/ljspeech/wavs/LJ005-0194.wav|tests/data/ljspeech/wavs/LJ005-0194.npy +tests/data/ljspeech/wavs/LJ027-0105.wav|tests/data/ljspeech/wavs/LJ027-0105.npy +tests/data/ljspeech/wavs/LJ014-0329.wav|tests/data/ljspeech/wavs/LJ014-0329.npy +tests/data/ljspeech/wavs/LJ035-0110.wav|tests/data/ljspeech/wavs/LJ035-0110.npy +tests/data/ljspeech/wavs/LJ016-0258.wav|tests/data/ljspeech/wavs/LJ016-0258.npy +tests/data/ljspeech/wavs/LJ018-0060.wav|tests/data/ljspeech/wavs/LJ018-0060.npy +tests/data/ljspeech/wavs/LJ013-0107.wav|tests/data/ljspeech/wavs/LJ013-0107.npy +tests/data/ljspeech/wavs/LJ032-0211.wav|tests/data/ljspeech/wavs/LJ032-0211.npy +tests/data/ljspeech/wavs/LJ036-0009.wav|tests/data/ljspeech/wavs/LJ036-0009.npy +tests/data/ljspeech/wavs/LJ030-0054.wav|tests/data/ljspeech/wavs/LJ030-0054.npy +tests/data/ljspeech/wavs/LJ028-0159.wav|tests/data/ljspeech/wavs/LJ028-0159.npy +tests/data/ljspeech/wavs/LJ010-0313.wav|tests/data/ljspeech/wavs/LJ010-0313.npy +tests/data/ljspeech/wavs/LJ047-0063.wav|tests/data/ljspeech/wavs/LJ047-0063.npy +tests/data/ljspeech/wavs/LJ019-0281.wav|tests/data/ljspeech/wavs/LJ019-0281.npy +tests/data/ljspeech/wavs/LJ029-0171.wav|tests/data/ljspeech/wavs/LJ029-0171.npy +tests/data/ljspeech/wavs/LJ047-0233.wav|tests/data/ljspeech/wavs/LJ047-0233.npy +tests/data/ljspeech/wavs/LJ022-0158.wav|tests/data/ljspeech/wavs/LJ022-0158.npy +tests/data/ljspeech/wavs/LJ043-0127.wav|tests/data/ljspeech/wavs/LJ043-0127.npy +tests/data/ljspeech/wavs/LJ040-0046.wav|tests/data/ljspeech/wavs/LJ040-0046.npy +tests/data/ljspeech/wavs/LJ045-0244.wav|tests/data/ljspeech/wavs/LJ045-0244.npy +tests/data/ljspeech/wavs/LJ040-0137.wav|tests/data/ljspeech/wavs/LJ040-0137.npy +tests/data/ljspeech/wavs/LJ040-0218.wav|tests/data/ljspeech/wavs/LJ040-0218.npy +tests/data/ljspeech/wavs/LJ021-0205.wav|tests/data/ljspeech/wavs/LJ021-0205.npy +tests/data/ljspeech/wavs/LJ010-0102.wav|tests/data/ljspeech/wavs/LJ010-0102.npy +tests/data/ljspeech/wavs/LJ019-0313.wav|tests/data/ljspeech/wavs/LJ019-0313.npy +tests/data/ljspeech/wavs/LJ050-0129.wav|tests/data/ljspeech/wavs/LJ050-0129.npy +tests/data/ljspeech/wavs/LJ028-0140.wav|tests/data/ljspeech/wavs/LJ028-0140.npy +tests/data/ljspeech/wavs/LJ029-0146.wav|tests/data/ljspeech/wavs/LJ029-0146.npy +tests/data/ljspeech/wavs/LJ022-0141.wav|tests/data/ljspeech/wavs/LJ022-0141.npy +tests/data/ljspeech/wavs/LJ005-0163.wav|tests/data/ljspeech/wavs/LJ005-0163.npy +tests/data/ljspeech/wavs/LJ010-0197.wav|tests/data/ljspeech/wavs/LJ010-0197.npy +tests/data/ljspeech/wavs/LJ021-0125.wav|tests/data/ljspeech/wavs/LJ021-0125.npy +tests/data/ljspeech/wavs/LJ006-0218.wav|tests/data/ljspeech/wavs/LJ006-0218.npy +tests/data/ljspeech/wavs/LJ013-0252.wav|tests/data/ljspeech/wavs/LJ013-0252.npy +tests/data/ljspeech/wavs/LJ006-0052.wav|tests/data/ljspeech/wavs/LJ006-0052.npy +tests/data/ljspeech/wavs/LJ043-0181.wav|tests/data/ljspeech/wavs/LJ043-0181.npy +tests/data/ljspeech/wavs/LJ005-0180.wav|tests/data/ljspeech/wavs/LJ005-0180.npy +tests/data/ljspeech/wavs/LJ009-0163.wav|tests/data/ljspeech/wavs/LJ009-0163.npy +tests/data/ljspeech/wavs/LJ050-0020.wav|tests/data/ljspeech/wavs/LJ050-0020.npy +tests/data/ljspeech/wavs/LJ028-0150.wav|tests/data/ljspeech/wavs/LJ028-0150.npy +tests/data/ljspeech/wavs/LJ002-0145.wav|tests/data/ljspeech/wavs/LJ002-0145.npy +tests/data/ljspeech/wavs/LJ028-0421.wav|tests/data/ljspeech/wavs/LJ028-0421.npy +tests/data/ljspeech/wavs/LJ009-0277.wav|tests/data/ljspeech/wavs/LJ009-0277.npy +tests/data/ljspeech/wavs/LJ018-0236.wav|tests/data/ljspeech/wavs/LJ018-0236.npy +tests/data/ljspeech/wavs/LJ011-0019.wav|tests/data/ljspeech/wavs/LJ011-0019.npy +tests/data/ljspeech/wavs/LJ029-0195.wav|tests/data/ljspeech/wavs/LJ029-0195.npy +tests/data/ljspeech/wavs/LJ040-0192.wav|tests/data/ljspeech/wavs/LJ040-0192.npy +tests/data/ljspeech/wavs/LJ047-0014.wav|tests/data/ljspeech/wavs/LJ047-0014.npy +tests/data/ljspeech/wavs/LJ042-0054.wav|tests/data/ljspeech/wavs/LJ042-0054.npy +tests/data/ljspeech/wavs/LJ023-0138.wav|tests/data/ljspeech/wavs/LJ023-0138.npy +tests/data/ljspeech/wavs/LJ043-0160.wav|tests/data/ljspeech/wavs/LJ043-0160.npy +tests/data/ljspeech/wavs/LJ046-0048.wav|tests/data/ljspeech/wavs/LJ046-0048.npy +tests/data/ljspeech/wavs/LJ002-0265.wav|tests/data/ljspeech/wavs/LJ002-0265.npy +tests/data/ljspeech/wavs/LJ045-0208.wav|tests/data/ljspeech/wavs/LJ045-0208.npy +tests/data/ljspeech/wavs/LJ024-0009.wav|tests/data/ljspeech/wavs/LJ024-0009.npy +tests/data/ljspeech/wavs/LJ021-0150.wav|tests/data/ljspeech/wavs/LJ021-0150.npy +tests/data/ljspeech/wavs/LJ011-0117.wav|tests/data/ljspeech/wavs/LJ011-0117.npy +tests/data/ljspeech/wavs/LJ006-0124.wav|tests/data/ljspeech/wavs/LJ006-0124.npy +tests/data/ljspeech/wavs/LJ033-0194.wav|tests/data/ljspeech/wavs/LJ033-0194.npy +tests/data/ljspeech/wavs/LJ010-0022.wav|tests/data/ljspeech/wavs/LJ010-0022.npy +tests/data/ljspeech/wavs/LJ009-0093.wav|tests/data/ljspeech/wavs/LJ009-0093.npy +tests/data/ljspeech/wavs/LJ028-0379.wav|tests/data/ljspeech/wavs/LJ028-0379.npy +tests/data/ljspeech/wavs/LJ005-0270.wav|tests/data/ljspeech/wavs/LJ005-0270.npy +tests/data/ljspeech/wavs/LJ016-0084.wav|tests/data/ljspeech/wavs/LJ016-0084.npy +tests/data/ljspeech/wavs/LJ007-0211.wav|tests/data/ljspeech/wavs/LJ007-0211.npy +tests/data/ljspeech/wavs/LJ024-0014.wav|tests/data/ljspeech/wavs/LJ024-0014.npy +tests/data/ljspeech/wavs/LJ005-0238.wav|tests/data/ljspeech/wavs/LJ005-0238.npy +tests/data/ljspeech/wavs/LJ037-0128.wav|tests/data/ljspeech/wavs/LJ037-0128.npy +tests/data/ljspeech/wavs/LJ007-0023.wav|tests/data/ljspeech/wavs/LJ007-0023.npy +tests/data/ljspeech/wavs/LJ035-0120.wav|tests/data/ljspeech/wavs/LJ035-0120.npy +tests/data/ljspeech/wavs/LJ010-0308.wav|tests/data/ljspeech/wavs/LJ010-0308.npy +tests/data/ljspeech/wavs/LJ047-0207.wav|tests/data/ljspeech/wavs/LJ047-0207.npy +tests/data/ljspeech/wavs/LJ009-0094.wav|tests/data/ljspeech/wavs/LJ009-0094.npy +tests/data/ljspeech/wavs/LJ010-0189.wav|tests/data/ljspeech/wavs/LJ010-0189.npy +tests/data/ljspeech/wavs/LJ002-0326.wav|tests/data/ljspeech/wavs/LJ002-0326.npy +tests/data/ljspeech/wavs/LJ046-0187.wav|tests/data/ljspeech/wavs/LJ046-0187.npy +tests/data/ljspeech/wavs/LJ018-0087.wav|tests/data/ljspeech/wavs/LJ018-0087.npy +tests/data/ljspeech/wavs/LJ008-0016.wav|tests/data/ljspeech/wavs/LJ008-0016.npy +tests/data/ljspeech/wavs/LJ047-0031.wav|tests/data/ljspeech/wavs/LJ047-0031.npy +tests/data/ljspeech/wavs/LJ042-0159.wav|tests/data/ljspeech/wavs/LJ042-0159.npy +tests/data/ljspeech/wavs/LJ025-0011.wav|tests/data/ljspeech/wavs/LJ025-0011.npy +tests/data/ljspeech/wavs/LJ026-0139.wav|tests/data/ljspeech/wavs/LJ026-0139.npy +tests/data/ljspeech/wavs/LJ050-0070.wav|tests/data/ljspeech/wavs/LJ050-0070.npy +tests/data/ljspeech/wavs/LJ049-0042.wav|tests/data/ljspeech/wavs/LJ049-0042.npy +tests/data/ljspeech/wavs/LJ032-0072.wav|tests/data/ljspeech/wavs/LJ032-0072.npy +tests/data/ljspeech/wavs/LJ018-0118.wav|tests/data/ljspeech/wavs/LJ018-0118.npy +tests/data/ljspeech/wavs/LJ042-0187.wav|tests/data/ljspeech/wavs/LJ042-0187.npy +tests/data/ljspeech/wavs/LJ028-0057.wav|tests/data/ljspeech/wavs/LJ028-0057.npy +tests/data/ljspeech/wavs/LJ042-0181.wav|tests/data/ljspeech/wavs/LJ042-0181.npy +tests/data/ljspeech/wavs/LJ034-0184.wav|tests/data/ljspeech/wavs/LJ034-0184.npy +tests/data/ljspeech/wavs/LJ008-0043.wav|tests/data/ljspeech/wavs/LJ008-0043.npy +tests/data/ljspeech/wavs/LJ017-0176.wav|tests/data/ljspeech/wavs/LJ017-0176.npy +tests/data/ljspeech/wavs/LJ015-0121.wav|tests/data/ljspeech/wavs/LJ015-0121.npy +tests/data/ljspeech/wavs/LJ001-0012.wav|tests/data/ljspeech/wavs/LJ001-0012.npy +tests/data/ljspeech/wavs/LJ030-0209.wav|tests/data/ljspeech/wavs/LJ030-0209.npy +tests/data/ljspeech/wavs/LJ007-0143.wav|tests/data/ljspeech/wavs/LJ007-0143.npy +tests/data/ljspeech/wavs/LJ033-0103.wav|tests/data/ljspeech/wavs/LJ033-0103.npy +tests/data/ljspeech/wavs/LJ048-0239.wav|tests/data/ljspeech/wavs/LJ048-0239.npy +tests/data/ljspeech/wavs/LJ028-0036.wav|tests/data/ljspeech/wavs/LJ028-0036.npy +tests/data/ljspeech/wavs/LJ049-0034.wav|tests/data/ljspeech/wavs/LJ049-0034.npy +tests/data/ljspeech/wavs/LJ024-0041.wav|tests/data/ljspeech/wavs/LJ024-0041.npy +tests/data/ljspeech/wavs/LJ018-0035.wav|tests/data/ljspeech/wavs/LJ018-0035.npy +tests/data/ljspeech/wavs/LJ017-0026.wav|tests/data/ljspeech/wavs/LJ017-0026.npy +tests/data/ljspeech/wavs/LJ016-0266.wav|tests/data/ljspeech/wavs/LJ016-0266.npy +tests/data/ljspeech/wavs/LJ015-0244.wav|tests/data/ljspeech/wavs/LJ015-0244.npy +tests/data/ljspeech/wavs/LJ037-0225.wav|tests/data/ljspeech/wavs/LJ037-0225.npy +tests/data/ljspeech/wavs/LJ003-0099.wav|tests/data/ljspeech/wavs/LJ003-0099.npy +tests/data/ljspeech/wavs/LJ009-0173.wav|tests/data/ljspeech/wavs/LJ009-0173.npy +tests/data/ljspeech/wavs/LJ036-0201.wav|tests/data/ljspeech/wavs/LJ036-0201.npy +tests/data/ljspeech/wavs/LJ014-0001.wav|tests/data/ljspeech/wavs/LJ014-0001.npy +tests/data/ljspeech/wavs/LJ013-0001.wav|tests/data/ljspeech/wavs/LJ013-0001.npy +tests/data/ljspeech/wavs/LJ037-0242.wav|tests/data/ljspeech/wavs/LJ037-0242.npy +tests/data/ljspeech/wavs/LJ044-0185.wav|tests/data/ljspeech/wavs/LJ044-0185.npy +tests/data/ljspeech/wavs/LJ039-0112.wav|tests/data/ljspeech/wavs/LJ039-0112.npy +tests/data/ljspeech/wavs/LJ008-0149.wav|tests/data/ljspeech/wavs/LJ008-0149.npy +tests/data/ljspeech/wavs/LJ042-0045.wav|tests/data/ljspeech/wavs/LJ042-0045.npy +tests/data/ljspeech/wavs/LJ019-0333.wav|tests/data/ljspeech/wavs/LJ019-0333.npy +tests/data/ljspeech/wavs/LJ026-0093.wav|tests/data/ljspeech/wavs/LJ026-0093.npy +tests/data/ljspeech/wavs/LJ031-0011.wav|tests/data/ljspeech/wavs/LJ031-0011.npy +tests/data/ljspeech/wavs/LJ019-0007.wav|tests/data/ljspeech/wavs/LJ019-0007.npy +tests/data/ljspeech/wavs/LJ044-0090.wav|tests/data/ljspeech/wavs/LJ044-0090.npy +tests/data/ljspeech/wavs/LJ006-0270.wav|tests/data/ljspeech/wavs/LJ006-0270.npy +tests/data/ljspeech/wavs/LJ039-0114.wav|tests/data/ljspeech/wavs/LJ039-0114.npy +tests/data/ljspeech/wavs/LJ012-0262.wav|tests/data/ljspeech/wavs/LJ012-0262.npy +tests/data/ljspeech/wavs/LJ012-0061.wav|tests/data/ljspeech/wavs/LJ012-0061.npy +tests/data/ljspeech/wavs/LJ008-0267.wav|tests/data/ljspeech/wavs/LJ008-0267.npy +tests/data/ljspeech/wavs/LJ016-0382.wav|tests/data/ljspeech/wavs/LJ016-0382.npy +tests/data/ljspeech/wavs/LJ019-0367.wav|tests/data/ljspeech/wavs/LJ019-0367.npy +tests/data/ljspeech/wavs/LJ012-0004.wav|tests/data/ljspeech/wavs/LJ012-0004.npy +tests/data/ljspeech/wavs/LJ005-0064.wav|tests/data/ljspeech/wavs/LJ005-0064.npy +tests/data/ljspeech/wavs/LJ012-0180.wav|tests/data/ljspeech/wavs/LJ012-0180.npy +tests/data/ljspeech/wavs/LJ037-0017.wav|tests/data/ljspeech/wavs/LJ037-0017.npy +tests/data/ljspeech/wavs/LJ011-0198.wav|tests/data/ljspeech/wavs/LJ011-0198.npy +tests/data/ljspeech/wavs/LJ027-0086.wav|tests/data/ljspeech/wavs/LJ027-0086.npy +tests/data/ljspeech/wavs/LJ035-0155.wav|tests/data/ljspeech/wavs/LJ035-0155.npy +tests/data/ljspeech/wavs/LJ012-0102.wav|tests/data/ljspeech/wavs/LJ012-0102.npy +tests/data/ljspeech/wavs/LJ006-0155.wav|tests/data/ljspeech/wavs/LJ006-0155.npy +tests/data/ljspeech/wavs/LJ046-0095.wav|tests/data/ljspeech/wavs/LJ046-0095.npy +tests/data/ljspeech/wavs/LJ049-0138.wav|tests/data/ljspeech/wavs/LJ049-0138.npy +tests/data/ljspeech/wavs/LJ034-0210.wav|tests/data/ljspeech/wavs/LJ034-0210.npy +tests/data/ljspeech/wavs/LJ042-0089.wav|tests/data/ljspeech/wavs/LJ042-0089.npy +tests/data/ljspeech/wavs/LJ007-0092.wav|tests/data/ljspeech/wavs/LJ007-0092.npy +tests/data/ljspeech/wavs/LJ047-0134.wav|tests/data/ljspeech/wavs/LJ047-0134.npy +tests/data/ljspeech/wavs/LJ041-0105.wav|tests/data/ljspeech/wavs/LJ041-0105.npy +tests/data/ljspeech/wavs/LJ008-0106.wav|tests/data/ljspeech/wavs/LJ008-0106.npy +tests/data/ljspeech/wavs/LJ022-0172.wav|tests/data/ljspeech/wavs/LJ022-0172.npy +tests/data/ljspeech/wavs/LJ014-0150.wav|tests/data/ljspeech/wavs/LJ014-0150.npy +tests/data/ljspeech/wavs/LJ022-0188.wav|tests/data/ljspeech/wavs/LJ022-0188.npy +tests/data/ljspeech/wavs/LJ008-0083.wav|tests/data/ljspeech/wavs/LJ008-0083.npy +tests/data/ljspeech/wavs/LJ048-0144.wav|tests/data/ljspeech/wavs/LJ048-0144.npy +tests/data/ljspeech/wavs/LJ045-0040.wav|tests/data/ljspeech/wavs/LJ045-0040.npy +tests/data/ljspeech/wavs/LJ006-0289.wav|tests/data/ljspeech/wavs/LJ006-0289.npy +tests/data/ljspeech/wavs/LJ030-0087.wav|tests/data/ljspeech/wavs/LJ030-0087.npy +tests/data/ljspeech/wavs/LJ033-0089.wav|tests/data/ljspeech/wavs/LJ033-0089.npy +tests/data/ljspeech/wavs/LJ006-0258.wav|tests/data/ljspeech/wavs/LJ006-0258.npy +tests/data/ljspeech/wavs/LJ050-0091.wav|tests/data/ljspeech/wavs/LJ050-0091.npy +tests/data/ljspeech/wavs/LJ043-0099.wav|tests/data/ljspeech/wavs/LJ043-0099.npy +tests/data/ljspeech/wavs/LJ038-0171.wav|tests/data/ljspeech/wavs/LJ038-0171.npy +tests/data/ljspeech/wavs/LJ028-0289.wav|tests/data/ljspeech/wavs/LJ028-0289.npy +tests/data/ljspeech/wavs/LJ008-0300.wav|tests/data/ljspeech/wavs/LJ008-0300.npy +tests/data/ljspeech/wavs/LJ019-0399.wav|tests/data/ljspeech/wavs/LJ019-0399.npy +tests/data/ljspeech/wavs/LJ034-0166.wav|tests/data/ljspeech/wavs/LJ034-0166.npy +tests/data/ljspeech/wavs/LJ026-0040.wav|tests/data/ljspeech/wavs/LJ026-0040.npy +tests/data/ljspeech/wavs/LJ028-0111.wav|tests/data/ljspeech/wavs/LJ028-0111.npy +tests/data/ljspeech/wavs/LJ014-0268.wav|tests/data/ljspeech/wavs/LJ014-0268.npy +tests/data/ljspeech/wavs/LJ003-0260.wav|tests/data/ljspeech/wavs/LJ003-0260.npy +tests/data/ljspeech/wavs/LJ032-0133.wav|tests/data/ljspeech/wavs/LJ032-0133.npy +tests/data/ljspeech/wavs/LJ009-0063.wav|tests/data/ljspeech/wavs/LJ009-0063.npy +tests/data/ljspeech/wavs/LJ047-0212.wav|tests/data/ljspeech/wavs/LJ047-0212.npy +tests/data/ljspeech/wavs/LJ011-0180.wav|tests/data/ljspeech/wavs/LJ011-0180.npy +tests/data/ljspeech/wavs/LJ011-0142.wav|tests/data/ljspeech/wavs/LJ011-0142.npy +tests/data/ljspeech/wavs/LJ037-0205.wav|tests/data/ljspeech/wavs/LJ037-0205.npy +tests/data/ljspeech/wavs/LJ037-0201.wav|tests/data/ljspeech/wavs/LJ037-0201.npy +tests/data/ljspeech/wavs/LJ049-0113.wav|tests/data/ljspeech/wavs/LJ049-0113.npy +tests/data/ljspeech/wavs/LJ050-0055.wav|tests/data/ljspeech/wavs/LJ050-0055.npy +tests/data/ljspeech/wavs/LJ038-0139.wav|tests/data/ljspeech/wavs/LJ038-0139.npy +tests/data/ljspeech/wavs/LJ050-0028.wav|tests/data/ljspeech/wavs/LJ050-0028.npy +tests/data/ljspeech/wavs/LJ015-0241.wav|tests/data/ljspeech/wavs/LJ015-0241.npy +tests/data/ljspeech/wavs/LJ048-0208.wav|tests/data/ljspeech/wavs/LJ048-0208.npy +tests/data/ljspeech/wavs/LJ015-0059.wav|tests/data/ljspeech/wavs/LJ015-0059.npy +tests/data/ljspeech/wavs/LJ018-0356.wav|tests/data/ljspeech/wavs/LJ018-0356.npy +tests/data/ljspeech/wavs/LJ015-0112.wav|tests/data/ljspeech/wavs/LJ015-0112.npy +tests/data/ljspeech/wavs/LJ035-0202.wav|tests/data/ljspeech/wavs/LJ035-0202.npy +tests/data/ljspeech/wavs/LJ030-0108.wav|tests/data/ljspeech/wavs/LJ030-0108.npy +tests/data/ljspeech/wavs/LJ008-0240.wav|tests/data/ljspeech/wavs/LJ008-0240.npy +tests/data/ljspeech/wavs/LJ015-0118.wav|tests/data/ljspeech/wavs/LJ015-0118.npy +tests/data/ljspeech/wavs/LJ003-0105.wav|tests/data/ljspeech/wavs/LJ003-0105.npy +tests/data/ljspeech/wavs/LJ033-0098.wav|tests/data/ljspeech/wavs/LJ033-0098.npy +tests/data/ljspeech/wavs/LJ014-0328.wav|tests/data/ljspeech/wavs/LJ014-0328.npy +tests/data/ljspeech/wavs/LJ045-0051.wav|tests/data/ljspeech/wavs/LJ045-0051.npy +tests/data/ljspeech/wavs/LJ006-0190.wav|tests/data/ljspeech/wavs/LJ006-0190.npy +tests/data/ljspeech/wavs/LJ014-0170.wav|tests/data/ljspeech/wavs/LJ014-0170.npy +tests/data/ljspeech/wavs/LJ003-0179.wav|tests/data/ljspeech/wavs/LJ003-0179.npy +tests/data/ljspeech/wavs/LJ041-0083.wav|tests/data/ljspeech/wavs/LJ041-0083.npy +tests/data/ljspeech/wavs/LJ045-0101.wav|tests/data/ljspeech/wavs/LJ045-0101.npy +tests/data/ljspeech/wavs/LJ006-0217.wav|tests/data/ljspeech/wavs/LJ006-0217.npy +tests/data/ljspeech/wavs/LJ020-0019.wav|tests/data/ljspeech/wavs/LJ020-0019.npy +tests/data/ljspeech/wavs/LJ029-0027.wav|tests/data/ljspeech/wavs/LJ029-0027.npy +tests/data/ljspeech/wavs/LJ007-0219.wav|tests/data/ljspeech/wavs/LJ007-0219.npy +tests/data/ljspeech/wavs/LJ035-0184.wav|tests/data/ljspeech/wavs/LJ035-0184.npy +tests/data/ljspeech/wavs/LJ015-0207.wav|tests/data/ljspeech/wavs/LJ015-0207.npy +tests/data/ljspeech/wavs/LJ006-0172.wav|tests/data/ljspeech/wavs/LJ006-0172.npy +tests/data/ljspeech/wavs/LJ018-0054.wav|tests/data/ljspeech/wavs/LJ018-0054.npy +tests/data/ljspeech/wavs/LJ032-0245.wav|tests/data/ljspeech/wavs/LJ032-0245.npy +tests/data/ljspeech/wavs/LJ037-0187.wav|tests/data/ljspeech/wavs/LJ037-0187.npy +tests/data/ljspeech/wavs/LJ035-0183.wav|tests/data/ljspeech/wavs/LJ035-0183.npy +tests/data/ljspeech/wavs/LJ045-0016.wav|tests/data/ljspeech/wavs/LJ045-0016.npy +tests/data/ljspeech/wavs/LJ038-0180.wav|tests/data/ljspeech/wavs/LJ038-0180.npy +tests/data/ljspeech/wavs/LJ046-0173.wav|tests/data/ljspeech/wavs/LJ046-0173.npy +tests/data/ljspeech/wavs/LJ024-0054.wav|tests/data/ljspeech/wavs/LJ024-0054.npy +tests/data/ljspeech/wavs/LJ016-0351.wav|tests/data/ljspeech/wavs/LJ016-0351.npy +tests/data/ljspeech/wavs/LJ017-0184.wav|tests/data/ljspeech/wavs/LJ017-0184.npy +tests/data/ljspeech/wavs/LJ028-0116.wav|tests/data/ljspeech/wavs/LJ028-0116.npy +tests/data/ljspeech/wavs/LJ018-0137.wav|tests/data/ljspeech/wavs/LJ018-0137.npy +tests/data/ljspeech/wavs/LJ027-0115.wav|tests/data/ljspeech/wavs/LJ027-0115.npy +tests/data/ljspeech/wavs/LJ032-0176.wav|tests/data/ljspeech/wavs/LJ032-0176.npy +tests/data/ljspeech/wavs/LJ031-0036.wav|tests/data/ljspeech/wavs/LJ031-0036.npy +tests/data/ljspeech/wavs/LJ017-0041.wav|tests/data/ljspeech/wavs/LJ017-0041.npy +tests/data/ljspeech/wavs/LJ017-0188.wav|tests/data/ljspeech/wavs/LJ017-0188.npy +tests/data/ljspeech/wavs/LJ032-0215.wav|tests/data/ljspeech/wavs/LJ032-0215.npy +tests/data/ljspeech/wavs/LJ017-0047.wav|tests/data/ljspeech/wavs/LJ017-0047.npy +tests/data/ljspeech/wavs/LJ037-0102.wav|tests/data/ljspeech/wavs/LJ037-0102.npy +tests/data/ljspeech/wavs/LJ032-0208.wav|tests/data/ljspeech/wavs/LJ032-0208.npy +tests/data/ljspeech/wavs/LJ017-0196.wav|tests/data/ljspeech/wavs/LJ017-0196.npy +tests/data/ljspeech/wavs/LJ018-0050.wav|tests/data/ljspeech/wavs/LJ018-0050.npy +tests/data/ljspeech/wavs/LJ003-0023.wav|tests/data/ljspeech/wavs/LJ003-0023.npy +tests/data/ljspeech/wavs/LJ014-0091.wav|tests/data/ljspeech/wavs/LJ014-0091.npy +tests/data/ljspeech/wavs/LJ014-0152.wav|tests/data/ljspeech/wavs/LJ014-0152.npy +tests/data/ljspeech/wavs/LJ017-0162.wav|tests/data/ljspeech/wavs/LJ017-0162.npy +tests/data/ljspeech/wavs/LJ018-0096.wav|tests/data/ljspeech/wavs/LJ018-0096.npy +tests/data/ljspeech/wavs/LJ030-0200.wav|tests/data/ljspeech/wavs/LJ030-0200.npy +tests/data/ljspeech/wavs/LJ004-0029.wav|tests/data/ljspeech/wavs/LJ004-0029.npy +tests/data/ljspeech/wavs/LJ018-0185.wav|tests/data/ljspeech/wavs/LJ018-0185.npy +tests/data/ljspeech/wavs/LJ009-0213.wav|tests/data/ljspeech/wavs/LJ009-0213.npy +tests/data/ljspeech/wavs/LJ014-0023.wav|tests/data/ljspeech/wavs/LJ014-0023.npy +tests/data/ljspeech/wavs/LJ044-0022.wav|tests/data/ljspeech/wavs/LJ044-0022.npy +tests/data/ljspeech/wavs/LJ016-0206.wav|tests/data/ljspeech/wavs/LJ016-0206.npy +tests/data/ljspeech/wavs/LJ047-0006.wav|tests/data/ljspeech/wavs/LJ047-0006.npy +tests/data/ljspeech/wavs/LJ005-0287.wav|tests/data/ljspeech/wavs/LJ005-0287.npy +tests/data/ljspeech/wavs/LJ027-0135.wav|tests/data/ljspeech/wavs/LJ027-0135.npy +tests/data/ljspeech/wavs/LJ012-0046.wav|tests/data/ljspeech/wavs/LJ012-0046.npy +tests/data/ljspeech/wavs/LJ040-0232.wav|tests/data/ljspeech/wavs/LJ040-0232.npy +tests/data/ljspeech/wavs/LJ002-0307.wav|tests/data/ljspeech/wavs/LJ002-0307.npy +tests/data/ljspeech/wavs/LJ012-0070.wav|tests/data/ljspeech/wavs/LJ012-0070.npy +tests/data/ljspeech/wavs/LJ039-0045.wav|tests/data/ljspeech/wavs/LJ039-0045.npy +tests/data/ljspeech/wavs/LJ047-0007.wav|tests/data/ljspeech/wavs/LJ047-0007.npy +tests/data/ljspeech/wavs/LJ019-0077.wav|tests/data/ljspeech/wavs/LJ019-0077.npy +tests/data/ljspeech/wavs/LJ005-0021.wav|tests/data/ljspeech/wavs/LJ005-0021.npy +tests/data/ljspeech/wavs/LJ011-0268.wav|tests/data/ljspeech/wavs/LJ011-0268.npy +tests/data/ljspeech/wavs/LJ034-0075.wav|tests/data/ljspeech/wavs/LJ034-0075.npy +tests/data/ljspeech/wavs/LJ014-0013.wav|tests/data/ljspeech/wavs/LJ014-0013.npy +tests/data/ljspeech/wavs/LJ031-0112.wav|tests/data/ljspeech/wavs/LJ031-0112.npy +tests/data/ljspeech/wavs/LJ010-0029.wav|tests/data/ljspeech/wavs/LJ010-0029.npy +tests/data/ljspeech/wavs/LJ047-0171.wav|tests/data/ljspeech/wavs/LJ047-0171.npy +tests/data/ljspeech/wavs/LJ012-0029.wav|tests/data/ljspeech/wavs/LJ012-0029.npy +tests/data/ljspeech/wavs/LJ049-0197.wav|tests/data/ljspeech/wavs/LJ049-0197.npy +tests/data/ljspeech/wavs/LJ016-0061.wav|tests/data/ljspeech/wavs/LJ016-0061.npy +tests/data/ljspeech/wavs/LJ021-0104.wav|tests/data/ljspeech/wavs/LJ021-0104.npy +tests/data/ljspeech/wavs/LJ030-0167.wav|tests/data/ljspeech/wavs/LJ030-0167.npy +tests/data/ljspeech/wavs/LJ030-0015.wav|tests/data/ljspeech/wavs/LJ030-0015.npy +tests/data/ljspeech/wavs/LJ012-0080.wav|tests/data/ljspeech/wavs/LJ012-0080.npy +tests/data/ljspeech/wavs/LJ028-0383.wav|tests/data/ljspeech/wavs/LJ028-0383.npy +tests/data/ljspeech/wavs/LJ047-0159.wav|tests/data/ljspeech/wavs/LJ047-0159.npy +tests/data/ljspeech/wavs/LJ039-0211.wav|tests/data/ljspeech/wavs/LJ039-0211.npy +tests/data/ljspeech/wavs/LJ016-0124.wav|tests/data/ljspeech/wavs/LJ016-0124.npy +tests/data/ljspeech/wavs/LJ027-0130.wav|tests/data/ljspeech/wavs/LJ027-0130.npy +tests/data/ljspeech/wavs/LJ038-0229.wav|tests/data/ljspeech/wavs/LJ038-0229.npy +tests/data/ljspeech/wavs/LJ032-0021.wav|tests/data/ljspeech/wavs/LJ032-0021.npy +tests/data/ljspeech/wavs/LJ032-0049.wav|tests/data/ljspeech/wavs/LJ032-0049.npy +tests/data/ljspeech/wavs/LJ031-0137.wav|tests/data/ljspeech/wavs/LJ031-0137.npy +tests/data/ljspeech/wavs/LJ046-0117.wav|tests/data/ljspeech/wavs/LJ046-0117.npy +tests/data/ljspeech/wavs/LJ021-0175.wav|tests/data/ljspeech/wavs/LJ021-0175.npy +tests/data/ljspeech/wavs/LJ035-0160.wav|tests/data/ljspeech/wavs/LJ035-0160.npy +tests/data/ljspeech/wavs/LJ044-0165.wav|tests/data/ljspeech/wavs/LJ044-0165.npy +tests/data/ljspeech/wavs/LJ012-0150.wav|tests/data/ljspeech/wavs/LJ012-0150.npy +tests/data/ljspeech/wavs/LJ044-0182.wav|tests/data/ljspeech/wavs/LJ044-0182.npy +tests/data/ljspeech/wavs/LJ011-0067.wav|tests/data/ljspeech/wavs/LJ011-0067.npy +tests/data/ljspeech/wavs/LJ022-0079.wav|tests/data/ljspeech/wavs/LJ022-0079.npy +tests/data/ljspeech/wavs/LJ013-0155.wav|tests/data/ljspeech/wavs/LJ013-0155.npy +tests/data/ljspeech/wavs/LJ039-0090.wav|tests/data/ljspeech/wavs/LJ039-0090.npy +tests/data/ljspeech/wavs/LJ046-0172.wav|tests/data/ljspeech/wavs/LJ046-0172.npy +tests/data/ljspeech/wavs/LJ048-0181.wav|tests/data/ljspeech/wavs/LJ048-0181.npy +tests/data/ljspeech/wavs/LJ014-0004.wav|tests/data/ljspeech/wavs/LJ014-0004.npy +tests/data/ljspeech/wavs/LJ001-0025.wav|tests/data/ljspeech/wavs/LJ001-0025.npy +tests/data/ljspeech/wavs/LJ039-0037.wav|tests/data/ljspeech/wavs/LJ039-0037.npy +tests/data/ljspeech/wavs/LJ012-0169.wav|tests/data/ljspeech/wavs/LJ012-0169.npy +tests/data/ljspeech/wavs/LJ012-0206.wav|tests/data/ljspeech/wavs/LJ012-0206.npy +tests/data/ljspeech/wavs/LJ012-0257.wav|tests/data/ljspeech/wavs/LJ012-0257.npy +tests/data/ljspeech/wavs/LJ028-0226.wav|tests/data/ljspeech/wavs/LJ028-0226.npy +tests/data/ljspeech/wavs/LJ018-0394.wav|tests/data/ljspeech/wavs/LJ018-0394.npy +tests/data/ljspeech/wavs/LJ048-0115.wav|tests/data/ljspeech/wavs/LJ048-0115.npy +tests/data/ljspeech/wavs/LJ029-0150.wav|tests/data/ljspeech/wavs/LJ029-0150.npy +tests/data/ljspeech/wavs/LJ038-0258.wav|tests/data/ljspeech/wavs/LJ038-0258.npy +tests/data/ljspeech/wavs/LJ010-0314.wav|tests/data/ljspeech/wavs/LJ010-0314.npy +tests/data/ljspeech/wavs/LJ024-0138.wav|tests/data/ljspeech/wavs/LJ024-0138.npy +tests/data/ljspeech/wavs/LJ049-0222.wav|tests/data/ljspeech/wavs/LJ049-0222.npy +tests/data/ljspeech/wavs/LJ004-0230.wav|tests/data/ljspeech/wavs/LJ004-0230.npy +tests/data/ljspeech/wavs/LJ009-0244.wav|tests/data/ljspeech/wavs/LJ009-0244.npy +tests/data/ljspeech/wavs/LJ011-0084.wav|tests/data/ljspeech/wavs/LJ011-0084.npy +tests/data/ljspeech/wavs/LJ043-0124.wav|tests/data/ljspeech/wavs/LJ043-0124.npy +tests/data/ljspeech/wavs/LJ002-0270.wav|tests/data/ljspeech/wavs/LJ002-0270.npy +tests/data/ljspeech/wavs/LJ029-0089.wav|tests/data/ljspeech/wavs/LJ029-0089.npy +tests/data/ljspeech/wavs/LJ001-0092.wav|tests/data/ljspeech/wavs/LJ001-0092.npy +tests/data/ljspeech/wavs/LJ030-0154.wav|tests/data/ljspeech/wavs/LJ030-0154.npy +tests/data/ljspeech/wavs/LJ005-0039.wav|tests/data/ljspeech/wavs/LJ005-0039.npy +tests/data/ljspeech/wavs/LJ004-0030.wav|tests/data/ljspeech/wavs/LJ004-0030.npy +tests/data/ljspeech/wavs/LJ044-0079.wav|tests/data/ljspeech/wavs/LJ044-0079.npy +tests/data/ljspeech/wavs/LJ029-0093.wav|tests/data/ljspeech/wavs/LJ029-0093.npy +tests/data/ljspeech/wavs/LJ043-0045.wav|tests/data/ljspeech/wavs/LJ043-0045.npy +tests/data/ljspeech/wavs/LJ046-0055.wav|tests/data/ljspeech/wavs/LJ046-0055.npy +tests/data/ljspeech/wavs/LJ003-0319.wav|tests/data/ljspeech/wavs/LJ003-0319.npy +tests/data/ljspeech/wavs/LJ003-0143.wav|tests/data/ljspeech/wavs/LJ003-0143.npy +tests/data/ljspeech/wavs/LJ022-0143.wav|tests/data/ljspeech/wavs/LJ022-0143.npy +tests/data/ljspeech/wavs/LJ030-0079.wav|tests/data/ljspeech/wavs/LJ030-0079.npy +tests/data/ljspeech/wavs/LJ044-0059.wav|tests/data/ljspeech/wavs/LJ044-0059.npy +tests/data/ljspeech/wavs/LJ003-0226.wav|tests/data/ljspeech/wavs/LJ003-0226.npy +tests/data/ljspeech/wavs/LJ005-0161.wav|tests/data/ljspeech/wavs/LJ005-0161.npy +tests/data/ljspeech/wavs/LJ022-0173.wav|tests/data/ljspeech/wavs/LJ022-0173.npy +tests/data/ljspeech/wavs/LJ048-0008.wav|tests/data/ljspeech/wavs/LJ048-0008.npy +tests/data/ljspeech/wavs/LJ006-0049.wav|tests/data/ljspeech/wavs/LJ006-0049.npy +tests/data/ljspeech/wavs/LJ001-0055.wav|tests/data/ljspeech/wavs/LJ001-0055.npy +tests/data/ljspeech/wavs/LJ006-0275.wav|tests/data/ljspeech/wavs/LJ006-0275.npy +tests/data/ljspeech/wavs/LJ043-0025.wav|tests/data/ljspeech/wavs/LJ043-0025.npy +tests/data/ljspeech/wavs/LJ023-0038.wav|tests/data/ljspeech/wavs/LJ023-0038.npy +tests/data/ljspeech/wavs/LJ006-0131.wav|tests/data/ljspeech/wavs/LJ006-0131.npy +tests/data/ljspeech/wavs/LJ022-0027.wav|tests/data/ljspeech/wavs/LJ022-0027.npy +tests/data/ljspeech/wavs/LJ005-0222.wav|tests/data/ljspeech/wavs/LJ005-0222.npy +tests/data/ljspeech/wavs/LJ001-0060.wav|tests/data/ljspeech/wavs/LJ001-0060.npy +tests/data/ljspeech/wavs/LJ006-0196.wav|tests/data/ljspeech/wavs/LJ006-0196.npy +tests/data/ljspeech/wavs/LJ029-0184.wav|tests/data/ljspeech/wavs/LJ029-0184.npy +tests/data/ljspeech/wavs/LJ002-0310.wav|tests/data/ljspeech/wavs/LJ002-0310.npy +tests/data/ljspeech/wavs/LJ018-0224.wav|tests/data/ljspeech/wavs/LJ018-0224.npy +tests/data/ljspeech/wavs/LJ032-0025.wav|tests/data/ljspeech/wavs/LJ032-0025.npy +tests/data/ljspeech/wavs/LJ040-0171.wav|tests/data/ljspeech/wavs/LJ040-0171.npy +tests/data/ljspeech/wavs/LJ049-0058.wav|tests/data/ljspeech/wavs/LJ049-0058.npy +tests/data/ljspeech/wavs/LJ010-0185.wav|tests/data/ljspeech/wavs/LJ010-0185.npy +tests/data/ljspeech/wavs/LJ026-0131.wav|tests/data/ljspeech/wavs/LJ026-0131.npy +tests/data/ljspeech/wavs/LJ019-0215.wav|tests/data/ljspeech/wavs/LJ019-0215.npy +tests/data/ljspeech/wavs/LJ035-0091.wav|tests/data/ljspeech/wavs/LJ035-0091.npy +tests/data/ljspeech/wavs/LJ028-0402.wav|tests/data/ljspeech/wavs/LJ028-0402.npy +tests/data/ljspeech/wavs/LJ037-0074.wav|tests/data/ljspeech/wavs/LJ037-0074.npy +tests/data/ljspeech/wavs/LJ018-0190.wav|tests/data/ljspeech/wavs/LJ018-0190.npy +tests/data/ljspeech/wavs/LJ036-0028.wav|tests/data/ljspeech/wavs/LJ036-0028.npy +tests/data/ljspeech/wavs/LJ015-0180.wav|tests/data/ljspeech/wavs/LJ015-0180.npy +tests/data/ljspeech/wavs/LJ019-0228.wav|tests/data/ljspeech/wavs/LJ019-0228.npy +tests/data/ljspeech/wavs/LJ018-0183.wav|tests/data/ljspeech/wavs/LJ018-0183.npy +tests/data/ljspeech/wavs/LJ017-0050.wav|tests/data/ljspeech/wavs/LJ017-0050.npy +tests/data/ljspeech/wavs/LJ049-0073.wav|tests/data/ljspeech/wavs/LJ049-0073.npy +tests/data/ljspeech/wavs/LJ011-0133.wav|tests/data/ljspeech/wavs/LJ011-0133.npy +tests/data/ljspeech/wavs/LJ041-0010.wav|tests/data/ljspeech/wavs/LJ041-0010.npy +tests/data/ljspeech/wavs/LJ030-0250.wav|tests/data/ljspeech/wavs/LJ030-0250.npy +tests/data/ljspeech/wavs/LJ028-0377.wav|tests/data/ljspeech/wavs/LJ028-0377.npy +tests/data/ljspeech/wavs/LJ040-0051.wav|tests/data/ljspeech/wavs/LJ040-0051.npy +tests/data/ljspeech/wavs/LJ011-0169.wav|tests/data/ljspeech/wavs/LJ011-0169.npy +tests/data/ljspeech/wavs/LJ011-0203.wav|tests/data/ljspeech/wavs/LJ011-0203.npy +tests/data/ljspeech/wavs/LJ026-0005.wav|tests/data/ljspeech/wavs/LJ026-0005.npy +tests/data/ljspeech/wavs/LJ018-0172.wav|tests/data/ljspeech/wavs/LJ018-0172.npy +tests/data/ljspeech/wavs/LJ009-0263.wav|tests/data/ljspeech/wavs/LJ009-0263.npy +tests/data/ljspeech/wavs/LJ028-0411.wav|tests/data/ljspeech/wavs/LJ028-0411.npy +tests/data/ljspeech/wavs/LJ016-0285.wav|tests/data/ljspeech/wavs/LJ016-0285.npy +tests/data/ljspeech/wavs/LJ036-0174.wav|tests/data/ljspeech/wavs/LJ036-0174.npy +tests/data/ljspeech/wavs/LJ039-0181.wav|tests/data/ljspeech/wavs/LJ039-0181.npy +tests/data/ljspeech/wavs/LJ028-0382.wav|tests/data/ljspeech/wavs/LJ028-0382.npy +tests/data/ljspeech/wavs/LJ038-0120.wav|tests/data/ljspeech/wavs/LJ038-0120.npy +tests/data/ljspeech/wavs/LJ047-0153.wav|tests/data/ljspeech/wavs/LJ047-0153.npy +tests/data/ljspeech/wavs/LJ015-0223.wav|tests/data/ljspeech/wavs/LJ015-0223.npy +tests/data/ljspeech/wavs/LJ016-0079.wav|tests/data/ljspeech/wavs/LJ016-0079.npy +tests/data/ljspeech/wavs/LJ028-0124.wav|tests/data/ljspeech/wavs/LJ028-0124.npy +tests/data/ljspeech/wavs/LJ018-0104.wav|tests/data/ljspeech/wavs/LJ018-0104.npy +tests/data/ljspeech/wavs/LJ038-0246.wav|tests/data/ljspeech/wavs/LJ038-0246.npy +tests/data/ljspeech/wavs/LJ013-0030.wav|tests/data/ljspeech/wavs/LJ013-0030.npy +tests/data/ljspeech/wavs/LJ015-0206.wav|tests/data/ljspeech/wavs/LJ015-0206.npy +tests/data/ljspeech/wavs/LJ015-0078.wav|tests/data/ljspeech/wavs/LJ015-0078.npy +tests/data/ljspeech/wavs/LJ012-0279.wav|tests/data/ljspeech/wavs/LJ012-0279.npy +tests/data/ljspeech/wavs/LJ027-0163.wav|tests/data/ljspeech/wavs/LJ027-0163.npy +tests/data/ljspeech/wavs/LJ037-0192.wav|tests/data/ljspeech/wavs/LJ037-0192.npy +tests/data/ljspeech/wavs/LJ038-0210.wav|tests/data/ljspeech/wavs/LJ038-0210.npy +tests/data/ljspeech/wavs/LJ038-0087.wav|tests/data/ljspeech/wavs/LJ038-0087.npy +tests/data/ljspeech/wavs/LJ016-0106.wav|tests/data/ljspeech/wavs/LJ016-0106.npy +tests/data/ljspeech/wavs/LJ016-0036.wav|tests/data/ljspeech/wavs/LJ016-0036.npy +tests/data/ljspeech/wavs/LJ032-0126.wav|tests/data/ljspeech/wavs/LJ032-0126.npy +tests/data/ljspeech/wavs/LJ027-0147.wav|tests/data/ljspeech/wavs/LJ027-0147.npy +tests/data/ljspeech/wavs/LJ035-0191.wav|tests/data/ljspeech/wavs/LJ035-0191.npy +tests/data/ljspeech/wavs/LJ016-0170.wav|tests/data/ljspeech/wavs/LJ016-0170.npy +tests/data/ljspeech/wavs/LJ018-0347.wav|tests/data/ljspeech/wavs/LJ018-0347.npy +tests/data/ljspeech/wavs/LJ032-0173.wav|tests/data/ljspeech/wavs/LJ032-0173.npy +tests/data/ljspeech/wavs/LJ015-0072.wav|tests/data/ljspeech/wavs/LJ015-0072.npy +tests/data/ljspeech/wavs/LJ014-0090.wav|tests/data/ljspeech/wavs/LJ014-0090.npy +tests/data/ljspeech/wavs/LJ014-0092.wav|tests/data/ljspeech/wavs/LJ014-0092.npy +tests/data/ljspeech/wavs/LJ013-0165.wav|tests/data/ljspeech/wavs/LJ013-0165.npy +tests/data/ljspeech/wavs/LJ015-0250.wav|tests/data/ljspeech/wavs/LJ015-0250.npy +tests/data/ljspeech/wavs/LJ013-0179.wav|tests/data/ljspeech/wavs/LJ013-0179.npy +tests/data/ljspeech/wavs/LJ028-0017.wav|tests/data/ljspeech/wavs/LJ028-0017.npy +tests/data/ljspeech/wavs/LJ028-0093.wav|tests/data/ljspeech/wavs/LJ028-0093.npy +tests/data/ljspeech/wavs/LJ026-0063.wav|tests/data/ljspeech/wavs/LJ026-0063.npy +tests/data/ljspeech/wavs/LJ019-0021.wav|tests/data/ljspeech/wavs/LJ019-0021.npy +tests/data/ljspeech/wavs/LJ019-0388.wav|tests/data/ljspeech/wavs/LJ019-0388.npy +tests/data/ljspeech/wavs/LJ008-0020.wav|tests/data/ljspeech/wavs/LJ008-0020.npy +tests/data/ljspeech/wavs/LJ027-0013.wav|tests/data/ljspeech/wavs/LJ027-0013.npy +tests/data/ljspeech/wavs/LJ018-0276.wav|tests/data/ljspeech/wavs/LJ018-0276.npy +tests/data/ljspeech/wavs/LJ009-0140.wav|tests/data/ljspeech/wavs/LJ009-0140.npy +tests/data/ljspeech/wavs/LJ042-0192.wav|tests/data/ljspeech/wavs/LJ042-0192.npy +tests/data/ljspeech/wavs/LJ042-0128.wav|tests/data/ljspeech/wavs/LJ042-0128.npy +tests/data/ljspeech/wavs/LJ048-0034.wav|tests/data/ljspeech/wavs/LJ048-0034.npy +tests/data/ljspeech/wavs/LJ019-0329.wav|tests/data/ljspeech/wavs/LJ019-0329.npy +tests/data/ljspeech/wavs/LJ029-0033.wav|tests/data/ljspeech/wavs/LJ029-0033.npy +tests/data/ljspeech/wavs/LJ013-0127.wav|tests/data/ljspeech/wavs/LJ013-0127.npy +tests/data/ljspeech/wavs/LJ008-0047.wav|tests/data/ljspeech/wavs/LJ008-0047.npy +tests/data/ljspeech/wavs/LJ012-0290.wav|tests/data/ljspeech/wavs/LJ012-0290.npy +tests/data/ljspeech/wavs/LJ008-0021.wav|tests/data/ljspeech/wavs/LJ008-0021.npy +tests/data/ljspeech/wavs/LJ009-0066.wav|tests/data/ljspeech/wavs/LJ009-0066.npy +tests/data/ljspeech/wavs/LJ014-0070.wav|tests/data/ljspeech/wavs/LJ014-0070.npy +tests/data/ljspeech/wavs/LJ018-0007.wav|tests/data/ljspeech/wavs/LJ018-0007.npy +tests/data/ljspeech/wavs/LJ035-0193.wav|tests/data/ljspeech/wavs/LJ035-0193.npy +tests/data/ljspeech/wavs/LJ041-0192.wav|tests/data/ljspeech/wavs/LJ041-0192.npy +tests/data/ljspeech/wavs/LJ004-0136.wav|tests/data/ljspeech/wavs/LJ004-0136.npy +tests/data/ljspeech/wavs/LJ019-0128.wav|tests/data/ljspeech/wavs/LJ019-0128.npy +tests/data/ljspeech/wavs/LJ047-0059.wav|tests/data/ljspeech/wavs/LJ047-0059.npy +tests/data/ljspeech/wavs/LJ050-0060.wav|tests/data/ljspeech/wavs/LJ050-0060.npy +tests/data/ljspeech/wavs/LJ008-0214.wav|tests/data/ljspeech/wavs/LJ008-0214.npy +tests/data/ljspeech/wavs/LJ035-0172.wav|tests/data/ljspeech/wavs/LJ035-0172.npy +tests/data/ljspeech/wavs/LJ018-0289.wav|tests/data/ljspeech/wavs/LJ018-0289.npy +tests/data/ljspeech/wavs/LJ017-0242.wav|tests/data/ljspeech/wavs/LJ017-0242.npy +tests/data/ljspeech/wavs/LJ017-0257.wav|tests/data/ljspeech/wavs/LJ017-0257.npy +tests/data/ljspeech/wavs/LJ035-0034.wav|tests/data/ljspeech/wavs/LJ035-0034.npy +tests/data/ljspeech/wavs/LJ018-0296.wav|tests/data/ljspeech/wavs/LJ018-0296.npy +tests/data/ljspeech/wavs/LJ004-0163.wav|tests/data/ljspeech/wavs/LJ004-0163.npy +tests/data/ljspeech/wavs/LJ039-0012.wav|tests/data/ljspeech/wavs/LJ039-0012.npy +tests/data/ljspeech/wavs/LJ048-0079.wav|tests/data/ljspeech/wavs/LJ048-0079.npy +tests/data/ljspeech/wavs/LJ025-0065.wav|tests/data/ljspeech/wavs/LJ025-0065.npy +tests/data/ljspeech/wavs/LJ034-0170.wav|tests/data/ljspeech/wavs/LJ034-0170.npy +tests/data/ljspeech/wavs/LJ045-0157.wav|tests/data/ljspeech/wavs/LJ045-0157.npy +tests/data/ljspeech/wavs/LJ022-0094.wav|tests/data/ljspeech/wavs/LJ022-0094.npy +tests/data/ljspeech/wavs/LJ013-0049.wav|tests/data/ljspeech/wavs/LJ013-0049.npy +tests/data/ljspeech/wavs/LJ007-0150.wav|tests/data/ljspeech/wavs/LJ007-0150.npy +tests/data/ljspeech/wavs/LJ042-0112.wav|tests/data/ljspeech/wavs/LJ042-0112.npy +tests/data/ljspeech/wavs/LJ045-0062.wav|tests/data/ljspeech/wavs/LJ045-0062.npy +tests/data/ljspeech/wavs/LJ035-0152.wav|tests/data/ljspeech/wavs/LJ035-0152.npy +tests/data/ljspeech/wavs/LJ031-0225.wav|tests/data/ljspeech/wavs/LJ031-0225.npy +tests/data/ljspeech/wavs/LJ013-0246.wav|tests/data/ljspeech/wavs/LJ013-0246.npy +tests/data/ljspeech/wavs/LJ009-0078.wav|tests/data/ljspeech/wavs/LJ009-0078.npy +tests/data/ljspeech/wavs/LJ016-0075.wav|tests/data/ljspeech/wavs/LJ016-0075.npy +tests/data/ljspeech/wavs/LJ037-0121.wav|tests/data/ljspeech/wavs/LJ037-0121.npy +tests/data/ljspeech/wavs/LJ047-0228.wav|tests/data/ljspeech/wavs/LJ047-0228.npy +tests/data/ljspeech/wavs/LJ008-0057.wav|tests/data/ljspeech/wavs/LJ008-0057.npy +tests/data/ljspeech/wavs/LJ012-0017.wav|tests/data/ljspeech/wavs/LJ012-0017.npy +tests/data/ljspeech/wavs/LJ026-0056.wav|tests/data/ljspeech/wavs/LJ026-0056.npy +tests/data/ljspeech/wavs/LJ033-0083.wav|tests/data/ljspeech/wavs/LJ033-0083.npy +tests/data/ljspeech/wavs/LJ023-0132.wav|tests/data/ljspeech/wavs/LJ023-0132.npy +tests/data/ljspeech/wavs/LJ016-0136.wav|tests/data/ljspeech/wavs/LJ016-0136.npy +tests/data/ljspeech/wavs/LJ012-0035.wav|tests/data/ljspeech/wavs/LJ012-0035.npy +tests/data/ljspeech/wavs/LJ012-0267.wav|tests/data/ljspeech/wavs/LJ012-0267.npy +tests/data/ljspeech/wavs/LJ016-0038.wav|tests/data/ljspeech/wavs/LJ016-0038.npy +tests/data/ljspeech/wavs/LJ003-0270.wav|tests/data/ljspeech/wavs/LJ003-0270.npy +tests/data/ljspeech/wavs/LJ042-0214.wav|tests/data/ljspeech/wavs/LJ042-0214.npy +tests/data/ljspeech/wavs/LJ004-0240.wav|tests/data/ljspeech/wavs/LJ004-0240.npy +tests/data/ljspeech/wavs/LJ039-0162.wav|tests/data/ljspeech/wavs/LJ039-0162.npy +tests/data/ljspeech/wavs/LJ033-0043.wav|tests/data/ljspeech/wavs/LJ033-0043.npy +tests/data/ljspeech/wavs/LJ012-0181.wav|tests/data/ljspeech/wavs/LJ012-0181.npy +tests/data/ljspeech/wavs/LJ014-0315.wav|tests/data/ljspeech/wavs/LJ014-0315.npy +tests/data/ljspeech/wavs/LJ038-0078.wav|tests/data/ljspeech/wavs/LJ038-0078.npy +tests/data/ljspeech/wavs/LJ038-0222.wav|tests/data/ljspeech/wavs/LJ038-0222.npy +tests/data/ljspeech/wavs/LJ018-0002.wav|tests/data/ljspeech/wavs/LJ018-0002.npy +tests/data/ljspeech/wavs/LJ037-0216.wav|tests/data/ljspeech/wavs/LJ037-0216.npy +tests/data/ljspeech/wavs/LJ042-0162.wav|tests/data/ljspeech/wavs/LJ042-0162.npy +tests/data/ljspeech/wavs/LJ018-0014.wav|tests/data/ljspeech/wavs/LJ018-0014.npy +tests/data/ljspeech/wavs/LJ026-0074.wav|tests/data/ljspeech/wavs/LJ026-0074.npy +tests/data/ljspeech/wavs/LJ014-0203.wav|tests/data/ljspeech/wavs/LJ014-0203.npy +tests/data/ljspeech/wavs/LJ007-0225.wav|tests/data/ljspeech/wavs/LJ007-0225.npy +tests/data/ljspeech/wavs/LJ016-0037.wav|tests/data/ljspeech/wavs/LJ016-0037.npy +tests/data/ljspeech/wavs/LJ015-0199.wav|tests/data/ljspeech/wavs/LJ015-0199.npy +tests/data/ljspeech/wavs/LJ038-0053.wav|tests/data/ljspeech/wavs/LJ038-0053.npy +tests/data/ljspeech/wavs/LJ047-0037.wav|tests/data/ljspeech/wavs/LJ047-0037.npy +tests/data/ljspeech/wavs/LJ016-0178.wav|tests/data/ljspeech/wavs/LJ016-0178.npy +tests/data/ljspeech/wavs/LJ003-0253.wav|tests/data/ljspeech/wavs/LJ003-0253.npy +tests/data/ljspeech/wavs/LJ003-0258.wav|tests/data/ljspeech/wavs/LJ003-0258.npy +tests/data/ljspeech/wavs/LJ015-0314.wav|tests/data/ljspeech/wavs/LJ015-0314.npy +tests/data/ljspeech/wavs/LJ007-0043.wav|tests/data/ljspeech/wavs/LJ007-0043.npy +tests/data/ljspeech/wavs/LJ014-0161.wav|tests/data/ljspeech/wavs/LJ014-0161.npy +tests/data/ljspeech/wavs/LJ018-0259.wav|tests/data/ljspeech/wavs/LJ018-0259.npy +tests/data/ljspeech/wavs/LJ042-0169.wav|tests/data/ljspeech/wavs/LJ042-0169.npy +tests/data/ljspeech/wavs/LJ003-0257.wav|tests/data/ljspeech/wavs/LJ003-0257.npy +tests/data/ljspeech/wavs/LJ018-0257.wav|tests/data/ljspeech/wavs/LJ018-0257.npy +tests/data/ljspeech/wavs/LJ003-0206.wav|tests/data/ljspeech/wavs/LJ003-0206.npy +tests/data/ljspeech/wavs/LJ018-0036.wav|tests/data/ljspeech/wavs/LJ018-0036.npy +tests/data/ljspeech/wavs/LJ029-0059.wav|tests/data/ljspeech/wavs/LJ029-0059.npy +tests/data/ljspeech/wavs/LJ038-0038.wav|tests/data/ljspeech/wavs/LJ038-0038.npy +tests/data/ljspeech/wavs/LJ026-0073.wav|tests/data/ljspeech/wavs/LJ026-0073.npy +tests/data/ljspeech/wavs/LJ034-0187.wav|tests/data/ljspeech/wavs/LJ034-0187.npy +tests/data/ljspeech/wavs/LJ018-0270.wav|tests/data/ljspeech/wavs/LJ018-0270.npy +tests/data/ljspeech/wavs/LJ003-0304.wav|tests/data/ljspeech/wavs/LJ003-0304.npy +tests/data/ljspeech/wavs/LJ034-0111.wav|tests/data/ljspeech/wavs/LJ034-0111.npy +tests/data/ljspeech/wavs/LJ010-0068.wav|tests/data/ljspeech/wavs/LJ010-0068.npy +tests/data/ljspeech/wavs/LJ005-0201.wav|tests/data/ljspeech/wavs/LJ005-0201.npy +tests/data/ljspeech/wavs/LJ029-0181.wav|tests/data/ljspeech/wavs/LJ029-0181.npy +tests/data/ljspeech/wavs/LJ010-0173.wav|tests/data/ljspeech/wavs/LJ010-0173.npy +tests/data/ljspeech/wavs/LJ043-0117.wav|tests/data/ljspeech/wavs/LJ043-0117.npy +tests/data/ljspeech/wavs/LJ044-0037.wav|tests/data/ljspeech/wavs/LJ044-0037.npy +tests/data/ljspeech/wavs/LJ017-0216.wav|tests/data/ljspeech/wavs/LJ017-0216.npy +tests/data/ljspeech/wavs/LJ024-0080.wav|tests/data/ljspeech/wavs/LJ024-0080.npy +tests/data/ljspeech/wavs/LJ002-0112.wav|tests/data/ljspeech/wavs/LJ002-0112.npy +tests/data/ljspeech/wavs/LJ010-0220.wav|tests/data/ljspeech/wavs/LJ010-0220.npy +tests/data/ljspeech/wavs/LJ006-0184.wav|tests/data/ljspeech/wavs/LJ006-0184.npy +tests/data/ljspeech/wavs/LJ016-0440.wav|tests/data/ljspeech/wavs/LJ016-0440.npy +tests/data/ljspeech/wavs/LJ017-0024.wav|tests/data/ljspeech/wavs/LJ017-0024.npy +tests/data/ljspeech/wavs/LJ017-0219.wav|tests/data/ljspeech/wavs/LJ017-0219.npy +tests/data/ljspeech/wavs/LJ005-0230.wav|tests/data/ljspeech/wavs/LJ005-0230.npy +tests/data/ljspeech/wavs/LJ041-0033.wav|tests/data/ljspeech/wavs/LJ041-0033.npy +tests/data/ljspeech/wavs/LJ033-0071.wav|tests/data/ljspeech/wavs/LJ033-0071.npy +tests/data/ljspeech/wavs/LJ010-0077.wav|tests/data/ljspeech/wavs/LJ010-0077.npy +tests/data/ljspeech/wavs/LJ016-0332.wav|tests/data/ljspeech/wavs/LJ016-0332.npy +tests/data/ljspeech/wavs/LJ010-0209.wav|tests/data/ljspeech/wavs/LJ010-0209.npy +tests/data/ljspeech/wavs/LJ041-0193.wav|tests/data/ljspeech/wavs/LJ041-0193.npy +tests/data/ljspeech/wavs/LJ010-0103.wav|tests/data/ljspeech/wavs/LJ010-0103.npy +tests/data/ljspeech/wavs/LJ008-0293.wav|tests/data/ljspeech/wavs/LJ008-0293.npy +tests/data/ljspeech/wavs/LJ009-0082.wav|tests/data/ljspeech/wavs/LJ009-0082.npy +tests/data/ljspeech/wavs/LJ017-0106.wav|tests/data/ljspeech/wavs/LJ017-0106.npy +tests/data/ljspeech/wavs/LJ003-0020.wav|tests/data/ljspeech/wavs/LJ003-0020.npy +tests/data/ljspeech/wavs/LJ001-0145.wav|tests/data/ljspeech/wavs/LJ001-0145.npy +tests/data/ljspeech/wavs/LJ006-0205.wav|tests/data/ljspeech/wavs/LJ006-0205.npy +tests/data/ljspeech/wavs/LJ001-0149.wav|tests/data/ljspeech/wavs/LJ001-0149.npy +tests/data/ljspeech/wavs/LJ002-0067.wav|tests/data/ljspeech/wavs/LJ002-0067.npy +tests/data/ljspeech/wavs/LJ019-0178.wav|tests/data/ljspeech/wavs/LJ019-0178.npy +tests/data/ljspeech/wavs/LJ002-0120.wav|tests/data/ljspeech/wavs/LJ002-0120.npy +tests/data/ljspeech/wavs/LJ042-0050.wav|tests/data/ljspeech/wavs/LJ042-0050.npy +tests/data/ljspeech/wavs/LJ011-0099.wav|tests/data/ljspeech/wavs/LJ011-0099.npy +tests/data/ljspeech/wavs/LJ037-0045.wav|tests/data/ljspeech/wavs/LJ037-0045.npy +tests/data/ljspeech/wavs/LJ031-0116.wav|tests/data/ljspeech/wavs/LJ031-0116.npy +tests/data/ljspeech/wavs/LJ011-0081.wav|tests/data/ljspeech/wavs/LJ011-0081.npy +tests/data/ljspeech/wavs/LJ050-0040.wav|tests/data/ljspeech/wavs/LJ050-0040.npy +tests/data/ljspeech/wavs/LJ025-0095.wav|tests/data/ljspeech/wavs/LJ025-0095.npy +tests/data/ljspeech/wavs/LJ040-0212.wav|tests/data/ljspeech/wavs/LJ040-0212.npy +tests/data/ljspeech/wavs/LJ046-0165.wav|tests/data/ljspeech/wavs/LJ046-0165.npy +tests/data/ljspeech/wavs/LJ008-0309.wav|tests/data/ljspeech/wavs/LJ008-0309.npy +tests/data/ljspeech/wavs/LJ002-0262.wav|tests/data/ljspeech/wavs/LJ002-0262.npy +tests/data/ljspeech/wavs/LJ011-0144.wav|tests/data/ljspeech/wavs/LJ011-0144.npy +tests/data/ljspeech/wavs/LJ010-0274.wav|tests/data/ljspeech/wavs/LJ010-0274.npy +tests/data/ljspeech/wavs/LJ016-0260.wav|tests/data/ljspeech/wavs/LJ016-0260.npy +tests/data/ljspeech/wavs/LJ047-0164.wav|tests/data/ljspeech/wavs/LJ047-0164.npy +tests/data/ljspeech/wavs/LJ009-0270.wav|tests/data/ljspeech/wavs/LJ009-0270.npy +tests/data/ljspeech/wavs/LJ002-0224.wav|tests/data/ljspeech/wavs/LJ002-0224.npy +tests/data/ljspeech/wavs/LJ034-0168.wav|tests/data/ljspeech/wavs/LJ034-0168.npy +tests/data/ljspeech/wavs/LJ049-0191.wav|tests/data/ljspeech/wavs/LJ049-0191.npy +tests/data/ljspeech/wavs/LJ048-0251.wav|tests/data/ljspeech/wavs/LJ048-0251.npy +tests/data/ljspeech/wavs/LJ040-0223.wav|tests/data/ljspeech/wavs/LJ040-0223.npy +tests/data/ljspeech/wavs/LJ019-0134.wav|tests/data/ljspeech/wavs/LJ019-0134.npy +tests/data/ljspeech/wavs/LJ024-0037.wav|tests/data/ljspeech/wavs/LJ024-0037.npy +tests/data/ljspeech/wavs/LJ010-0239.wav|tests/data/ljspeech/wavs/LJ010-0239.npy +tests/data/ljspeech/wavs/LJ021-0012.wav|tests/data/ljspeech/wavs/LJ021-0012.npy +tests/data/ljspeech/wavs/LJ021-0009.wav|tests/data/ljspeech/wavs/LJ021-0009.npy +tests/data/ljspeech/wavs/LJ028-0268.wav|tests/data/ljspeech/wavs/LJ028-0268.npy +tests/data/ljspeech/wavs/LJ010-0033.wav|tests/data/ljspeech/wavs/LJ010-0033.npy +tests/data/ljspeech/wavs/LJ041-0166.wav|tests/data/ljspeech/wavs/LJ041-0166.npy +tests/data/ljspeech/wavs/LJ032-0274.wav|tests/data/ljspeech/wavs/LJ032-0274.npy +tests/data/ljspeech/wavs/LJ017-0035.wav|tests/data/ljspeech/wavs/LJ017-0035.npy +tests/data/ljspeech/wavs/LJ047-0179.wav|tests/data/ljspeech/wavs/LJ047-0179.npy +tests/data/ljspeech/wavs/LJ032-0241.wav|tests/data/ljspeech/wavs/LJ032-0241.npy +tests/data/ljspeech/wavs/LJ037-0125.wav|tests/data/ljspeech/wavs/LJ037-0125.npy +tests/data/ljspeech/wavs/LJ027-0175.wav|tests/data/ljspeech/wavs/LJ027-0175.npy +tests/data/ljspeech/wavs/LJ048-0036.wav|tests/data/ljspeech/wavs/LJ048-0036.npy +tests/data/ljspeech/wavs/LJ017-0112.wav|tests/data/ljspeech/wavs/LJ017-0112.npy +tests/data/ljspeech/wavs/LJ047-0182.wav|tests/data/ljspeech/wavs/LJ047-0182.npy +tests/data/ljspeech/wavs/LJ017-0181.wav|tests/data/ljspeech/wavs/LJ017-0181.npy +tests/data/ljspeech/wavs/LJ033-0173.wav|tests/data/ljspeech/wavs/LJ033-0173.npy +tests/data/ljspeech/wavs/LJ033-0172.wav|tests/data/ljspeech/wavs/LJ033-0172.npy +tests/data/ljspeech/wavs/LJ049-0043.wav|tests/data/ljspeech/wavs/LJ049-0043.npy +tests/data/ljspeech/wavs/LJ024-0018.wav|tests/data/ljspeech/wavs/LJ024-0018.npy +tests/data/ljspeech/wavs/LJ016-0217.wav|tests/data/ljspeech/wavs/LJ016-0217.npy +tests/data/ljspeech/wavs/LJ016-0139.wav|tests/data/ljspeech/wavs/LJ016-0139.npy +tests/data/ljspeech/wavs/LJ017-0204.wav|tests/data/ljspeech/wavs/LJ017-0204.npy +tests/data/ljspeech/wavs/LJ046-0051.wav|tests/data/ljspeech/wavs/LJ046-0051.npy +tests/data/ljspeech/wavs/LJ033-0187.wav|tests/data/ljspeech/wavs/LJ033-0187.npy +tests/data/ljspeech/wavs/LJ017-0157.wav|tests/data/ljspeech/wavs/LJ017-0157.npy +tests/data/ljspeech/wavs/LJ015-0280.wav|tests/data/ljspeech/wavs/LJ015-0280.npy +tests/data/ljspeech/wavs/LJ017-0207.wav|tests/data/ljspeech/wavs/LJ017-0207.npy +tests/data/ljspeech/wavs/LJ017-0205.wav|tests/data/ljspeech/wavs/LJ017-0205.npy +tests/data/ljspeech/wavs/LJ001-0178.wav|tests/data/ljspeech/wavs/LJ001-0178.npy +tests/data/ljspeech/wavs/LJ046-0171.wav|tests/data/ljspeech/wavs/LJ046-0171.npy +tests/data/ljspeech/wavs/LJ030-0214.wav|tests/data/ljspeech/wavs/LJ030-0214.npy +tests/data/ljspeech/wavs/LJ046-0001.wav|tests/data/ljspeech/wavs/LJ046-0001.npy +tests/data/ljspeech/wavs/LJ016-0096.wav|tests/data/ljspeech/wavs/LJ016-0096.npy +tests/data/ljspeech/wavs/LJ010-0304.wav|tests/data/ljspeech/wavs/LJ010-0304.npy +tests/data/ljspeech/wavs/LJ001-0022.wav|tests/data/ljspeech/wavs/LJ001-0022.npy +tests/data/ljspeech/wavs/LJ015-0221.wav|tests/data/ljspeech/wavs/LJ015-0221.npy +tests/data/ljspeech/wavs/LJ027-0152.wav|tests/data/ljspeech/wavs/LJ027-0152.npy +tests/data/ljspeech/wavs/LJ030-0142.wav|tests/data/ljspeech/wavs/LJ030-0142.npy +tests/data/ljspeech/wavs/LJ032-0160.wav|tests/data/ljspeech/wavs/LJ032-0160.npy +tests/data/ljspeech/wavs/LJ028-0370.wav|tests/data/ljspeech/wavs/LJ028-0370.npy +tests/data/ljspeech/wavs/LJ047-0032.wav|tests/data/ljspeech/wavs/LJ047-0032.npy +tests/data/ljspeech/wavs/LJ019-0240.wav|tests/data/ljspeech/wavs/LJ019-0240.npy +tests/data/ljspeech/wavs/LJ032-0185.wav|tests/data/ljspeech/wavs/LJ032-0185.npy +tests/data/ljspeech/wavs/LJ050-0167.wav|tests/data/ljspeech/wavs/LJ050-0167.npy +tests/data/ljspeech/wavs/LJ049-0063.wav|tests/data/ljspeech/wavs/LJ049-0063.npy +tests/data/ljspeech/wavs/LJ031-0040.wav|tests/data/ljspeech/wavs/LJ031-0040.npy +tests/data/ljspeech/wavs/LJ036-0026.wav|tests/data/ljspeech/wavs/LJ036-0026.npy +tests/data/ljspeech/wavs/LJ050-0271.wav|tests/data/ljspeech/wavs/LJ050-0271.npy +tests/data/ljspeech/wavs/LJ032-0174.wav|tests/data/ljspeech/wavs/LJ032-0174.npy +tests/data/ljspeech/wavs/LJ005-0206.wav|tests/data/ljspeech/wavs/LJ005-0206.npy +tests/data/ljspeech/wavs/LJ015-0283.wav|tests/data/ljspeech/wavs/LJ015-0283.npy +tests/data/ljspeech/wavs/LJ043-0166.wav|tests/data/ljspeech/wavs/LJ043-0166.npy +tests/data/ljspeech/wavs/LJ037-0016.wav|tests/data/ljspeech/wavs/LJ037-0016.npy +tests/data/ljspeech/wavs/LJ032-0236.wav|tests/data/ljspeech/wavs/LJ032-0236.npy +tests/data/ljspeech/wavs/LJ024-0039.wav|tests/data/ljspeech/wavs/LJ024-0039.npy +tests/data/ljspeech/wavs/LJ005-0259.wav|tests/data/ljspeech/wavs/LJ005-0259.npy +tests/data/ljspeech/wavs/LJ010-0248.wav|tests/data/ljspeech/wavs/LJ010-0248.npy +tests/data/ljspeech/wavs/LJ041-0006.wav|tests/data/ljspeech/wavs/LJ041-0006.npy +tests/data/ljspeech/wavs/LJ043-0165.wav|tests/data/ljspeech/wavs/LJ043-0165.npy +tests/data/ljspeech/wavs/LJ019-0192.wav|tests/data/ljspeech/wavs/LJ019-0192.npy +tests/data/ljspeech/wavs/LJ002-0284.wav|tests/data/ljspeech/wavs/LJ002-0284.npy +tests/data/ljspeech/wavs/LJ019-0152.wav|tests/data/ljspeech/wavs/LJ019-0152.npy +tests/data/ljspeech/wavs/LJ027-0171.wav|tests/data/ljspeech/wavs/LJ027-0171.npy +tests/data/ljspeech/wavs/LJ028-0396.wav|tests/data/ljspeech/wavs/LJ028-0396.npy +tests/data/ljspeech/wavs/LJ023-0047.wav|tests/data/ljspeech/wavs/LJ023-0047.npy +tests/data/ljspeech/wavs/LJ035-0107.wav|tests/data/ljspeech/wavs/LJ035-0107.npy +tests/data/ljspeech/wavs/LJ033-0118.wav|tests/data/ljspeech/wavs/LJ033-0118.npy +tests/data/ljspeech/wavs/LJ033-0005.wav|tests/data/ljspeech/wavs/LJ033-0005.npy +tests/data/ljspeech/wavs/LJ040-0110.wav|tests/data/ljspeech/wavs/LJ040-0110.npy +tests/data/ljspeech/wavs/LJ001-0019.wav|tests/data/ljspeech/wavs/LJ001-0019.npy +tests/data/ljspeech/wavs/LJ003-0046.wav|tests/data/ljspeech/wavs/LJ003-0046.npy +tests/data/ljspeech/wavs/LJ036-0006.wav|tests/data/ljspeech/wavs/LJ036-0006.npy +tests/data/ljspeech/wavs/LJ031-0109.wav|tests/data/ljspeech/wavs/LJ031-0109.npy +tests/data/ljspeech/wavs/LJ042-0083.wav|tests/data/ljspeech/wavs/LJ042-0083.npy +tests/data/ljspeech/wavs/LJ028-0423.wav|tests/data/ljspeech/wavs/LJ028-0423.npy +tests/data/ljspeech/wavs/LJ048-0140.wav|tests/data/ljspeech/wavs/LJ048-0140.npy +tests/data/ljspeech/wavs/LJ008-0029.wav|tests/data/ljspeech/wavs/LJ008-0029.npy +tests/data/ljspeech/wavs/LJ006-0244.wav|tests/data/ljspeech/wavs/LJ006-0244.npy +tests/data/ljspeech/wavs/LJ010-0273.wav|tests/data/ljspeech/wavs/LJ010-0273.npy +tests/data/ljspeech/wavs/LJ019-0382.wav|tests/data/ljspeech/wavs/LJ019-0382.npy +tests/data/ljspeech/wavs/LJ042-0042.wav|tests/data/ljspeech/wavs/LJ042-0042.npy +tests/data/ljspeech/wavs/LJ019-0262.wav|tests/data/ljspeech/wavs/LJ019-0262.npy +tests/data/ljspeech/wavs/LJ048-0171.wav|tests/data/ljspeech/wavs/LJ048-0171.npy +tests/data/ljspeech/wavs/LJ043-0116.wav|tests/data/ljspeech/wavs/LJ043-0116.npy +tests/data/ljspeech/wavs/LJ010-0312.wav|tests/data/ljspeech/wavs/LJ010-0312.npy +tests/data/ljspeech/wavs/LJ008-0129.wav|tests/data/ljspeech/wavs/LJ008-0129.npy +tests/data/ljspeech/wavs/LJ042-0067.wav|tests/data/ljspeech/wavs/LJ042-0067.npy +tests/data/ljspeech/wavs/LJ007-0206.wav|tests/data/ljspeech/wavs/LJ007-0206.npy +tests/data/ljspeech/wavs/LJ028-0307.wav|tests/data/ljspeech/wavs/LJ028-0307.npy +tests/data/ljspeech/wavs/LJ031-0219.wav|tests/data/ljspeech/wavs/LJ031-0219.npy +tests/data/ljspeech/wavs/LJ048-0252.wav|tests/data/ljspeech/wavs/LJ048-0252.npy +tests/data/ljspeech/wavs/LJ008-0109.wav|tests/data/ljspeech/wavs/LJ008-0109.npy +tests/data/ljspeech/wavs/LJ048-0138.wav|tests/data/ljspeech/wavs/LJ048-0138.npy +tests/data/ljspeech/wavs/LJ050-0168.wav|tests/data/ljspeech/wavs/LJ050-0168.npy +tests/data/ljspeech/wavs/LJ019-0251.wav|tests/data/ljspeech/wavs/LJ019-0251.npy +tests/data/ljspeech/wavs/LJ028-0431.wav|tests/data/ljspeech/wavs/LJ028-0431.npy +tests/data/ljspeech/wavs/LJ028-0308.wav|tests/data/ljspeech/wavs/LJ028-0308.npy +tests/data/ljspeech/wavs/LJ011-0017.wav|tests/data/ljspeech/wavs/LJ011-0017.npy +tests/data/ljspeech/wavs/LJ008-0164.wav|tests/data/ljspeech/wavs/LJ008-0164.npy +tests/data/ljspeech/wavs/LJ007-0041.wav|tests/data/ljspeech/wavs/LJ007-0041.npy +tests/data/ljspeech/wavs/LJ025-0112.wav|tests/data/ljspeech/wavs/LJ025-0112.npy +tests/data/ljspeech/wavs/LJ020-0076.wav|tests/data/ljspeech/wavs/LJ020-0076.npy +tests/data/ljspeech/wavs/LJ043-0101.wav|tests/data/ljspeech/wavs/LJ043-0101.npy +tests/data/ljspeech/wavs/LJ031-0061.wav|tests/data/ljspeech/wavs/LJ031-0061.npy +tests/data/ljspeech/wavs/LJ015-0073.wav|tests/data/ljspeech/wavs/LJ015-0073.npy +tests/data/ljspeech/wavs/LJ039-0203.wav|tests/data/ljspeech/wavs/LJ039-0203.npy +tests/data/ljspeech/wavs/LJ038-0225.wav|tests/data/ljspeech/wavs/LJ038-0225.npy +tests/data/ljspeech/wavs/LJ022-0150.wav|tests/data/ljspeech/wavs/LJ022-0150.npy +tests/data/ljspeech/wavs/LJ045-0089.wav|tests/data/ljspeech/wavs/LJ045-0089.npy +tests/data/ljspeech/wavs/LJ030-0095.wav|tests/data/ljspeech/wavs/LJ030-0095.npy +tests/data/ljspeech/wavs/LJ019-0087.wav|tests/data/ljspeech/wavs/LJ019-0087.npy +tests/data/ljspeech/wavs/LJ003-0235.wav|tests/data/ljspeech/wavs/LJ003-0235.npy +tests/data/ljspeech/wavs/LJ044-0198.wav|tests/data/ljspeech/wavs/LJ044-0198.npy +tests/data/ljspeech/wavs/LJ035-0194.wav|tests/data/ljspeech/wavs/LJ035-0194.npy +tests/data/ljspeech/wavs/LJ022-0166.wav|tests/data/ljspeech/wavs/LJ022-0166.npy +tests/data/ljspeech/wavs/LJ018-0382.wav|tests/data/ljspeech/wavs/LJ018-0382.npy +tests/data/ljspeech/wavs/LJ011-0219.wav|tests/data/ljspeech/wavs/LJ011-0219.npy +tests/data/ljspeech/wavs/LJ045-0116.wav|tests/data/ljspeech/wavs/LJ045-0116.npy +tests/data/ljspeech/wavs/LJ043-0120.wav|tests/data/ljspeech/wavs/LJ043-0120.npy +tests/data/ljspeech/wavs/LJ013-0157.wav|tests/data/ljspeech/wavs/LJ013-0157.npy +tests/data/ljspeech/wavs/LJ015-0095.wav|tests/data/ljspeech/wavs/LJ015-0095.npy +tests/data/ljspeech/wavs/LJ022-0133.wav|tests/data/ljspeech/wavs/LJ022-0133.npy +tests/data/ljspeech/wavs/LJ045-0053.wav|tests/data/ljspeech/wavs/LJ045-0053.npy +tests/data/ljspeech/wavs/LJ048-0183.wav|tests/data/ljspeech/wavs/LJ048-0183.npy +tests/data/ljspeech/wavs/LJ045-0054.wav|tests/data/ljspeech/wavs/LJ045-0054.npy +tests/data/ljspeech/wavs/LJ014-0269.wav|tests/data/ljspeech/wavs/LJ014-0269.npy +tests/data/ljspeech/wavs/LJ018-0397.wav|tests/data/ljspeech/wavs/LJ018-0397.npy +tests/data/ljspeech/wavs/LJ003-0245.wav|tests/data/ljspeech/wavs/LJ003-0245.npy +tests/data/ljspeech/wavs/LJ014-0273.wav|tests/data/ljspeech/wavs/LJ014-0273.npy +tests/data/ljspeech/wavs/LJ037-0269.wav|tests/data/ljspeech/wavs/LJ037-0269.npy +tests/data/ljspeech/wavs/LJ014-0126.wav|tests/data/ljspeech/wavs/LJ014-0126.npy +tests/data/ljspeech/wavs/LJ018-0387.wav|tests/data/ljspeech/wavs/LJ018-0387.npy +tests/data/ljspeech/wavs/LJ006-0088.wav|tests/data/ljspeech/wavs/LJ006-0088.npy +tests/data/ljspeech/wavs/LJ014-0042.wav|tests/data/ljspeech/wavs/LJ014-0042.npy +tests/data/ljspeech/wavs/LJ014-0007.wav|tests/data/ljspeech/wavs/LJ014-0007.npy +tests/data/ljspeech/wavs/LJ043-0072.wav|tests/data/ljspeech/wavs/LJ043-0072.npy +tests/data/ljspeech/wavs/LJ042-0247.wav|tests/data/ljspeech/wavs/LJ042-0247.npy +tests/data/ljspeech/wavs/LJ044-0145.wav|tests/data/ljspeech/wavs/LJ044-0145.npy +tests/data/ljspeech/wavs/LJ012-0151.wav|tests/data/ljspeech/wavs/LJ012-0151.npy +tests/data/ljspeech/wavs/LJ036-0124.wav|tests/data/ljspeech/wavs/LJ036-0124.npy +tests/data/ljspeech/wavs/LJ035-0008.wav|tests/data/ljspeech/wavs/LJ035-0008.npy +tests/data/ljspeech/wavs/LJ022-0043.wav|tests/data/ljspeech/wavs/LJ022-0043.npy +tests/data/ljspeech/wavs/LJ036-0119.wav|tests/data/ljspeech/wavs/LJ036-0119.npy +tests/data/ljspeech/wavs/LJ026-0051.wav|tests/data/ljspeech/wavs/LJ026-0051.npy +tests/data/ljspeech/wavs/LJ048-0065.wav|tests/data/ljspeech/wavs/LJ048-0065.npy +tests/data/ljspeech/wavs/LJ040-0072.wav|tests/data/ljspeech/wavs/LJ040-0072.npy +tests/data/ljspeech/wavs/LJ013-0123.wav|tests/data/ljspeech/wavs/LJ013-0123.npy +tests/data/ljspeech/wavs/LJ005-0032.wav|tests/data/ljspeech/wavs/LJ005-0032.npy +tests/data/ljspeech/wavs/LJ036-0019.wav|tests/data/ljspeech/wavs/LJ036-0019.npy +tests/data/ljspeech/wavs/LJ038-0073.wav|tests/data/ljspeech/wavs/LJ038-0073.npy +tests/data/ljspeech/wavs/LJ042-0188.wav|tests/data/ljspeech/wavs/LJ042-0188.npy +tests/data/ljspeech/wavs/LJ041-0004.wav|tests/data/ljspeech/wavs/LJ041-0004.npy +tests/data/ljspeech/wavs/LJ038-0270.wav|tests/data/ljspeech/wavs/LJ038-0270.npy +tests/data/ljspeech/wavs/LJ012-0226.wav|tests/data/ljspeech/wavs/LJ012-0226.npy +tests/data/ljspeech/wavs/LJ044-0002.wav|tests/data/ljspeech/wavs/LJ044-0002.npy +tests/data/ljspeech/wavs/LJ028-0242.wav|tests/data/ljspeech/wavs/LJ028-0242.npy +tests/data/ljspeech/wavs/LJ034-0013.wav|tests/data/ljspeech/wavs/LJ034-0013.npy +tests/data/ljspeech/wavs/LJ005-0022.wav|tests/data/ljspeech/wavs/LJ005-0022.npy +tests/data/ljspeech/wavs/LJ028-0245.wav|tests/data/ljspeech/wavs/LJ028-0245.npy +tests/data/ljspeech/wavs/LJ046-0105.wav|tests/data/ljspeech/wavs/LJ046-0105.npy +tests/data/ljspeech/wavs/LJ040-0021.wav|tests/data/ljspeech/wavs/LJ040-0021.npy +tests/data/ljspeech/wavs/LJ039-0221.wav|tests/data/ljspeech/wavs/LJ039-0221.npy +tests/data/ljspeech/wavs/LJ028-0247.wav|tests/data/ljspeech/wavs/LJ028-0247.npy +tests/data/ljspeech/wavs/LJ034-0180.wav|tests/data/ljspeech/wavs/LJ034-0180.npy +tests/data/ljspeech/wavs/LJ022-0124.wav|tests/data/ljspeech/wavs/LJ022-0124.npy +tests/data/ljspeech/wavs/LJ012-0108.wav|tests/data/ljspeech/wavs/LJ012-0108.npy +tests/data/ljspeech/wavs/LJ032-0196.wav|tests/data/ljspeech/wavs/LJ032-0196.npy +tests/data/ljspeech/wavs/LJ047-0016.wav|tests/data/ljspeech/wavs/LJ047-0016.npy +tests/data/ljspeech/wavs/LJ032-0123.wav|tests/data/ljspeech/wavs/LJ032-0123.npy +tests/data/ljspeech/wavs/LJ050-0094.wav|tests/data/ljspeech/wavs/LJ050-0094.npy +tests/data/ljspeech/wavs/LJ048-0057.wav|tests/data/ljspeech/wavs/LJ048-0057.npy +tests/data/ljspeech/wavs/LJ026-0028.wav|tests/data/ljspeech/wavs/LJ026-0028.npy +tests/data/ljspeech/wavs/LJ026-0081.wav|tests/data/ljspeech/wavs/LJ026-0081.npy +tests/data/ljspeech/wavs/LJ040-0180.wav|tests/data/ljspeech/wavs/LJ040-0180.npy +tests/data/ljspeech/wavs/LJ047-0245.wav|tests/data/ljspeech/wavs/LJ047-0245.npy +tests/data/ljspeech/wavs/LJ017-0191.wav|tests/data/ljspeech/wavs/LJ017-0191.npy +tests/data/ljspeech/wavs/LJ046-0087.wav|tests/data/ljspeech/wavs/LJ046-0087.npy +tests/data/ljspeech/wavs/LJ037-0046.wav|tests/data/ljspeech/wavs/LJ037-0046.npy +tests/data/ljspeech/wavs/LJ031-0004.wav|tests/data/ljspeech/wavs/LJ031-0004.npy +tests/data/ljspeech/wavs/LJ021-0169.wav|tests/data/ljspeech/wavs/LJ021-0169.npy +tests/data/ljspeech/wavs/LJ016-0414.wav|tests/data/ljspeech/wavs/LJ016-0414.npy +tests/data/ljspeech/wavs/LJ003-0341.wav|tests/data/ljspeech/wavs/LJ003-0341.npy +tests/data/ljspeech/wavs/LJ018-0059.wav|tests/data/ljspeech/wavs/LJ018-0059.npy +tests/data/ljspeech/wavs/LJ026-0107.wav|tests/data/ljspeech/wavs/LJ026-0107.npy +tests/data/ljspeech/wavs/LJ016-0040.wav|tests/data/ljspeech/wavs/LJ016-0040.npy +tests/data/ljspeech/wavs/LJ001-0164.wav|tests/data/ljspeech/wavs/LJ001-0164.npy +tests/data/ljspeech/wavs/LJ038-0249.wav|tests/data/ljspeech/wavs/LJ038-0249.npy +tests/data/ljspeech/wavs/LJ033-0141.wav|tests/data/ljspeech/wavs/LJ033-0141.npy +tests/data/ljspeech/wavs/LJ020-0059.wav|tests/data/ljspeech/wavs/LJ020-0059.npy +tests/data/ljspeech/wavs/LJ001-0071.wav|tests/data/ljspeech/wavs/LJ001-0071.npy +tests/data/ljspeech/wavs/LJ041-0140.wav|tests/data/ljspeech/wavs/LJ041-0140.npy +tests/data/ljspeech/wavs/LJ029-0097.wav|tests/data/ljspeech/wavs/LJ029-0097.npy +tests/data/ljspeech/wavs/LJ038-0227.wav|tests/data/ljspeech/wavs/LJ038-0227.npy +tests/data/ljspeech/wavs/LJ048-0245.wav|tests/data/ljspeech/wavs/LJ048-0245.npy +tests/data/ljspeech/wavs/LJ040-0100.wav|tests/data/ljspeech/wavs/LJ040-0100.npy +tests/data/ljspeech/wavs/LJ046-0239.wav|tests/data/ljspeech/wavs/LJ046-0239.npy +tests/data/ljspeech/wavs/LJ046-0119.wav|tests/data/ljspeech/wavs/LJ046-0119.npy +tests/data/ljspeech/wavs/LJ033-0127.wav|tests/data/ljspeech/wavs/LJ033-0127.npy +tests/data/ljspeech/wavs/LJ010-0111.wav|tests/data/ljspeech/wavs/LJ010-0111.npy +tests/data/ljspeech/wavs/LJ008-0187.wav|tests/data/ljspeech/wavs/LJ008-0187.npy +tests/data/ljspeech/wavs/LJ049-0174.wav|tests/data/ljspeech/wavs/LJ049-0174.npy +tests/data/ljspeech/wavs/LJ026-0008.wav|tests/data/ljspeech/wavs/LJ026-0008.npy +tests/data/ljspeech/wavs/LJ006-0144.wav|tests/data/ljspeech/wavs/LJ006-0144.npy +tests/data/ljspeech/wavs/LJ017-0262.wav|tests/data/ljspeech/wavs/LJ017-0262.npy +tests/data/ljspeech/wavs/LJ012-0296.wav|tests/data/ljspeech/wavs/LJ012-0296.npy +tests/data/ljspeech/wavs/LJ027-0021.wav|tests/data/ljspeech/wavs/LJ027-0021.npy +tests/data/ljspeech/wavs/LJ016-0103.wav|tests/data/ljspeech/wavs/LJ016-0103.npy +tests/data/ljspeech/wavs/LJ004-0083.wav|tests/data/ljspeech/wavs/LJ004-0083.npy +tests/data/ljspeech/wavs/LJ005-0091.wav|tests/data/ljspeech/wavs/LJ005-0091.npy +tests/data/ljspeech/wavs/LJ022-0040.wav|tests/data/ljspeech/wavs/LJ022-0040.npy +tests/data/ljspeech/wavs/LJ011-0206.wav|tests/data/ljspeech/wavs/LJ011-0206.npy +tests/data/ljspeech/wavs/LJ027-0033.wav|tests/data/ljspeech/wavs/LJ027-0033.npy +tests/data/ljspeech/wavs/LJ028-0266.wav|tests/data/ljspeech/wavs/LJ028-0266.npy +tests/data/ljspeech/wavs/LJ019-0248.wav|tests/data/ljspeech/wavs/LJ019-0248.npy +tests/data/ljspeech/wavs/LJ027-0045.wav|tests/data/ljspeech/wavs/LJ027-0045.npy +tests/data/ljspeech/wavs/LJ049-0017.wav|tests/data/ljspeech/wavs/LJ049-0017.npy +tests/data/ljspeech/wavs/LJ008-0163.wav|tests/data/ljspeech/wavs/LJ008-0163.npy +tests/data/ljspeech/wavs/LJ013-0065.wav|tests/data/ljspeech/wavs/LJ013-0065.npy +tests/data/ljspeech/wavs/LJ022-0013.wav|tests/data/ljspeech/wavs/LJ022-0013.npy +tests/data/ljspeech/wavs/LJ002-0169.wav|tests/data/ljspeech/wavs/LJ002-0169.npy +tests/data/ljspeech/wavs/LJ015-0009.wav|tests/data/ljspeech/wavs/LJ015-0009.npy +tests/data/ljspeech/wavs/LJ030-0078.wav|tests/data/ljspeech/wavs/LJ030-0078.npy +tests/data/ljspeech/wavs/LJ010-0006.wav|tests/data/ljspeech/wavs/LJ010-0006.npy +tests/data/ljspeech/wavs/LJ003-0224.wav|tests/data/ljspeech/wavs/LJ003-0224.npy +tests/data/ljspeech/wavs/LJ019-0168.wav|tests/data/ljspeech/wavs/LJ019-0168.npy +tests/data/ljspeech/wavs/LJ028-0276.wav|tests/data/ljspeech/wavs/LJ028-0276.npy +tests/data/ljspeech/wavs/LJ021-0203.wav|tests/data/ljspeech/wavs/LJ021-0203.npy +tests/data/ljspeech/wavs/LJ028-0279.wav|tests/data/ljspeech/wavs/LJ028-0279.npy +tests/data/ljspeech/wavs/LJ021-0073.wav|tests/data/ljspeech/wavs/LJ021-0073.npy +tests/data/ljspeech/wavs/LJ029-0206.wav|tests/data/ljspeech/wavs/LJ029-0206.npy +tests/data/ljspeech/wavs/LJ020-0060.wav|tests/data/ljspeech/wavs/LJ020-0060.npy +tests/data/ljspeech/wavs/LJ028-0064.wav|tests/data/ljspeech/wavs/LJ028-0064.npy +tests/data/ljspeech/wavs/LJ011-0216.wav|tests/data/ljspeech/wavs/LJ011-0216.npy +tests/data/ljspeech/wavs/LJ028-0037.wav|tests/data/ljspeech/wavs/LJ028-0037.npy +tests/data/ljspeech/wavs/LJ009-0031.wav|tests/data/ljspeech/wavs/LJ009-0031.npy +tests/data/ljspeech/wavs/LJ019-0362.wav|tests/data/ljspeech/wavs/LJ019-0362.npy +tests/data/ljspeech/wavs/LJ025-0090.wav|tests/data/ljspeech/wavs/LJ025-0090.npy +tests/data/ljspeech/wavs/LJ050-0064.wav|tests/data/ljspeech/wavs/LJ050-0064.npy +tests/data/ljspeech/wavs/LJ050-0083.wav|tests/data/ljspeech/wavs/LJ050-0083.npy +tests/data/ljspeech/wavs/LJ007-0163.wav|tests/data/ljspeech/wavs/LJ007-0163.npy +tests/data/ljspeech/wavs/LJ012-0084.wav|tests/data/ljspeech/wavs/LJ012-0084.npy +tests/data/ljspeech/wavs/LJ027-0025.wav|tests/data/ljspeech/wavs/LJ027-0025.npy +tests/data/ljspeech/wavs/LJ014-0305.wav|tests/data/ljspeech/wavs/LJ014-0305.npy +tests/data/ljspeech/wavs/LJ011-0147.wav|tests/data/ljspeech/wavs/LJ011-0147.npy +tests/data/ljspeech/wavs/LJ050-0090.wav|tests/data/ljspeech/wavs/LJ050-0090.npy +tests/data/ljspeech/wavs/LJ030-0213.wav|tests/data/ljspeech/wavs/LJ030-0213.npy +tests/data/ljspeech/wavs/LJ028-0347.wav|tests/data/ljspeech/wavs/LJ028-0347.npy +tests/data/ljspeech/wavs/LJ002-0098.wav|tests/data/ljspeech/wavs/LJ002-0098.npy +tests/data/ljspeech/wavs/LJ006-0064.wav|tests/data/ljspeech/wavs/LJ006-0064.npy +tests/data/ljspeech/wavs/LJ009-0091.wav|tests/data/ljspeech/wavs/LJ009-0091.npy +tests/data/ljspeech/wavs/LJ048-0172.wav|tests/data/ljspeech/wavs/LJ048-0172.npy +tests/data/ljspeech/wavs/LJ023-0129.wav|tests/data/ljspeech/wavs/LJ023-0129.npy +tests/data/ljspeech/wavs/LJ023-0082.wav|tests/data/ljspeech/wavs/LJ023-0082.npy +tests/data/ljspeech/wavs/LJ014-0182.wav|tests/data/ljspeech/wavs/LJ014-0182.npy +tests/data/ljspeech/wavs/LJ009-0046.wav|tests/data/ljspeech/wavs/LJ009-0046.npy +tests/data/ljspeech/wavs/LJ004-0088.wav|tests/data/ljspeech/wavs/LJ004-0088.npy +tests/data/ljspeech/wavs/LJ018-0297.wav|tests/data/ljspeech/wavs/LJ018-0297.npy +tests/data/ljspeech/wavs/LJ016-0265.wav|tests/data/ljspeech/wavs/LJ016-0265.npy +tests/data/ljspeech/wavs/LJ028-0335.wav|tests/data/ljspeech/wavs/LJ028-0335.npy +tests/data/ljspeech/wavs/LJ019-0381.wav|tests/data/ljspeech/wavs/LJ019-0381.npy +tests/data/ljspeech/wavs/LJ012-0076.wav|tests/data/ljspeech/wavs/LJ012-0076.npy +tests/data/ljspeech/wavs/LJ013-0242.wav|tests/data/ljspeech/wavs/LJ013-0242.npy +tests/data/ljspeech/wavs/LJ014-0245.wav|tests/data/ljspeech/wavs/LJ014-0245.npy +tests/data/ljspeech/wavs/LJ029-0067.wav|tests/data/ljspeech/wavs/LJ029-0067.npy +tests/data/ljspeech/wavs/LJ019-0345.wav|tests/data/ljspeech/wavs/LJ019-0345.npy +tests/data/ljspeech/wavs/LJ016-0241.wav|tests/data/ljspeech/wavs/LJ016-0241.npy +tests/data/ljspeech/wavs/LJ019-0278.wav|tests/data/ljspeech/wavs/LJ019-0278.npy +tests/data/ljspeech/wavs/LJ043-0047.wav|tests/data/ljspeech/wavs/LJ043-0047.npy +tests/data/ljspeech/wavs/LJ015-0120.wav|tests/data/ljspeech/wavs/LJ015-0120.npy +tests/data/ljspeech/wavs/LJ050-0272.wav|tests/data/ljspeech/wavs/LJ050-0272.npy +tests/data/ljspeech/wavs/LJ043-0109.wav|tests/data/ljspeech/wavs/LJ043-0109.npy +tests/data/ljspeech/wavs/LJ019-0297.wav|tests/data/ljspeech/wavs/LJ019-0297.npy +tests/data/ljspeech/wavs/LJ019-0266.wav|tests/data/ljspeech/wavs/LJ019-0266.npy +tests/data/ljspeech/wavs/LJ020-0005.wav|tests/data/ljspeech/wavs/LJ020-0005.npy +tests/data/ljspeech/wavs/LJ035-0037.wav|tests/data/ljspeech/wavs/LJ035-0037.npy +tests/data/ljspeech/wavs/LJ010-0190.wav|tests/data/ljspeech/wavs/LJ010-0190.npy +tests/data/ljspeech/wavs/LJ025-0128.wav|tests/data/ljspeech/wavs/LJ025-0128.npy +tests/data/ljspeech/wavs/LJ015-0247.wav|tests/data/ljspeech/wavs/LJ015-0247.npy +tests/data/ljspeech/wavs/LJ005-0181.wav|tests/data/ljspeech/wavs/LJ005-0181.npy +tests/data/ljspeech/wavs/LJ020-0021.wav|tests/data/ljspeech/wavs/LJ020-0021.npy +tests/data/ljspeech/wavs/LJ013-0028.wav|tests/data/ljspeech/wavs/LJ013-0028.npy +tests/data/ljspeech/wavs/LJ002-0221.wav|tests/data/ljspeech/wavs/LJ002-0221.npy +tests/data/ljspeech/wavs/LJ014-0291.wav|tests/data/ljspeech/wavs/LJ014-0291.npy +tests/data/ljspeech/wavs/LJ028-0136.wav|tests/data/ljspeech/wavs/LJ028-0136.npy +tests/data/ljspeech/wavs/LJ009-0133.wav|tests/data/ljspeech/wavs/LJ009-0133.npy +tests/data/ljspeech/wavs/LJ011-0276.wav|tests/data/ljspeech/wavs/LJ011-0276.npy +tests/data/ljspeech/wavs/LJ025-0131.wav|tests/data/ljspeech/wavs/LJ025-0131.npy +tests/data/ljspeech/wavs/LJ001-0042.wav|tests/data/ljspeech/wavs/LJ001-0042.npy +tests/data/ljspeech/wavs/LJ028-0186.wav|tests/data/ljspeech/wavs/LJ028-0186.npy +tests/data/ljspeech/wavs/LJ018-0389.wav|tests/data/ljspeech/wavs/LJ018-0389.npy +tests/data/ljspeech/wavs/LJ008-0140.wav|tests/data/ljspeech/wavs/LJ008-0140.npy +tests/data/ljspeech/wavs/LJ014-0167.wav|tests/data/ljspeech/wavs/LJ014-0167.npy +tests/data/ljspeech/wavs/LJ009-0215.wav|tests/data/ljspeech/wavs/LJ009-0215.npy +tests/data/ljspeech/wavs/LJ012-0232.wav|tests/data/ljspeech/wavs/LJ012-0232.npy +tests/data/ljspeech/wavs/LJ049-0228.wav|tests/data/ljspeech/wavs/LJ049-0228.npy +tests/data/ljspeech/wavs/LJ002-0024.wav|tests/data/ljspeech/wavs/LJ002-0024.npy +tests/data/ljspeech/wavs/LJ004-0090.wav|tests/data/ljspeech/wavs/LJ004-0090.npy +tests/data/ljspeech/wavs/LJ040-0104.wav|tests/data/ljspeech/wavs/LJ040-0104.npy +tests/data/ljspeech/wavs/LJ010-0174.wav|tests/data/ljspeech/wavs/LJ010-0174.npy +tests/data/ljspeech/wavs/LJ046-0086.wav|tests/data/ljspeech/wavs/LJ046-0086.npy +tests/data/ljspeech/wavs/LJ042-0149.wav|tests/data/ljspeech/wavs/LJ042-0149.npy +tests/data/ljspeech/wavs/LJ016-0235.wav|tests/data/ljspeech/wavs/LJ016-0235.npy +tests/data/ljspeech/wavs/LJ016-0224.wav|tests/data/ljspeech/wavs/LJ016-0224.npy +tests/data/ljspeech/wavs/LJ007-0157.wav|tests/data/ljspeech/wavs/LJ007-0157.npy +tests/data/ljspeech/wavs/LJ014-0266.wav|tests/data/ljspeech/wavs/LJ014-0266.npy +tests/data/ljspeech/wavs/LJ048-0270.wav|tests/data/ljspeech/wavs/LJ048-0270.npy +tests/data/ljspeech/wavs/LJ008-0045.wav|tests/data/ljspeech/wavs/LJ008-0045.npy +tests/data/ljspeech/wavs/LJ044-0200.wav|tests/data/ljspeech/wavs/LJ044-0200.npy +tests/data/ljspeech/wavs/LJ044-0103.wav|tests/data/ljspeech/wavs/LJ044-0103.npy +tests/data/ljspeech/wavs/LJ037-0064.wav|tests/data/ljspeech/wavs/LJ037-0064.npy +tests/data/ljspeech/wavs/LJ006-0140.wav|tests/data/ljspeech/wavs/LJ006-0140.npy +tests/data/ljspeech/wavs/LJ038-0101.wav|tests/data/ljspeech/wavs/LJ038-0101.npy +tests/data/ljspeech/wavs/LJ040-0160.wav|tests/data/ljspeech/wavs/LJ040-0160.npy +tests/data/ljspeech/wavs/LJ010-0177.wav|tests/data/ljspeech/wavs/LJ010-0177.npy +tests/data/ljspeech/wavs/LJ038-0137.wav|tests/data/ljspeech/wavs/LJ038-0137.npy +tests/data/ljspeech/wavs/LJ047-0138.wav|tests/data/ljspeech/wavs/LJ047-0138.npy +tests/data/ljspeech/wavs/LJ046-0080.wav|tests/data/ljspeech/wavs/LJ046-0080.npy +tests/data/ljspeech/wavs/LJ005-0168.wav|tests/data/ljspeech/wavs/LJ005-0168.npy +tests/data/ljspeech/wavs/LJ048-0277.wav|tests/data/ljspeech/wavs/LJ048-0277.npy +tests/data/ljspeech/wavs/LJ014-0192.wav|tests/data/ljspeech/wavs/LJ014-0192.npy +tests/data/ljspeech/wavs/LJ008-0076.wav|tests/data/ljspeech/wavs/LJ008-0076.npy +tests/data/ljspeech/wavs/LJ028-0399.wav|tests/data/ljspeech/wavs/LJ028-0399.npy +tests/data/ljspeech/wavs/LJ032-0121.wav|tests/data/ljspeech/wavs/LJ032-0121.npy +tests/data/ljspeech/wavs/LJ002-0071.wav|tests/data/ljspeech/wavs/LJ002-0071.npy +tests/data/ljspeech/wavs/LJ010-0133.wav|tests/data/ljspeech/wavs/LJ010-0133.npy +tests/data/ljspeech/wavs/LJ013-0070.wav|tests/data/ljspeech/wavs/LJ013-0070.npy +tests/data/ljspeech/wavs/LJ031-0221.wav|tests/data/ljspeech/wavs/LJ031-0221.npy +tests/data/ljspeech/wavs/LJ033-0077.wav|tests/data/ljspeech/wavs/LJ033-0077.npy +tests/data/ljspeech/wavs/LJ006-0001.wav|tests/data/ljspeech/wavs/LJ006-0001.npy +tests/data/ljspeech/wavs/LJ023-0078.wav|tests/data/ljspeech/wavs/LJ023-0078.npy +tests/data/ljspeech/wavs/LJ048-0028.wav|tests/data/ljspeech/wavs/LJ048-0028.npy +tests/data/ljspeech/wavs/LJ025-0104.wav|tests/data/ljspeech/wavs/LJ025-0104.npy +tests/data/ljspeech/wavs/LJ022-0198.wav|tests/data/ljspeech/wavs/LJ022-0198.npy +tests/data/ljspeech/wavs/LJ009-0202.wav|tests/data/ljspeech/wavs/LJ009-0202.npy +tests/data/ljspeech/wavs/LJ015-0092.wav|tests/data/ljspeech/wavs/LJ015-0092.npy +tests/data/ljspeech/wavs/LJ009-0136.wav|tests/data/ljspeech/wavs/LJ009-0136.npy +tests/data/ljspeech/wavs/LJ027-0134.wav|tests/data/ljspeech/wavs/LJ027-0134.npy +tests/data/ljspeech/wavs/LJ022-0088.wav|tests/data/ljspeech/wavs/LJ022-0088.npy +tests/data/ljspeech/wavs/LJ022-0177.wav|tests/data/ljspeech/wavs/LJ022-0177.npy +tests/data/ljspeech/wavs/LJ037-0268.wav|tests/data/ljspeech/wavs/LJ037-0268.npy +tests/data/ljspeech/wavs/LJ023-0126.wav|tests/data/ljspeech/wavs/LJ023-0126.npy +tests/data/ljspeech/wavs/LJ009-0101.wav|tests/data/ljspeech/wavs/LJ009-0101.npy +tests/data/ljspeech/wavs/LJ025-0172.wav|tests/data/ljspeech/wavs/LJ025-0172.npy +tests/data/ljspeech/wavs/LJ037-0258.wav|tests/data/ljspeech/wavs/LJ037-0258.npy +tests/data/ljspeech/wavs/LJ025-0073.wav|tests/data/ljspeech/wavs/LJ025-0073.npy +tests/data/ljspeech/wavs/LJ015-0239.wav|tests/data/ljspeech/wavs/LJ015-0239.npy +tests/data/ljspeech/wavs/LJ023-0064.wav|tests/data/ljspeech/wavs/LJ023-0064.npy +tests/data/ljspeech/wavs/LJ025-0142.wav|tests/data/ljspeech/wavs/LJ025-0142.npy +tests/data/ljspeech/wavs/LJ037-0234.wav|tests/data/ljspeech/wavs/LJ037-0234.npy +tests/data/ljspeech/wavs/LJ038-0005.wav|tests/data/ljspeech/wavs/LJ038-0005.npy +tests/data/ljspeech/wavs/LJ009-0072.wav|tests/data/ljspeech/wavs/LJ009-0072.npy +tests/data/ljspeech/wavs/LJ028-0101.wav|tests/data/ljspeech/wavs/LJ028-0101.npy +tests/data/ljspeech/wavs/LJ015-0197.wav|tests/data/ljspeech/wavs/LJ015-0197.npy +tests/data/ljspeech/wavs/LJ025-0149.wav|tests/data/ljspeech/wavs/LJ025-0149.npy +tests/data/ljspeech/wavs/LJ025-0029.wav|tests/data/ljspeech/wavs/LJ025-0029.npy +tests/data/ljspeech/wavs/LJ044-0235.wav|tests/data/ljspeech/wavs/LJ044-0235.npy +tests/data/ljspeech/wavs/LJ002-0278.wav|tests/data/ljspeech/wavs/LJ002-0278.npy +tests/data/ljspeech/wavs/LJ004-0043.wav|tests/data/ljspeech/wavs/LJ004-0043.npy +tests/data/ljspeech/wavs/LJ033-0109.wav|tests/data/ljspeech/wavs/LJ033-0109.npy +tests/data/ljspeech/wavs/LJ026-0114.wav|tests/data/ljspeech/wavs/LJ026-0114.npy +tests/data/ljspeech/wavs/LJ008-0196.wav|tests/data/ljspeech/wavs/LJ008-0196.npy +tests/data/ljspeech/wavs/LJ002-0137.wav|tests/data/ljspeech/wavs/LJ002-0137.npy +tests/data/ljspeech/wavs/LJ032-0192.wav|tests/data/ljspeech/wavs/LJ032-0192.npy +tests/data/ljspeech/wavs/LJ045-0017.wav|tests/data/ljspeech/wavs/LJ045-0017.npy +tests/data/ljspeech/wavs/LJ021-0190.wav|tests/data/ljspeech/wavs/LJ021-0190.npy +tests/data/ljspeech/wavs/LJ037-0189.wav|tests/data/ljspeech/wavs/LJ037-0189.npy +tests/data/ljspeech/wavs/LJ025-0080.wav|tests/data/ljspeech/wavs/LJ025-0080.npy +tests/data/ljspeech/wavs/LJ021-0137.wav|tests/data/ljspeech/wavs/LJ021-0137.npy +tests/data/ljspeech/wavs/LJ034-0214.wav|tests/data/ljspeech/wavs/LJ034-0214.npy +tests/data/ljspeech/wavs/LJ028-0110.wav|tests/data/ljspeech/wavs/LJ028-0110.npy +tests/data/ljspeech/wavs/LJ048-0210.wav|tests/data/ljspeech/wavs/LJ048-0210.npy +tests/data/ljspeech/wavs/LJ010-0050.wav|tests/data/ljspeech/wavs/LJ010-0050.npy +tests/data/ljspeech/wavs/LJ009-0087.wav|tests/data/ljspeech/wavs/LJ009-0087.npy +tests/data/ljspeech/wavs/LJ046-0029.wav|tests/data/ljspeech/wavs/LJ046-0029.npy +tests/data/ljspeech/wavs/LJ048-0020.wav|tests/data/ljspeech/wavs/LJ048-0020.npy +tests/data/ljspeech/wavs/LJ008-0305.wav|tests/data/ljspeech/wavs/LJ008-0305.npy +tests/data/ljspeech/wavs/LJ020-0045.wav|tests/data/ljspeech/wavs/LJ020-0045.npy +tests/data/ljspeech/wavs/LJ004-0003.wav|tests/data/ljspeech/wavs/LJ004-0003.npy +tests/data/ljspeech/wavs/LJ038-0057.wav|tests/data/ljspeech/wavs/LJ038-0057.npy +tests/data/ljspeech/wavs/LJ008-0145.wav|tests/data/ljspeech/wavs/LJ008-0145.npy +tests/data/ljspeech/wavs/LJ010-0066.wav|tests/data/ljspeech/wavs/LJ010-0066.npy +tests/data/ljspeech/wavs/LJ032-0152.wav|tests/data/ljspeech/wavs/LJ032-0152.npy +tests/data/ljspeech/wavs/LJ046-0203.wav|tests/data/ljspeech/wavs/LJ046-0203.npy +tests/data/ljspeech/wavs/LJ012-0088.wav|tests/data/ljspeech/wavs/LJ012-0088.npy +tests/data/ljspeech/wavs/LJ003-0060.wav|tests/data/ljspeech/wavs/LJ003-0060.npy +tests/data/ljspeech/wavs/LJ014-0339.wav|tests/data/ljspeech/wavs/LJ014-0339.npy +tests/data/ljspeech/wavs/LJ006-0062.wav|tests/data/ljspeech/wavs/LJ006-0062.npy +tests/data/ljspeech/wavs/LJ003-0059.wav|tests/data/ljspeech/wavs/LJ003-0059.npy +tests/data/ljspeech/wavs/LJ034-0218.wav|tests/data/ljspeech/wavs/LJ034-0218.npy +tests/data/ljspeech/wavs/LJ034-0190.wav|tests/data/ljspeech/wavs/LJ034-0190.npy +tests/data/ljspeech/wavs/LJ038-0030.wav|tests/data/ljspeech/wavs/LJ038-0030.npy +tests/data/ljspeech/wavs/LJ044-0140.wav|tests/data/ljspeech/wavs/LJ044-0140.npy +tests/data/ljspeech/wavs/LJ032-0246.wav|tests/data/ljspeech/wavs/LJ032-0246.npy +tests/data/ljspeech/wavs/LJ036-0154.wav|tests/data/ljspeech/wavs/LJ036-0154.npy +tests/data/ljspeech/wavs/LJ038-0281.wav|tests/data/ljspeech/wavs/LJ038-0281.npy +tests/data/ljspeech/wavs/LJ011-0245.wav|tests/data/ljspeech/wavs/LJ011-0245.npy +tests/data/ljspeech/wavs/LJ037-0029.wav|tests/data/ljspeech/wavs/LJ037-0029.npy +tests/data/ljspeech/wavs/LJ027-0016.wav|tests/data/ljspeech/wavs/LJ027-0016.npy +tests/data/ljspeech/wavs/LJ047-0029.wav|tests/data/ljspeech/wavs/LJ047-0029.npy +tests/data/ljspeech/wavs/LJ006-0193.wav|tests/data/ljspeech/wavs/LJ006-0193.npy +tests/data/ljspeech/wavs/LJ014-0080.wav|tests/data/ljspeech/wavs/LJ014-0080.npy +tests/data/ljspeech/wavs/LJ005-0263.wav|tests/data/ljspeech/wavs/LJ005-0263.npy +tests/data/ljspeech/wavs/LJ038-0037.wav|tests/data/ljspeech/wavs/LJ038-0037.npy +tests/data/ljspeech/wavs/LJ030-0033.wav|tests/data/ljspeech/wavs/LJ030-0033.npy +tests/data/ljspeech/wavs/LJ005-0109.wav|tests/data/ljspeech/wavs/LJ005-0109.npy +tests/data/ljspeech/wavs/LJ021-0078.wav|tests/data/ljspeech/wavs/LJ021-0078.npy +tests/data/ljspeech/wavs/LJ048-0162.wav|tests/data/ljspeech/wavs/LJ048-0162.npy +tests/data/ljspeech/wavs/LJ018-0057.wav|tests/data/ljspeech/wavs/LJ018-0057.npy +tests/data/ljspeech/wavs/LJ021-0087.wav|tests/data/ljspeech/wavs/LJ021-0087.npy +tests/data/ljspeech/wavs/LJ042-0221.wav|tests/data/ljspeech/wavs/LJ042-0221.npy +tests/data/ljspeech/wavs/LJ048-0121.wav|tests/data/ljspeech/wavs/LJ048-0121.npy +tests/data/ljspeech/wavs/LJ030-0128.wav|tests/data/ljspeech/wavs/LJ030-0128.npy +tests/data/ljspeech/wavs/LJ014-0121.wav|tests/data/ljspeech/wavs/LJ014-0121.npy +tests/data/ljspeech/wavs/LJ011-0051.wav|tests/data/ljspeech/wavs/LJ011-0051.npy +tests/data/ljspeech/wavs/LJ040-0219.wav|tests/data/ljspeech/wavs/LJ040-0219.npy +tests/data/ljspeech/wavs/LJ033-0053.wav|tests/data/ljspeech/wavs/LJ033-0053.npy +tests/data/ljspeech/wavs/LJ038-0272.wav|tests/data/ljspeech/wavs/LJ038-0272.npy +tests/data/ljspeech/wavs/LJ014-0128.wav|tests/data/ljspeech/wavs/LJ014-0128.npy +tests/data/ljspeech/wavs/LJ018-0204.wav|tests/data/ljspeech/wavs/LJ018-0204.npy +tests/data/ljspeech/wavs/LJ003-0158.wav|tests/data/ljspeech/wavs/LJ003-0158.npy +tests/data/ljspeech/wavs/LJ028-0230.wav|tests/data/ljspeech/wavs/LJ028-0230.npy +tests/data/ljspeech/wavs/LJ016-0320.wav|tests/data/ljspeech/wavs/LJ016-0320.npy +tests/data/ljspeech/wavs/LJ017-0147.wav|tests/data/ljspeech/wavs/LJ017-0147.npy +tests/data/ljspeech/wavs/LJ043-0079.wav|tests/data/ljspeech/wavs/LJ043-0079.npy +tests/data/ljspeech/wavs/LJ044-0066.wav|tests/data/ljspeech/wavs/LJ044-0066.npy +tests/data/ljspeech/wavs/LJ011-0241.wav|tests/data/ljspeech/wavs/LJ011-0241.npy +tests/data/ljspeech/wavs/LJ030-0206.wav|tests/data/ljspeech/wavs/LJ030-0206.npy +tests/data/ljspeech/wavs/LJ046-0147.wav|tests/data/ljspeech/wavs/LJ046-0147.npy +tests/data/ljspeech/wavs/LJ043-0039.wav|tests/data/ljspeech/wavs/LJ043-0039.npy +tests/data/ljspeech/wavs/LJ036-0089.wav|tests/data/ljspeech/wavs/LJ036-0089.npy +tests/data/ljspeech/wavs/LJ049-0060.wav|tests/data/ljspeech/wavs/LJ049-0060.npy +tests/data/ljspeech/wavs/LJ014-0337.wav|tests/data/ljspeech/wavs/LJ014-0337.npy +tests/data/ljspeech/wavs/LJ016-0077.wav|tests/data/ljspeech/wavs/LJ016-0077.npy +tests/data/ljspeech/wavs/LJ036-0085.wav|tests/data/ljspeech/wavs/LJ036-0085.npy +tests/data/ljspeech/wavs/LJ004-0153.wav|tests/data/ljspeech/wavs/LJ004-0153.npy +tests/data/ljspeech/wavs/LJ032-0222.wav|tests/data/ljspeech/wavs/LJ032-0222.npy +tests/data/ljspeech/wavs/LJ008-0212.wav|tests/data/ljspeech/wavs/LJ008-0212.npy +tests/data/ljspeech/wavs/LJ016-0442.wav|tests/data/ljspeech/wavs/LJ016-0442.npy +tests/data/ljspeech/wavs/LJ006-0158.wav|tests/data/ljspeech/wavs/LJ006-0158.npy +tests/data/ljspeech/wavs/LJ013-0257.wav|tests/data/ljspeech/wavs/LJ013-0257.npy +tests/data/ljspeech/wavs/LJ044-0077.wav|tests/data/ljspeech/wavs/LJ044-0077.npy +tests/data/ljspeech/wavs/LJ003-0314.wav|tests/data/ljspeech/wavs/LJ003-0314.npy +tests/data/ljspeech/wavs/LJ013-0066.wav|tests/data/ljspeech/wavs/LJ013-0066.npy +tests/data/ljspeech/wavs/LJ017-0108.wav|tests/data/ljspeech/wavs/LJ017-0108.npy +tests/data/ljspeech/wavs/LJ044-0092.wav|tests/data/ljspeech/wavs/LJ044-0092.npy +tests/data/ljspeech/wavs/LJ011-0037.wav|tests/data/ljspeech/wavs/LJ011-0037.npy +tests/data/ljspeech/wavs/LJ029-0045.wav|tests/data/ljspeech/wavs/LJ029-0045.npy +tests/data/ljspeech/wavs/LJ010-0054.wav|tests/data/ljspeech/wavs/LJ010-0054.npy +tests/data/ljspeech/wavs/LJ011-0009.wav|tests/data/ljspeech/wavs/LJ011-0009.npy +tests/data/ljspeech/wavs/LJ005-0079.wav|tests/data/ljspeech/wavs/LJ005-0079.npy +tests/data/ljspeech/wavs/LJ004-0170.wav|tests/data/ljspeech/wavs/LJ004-0170.npy +tests/data/ljspeech/wavs/LJ005-0108.wav|tests/data/ljspeech/wavs/LJ005-0108.npy +tests/data/ljspeech/wavs/LJ038-0234.wav|tests/data/ljspeech/wavs/LJ038-0234.npy +tests/data/ljspeech/wavs/LJ038-0299.wav|tests/data/ljspeech/wavs/LJ038-0299.npy +tests/data/ljspeech/wavs/LJ001-0180.wav|tests/data/ljspeech/wavs/LJ001-0180.npy +tests/data/ljspeech/wavs/LJ038-0166.wav|tests/data/ljspeech/wavs/LJ038-0166.npy +tests/data/ljspeech/wavs/LJ040-0193.wav|tests/data/ljspeech/wavs/LJ040-0193.npy +tests/data/ljspeech/wavs/LJ050-0227.wav|tests/data/ljspeech/wavs/LJ050-0227.npy +tests/data/ljspeech/wavs/LJ038-0082.wav|tests/data/ljspeech/wavs/LJ038-0082.npy +tests/data/ljspeech/wavs/LJ017-0122.wav|tests/data/ljspeech/wavs/LJ017-0122.npy +tests/data/ljspeech/wavs/LJ007-0001.wav|tests/data/ljspeech/wavs/LJ007-0001.npy +tests/data/ljspeech/wavs/LJ032-0200.wav|tests/data/ljspeech/wavs/LJ032-0200.npy +tests/data/ljspeech/wavs/LJ012-0143.wav|tests/data/ljspeech/wavs/LJ012-0143.npy +tests/data/ljspeech/wavs/LJ027-0169.wav|tests/data/ljspeech/wavs/LJ027-0169.npy +tests/data/ljspeech/wavs/LJ049-0227.wav|tests/data/ljspeech/wavs/LJ049-0227.npy +tests/data/ljspeech/wavs/LJ002-0229.wav|tests/data/ljspeech/wavs/LJ002-0229.npy +tests/data/ljspeech/wavs/LJ007-0077.wav|tests/data/ljspeech/wavs/LJ007-0077.npy +tests/data/ljspeech/wavs/LJ028-0056.wav|tests/data/ljspeech/wavs/LJ028-0056.npy +tests/data/ljspeech/wavs/LJ005-0130.wav|tests/data/ljspeech/wavs/LJ005-0130.npy +tests/data/ljspeech/wavs/LJ040-0036.wav|tests/data/ljspeech/wavs/LJ040-0036.npy +tests/data/ljspeech/wavs/LJ047-0008.wav|tests/data/ljspeech/wavs/LJ047-0008.npy +tests/data/ljspeech/wavs/LJ001-0101.wav|tests/data/ljspeech/wavs/LJ001-0101.npy +tests/data/ljspeech/wavs/LJ014-0336.wav|tests/data/ljspeech/wavs/LJ014-0336.npy +tests/data/ljspeech/wavs/LJ013-0057.wav|tests/data/ljspeech/wavs/LJ013-0057.npy +tests/data/ljspeech/wavs/LJ028-0015.wav|tests/data/ljspeech/wavs/LJ028-0015.npy +tests/data/ljspeech/wavs/LJ019-0351.wav|tests/data/ljspeech/wavs/LJ019-0351.npy +tests/data/ljspeech/wavs/LJ011-0072.wav|tests/data/ljspeech/wavs/LJ011-0072.npy +tests/data/ljspeech/wavs/LJ006-0185.wav|tests/data/ljspeech/wavs/LJ006-0185.npy +tests/data/ljspeech/wavs/LJ022-0068.wav|tests/data/ljspeech/wavs/LJ022-0068.npy +tests/data/ljspeech/wavs/LJ011-0010.wav|tests/data/ljspeech/wavs/LJ011-0010.npy +tests/data/ljspeech/wavs/LJ018-0071.wav|tests/data/ljspeech/wavs/LJ018-0071.npy +tests/data/ljspeech/wavs/LJ028-0194.wav|tests/data/ljspeech/wavs/LJ028-0194.npy +tests/data/ljspeech/wavs/LJ004-0108.wav|tests/data/ljspeech/wavs/LJ004-0108.npy +tests/data/ljspeech/wavs/LJ046-0111.wav|tests/data/ljspeech/wavs/LJ046-0111.npy +tests/data/ljspeech/wavs/LJ003-0255.wav|tests/data/ljspeech/wavs/LJ003-0255.npy +tests/data/ljspeech/wavs/LJ009-0214.wav|tests/data/ljspeech/wavs/LJ009-0214.npy +tests/data/ljspeech/wavs/LJ049-0230.wav|tests/data/ljspeech/wavs/LJ049-0230.npy +tests/data/ljspeech/wavs/LJ037-0005.wav|tests/data/ljspeech/wavs/LJ037-0005.npy +tests/data/ljspeech/wavs/LJ026-0143.wav|tests/data/ljspeech/wavs/LJ026-0143.npy +tests/data/ljspeech/wavs/LJ025-0066.wav|tests/data/ljspeech/wavs/LJ025-0066.npy +tests/data/ljspeech/wavs/LJ023-0135.wav|tests/data/ljspeech/wavs/LJ023-0135.npy +tests/data/ljspeech/wavs/LJ017-0019.wav|tests/data/ljspeech/wavs/LJ017-0019.npy +tests/data/ljspeech/wavs/LJ014-0055.wav|tests/data/ljspeech/wavs/LJ014-0055.npy +tests/data/ljspeech/wavs/LJ047-0083.wav|tests/data/ljspeech/wavs/LJ047-0083.npy +tests/data/ljspeech/wavs/LJ016-0157.wav|tests/data/ljspeech/wavs/LJ016-0157.npy +tests/data/ljspeech/wavs/LJ024-0043.wav|tests/data/ljspeech/wavs/LJ024-0043.npy +tests/data/ljspeech/wavs/LJ030-0247.wav|tests/data/ljspeech/wavs/LJ030-0247.npy +tests/data/ljspeech/wavs/LJ041-0191.wav|tests/data/ljspeech/wavs/LJ041-0191.npy +tests/data/ljspeech/wavs/LJ014-0064.wav|tests/data/ljspeech/wavs/LJ014-0064.npy +tests/data/ljspeech/wavs/LJ024-0079.wav|tests/data/ljspeech/wavs/LJ024-0079.npy +tests/data/ljspeech/wavs/LJ041-0062.wav|tests/data/ljspeech/wavs/LJ041-0062.npy +tests/data/ljspeech/wavs/LJ030-0060.wav|tests/data/ljspeech/wavs/LJ030-0060.npy +tests/data/ljspeech/wavs/LJ022-0032.wav|tests/data/ljspeech/wavs/LJ022-0032.npy +tests/data/ljspeech/wavs/LJ002-0214.wav|tests/data/ljspeech/wavs/LJ002-0214.npy +tests/data/ljspeech/wavs/LJ002-0258.wav|tests/data/ljspeech/wavs/LJ002-0258.npy +tests/data/ljspeech/wavs/LJ023-0100.wav|tests/data/ljspeech/wavs/LJ023-0100.npy +tests/data/ljspeech/wavs/LJ032-0271.wav|tests/data/ljspeech/wavs/LJ032-0271.npy +tests/data/ljspeech/wavs/LJ032-0272.wav|tests/data/ljspeech/wavs/LJ032-0272.npy +tests/data/ljspeech/wavs/LJ013-0229.wav|tests/data/ljspeech/wavs/LJ013-0229.npy +tests/data/ljspeech/wavs/LJ032-0242.wav|tests/data/ljspeech/wavs/LJ032-0242.npy +tests/data/ljspeech/wavs/LJ012-0215.wav|tests/data/ljspeech/wavs/LJ012-0215.npy +tests/data/ljspeech/wavs/LJ022-0015.wav|tests/data/ljspeech/wavs/LJ022-0015.npy +tests/data/ljspeech/wavs/LJ006-0237.wav|tests/data/ljspeech/wavs/LJ006-0237.npy +tests/data/ljspeech/wavs/LJ017-0240.wav|tests/data/ljspeech/wavs/LJ017-0240.npy +tests/data/ljspeech/wavs/LJ017-0055.wav|tests/data/ljspeech/wavs/LJ017-0055.npy +tests/data/ljspeech/wavs/LJ050-0010.wav|tests/data/ljspeech/wavs/LJ050-0010.npy +tests/data/ljspeech/wavs/LJ039-0055.wav|tests/data/ljspeech/wavs/LJ039-0055.npy +tests/data/ljspeech/wavs/LJ015-0227.wav|tests/data/ljspeech/wavs/LJ015-0227.npy +tests/data/ljspeech/wavs/LJ007-0031.wav|tests/data/ljspeech/wavs/LJ007-0031.npy +tests/data/ljspeech/wavs/LJ050-0141.wav|tests/data/ljspeech/wavs/LJ050-0141.npy +tests/data/ljspeech/wavs/LJ018-0317.wav|tests/data/ljspeech/wavs/LJ018-0317.npy +tests/data/ljspeech/wavs/LJ019-0230.wav|tests/data/ljspeech/wavs/LJ019-0230.npy +tests/data/ljspeech/wavs/LJ047-0192.wav|tests/data/ljspeech/wavs/LJ047-0192.npy +tests/data/ljspeech/wavs/LJ016-0313.wav|tests/data/ljspeech/wavs/LJ016-0313.npy +tests/data/ljspeech/wavs/LJ039-0155.wav|tests/data/ljspeech/wavs/LJ039-0155.npy +tests/data/ljspeech/wavs/LJ043-0132.wav|tests/data/ljspeech/wavs/LJ043-0132.npy +tests/data/ljspeech/wavs/LJ021-0143.wav|tests/data/ljspeech/wavs/LJ021-0143.npy +tests/data/ljspeech/wavs/LJ047-0090.wav|tests/data/ljspeech/wavs/LJ047-0090.npy +tests/data/ljspeech/wavs/LJ010-0215.wav|tests/data/ljspeech/wavs/LJ010-0215.npy +tests/data/ljspeech/wavs/LJ033-0027.wav|tests/data/ljspeech/wavs/LJ033-0027.npy +tests/data/ljspeech/wavs/LJ045-0064.wav|tests/data/ljspeech/wavs/LJ045-0064.npy +tests/data/ljspeech/wavs/LJ004-0069.wav|tests/data/ljspeech/wavs/LJ004-0069.npy +tests/data/ljspeech/wavs/LJ018-0246.wav|tests/data/ljspeech/wavs/LJ018-0246.npy +tests/data/ljspeech/wavs/LJ050-0105.wav|tests/data/ljspeech/wavs/LJ050-0105.npy +tests/data/ljspeech/wavs/LJ002-0018.wav|tests/data/ljspeech/wavs/LJ002-0018.npy +tests/data/ljspeech/wavs/LJ045-0138.wav|tests/data/ljspeech/wavs/LJ045-0138.npy +tests/data/ljspeech/wavs/LJ042-0113.wav|tests/data/ljspeech/wavs/LJ042-0113.npy +tests/data/ljspeech/wavs/LJ014-0206.wav|tests/data/ljspeech/wavs/LJ014-0206.npy +tests/data/ljspeech/wavs/LJ010-0194.wav|tests/data/ljspeech/wavs/LJ010-0194.npy +tests/data/ljspeech/wavs/LJ030-0117.wav|tests/data/ljspeech/wavs/LJ030-0117.npy +tests/data/ljspeech/wavs/LJ030-0092.wav|tests/data/ljspeech/wavs/LJ030-0092.npy +tests/data/ljspeech/wavs/LJ039-0057.wav|tests/data/ljspeech/wavs/LJ039-0057.npy +tests/data/ljspeech/wavs/LJ018-0305.wav|tests/data/ljspeech/wavs/LJ018-0305.npy +tests/data/ljspeech/wavs/LJ003-0125.wav|tests/data/ljspeech/wavs/LJ003-0125.npy +tests/data/ljspeech/wavs/LJ035-0126.wav|tests/data/ljspeech/wavs/LJ035-0126.npy +tests/data/ljspeech/wavs/LJ046-0100.wav|tests/data/ljspeech/wavs/LJ046-0100.npy +tests/data/ljspeech/wavs/LJ005-0090.wav|tests/data/ljspeech/wavs/LJ005-0090.npy +tests/data/ljspeech/wavs/LJ049-0023.wav|tests/data/ljspeech/wavs/LJ049-0023.npy +tests/data/ljspeech/wavs/LJ009-0238.wav|tests/data/ljspeech/wavs/LJ009-0238.npy +tests/data/ljspeech/wavs/LJ034-0136.wav|tests/data/ljspeech/wavs/LJ034-0136.npy +tests/data/ljspeech/wavs/LJ046-0229.wav|tests/data/ljspeech/wavs/LJ046-0229.npy +tests/data/ljspeech/wavs/LJ032-0073.wav|tests/data/ljspeech/wavs/LJ032-0073.npy +tests/data/ljspeech/wavs/LJ010-0296.wav|tests/data/ljspeech/wavs/LJ010-0296.npy +tests/data/ljspeech/wavs/LJ037-0246.wav|tests/data/ljspeech/wavs/LJ037-0246.npy +tests/data/ljspeech/wavs/LJ027-0050.wav|tests/data/ljspeech/wavs/LJ027-0050.npy +tests/data/ljspeech/wavs/LJ040-0222.wav|tests/data/ljspeech/wavs/LJ040-0222.npy +tests/data/ljspeech/wavs/LJ045-0156.wav|tests/data/ljspeech/wavs/LJ045-0156.npy +tests/data/ljspeech/wavs/LJ003-0148.wav|tests/data/ljspeech/wavs/LJ003-0148.npy +tests/data/ljspeech/wavs/LJ027-0035.wav|tests/data/ljspeech/wavs/LJ027-0035.npy +tests/data/ljspeech/wavs/LJ038-0119.wav|tests/data/ljspeech/wavs/LJ038-0119.npy +tests/data/ljspeech/wavs/LJ050-0018.wav|tests/data/ljspeech/wavs/LJ050-0018.npy +tests/data/ljspeech/wavs/LJ046-0120.wav|tests/data/ljspeech/wavs/LJ046-0120.npy +tests/data/ljspeech/wavs/LJ010-0245.wav|tests/data/ljspeech/wavs/LJ010-0245.npy +tests/data/ljspeech/wavs/LJ010-0025.wav|tests/data/ljspeech/wavs/LJ010-0025.npy +tests/data/ljspeech/wavs/LJ020-0094.wav|tests/data/ljspeech/wavs/LJ020-0094.npy +tests/data/ljspeech/wavs/LJ005-0177.wav|tests/data/ljspeech/wavs/LJ005-0177.npy +tests/data/ljspeech/wavs/LJ042-0164.wav|tests/data/ljspeech/wavs/LJ042-0164.npy +tests/data/ljspeech/wavs/LJ007-0175.wav|tests/data/ljspeech/wavs/LJ007-0175.npy +tests/data/ljspeech/wavs/LJ015-0018.wav|tests/data/ljspeech/wavs/LJ015-0018.npy +tests/data/ljspeech/wavs/LJ003-0303.wav|tests/data/ljspeech/wavs/LJ003-0303.npy +tests/data/ljspeech/wavs/LJ034-0176.wav|tests/data/ljspeech/wavs/LJ034-0176.npy +tests/data/ljspeech/wavs/LJ018-0264.wav|tests/data/ljspeech/wavs/LJ018-0264.npy +tests/data/ljspeech/wavs/LJ011-0258.wav|tests/data/ljspeech/wavs/LJ011-0258.npy +tests/data/ljspeech/wavs/LJ016-0417.wav|tests/data/ljspeech/wavs/LJ016-0417.npy +tests/data/ljspeech/wavs/LJ021-0066.wav|tests/data/ljspeech/wavs/LJ021-0066.npy +tests/data/ljspeech/wavs/LJ007-0138.wav|tests/data/ljspeech/wavs/LJ007-0138.npy +tests/data/ljspeech/wavs/LJ017-0093.wav|tests/data/ljspeech/wavs/LJ017-0093.npy +tests/data/ljspeech/wavs/LJ029-0060.wav|tests/data/ljspeech/wavs/LJ029-0060.npy +tests/data/ljspeech/wavs/LJ012-0287.wav|tests/data/ljspeech/wavs/LJ012-0287.npy +tests/data/ljspeech/wavs/LJ001-0097.wav|tests/data/ljspeech/wavs/LJ001-0097.npy +tests/data/ljspeech/wavs/LJ007-0187.wav|tests/data/ljspeech/wavs/LJ007-0187.npy +tests/data/ljspeech/wavs/LJ032-0003.wav|tests/data/ljspeech/wavs/LJ032-0003.npy +tests/data/ljspeech/wavs/LJ038-0153.wav|tests/data/ljspeech/wavs/LJ038-0153.npy +tests/data/ljspeech/wavs/LJ043-0005.wav|tests/data/ljspeech/wavs/LJ043-0005.npy +tests/data/ljspeech/wavs/LJ037-0184.wav|tests/data/ljspeech/wavs/LJ037-0184.npy +tests/data/ljspeech/wavs/LJ050-0065.wav|tests/data/ljspeech/wavs/LJ050-0065.npy +tests/data/ljspeech/wavs/LJ012-0247.wav|tests/data/ljspeech/wavs/LJ012-0247.npy +tests/data/ljspeech/wavs/LJ033-0029.wav|tests/data/ljspeech/wavs/LJ033-0029.npy +tests/data/ljspeech/wavs/LJ010-0232.wav|tests/data/ljspeech/wavs/LJ010-0232.npy +tests/data/ljspeech/wavs/LJ011-0115.wav|tests/data/ljspeech/wavs/LJ011-0115.npy +tests/data/ljspeech/wavs/LJ015-0007.wav|tests/data/ljspeech/wavs/LJ015-0007.npy +tests/data/ljspeech/wavs/LJ040-0009.wav|tests/data/ljspeech/wavs/LJ040-0009.npy +tests/data/ljspeech/wavs/LJ031-0201.wav|tests/data/ljspeech/wavs/LJ031-0201.npy +tests/data/ljspeech/wavs/LJ040-0140.wav|tests/data/ljspeech/wavs/LJ040-0140.npy +tests/data/ljspeech/wavs/LJ033-0035.wav|tests/data/ljspeech/wavs/LJ033-0035.npy +tests/data/ljspeech/wavs/LJ015-0011.wav|tests/data/ljspeech/wavs/LJ015-0011.npy +tests/data/ljspeech/wavs/LJ049-0091.wav|tests/data/ljspeech/wavs/LJ049-0091.npy +tests/data/ljspeech/wavs/LJ016-0041.wav|tests/data/ljspeech/wavs/LJ016-0041.npy +tests/data/ljspeech/wavs/LJ010-0002.wav|tests/data/ljspeech/wavs/LJ010-0002.npy +tests/data/ljspeech/wavs/LJ016-0379.wav|tests/data/ljspeech/wavs/LJ016-0379.npy +tests/data/ljspeech/wavs/LJ050-0138.wav|tests/data/ljspeech/wavs/LJ050-0138.npy +tests/data/ljspeech/wavs/LJ050-0022.wav|tests/data/ljspeech/wavs/LJ050-0022.npy +tests/data/ljspeech/wavs/LJ003-0336.wav|tests/data/ljspeech/wavs/LJ003-0336.npy +tests/data/ljspeech/wavs/LJ047-0055.wav|tests/data/ljspeech/wavs/LJ047-0055.npy +tests/data/ljspeech/wavs/LJ034-0024.wav|tests/data/ljspeech/wavs/LJ034-0024.npy +tests/data/ljspeech/wavs/LJ050-0191.wav|tests/data/ljspeech/wavs/LJ050-0191.npy +tests/data/ljspeech/wavs/LJ027-0082.wav|tests/data/ljspeech/wavs/LJ027-0082.npy +tests/data/ljspeech/wavs/LJ002-0054.wav|tests/data/ljspeech/wavs/LJ002-0054.npy +tests/data/ljspeech/wavs/LJ039-0190.wav|tests/data/ljspeech/wavs/LJ039-0190.npy +tests/data/ljspeech/wavs/LJ003-0136.wav|tests/data/ljspeech/wavs/LJ003-0136.npy +tests/data/ljspeech/wavs/LJ011-0185.wav|tests/data/ljspeech/wavs/LJ011-0185.npy +tests/data/ljspeech/wavs/LJ017-0237.wav|tests/data/ljspeech/wavs/LJ017-0237.npy +tests/data/ljspeech/wavs/LJ007-0177.wav|tests/data/ljspeech/wavs/LJ007-0177.npy +tests/data/ljspeech/wavs/LJ039-0053.wav|tests/data/ljspeech/wavs/LJ039-0053.npy +tests/data/ljspeech/wavs/LJ027-0097.wav|tests/data/ljspeech/wavs/LJ027-0097.npy +tests/data/ljspeech/wavs/LJ039-0107.wav|tests/data/ljspeech/wavs/LJ039-0107.npy +tests/data/ljspeech/wavs/LJ040-0091.wav|tests/data/ljspeech/wavs/LJ040-0091.npy +tests/data/ljspeech/wavs/LJ045-0130.wav|tests/data/ljspeech/wavs/LJ045-0130.npy +tests/data/ljspeech/wavs/LJ031-0157.wav|tests/data/ljspeech/wavs/LJ031-0157.npy +tests/data/ljspeech/wavs/LJ017-0070.wav|tests/data/ljspeech/wavs/LJ017-0070.npy +tests/data/ljspeech/wavs/LJ012-0034.wav|tests/data/ljspeech/wavs/LJ012-0034.npy +tests/data/ljspeech/wavs/LJ045-0082.wav|tests/data/ljspeech/wavs/LJ045-0082.npy +tests/data/ljspeech/wavs/LJ038-0036.wav|tests/data/ljspeech/wavs/LJ038-0036.npy +tests/data/ljspeech/wavs/LJ025-0037.wav|tests/data/ljspeech/wavs/LJ025-0037.npy +tests/data/ljspeech/wavs/LJ048-0188.wav|tests/data/ljspeech/wavs/LJ048-0188.npy +tests/data/ljspeech/wavs/LJ012-0189.wav|tests/data/ljspeech/wavs/LJ012-0189.npy +tests/data/ljspeech/wavs/LJ041-0177.wav|tests/data/ljspeech/wavs/LJ041-0177.npy +tests/data/ljspeech/wavs/LJ004-0246.wav|tests/data/ljspeech/wavs/LJ004-0246.npy +tests/data/ljspeech/wavs/LJ017-0131.wav|tests/data/ljspeech/wavs/LJ017-0131.npy +tests/data/ljspeech/wavs/LJ049-0186.wav|tests/data/ljspeech/wavs/LJ049-0186.npy +tests/data/ljspeech/wavs/LJ025-0169.wav|tests/data/ljspeech/wavs/LJ025-0169.npy +tests/data/ljspeech/wavs/LJ006-0181.wav|tests/data/ljspeech/wavs/LJ006-0181.npy +tests/data/ljspeech/wavs/LJ029-0172.wav|tests/data/ljspeech/wavs/LJ029-0172.npy +tests/data/ljspeech/wavs/LJ049-0199.wav|tests/data/ljspeech/wavs/LJ049-0199.npy +tests/data/ljspeech/wavs/LJ006-0200.wav|tests/data/ljspeech/wavs/LJ006-0200.npy +tests/data/ljspeech/wavs/LJ023-0093.wav|tests/data/ljspeech/wavs/LJ023-0093.npy +tests/data/ljspeech/wavs/LJ003-0025.wav|tests/data/ljspeech/wavs/LJ003-0025.npy +tests/data/ljspeech/wavs/LJ037-0171.wav|tests/data/ljspeech/wavs/LJ037-0171.npy +tests/data/ljspeech/wavs/LJ009-0147.wav|tests/data/ljspeech/wavs/LJ009-0147.npy +tests/data/ljspeech/wavs/LJ018-0192.wav|tests/data/ljspeech/wavs/LJ018-0192.npy +tests/data/ljspeech/wavs/LJ028-0387.wav|tests/data/ljspeech/wavs/LJ028-0387.npy +tests/data/ljspeech/wavs/LJ011-0161.wav|tests/data/ljspeech/wavs/LJ011-0161.npy +tests/data/ljspeech/wavs/LJ036-0111.wav|tests/data/ljspeech/wavs/LJ036-0111.npy +tests/data/ljspeech/wavs/LJ017-0044.wav|tests/data/ljspeech/wavs/LJ017-0044.npy +tests/data/ljspeech/wavs/LJ029-0020.wav|tests/data/ljspeech/wavs/LJ029-0020.npy +tests/data/ljspeech/wavs/LJ026-0108.wav|tests/data/ljspeech/wavs/LJ026-0108.npy +tests/data/ljspeech/wavs/LJ004-0098.wav|tests/data/ljspeech/wavs/LJ004-0098.npy +tests/data/ljspeech/wavs/LJ048-0099.wav|tests/data/ljspeech/wavs/LJ048-0099.npy +tests/data/ljspeech/wavs/LJ041-0113.wav|tests/data/ljspeech/wavs/LJ041-0113.npy +tests/data/ljspeech/wavs/LJ019-0275.wav|tests/data/ljspeech/wavs/LJ019-0275.npy +tests/data/ljspeech/wavs/LJ014-0271.wav|tests/data/ljspeech/wavs/LJ014-0271.npy +tests/data/ljspeech/wavs/LJ011-0041.wav|tests/data/ljspeech/wavs/LJ011-0041.npy +tests/data/ljspeech/wavs/LJ018-0068.wav|tests/data/ljspeech/wavs/LJ018-0068.npy +tests/data/ljspeech/wavs/LJ018-0164.wav|tests/data/ljspeech/wavs/LJ018-0164.npy +tests/data/ljspeech/wavs/LJ010-0317.wav|tests/data/ljspeech/wavs/LJ010-0317.npy +tests/data/ljspeech/wavs/LJ045-0033.wav|tests/data/ljspeech/wavs/LJ045-0033.npy +tests/data/ljspeech/wavs/LJ029-0140.wav|tests/data/ljspeech/wavs/LJ029-0140.npy +tests/data/ljspeech/wavs/LJ001-0010.wav|tests/data/ljspeech/wavs/LJ001-0010.npy +tests/data/ljspeech/wavs/LJ015-0178.wav|tests/data/ljspeech/wavs/LJ015-0178.npy +tests/data/ljspeech/wavs/LJ042-0207.wav|tests/data/ljspeech/wavs/LJ042-0207.npy +tests/data/ljspeech/wavs/LJ043-0105.wav|tests/data/ljspeech/wavs/LJ043-0105.npy +tests/data/ljspeech/wavs/LJ023-0057.wav|tests/data/ljspeech/wavs/LJ023-0057.npy +tests/data/ljspeech/wavs/LJ045-0039.wav|tests/data/ljspeech/wavs/LJ045-0039.npy +tests/data/ljspeech/wavs/LJ016-0093.wav|tests/data/ljspeech/wavs/LJ016-0093.npy +tests/data/ljspeech/wavs/LJ013-0254.wav|tests/data/ljspeech/wavs/LJ013-0254.npy +tests/data/ljspeech/wavs/LJ017-0039.wav|tests/data/ljspeech/wavs/LJ017-0039.npy +tests/data/ljspeech/wavs/LJ046-0078.wav|tests/data/ljspeech/wavs/LJ046-0078.npy +tests/data/ljspeech/wavs/LJ023-0111.wav|tests/data/ljspeech/wavs/LJ023-0111.npy +tests/data/ljspeech/wavs/LJ028-0006.wav|tests/data/ljspeech/wavs/LJ028-0006.npy +tests/data/ljspeech/wavs/LJ042-0202.wav|tests/data/ljspeech/wavs/LJ042-0202.npy +tests/data/ljspeech/wavs/LJ007-0237.wav|tests/data/ljspeech/wavs/LJ007-0237.npy +tests/data/ljspeech/wavs/LJ019-0374.wav|tests/data/ljspeech/wavs/LJ019-0374.npy +tests/data/ljspeech/wavs/LJ031-0139.wav|tests/data/ljspeech/wavs/LJ031-0139.npy +tests/data/ljspeech/wavs/LJ010-0299.wav|tests/data/ljspeech/wavs/LJ010-0299.npy +tests/data/ljspeech/wavs/LJ003-0062.wav|tests/data/ljspeech/wavs/LJ003-0062.npy +tests/data/ljspeech/wavs/LJ029-0121.wav|tests/data/ljspeech/wavs/LJ029-0121.npy +tests/data/ljspeech/wavs/LJ003-0328.wav|tests/data/ljspeech/wavs/LJ003-0328.npy +tests/data/ljspeech/wavs/LJ006-0117.wav|tests/data/ljspeech/wavs/LJ006-0117.npy +tests/data/ljspeech/wavs/LJ028-0244.wav|tests/data/ljspeech/wavs/LJ028-0244.npy +tests/data/ljspeech/wavs/LJ016-0188.wav|tests/data/ljspeech/wavs/LJ016-0188.npy +tests/data/ljspeech/wavs/LJ031-0195.wav|tests/data/ljspeech/wavs/LJ031-0195.npy +tests/data/ljspeech/wavs/LJ044-0132.wav|tests/data/ljspeech/wavs/LJ044-0132.npy +tests/data/ljspeech/wavs/LJ045-0087.wav|tests/data/ljspeech/wavs/LJ045-0087.npy +tests/data/ljspeech/wavs/LJ044-0199.wav|tests/data/ljspeech/wavs/LJ044-0199.npy +tests/data/ljspeech/wavs/LJ046-0162.wav|tests/data/ljspeech/wavs/LJ046-0162.npy +tests/data/ljspeech/wavs/LJ042-0035.wav|tests/data/ljspeech/wavs/LJ042-0035.npy +tests/data/ljspeech/wavs/LJ037-0101.wav|tests/data/ljspeech/wavs/LJ037-0101.npy +tests/data/ljspeech/wavs/LJ048-0244.wav|tests/data/ljspeech/wavs/LJ048-0244.npy +tests/data/ljspeech/wavs/LJ048-0010.wav|tests/data/ljspeech/wavs/LJ048-0010.npy +tests/data/ljspeech/wavs/LJ005-0033.wav|tests/data/ljspeech/wavs/LJ005-0033.npy +tests/data/ljspeech/wavs/LJ025-0078.wav|tests/data/ljspeech/wavs/LJ025-0078.npy +tests/data/ljspeech/wavs/LJ027-0123.wav|tests/data/ljspeech/wavs/LJ027-0123.npy +tests/data/ljspeech/wavs/LJ047-0224.wav|tests/data/ljspeech/wavs/LJ047-0224.npy +tests/data/ljspeech/wavs/LJ023-0075.wav|tests/data/ljspeech/wavs/LJ023-0075.npy +tests/data/ljspeech/wavs/LJ048-0268.wav|tests/data/ljspeech/wavs/LJ048-0268.npy +tests/data/ljspeech/wavs/LJ028-0309.wav|tests/data/ljspeech/wavs/LJ028-0309.npy +tests/data/ljspeech/wavs/LJ006-0084.wav|tests/data/ljspeech/wavs/LJ006-0084.npy +tests/data/ljspeech/wavs/LJ011-0201.wav|tests/data/ljspeech/wavs/LJ011-0201.npy +tests/data/ljspeech/wavs/LJ007-0212.wav|tests/data/ljspeech/wavs/LJ007-0212.npy +tests/data/ljspeech/wavs/LJ020-0031.wav|tests/data/ljspeech/wavs/LJ020-0031.npy +tests/data/ljspeech/wavs/LJ041-0015.wav|tests/data/ljspeech/wavs/LJ041-0015.npy +tests/data/ljspeech/wavs/LJ014-0014.wav|tests/data/ljspeech/wavs/LJ014-0014.npy +tests/data/ljspeech/wavs/LJ049-0160.wav|tests/data/ljspeech/wavs/LJ049-0160.npy +tests/data/ljspeech/wavs/LJ045-0078.wav|tests/data/ljspeech/wavs/LJ045-0078.npy +tests/data/ljspeech/wavs/LJ016-0277.wav|tests/data/ljspeech/wavs/LJ016-0277.npy +tests/data/ljspeech/wavs/LJ004-0225.wav|tests/data/ljspeech/wavs/LJ004-0225.npy +tests/data/ljspeech/wavs/LJ031-0159.wav|tests/data/ljspeech/wavs/LJ031-0159.npy +tests/data/ljspeech/wavs/LJ020-0043.wav|tests/data/ljspeech/wavs/LJ020-0043.npy +tests/data/ljspeech/wavs/LJ005-0152.wav|tests/data/ljspeech/wavs/LJ005-0152.npy +tests/data/ljspeech/wavs/LJ035-0139.wav|tests/data/ljspeech/wavs/LJ035-0139.npy +tests/data/ljspeech/wavs/LJ011-0293.wav|tests/data/ljspeech/wavs/LJ011-0293.npy +tests/data/ljspeech/wavs/LJ043-0089.wav|tests/data/ljspeech/wavs/LJ043-0089.npy +tests/data/ljspeech/wavs/LJ030-0156.wav|tests/data/ljspeech/wavs/LJ030-0156.npy +tests/data/ljspeech/wavs/LJ023-0119.wav|tests/data/ljspeech/wavs/LJ023-0119.npy +tests/data/ljspeech/wavs/LJ041-0061.wav|tests/data/ljspeech/wavs/LJ041-0061.npy +tests/data/ljspeech/wavs/LJ023-0120.wav|tests/data/ljspeech/wavs/LJ023-0120.npy +tests/data/ljspeech/wavs/LJ040-0008.wav|tests/data/ljspeech/wavs/LJ040-0008.npy +tests/data/ljspeech/wavs/LJ024-0131.wav|tests/data/ljspeech/wavs/LJ024-0131.npy +tests/data/ljspeech/wavs/LJ011-0128.wav|tests/data/ljspeech/wavs/LJ011-0128.npy +tests/data/ljspeech/wavs/LJ009-0059.wav|tests/data/ljspeech/wavs/LJ009-0059.npy +tests/data/ljspeech/wavs/LJ007-0134.wav|tests/data/ljspeech/wavs/LJ007-0134.npy +tests/data/ljspeech/wavs/LJ041-0035.wav|tests/data/ljspeech/wavs/LJ041-0035.npy +tests/data/ljspeech/wavs/LJ036-0198.wav|tests/data/ljspeech/wavs/LJ036-0198.npy +tests/data/ljspeech/wavs/LJ017-0067.wav|tests/data/ljspeech/wavs/LJ017-0067.npy +tests/data/ljspeech/wavs/LJ004-0174.wav|tests/data/ljspeech/wavs/LJ004-0174.npy +tests/data/ljspeech/wavs/LJ043-0090.wav|tests/data/ljspeech/wavs/LJ043-0090.npy +tests/data/ljspeech/wavs/LJ019-0013.wav|tests/data/ljspeech/wavs/LJ019-0013.npy +tests/data/ljspeech/wavs/LJ038-0240.wav|tests/data/ljspeech/wavs/LJ038-0240.npy +tests/data/ljspeech/wavs/LJ025-0043.wav|tests/data/ljspeech/wavs/LJ025-0043.npy +tests/data/ljspeech/wavs/LJ009-0068.wav|tests/data/ljspeech/wavs/LJ009-0068.npy +tests/data/ljspeech/wavs/LJ017-0161.wav|tests/data/ljspeech/wavs/LJ017-0161.npy +tests/data/ljspeech/wavs/LJ017-0023.wav|tests/data/ljspeech/wavs/LJ017-0023.npy +tests/data/ljspeech/wavs/LJ007-0119.wav|tests/data/ljspeech/wavs/LJ007-0119.npy +tests/data/ljspeech/wavs/LJ002-0271.wav|tests/data/ljspeech/wavs/LJ002-0271.npy +tests/data/ljspeech/wavs/LJ038-0251.wav|tests/data/ljspeech/wavs/LJ038-0251.npy +tests/data/ljspeech/wavs/LJ015-0139.wav|tests/data/ljspeech/wavs/LJ015-0139.npy +tests/data/ljspeech/wavs/LJ028-0516.wav|tests/data/ljspeech/wavs/LJ028-0516.npy +tests/data/ljspeech/wavs/LJ016-0300.wav|tests/data/ljspeech/wavs/LJ016-0300.npy +tests/data/ljspeech/wavs/LJ005-0159.wav|tests/data/ljspeech/wavs/LJ005-0159.npy +tests/data/ljspeech/wavs/LJ009-0212.wav|tests/data/ljspeech/wavs/LJ009-0212.npy +tests/data/ljspeech/wavs/LJ037-0207.wav|tests/data/ljspeech/wavs/LJ037-0207.npy +tests/data/ljspeech/wavs/LJ004-0162.wav|tests/data/ljspeech/wavs/LJ004-0162.npy +tests/data/ljspeech/wavs/LJ034-0044.wav|tests/data/ljspeech/wavs/LJ034-0044.npy +tests/data/ljspeech/wavs/LJ042-0077.wav|tests/data/ljspeech/wavs/LJ042-0077.npy +tests/data/ljspeech/wavs/LJ032-0163.wav|tests/data/ljspeech/wavs/LJ032-0163.npy +tests/data/ljspeech/wavs/LJ004-0110.wav|tests/data/ljspeech/wavs/LJ004-0110.npy +tests/data/ljspeech/wavs/LJ029-0188.wav|tests/data/ljspeech/wavs/LJ029-0188.npy +tests/data/ljspeech/wavs/LJ006-0167.wav|tests/data/ljspeech/wavs/LJ006-0167.npy +tests/data/ljspeech/wavs/LJ003-0052.wav|tests/data/ljspeech/wavs/LJ003-0052.npy +tests/data/ljspeech/wavs/LJ019-0358.wav|tests/data/ljspeech/wavs/LJ019-0358.npy +tests/data/ljspeech/wavs/LJ019-0221.wav|tests/data/ljspeech/wavs/LJ019-0221.npy +tests/data/ljspeech/wavs/LJ016-0401.wav|tests/data/ljspeech/wavs/LJ016-0401.npy +tests/data/ljspeech/wavs/LJ031-0168.wav|tests/data/ljspeech/wavs/LJ031-0168.npy +tests/data/ljspeech/wavs/LJ013-0084.wav|tests/data/ljspeech/wavs/LJ013-0084.npy +tests/data/ljspeech/wavs/LJ021-0018.wav|tests/data/ljspeech/wavs/LJ021-0018.npy +tests/data/ljspeech/wavs/LJ047-0140.wav|tests/data/ljspeech/wavs/LJ047-0140.npy +tests/data/ljspeech/wavs/LJ019-0254.wav|tests/data/ljspeech/wavs/LJ019-0254.npy +tests/data/ljspeech/wavs/LJ010-0211.wav|tests/data/ljspeech/wavs/LJ010-0211.npy +tests/data/ljspeech/wavs/LJ013-0085.wav|tests/data/ljspeech/wavs/LJ013-0085.npy +tests/data/ljspeech/wavs/LJ020-0100.wav|tests/data/ljspeech/wavs/LJ020-0100.npy +tests/data/ljspeech/wavs/LJ003-0094.wav|tests/data/ljspeech/wavs/LJ003-0094.npy +tests/data/ljspeech/wavs/LJ014-0222.wav|tests/data/ljspeech/wavs/LJ014-0222.npy +tests/data/ljspeech/wavs/LJ002-0334.wav|tests/data/ljspeech/wavs/LJ002-0334.npy +tests/data/ljspeech/wavs/LJ029-0079.wav|tests/data/ljspeech/wavs/LJ029-0079.npy +tests/data/ljspeech/wavs/LJ036-0063.wav|tests/data/ljspeech/wavs/LJ036-0063.npy +tests/data/ljspeech/wavs/LJ011-0054.wav|tests/data/ljspeech/wavs/LJ011-0054.npy +tests/data/ljspeech/wavs/LJ031-0227.wav|tests/data/ljspeech/wavs/LJ031-0227.npy +tests/data/ljspeech/wavs/LJ018-0033.wav|tests/data/ljspeech/wavs/LJ018-0033.npy +tests/data/ljspeech/wavs/LJ034-0174.wav|tests/data/ljspeech/wavs/LJ034-0174.npy +tests/data/ljspeech/wavs/LJ021-0107.wav|tests/data/ljspeech/wavs/LJ021-0107.npy +tests/data/ljspeech/wavs/LJ007-0049.wav|tests/data/ljspeech/wavs/LJ007-0049.npy +tests/data/ljspeech/wavs/LJ035-0096.wav|tests/data/ljspeech/wavs/LJ035-0096.npy +tests/data/ljspeech/wavs/LJ047-0151.wav|tests/data/ljspeech/wavs/LJ047-0151.npy +tests/data/ljspeech/wavs/LJ020-0079.wav|tests/data/ljspeech/wavs/LJ020-0079.npy +tests/data/ljspeech/wavs/LJ016-0019.wav|tests/data/ljspeech/wavs/LJ016-0019.npy +tests/data/ljspeech/wavs/LJ008-0050.wav|tests/data/ljspeech/wavs/LJ008-0050.npy +tests/data/ljspeech/wavs/LJ040-0071.wav|tests/data/ljspeech/wavs/LJ040-0071.npy +tests/data/ljspeech/wavs/LJ050-0093.wav|tests/data/ljspeech/wavs/LJ050-0093.npy +tests/data/ljspeech/wavs/LJ040-0075.wav|tests/data/ljspeech/wavs/LJ040-0075.npy +tests/data/ljspeech/wavs/LJ042-0235.wav|tests/data/ljspeech/wavs/LJ042-0235.npy +tests/data/ljspeech/wavs/LJ009-0304.wav|tests/data/ljspeech/wavs/LJ009-0304.npy +tests/data/ljspeech/wavs/LJ031-0069.wav|tests/data/ljspeech/wavs/LJ031-0069.npy +tests/data/ljspeech/wavs/LJ042-0195.wav|tests/data/ljspeech/wavs/LJ042-0195.npy +tests/data/ljspeech/wavs/LJ004-0105.wav|tests/data/ljspeech/wavs/LJ004-0105.npy +tests/data/ljspeech/wavs/LJ012-0273.wav|tests/data/ljspeech/wavs/LJ012-0273.npy +tests/data/ljspeech/wavs/LJ011-0023.wav|tests/data/ljspeech/wavs/LJ011-0023.npy +tests/data/ljspeech/wavs/LJ021-0188.wav|tests/data/ljspeech/wavs/LJ021-0188.npy +tests/data/ljspeech/wavs/LJ036-0125.wav|tests/data/ljspeech/wavs/LJ036-0125.npy +tests/data/ljspeech/wavs/LJ011-0172.wav|tests/data/ljspeech/wavs/LJ011-0172.npy +tests/data/ljspeech/wavs/LJ012-0119.wav|tests/data/ljspeech/wavs/LJ012-0119.npy +tests/data/ljspeech/wavs/LJ042-0023.wav|tests/data/ljspeech/wavs/LJ042-0023.npy +tests/data/ljspeech/wavs/LJ021-0132.wav|tests/data/ljspeech/wavs/LJ021-0132.npy +tests/data/ljspeech/wavs/LJ026-0091.wav|tests/data/ljspeech/wavs/LJ026-0091.npy +tests/data/ljspeech/wavs/LJ028-0217.wav|tests/data/ljspeech/wavs/LJ028-0217.npy +tests/data/ljspeech/wavs/LJ050-0103.wav|tests/data/ljspeech/wavs/LJ050-0103.npy +tests/data/ljspeech/wavs/LJ041-0158.wav|tests/data/ljspeech/wavs/LJ041-0158.npy +tests/data/ljspeech/wavs/LJ008-0049.wav|tests/data/ljspeech/wavs/LJ008-0049.npy +tests/data/ljspeech/wavs/LJ002-0058.wav|tests/data/ljspeech/wavs/LJ002-0058.npy +tests/data/ljspeech/wavs/LJ014-0289.wav|tests/data/ljspeech/wavs/LJ014-0289.npy +tests/data/ljspeech/wavs/LJ009-0007.wav|tests/data/ljspeech/wavs/LJ009-0007.npy +tests/data/ljspeech/wavs/LJ037-0183.wav|tests/data/ljspeech/wavs/LJ037-0183.npy +tests/data/ljspeech/wavs/LJ006-0126.wav|tests/data/ljspeech/wavs/LJ006-0126.npy +tests/data/ljspeech/wavs/LJ009-0019.wav|tests/data/ljspeech/wavs/LJ009-0019.npy +tests/data/ljspeech/wavs/LJ035-0064.wav|tests/data/ljspeech/wavs/LJ035-0064.npy +tests/data/ljspeech/wavs/LJ008-0023.wav|tests/data/ljspeech/wavs/LJ008-0023.npy +tests/data/ljspeech/wavs/LJ028-0165.wav|tests/data/ljspeech/wavs/LJ028-0165.npy +tests/data/ljspeech/wavs/LJ013-0009.wav|tests/data/ljspeech/wavs/LJ013-0009.npy +tests/data/ljspeech/wavs/LJ036-0200.wav|tests/data/ljspeech/wavs/LJ036-0200.npy +tests/data/ljspeech/wavs/LJ009-0167.wav|tests/data/ljspeech/wavs/LJ009-0167.npy +tests/data/ljspeech/wavs/LJ011-0064.wav|tests/data/ljspeech/wavs/LJ011-0064.npy +tests/data/ljspeech/wavs/LJ047-0237.wav|tests/data/ljspeech/wavs/LJ047-0237.npy +tests/data/ljspeech/wavs/LJ024-0081.wav|tests/data/ljspeech/wavs/LJ024-0081.npy +tests/data/ljspeech/wavs/LJ048-0254.wav|tests/data/ljspeech/wavs/LJ048-0254.npy +tests/data/ljspeech/wavs/LJ017-0235.wav|tests/data/ljspeech/wavs/LJ017-0235.npy +tests/data/ljspeech/wavs/LJ016-0107.wav|tests/data/ljspeech/wavs/LJ016-0107.npy +tests/data/ljspeech/wavs/LJ038-0034.wav|tests/data/ljspeech/wavs/LJ038-0034.npy +tests/data/ljspeech/wavs/LJ035-0153.wav|tests/data/ljspeech/wavs/LJ035-0153.npy +tests/data/ljspeech/wavs/LJ021-0126.wav|tests/data/ljspeech/wavs/LJ021-0126.npy +tests/data/ljspeech/wavs/LJ015-0219.wav|tests/data/ljspeech/wavs/LJ015-0219.npy +tests/data/ljspeech/wavs/LJ010-0242.wav|tests/data/ljspeech/wavs/LJ010-0242.npy +tests/data/ljspeech/wavs/LJ034-0124.wav|tests/data/ljspeech/wavs/LJ034-0124.npy +tests/data/ljspeech/wavs/LJ018-0219.wav|tests/data/ljspeech/wavs/LJ018-0219.npy +tests/data/ljspeech/wavs/LJ011-0287.wav|tests/data/ljspeech/wavs/LJ011-0287.npy +tests/data/ljspeech/wavs/LJ044-0064.wav|tests/data/ljspeech/wavs/LJ044-0064.npy +tests/data/ljspeech/wavs/LJ011-0045.wav|tests/data/ljspeech/wavs/LJ011-0045.npy +tests/data/ljspeech/wavs/LJ010-0087.wav|tests/data/ljspeech/wavs/LJ010-0087.npy +tests/data/ljspeech/wavs/LJ018-0241.wav|tests/data/ljspeech/wavs/LJ018-0241.npy +tests/data/ljspeech/wavs/LJ045-0199.wav|tests/data/ljspeech/wavs/LJ045-0199.npy +tests/data/ljspeech/wavs/LJ034-0126.wav|tests/data/ljspeech/wavs/LJ034-0126.npy +tests/data/ljspeech/wavs/LJ039-0216.wav|tests/data/ljspeech/wavs/LJ039-0216.npy +tests/data/ljspeech/wavs/LJ017-0151.wav|tests/data/ljspeech/wavs/LJ017-0151.npy +tests/data/ljspeech/wavs/LJ018-0177.wav|tests/data/ljspeech/wavs/LJ018-0177.npy +tests/data/ljspeech/wavs/LJ010-0083.wav|tests/data/ljspeech/wavs/LJ010-0083.npy +tests/data/ljspeech/wavs/LJ018-0208.wav|tests/data/ljspeech/wavs/LJ018-0208.npy +tests/data/ljspeech/wavs/LJ015-0226.wav|tests/data/ljspeech/wavs/LJ015-0226.npy +tests/data/ljspeech/wavs/LJ016-0165.wav|tests/data/ljspeech/wavs/LJ016-0165.npy +tests/data/ljspeech/wavs/LJ012-0113.wav|tests/data/ljspeech/wavs/LJ012-0113.npy +tests/data/ljspeech/wavs/LJ032-0103.wav|tests/data/ljspeech/wavs/LJ032-0103.npy +tests/data/ljspeech/wavs/LJ033-0206.wav|tests/data/ljspeech/wavs/LJ033-0206.npy +tests/data/ljspeech/wavs/LJ005-0256.wav|tests/data/ljspeech/wavs/LJ005-0256.npy +tests/data/ljspeech/wavs/LJ045-0022.wav|tests/data/ljspeech/wavs/LJ045-0022.npy +tests/data/ljspeech/wavs/LJ044-0108.wav|tests/data/ljspeech/wavs/LJ044-0108.npy +tests/data/ljspeech/wavs/LJ012-0040.wav|tests/data/ljspeech/wavs/LJ012-0040.npy +tests/data/ljspeech/wavs/LJ021-0144.wav|tests/data/ljspeech/wavs/LJ021-0144.npy +tests/data/ljspeech/wavs/LJ033-0175.wav|tests/data/ljspeech/wavs/LJ033-0175.npy +tests/data/ljspeech/wavs/LJ018-0308.wav|tests/data/ljspeech/wavs/LJ018-0308.npy +tests/data/ljspeech/wavs/LJ022-0161.wav|tests/data/ljspeech/wavs/LJ022-0161.npy +tests/data/ljspeech/wavs/LJ016-0221.wav|tests/data/ljspeech/wavs/LJ016-0221.npy +tests/data/ljspeech/wavs/LJ005-0005.wav|tests/data/ljspeech/wavs/LJ005-0005.npy +tests/data/ljspeech/wavs/LJ011-0077.wav|tests/data/ljspeech/wavs/LJ011-0077.npy +tests/data/ljspeech/wavs/LJ005-0278.wav|tests/data/ljspeech/wavs/LJ005-0278.npy +tests/data/ljspeech/wavs/LJ022-0003.wav|tests/data/ljspeech/wavs/LJ022-0003.npy +tests/data/ljspeech/wavs/LJ017-0063.wav|tests/data/ljspeech/wavs/LJ017-0063.npy +tests/data/ljspeech/wavs/LJ021-0110.wav|tests/data/ljspeech/wavs/LJ021-0110.npy +tests/data/ljspeech/wavs/LJ017-0264.wav|tests/data/ljspeech/wavs/LJ017-0264.npy +tests/data/ljspeech/wavs/LJ018-0277.wav|tests/data/ljspeech/wavs/LJ018-0277.npy +tests/data/ljspeech/wavs/LJ022-0130.wav|tests/data/ljspeech/wavs/LJ022-0130.npy +tests/data/ljspeech/wavs/LJ050-0221.wav|tests/data/ljspeech/wavs/LJ050-0221.npy +tests/data/ljspeech/wavs/LJ021-0075.wav|tests/data/ljspeech/wavs/LJ021-0075.npy +tests/data/ljspeech/wavs/LJ010-0218.wav|tests/data/ljspeech/wavs/LJ010-0218.npy +tests/data/ljspeech/wavs/LJ040-0146.wav|tests/data/ljspeech/wavs/LJ040-0146.npy +tests/data/ljspeech/wavs/LJ003-0155.wav|tests/data/ljspeech/wavs/LJ003-0155.npy +tests/data/ljspeech/wavs/LJ020-0048.wav|tests/data/ljspeech/wavs/LJ020-0048.npy +tests/data/ljspeech/wavs/LJ033-0159.wav|tests/data/ljspeech/wavs/LJ033-0159.npy +tests/data/ljspeech/wavs/LJ035-0109.wav|tests/data/ljspeech/wavs/LJ035-0109.npy +tests/data/ljspeech/wavs/LJ023-0027.wav|tests/data/ljspeech/wavs/LJ023-0027.npy +tests/data/ljspeech/wavs/LJ002-0333.wav|tests/data/ljspeech/wavs/LJ002-0333.npy +tests/data/ljspeech/wavs/LJ034-0135.wav|tests/data/ljspeech/wavs/LJ034-0135.npy +tests/data/ljspeech/wavs/LJ011-0111.wav|tests/data/ljspeech/wavs/LJ011-0111.npy +tests/data/ljspeech/wavs/LJ018-0020.wav|tests/data/ljspeech/wavs/LJ018-0020.npy +tests/data/ljspeech/wavs/LJ020-0101.wav|tests/data/ljspeech/wavs/LJ020-0101.npy +tests/data/ljspeech/wavs/LJ047-0057.wav|tests/data/ljspeech/wavs/LJ047-0057.npy +tests/data/ljspeech/wavs/LJ029-0011.wav|tests/data/ljspeech/wavs/LJ029-0011.npy +tests/data/ljspeech/wavs/LJ032-0189.wav|tests/data/ljspeech/wavs/LJ032-0189.npy +tests/data/ljspeech/wavs/LJ046-0110.wav|tests/data/ljspeech/wavs/LJ046-0110.npy +tests/data/ljspeech/wavs/LJ025-0109.wav|tests/data/ljspeech/wavs/LJ025-0109.npy +tests/data/ljspeech/wavs/LJ042-0172.wav|tests/data/ljspeech/wavs/LJ042-0172.npy +tests/data/ljspeech/wavs/LJ007-0126.wav|tests/data/ljspeech/wavs/LJ007-0126.npy +tests/data/ljspeech/wavs/LJ043-0095.wav|tests/data/ljspeech/wavs/LJ043-0095.npy +tests/data/ljspeech/wavs/LJ007-0197.wav|tests/data/ljspeech/wavs/LJ007-0197.npy +tests/data/ljspeech/wavs/LJ002-0197.wav|tests/data/ljspeech/wavs/LJ002-0197.npy +tests/data/ljspeech/wavs/LJ050-0125.wav|tests/data/ljspeech/wavs/LJ050-0125.npy +tests/data/ljspeech/wavs/LJ029-0104.wav|tests/data/ljspeech/wavs/LJ029-0104.npy +tests/data/ljspeech/wavs/LJ028-0352.wav|tests/data/ljspeech/wavs/LJ028-0352.npy +tests/data/ljspeech/wavs/LJ036-0187.wav|tests/data/ljspeech/wavs/LJ036-0187.npy +tests/data/ljspeech/wavs/LJ029-0152.wav|tests/data/ljspeech/wavs/LJ029-0152.npy +tests/data/ljspeech/wavs/LJ048-0042.wav|tests/data/ljspeech/wavs/LJ048-0042.npy +tests/data/ljspeech/wavs/LJ028-0442.wav|tests/data/ljspeech/wavs/LJ028-0442.npy +tests/data/ljspeech/wavs/LJ046-0019.wav|tests/data/ljspeech/wavs/LJ046-0019.npy +tests/data/ljspeech/wavs/LJ025-0156.wav|tests/data/ljspeech/wavs/LJ025-0156.npy +tests/data/ljspeech/wavs/LJ033-0100.wav|tests/data/ljspeech/wavs/LJ033-0100.npy +tests/data/ljspeech/wavs/LJ014-0164.wav|tests/data/ljspeech/wavs/LJ014-0164.npy +tests/data/ljspeech/wavs/LJ002-0170.wav|tests/data/ljspeech/wavs/LJ002-0170.npy +tests/data/ljspeech/wavs/LJ014-0321.wav|tests/data/ljspeech/wavs/LJ014-0321.npy +tests/data/ljspeech/wavs/LJ033-0010.wav|tests/data/ljspeech/wavs/LJ033-0010.npy +tests/data/ljspeech/wavs/LJ007-0222.wav|tests/data/ljspeech/wavs/LJ007-0222.npy +tests/data/ljspeech/wavs/LJ013-0091.wav|tests/data/ljspeech/wavs/LJ013-0091.npy +tests/data/ljspeech/wavs/LJ008-0270.wav|tests/data/ljspeech/wavs/LJ008-0270.npy +tests/data/ljspeech/wavs/LJ002-0255.wav|tests/data/ljspeech/wavs/LJ002-0255.npy +tests/data/ljspeech/wavs/LJ014-0249.wav|tests/data/ljspeech/wavs/LJ014-0249.npy +tests/data/ljspeech/wavs/LJ007-0098.wav|tests/data/ljspeech/wavs/LJ007-0098.npy +tests/data/ljspeech/wavs/LJ025-0139.wav|tests/data/ljspeech/wavs/LJ025-0139.npy +tests/data/ljspeech/wavs/LJ002-0149.wav|tests/data/ljspeech/wavs/LJ002-0149.npy +tests/data/ljspeech/wavs/LJ048-0077.wav|tests/data/ljspeech/wavs/LJ048-0077.npy +tests/data/ljspeech/wavs/LJ049-0153.wav|tests/data/ljspeech/wavs/LJ049-0153.npy +tests/data/ljspeech/wavs/LJ038-0071.wav|tests/data/ljspeech/wavs/LJ038-0071.npy +tests/data/ljspeech/wavs/LJ014-0312.wav|tests/data/ljspeech/wavs/LJ014-0312.npy +tests/data/ljspeech/wavs/LJ009-0021.wav|tests/data/ljspeech/wavs/LJ009-0021.npy +tests/data/ljspeech/wavs/LJ009-0097.wav|tests/data/ljspeech/wavs/LJ009-0097.npy +tests/data/ljspeech/wavs/LJ009-0006.wav|tests/data/ljspeech/wavs/LJ009-0006.npy +tests/data/ljspeech/wavs/LJ015-0193.wav|tests/data/ljspeech/wavs/LJ015-0193.npy +tests/data/ljspeech/wavs/LJ046-0154.wav|tests/data/ljspeech/wavs/LJ046-0154.npy +tests/data/ljspeech/wavs/LJ026-0052.wav|tests/data/ljspeech/wavs/LJ026-0052.npy +tests/data/ljspeech/wavs/LJ030-0146.wav|tests/data/ljspeech/wavs/LJ030-0146.npy +tests/data/ljspeech/wavs/LJ004-0124.wav|tests/data/ljspeech/wavs/LJ004-0124.npy +tests/data/ljspeech/wavs/LJ014-0283.wav|tests/data/ljspeech/wavs/LJ014-0283.npy +tests/data/ljspeech/wavs/LJ048-0083.wav|tests/data/ljspeech/wavs/LJ048-0083.npy +tests/data/ljspeech/wavs/LJ006-0087.wav|tests/data/ljspeech/wavs/LJ006-0087.npy +tests/data/ljspeech/wavs/LJ033-0137.wav|tests/data/ljspeech/wavs/LJ033-0137.npy +tests/data/ljspeech/wavs/LJ041-0141.wav|tests/data/ljspeech/wavs/LJ041-0141.npy +tests/data/ljspeech/wavs/LJ044-0180.wav|tests/data/ljspeech/wavs/LJ044-0180.npy +tests/data/ljspeech/wavs/LJ006-0099.wav|tests/data/ljspeech/wavs/LJ006-0099.npy +tests/data/ljspeech/wavs/LJ006-0246.wav|tests/data/ljspeech/wavs/LJ006-0246.npy +tests/data/ljspeech/wavs/LJ006-0264.wav|tests/data/ljspeech/wavs/LJ006-0264.npy +tests/data/ljspeech/wavs/LJ028-0092.wav|tests/data/ljspeech/wavs/LJ028-0092.npy +tests/data/ljspeech/wavs/LJ028-0009.wav|tests/data/ljspeech/wavs/LJ028-0009.npy +tests/data/ljspeech/wavs/LJ050-0270.wav|tests/data/ljspeech/wavs/LJ050-0270.npy +tests/data/ljspeech/wavs/LJ030-0043.wav|tests/data/ljspeech/wavs/LJ030-0043.npy +tests/data/ljspeech/wavs/LJ026-0087.wav|tests/data/ljspeech/wavs/LJ026-0087.npy +tests/data/ljspeech/wavs/LJ043-0152.wav|tests/data/ljspeech/wavs/LJ043-0152.npy +tests/data/ljspeech/wavs/LJ046-0075.wav|tests/data/ljspeech/wavs/LJ046-0075.npy +tests/data/ljspeech/wavs/LJ014-0300.wav|tests/data/ljspeech/wavs/LJ014-0300.npy +tests/data/ljspeech/wavs/LJ041-0106.wav|tests/data/ljspeech/wavs/LJ041-0106.npy +tests/data/ljspeech/wavs/LJ048-0101.wav|tests/data/ljspeech/wavs/LJ048-0101.npy +tests/data/ljspeech/wavs/LJ033-0170.wav|tests/data/ljspeech/wavs/LJ033-0170.npy +tests/data/ljspeech/wavs/LJ032-0253.wav|tests/data/ljspeech/wavs/LJ032-0253.npy +tests/data/ljspeech/wavs/LJ038-0102.wav|tests/data/ljspeech/wavs/LJ038-0102.npy +tests/data/ljspeech/wavs/LJ006-0113.wav|tests/data/ljspeech/wavs/LJ006-0113.npy +tests/data/ljspeech/wavs/LJ004-0026.wav|tests/data/ljspeech/wavs/LJ004-0026.npy +tests/data/ljspeech/wavs/LJ013-0047.wav|tests/data/ljspeech/wavs/LJ013-0047.npy +tests/data/ljspeech/wavs/LJ005-0041.wav|tests/data/ljspeech/wavs/LJ005-0041.npy +tests/data/ljspeech/wavs/LJ006-0065.wav|tests/data/ljspeech/wavs/LJ006-0065.npy +tests/data/ljspeech/wavs/LJ016-0142.wav|tests/data/ljspeech/wavs/LJ016-0142.npy +tests/data/ljspeech/wavs/LJ016-0163.wav|tests/data/ljspeech/wavs/LJ016-0163.npy +tests/data/ljspeech/wavs/LJ041-0197.wav|tests/data/ljspeech/wavs/LJ041-0197.npy +tests/data/ljspeech/wavs/LJ043-0062.wav|tests/data/ljspeech/wavs/LJ043-0062.npy +tests/data/ljspeech/wavs/LJ047-0132.wav|tests/data/ljspeech/wavs/LJ047-0132.npy +tests/data/ljspeech/wavs/LJ028-0096.wav|tests/data/ljspeech/wavs/LJ028-0096.npy +tests/data/ljspeech/wavs/LJ030-0193.wav|tests/data/ljspeech/wavs/LJ030-0193.npy +tests/data/ljspeech/wavs/LJ016-0111.wav|tests/data/ljspeech/wavs/LJ016-0111.npy +tests/data/ljspeech/wavs/LJ035-0043.wav|tests/data/ljspeech/wavs/LJ035-0043.npy +tests/data/ljspeech/wavs/LJ013-0230.wav|tests/data/ljspeech/wavs/LJ013-0230.npy +tests/data/ljspeech/wavs/LJ032-0134.wav|tests/data/ljspeech/wavs/LJ032-0134.npy +tests/data/ljspeech/wavs/LJ006-0301.wav|tests/data/ljspeech/wavs/LJ006-0301.npy +tests/data/ljspeech/wavs/LJ035-0055.wav|tests/data/ljspeech/wavs/LJ035-0055.npy +tests/data/ljspeech/wavs/LJ011-0164.wav|tests/data/ljspeech/wavs/LJ011-0164.npy +tests/data/ljspeech/wavs/LJ019-0365.wav|tests/data/ljspeech/wavs/LJ019-0365.npy +tests/data/ljspeech/wavs/LJ017-0164.wav|tests/data/ljspeech/wavs/LJ017-0164.npy +tests/data/ljspeech/wavs/LJ045-0094.wav|tests/data/ljspeech/wavs/LJ045-0094.npy +tests/data/ljspeech/wavs/LJ036-0148.wav|tests/data/ljspeech/wavs/LJ036-0148.npy +tests/data/ljspeech/wavs/LJ007-0026.wav|tests/data/ljspeech/wavs/LJ007-0026.npy +tests/data/ljspeech/wavs/LJ035-0041.wav|tests/data/ljspeech/wavs/LJ035-0041.npy +tests/data/ljspeech/wavs/LJ040-0162.wav|tests/data/ljspeech/wavs/LJ040-0162.npy +tests/data/ljspeech/wavs/LJ048-0103.wav|tests/data/ljspeech/wavs/LJ048-0103.npy +tests/data/ljspeech/wavs/LJ017-0118.wav|tests/data/ljspeech/wavs/LJ017-0118.npy +tests/data/ljspeech/wavs/LJ034-0216.wav|tests/data/ljspeech/wavs/LJ034-0216.npy +tests/data/ljspeech/wavs/LJ037-0122.wav|tests/data/ljspeech/wavs/LJ037-0122.npy +tests/data/ljspeech/wavs/LJ018-0279.wav|tests/data/ljspeech/wavs/LJ018-0279.npy +tests/data/ljspeech/wavs/LJ032-0206.wav|tests/data/ljspeech/wavs/LJ032-0206.npy +tests/data/ljspeech/wavs/LJ004-0187.wav|tests/data/ljspeech/wavs/LJ004-0187.npy +tests/data/ljspeech/wavs/LJ014-0048.wav|tests/data/ljspeech/wavs/LJ014-0048.npy +tests/data/ljspeech/wavs/LJ010-0146.wav|tests/data/ljspeech/wavs/LJ010-0146.npy +tests/data/ljspeech/wavs/LJ039-0042.wav|tests/data/ljspeech/wavs/LJ039-0042.npy +tests/data/ljspeech/wavs/LJ016-0168.wav|tests/data/ljspeech/wavs/LJ016-0168.npy +tests/data/ljspeech/wavs/LJ027-0099.wav|tests/data/ljspeech/wavs/LJ027-0099.npy +tests/data/ljspeech/wavs/LJ042-0057.wav|tests/data/ljspeech/wavs/LJ042-0057.npy +tests/data/ljspeech/wavs/LJ047-0024.wav|tests/data/ljspeech/wavs/LJ047-0024.npy +tests/data/ljspeech/wavs/LJ036-0099.wav|tests/data/ljspeech/wavs/LJ036-0099.npy +tests/data/ljspeech/wavs/LJ049-0200.wav|tests/data/ljspeech/wavs/LJ049-0200.npy +tests/data/ljspeech/wavs/LJ008-0200.wav|tests/data/ljspeech/wavs/LJ008-0200.npy +tests/data/ljspeech/wavs/LJ021-0020.wav|tests/data/ljspeech/wavs/LJ021-0020.npy +tests/data/ljspeech/wavs/LJ017-0080.wav|tests/data/ljspeech/wavs/LJ017-0080.npy +tests/data/ljspeech/wavs/LJ048-0189.wav|tests/data/ljspeech/wavs/LJ048-0189.npy +tests/data/ljspeech/wavs/LJ047-0143.wav|tests/data/ljspeech/wavs/LJ047-0143.npy +tests/data/ljspeech/wavs/LJ045-0031.wav|tests/data/ljspeech/wavs/LJ045-0031.npy +tests/data/ljspeech/wavs/LJ043-0049.wav|tests/data/ljspeech/wavs/LJ043-0049.npy +tests/data/ljspeech/wavs/LJ001-0172.wav|tests/data/ljspeech/wavs/LJ001-0172.npy +tests/data/ljspeech/wavs/LJ017-0127.wav|tests/data/ljspeech/wavs/LJ017-0127.npy +tests/data/ljspeech/wavs/LJ037-0165.wav|tests/data/ljspeech/wavs/LJ037-0165.npy +tests/data/ljspeech/wavs/LJ032-0080.wav|tests/data/ljspeech/wavs/LJ032-0080.npy +tests/data/ljspeech/wavs/LJ012-0106.wav|tests/data/ljspeech/wavs/LJ012-0106.npy +tests/data/ljspeech/wavs/LJ003-0329.wav|tests/data/ljspeech/wavs/LJ003-0329.npy +tests/data/ljspeech/wavs/LJ029-0071.wav|tests/data/ljspeech/wavs/LJ029-0071.npy +tests/data/ljspeech/wavs/LJ008-0194.wav|tests/data/ljspeech/wavs/LJ008-0194.npy +tests/data/ljspeech/wavs/LJ027-0167.wav|tests/data/ljspeech/wavs/LJ027-0167.npy +tests/data/ljspeech/wavs/LJ034-0167.wav|tests/data/ljspeech/wavs/LJ034-0167.npy +tests/data/ljspeech/wavs/LJ010-0032.wav|tests/data/ljspeech/wavs/LJ010-0032.npy +tests/data/ljspeech/wavs/LJ042-0019.wav|tests/data/ljspeech/wavs/LJ042-0019.npy +tests/data/ljspeech/wavs/LJ010-0070.wav|tests/data/ljspeech/wavs/LJ010-0070.npy +tests/data/ljspeech/wavs/LJ046-0146.wav|tests/data/ljspeech/wavs/LJ046-0146.npy +tests/data/ljspeech/wavs/LJ043-0103.wav|tests/data/ljspeech/wavs/LJ043-0103.npy +tests/data/ljspeech/wavs/LJ040-0057.wav|tests/data/ljspeech/wavs/LJ040-0057.npy +tests/data/ljspeech/wavs/LJ011-0249.wav|tests/data/ljspeech/wavs/LJ011-0249.npy +tests/data/ljspeech/wavs/LJ018-0221.wav|tests/data/ljspeech/wavs/LJ018-0221.npy +tests/data/ljspeech/wavs/LJ048-0160.wav|tests/data/ljspeech/wavs/LJ048-0160.npy +tests/data/ljspeech/wavs/LJ029-0133.wav|tests/data/ljspeech/wavs/LJ029-0133.npy +tests/data/ljspeech/wavs/LJ003-0193.wav|tests/data/ljspeech/wavs/LJ003-0193.npy +tests/data/ljspeech/wavs/LJ018-0131.wav|tests/data/ljspeech/wavs/LJ018-0131.npy +tests/data/ljspeech/wavs/LJ042-0223.wav|tests/data/ljspeech/wavs/LJ042-0223.npy +tests/data/ljspeech/wavs/LJ050-0096.wav|tests/data/ljspeech/wavs/LJ050-0096.npy +tests/data/ljspeech/wavs/LJ018-0146.wav|tests/data/ljspeech/wavs/LJ018-0146.npy +tests/data/ljspeech/wavs/LJ046-0209.wav|tests/data/ljspeech/wavs/LJ046-0209.npy +tests/data/ljspeech/wavs/LJ007-0003.wav|tests/data/ljspeech/wavs/LJ007-0003.npy +tests/data/ljspeech/wavs/LJ031-0140.wav|tests/data/ljspeech/wavs/LJ031-0140.npy +tests/data/ljspeech/wavs/LJ048-0122.wav|tests/data/ljspeech/wavs/LJ048-0122.npy +tests/data/ljspeech/wavs/LJ003-0096.wav|tests/data/ljspeech/wavs/LJ003-0096.npy +tests/data/ljspeech/wavs/LJ018-0029.wav|tests/data/ljspeech/wavs/LJ018-0029.npy +tests/data/ljspeech/wavs/LJ018-0152.wav|tests/data/ljspeech/wavs/LJ018-0152.npy +tests/data/ljspeech/wavs/LJ014-0138.wav|tests/data/ljspeech/wavs/LJ014-0138.npy +tests/data/ljspeech/wavs/LJ048-0070.wav|tests/data/ljspeech/wavs/LJ048-0070.npy +tests/data/ljspeech/wavs/LJ018-0345.wav|tests/data/ljspeech/wavs/LJ018-0345.npy +tests/data/ljspeech/wavs/LJ011-0071.wav|tests/data/ljspeech/wavs/LJ011-0071.npy +tests/data/ljspeech/wavs/LJ003-0185.wav|tests/data/ljspeech/wavs/LJ003-0185.npy +tests/data/ljspeech/wavs/LJ040-0043.wav|tests/data/ljspeech/wavs/LJ040-0043.npy +tests/data/ljspeech/wavs/LJ018-0026.wav|tests/data/ljspeech/wavs/LJ018-0026.npy +tests/data/ljspeech/wavs/LJ001-0115.wav|tests/data/ljspeech/wavs/LJ001-0115.npy +tests/data/ljspeech/wavs/LJ050-0189.wav|tests/data/ljspeech/wavs/LJ050-0189.npy +tests/data/ljspeech/wavs/LJ038-0173.wav|tests/data/ljspeech/wavs/LJ038-0173.npy +tests/data/ljspeech/wavs/LJ038-0172.wav|tests/data/ljspeech/wavs/LJ038-0172.npy +tests/data/ljspeech/wavs/LJ016-0194.wav|tests/data/ljspeech/wavs/LJ016-0194.npy +tests/data/ljspeech/wavs/LJ016-0324.wav|tests/data/ljspeech/wavs/LJ016-0324.npy +tests/data/ljspeech/wavs/LJ042-0157.wav|tests/data/ljspeech/wavs/LJ042-0157.npy +tests/data/ljspeech/wavs/LJ044-0160.wav|tests/data/ljspeech/wavs/LJ044-0160.npy +tests/data/ljspeech/wavs/LJ003-0293.wav|tests/data/ljspeech/wavs/LJ003-0293.npy +tests/data/ljspeech/wavs/LJ021-0156.wav|tests/data/ljspeech/wavs/LJ021-0156.npy +tests/data/ljspeech/wavs/LJ041-0129.wav|tests/data/ljspeech/wavs/LJ041-0129.npy +tests/data/ljspeech/wavs/LJ002-0189.wav|tests/data/ljspeech/wavs/LJ002-0189.npy +tests/data/ljspeech/wavs/LJ034-0045.wav|tests/data/ljspeech/wavs/LJ034-0045.npy +tests/data/ljspeech/wavs/LJ024-0110.wav|tests/data/ljspeech/wavs/LJ024-0110.npy +tests/data/ljspeech/wavs/LJ044-0044.wav|tests/data/ljspeech/wavs/LJ044-0044.npy +tests/data/ljspeech/wavs/LJ023-0015.wav|tests/data/ljspeech/wavs/LJ023-0015.npy +tests/data/ljspeech/wavs/LJ025-0027.wav|tests/data/ljspeech/wavs/LJ025-0027.npy +tests/data/ljspeech/wavs/LJ048-0174.wav|tests/data/ljspeech/wavs/LJ048-0174.npy +tests/data/ljspeech/wavs/LJ028-0358.wav|tests/data/ljspeech/wavs/LJ028-0358.npy +tests/data/ljspeech/wavs/LJ050-0261.wav|tests/data/ljspeech/wavs/LJ050-0261.npy +tests/data/ljspeech/wavs/LJ007-0180.wav|tests/data/ljspeech/wavs/LJ007-0180.npy +tests/data/ljspeech/wavs/LJ004-0200.wav|tests/data/ljspeech/wavs/LJ004-0200.npy +tests/data/ljspeech/wavs/LJ021-0120.wav|tests/data/ljspeech/wavs/LJ021-0120.npy +tests/data/ljspeech/wavs/LJ046-0160.wav|tests/data/ljspeech/wavs/LJ046-0160.npy +tests/data/ljspeech/wavs/LJ024-0117.wav|tests/data/ljspeech/wavs/LJ024-0117.npy +tests/data/ljspeech/wavs/LJ016-0231.wav|tests/data/ljspeech/wavs/LJ016-0231.npy +tests/data/ljspeech/wavs/LJ003-0251.wav|tests/data/ljspeech/wavs/LJ003-0251.npy +tests/data/ljspeech/wavs/LJ005-0266.wav|tests/data/ljspeech/wavs/LJ005-0266.npy +tests/data/ljspeech/wavs/LJ019-0018.wav|tests/data/ljspeech/wavs/LJ019-0018.npy +tests/data/ljspeech/wavs/LJ031-0233.wav|tests/data/ljspeech/wavs/LJ031-0233.npy +tests/data/ljspeech/wavs/LJ046-0094.wav|tests/data/ljspeech/wavs/LJ046-0094.npy +tests/data/ljspeech/wavs/LJ050-0066.wav|tests/data/ljspeech/wavs/LJ050-0066.npy +tests/data/ljspeech/wavs/LJ018-0362.wav|tests/data/ljspeech/wavs/LJ018-0362.npy +tests/data/ljspeech/wavs/LJ007-0013.wav|tests/data/ljspeech/wavs/LJ007-0013.npy +tests/data/ljspeech/wavs/LJ016-0237.wav|tests/data/ljspeech/wavs/LJ016-0237.npy +tests/data/ljspeech/wavs/LJ007-0028.wav|tests/data/ljspeech/wavs/LJ007-0028.npy +tests/data/ljspeech/wavs/LJ028-0351.wav|tests/data/ljspeech/wavs/LJ028-0351.npy +tests/data/ljspeech/wavs/LJ019-0150.wav|tests/data/ljspeech/wavs/LJ019-0150.npy +tests/data/ljspeech/wavs/LJ038-0025.wav|tests/data/ljspeech/wavs/LJ038-0025.npy +tests/data/ljspeech/wavs/LJ026-0121.wav|tests/data/ljspeech/wavs/LJ026-0121.npy +tests/data/ljspeech/wavs/LJ025-0081.wav|tests/data/ljspeech/wavs/LJ025-0081.npy +tests/data/ljspeech/wavs/LJ009-0178.wav|tests/data/ljspeech/wavs/LJ009-0178.npy +tests/data/ljspeech/wavs/LJ044-0212.wav|tests/data/ljspeech/wavs/LJ044-0212.npy +tests/data/ljspeech/wavs/LJ002-0296.wav|tests/data/ljspeech/wavs/LJ002-0296.npy +tests/data/ljspeech/wavs/LJ012-0211.wav|tests/data/ljspeech/wavs/LJ012-0211.npy +tests/data/ljspeech/wavs/LJ026-0015.wav|tests/data/ljspeech/wavs/LJ026-0015.npy +tests/data/ljspeech/wavs/LJ023-0006.wav|tests/data/ljspeech/wavs/LJ023-0006.npy +tests/data/ljspeech/wavs/LJ025-0076.wav|tests/data/ljspeech/wavs/LJ025-0076.npy +tests/data/ljspeech/wavs/LJ025-0150.wav|tests/data/ljspeech/wavs/LJ025-0150.npy +tests/data/ljspeech/wavs/LJ039-0066.wav|tests/data/ljspeech/wavs/LJ039-0066.npy +tests/data/ljspeech/wavs/LJ025-0075.wav|tests/data/ljspeech/wavs/LJ025-0075.npy +tests/data/ljspeech/wavs/LJ021-0191.wav|tests/data/ljspeech/wavs/LJ021-0191.npy +tests/data/ljspeech/wavs/LJ012-0230.wav|tests/data/ljspeech/wavs/LJ012-0230.npy +tests/data/ljspeech/wavs/LJ012-0260.wav|tests/data/ljspeech/wavs/LJ012-0260.npy +tests/data/ljspeech/wavs/LJ041-0009.wav|tests/data/ljspeech/wavs/LJ041-0009.npy +tests/data/ljspeech/wavs/LJ045-0230.wav|tests/data/ljspeech/wavs/LJ045-0230.npy +tests/data/ljspeech/wavs/LJ049-0168.wav|tests/data/ljspeech/wavs/LJ049-0168.npy +tests/data/ljspeech/wavs/LJ015-0277.wav|tests/data/ljspeech/wavs/LJ015-0277.npy +tests/data/ljspeech/wavs/LJ030-0100.wav|tests/data/ljspeech/wavs/LJ030-0100.npy +tests/data/ljspeech/wavs/LJ018-0274.wav|tests/data/ljspeech/wavs/LJ018-0274.npy +tests/data/ljspeech/wavs/LJ006-0035.wav|tests/data/ljspeech/wavs/LJ006-0035.npy +tests/data/ljspeech/wavs/LJ009-0084.wav|tests/data/ljspeech/wavs/LJ009-0084.npy +tests/data/ljspeech/wavs/LJ009-0297.wav|tests/data/ljspeech/wavs/LJ009-0297.npy +tests/data/ljspeech/wavs/LJ045-0240.wav|tests/data/ljspeech/wavs/LJ045-0240.npy +tests/data/ljspeech/wavs/LJ014-0260.wav|tests/data/ljspeech/wavs/LJ014-0260.npy +tests/data/ljspeech/wavs/LJ009-0118.wav|tests/data/ljspeech/wavs/LJ009-0118.npy +tests/data/ljspeech/wavs/LJ022-0012.wav|tests/data/ljspeech/wavs/LJ022-0012.npy +tests/data/ljspeech/wavs/LJ045-0220.wav|tests/data/ljspeech/wavs/LJ045-0220.npy +tests/data/ljspeech/wavs/LJ022-0047.wav|tests/data/ljspeech/wavs/LJ022-0047.npy +tests/data/ljspeech/wavs/LJ008-0279.wav|tests/data/ljspeech/wavs/LJ008-0279.npy +tests/data/ljspeech/wavs/LJ005-0141.wav|tests/data/ljspeech/wavs/LJ005-0141.npy +tests/data/ljspeech/wavs/LJ035-0163.wav|tests/data/ljspeech/wavs/LJ035-0163.npy +tests/data/ljspeech/wavs/LJ030-0110.wav|tests/data/ljspeech/wavs/LJ030-0110.npy +tests/data/ljspeech/wavs/LJ015-0094.wav|tests/data/ljspeech/wavs/LJ015-0094.npy +tests/data/ljspeech/wavs/LJ034-0087.wav|tests/data/ljspeech/wavs/LJ034-0087.npy +tests/data/ljspeech/wavs/LJ002-0063.wav|tests/data/ljspeech/wavs/LJ002-0063.npy +tests/data/ljspeech/wavs/LJ028-0281.wav|tests/data/ljspeech/wavs/LJ028-0281.npy +tests/data/ljspeech/wavs/LJ047-0187.wav|tests/data/ljspeech/wavs/LJ047-0187.npy +tests/data/ljspeech/wavs/LJ002-0230.wav|tests/data/ljspeech/wavs/LJ002-0230.npy +tests/data/ljspeech/wavs/LJ019-0219.wav|tests/data/ljspeech/wavs/LJ019-0219.npy +tests/data/ljspeech/wavs/LJ014-0168.wav|tests/data/ljspeech/wavs/LJ014-0168.npy +tests/data/ljspeech/wavs/LJ010-0142.wav|tests/data/ljspeech/wavs/LJ010-0142.npy +tests/data/ljspeech/wavs/LJ019-0181.wav|tests/data/ljspeech/wavs/LJ019-0181.npy +tests/data/ljspeech/wavs/LJ011-0122.wav|tests/data/ljspeech/wavs/LJ011-0122.npy +tests/data/ljspeech/wavs/LJ007-0239.wav|tests/data/ljspeech/wavs/LJ007-0239.npy +tests/data/ljspeech/wavs/LJ029-0086.wav|tests/data/ljspeech/wavs/LJ029-0086.npy +tests/data/ljspeech/wavs/LJ028-0262.wav|tests/data/ljspeech/wavs/LJ028-0262.npy +tests/data/ljspeech/wavs/LJ019-0246.wav|tests/data/ljspeech/wavs/LJ019-0246.npy +tests/data/ljspeech/wavs/LJ021-0200.wav|tests/data/ljspeech/wavs/LJ021-0200.npy +tests/data/ljspeech/wavs/LJ010-0156.wav|tests/data/ljspeech/wavs/LJ010-0156.npy +tests/data/ljspeech/wavs/LJ016-0184.wav|tests/data/ljspeech/wavs/LJ016-0184.npy +tests/data/ljspeech/wavs/LJ038-0021.wav|tests/data/ljspeech/wavs/LJ038-0021.npy +tests/data/ljspeech/wavs/LJ003-0211.wav|tests/data/ljspeech/wavs/LJ003-0211.npy +tests/data/ljspeech/wavs/LJ050-0243.wav|tests/data/ljspeech/wavs/LJ050-0243.npy +tests/data/ljspeech/wavs/LJ019-0222.wav|tests/data/ljspeech/wavs/LJ019-0222.npy +tests/data/ljspeech/wavs/LJ016-0190.wav|tests/data/ljspeech/wavs/LJ016-0190.npy +tests/data/ljspeech/wavs/LJ003-0101.wav|tests/data/ljspeech/wavs/LJ003-0101.npy +tests/data/ljspeech/wavs/LJ008-0011.wav|tests/data/ljspeech/wavs/LJ008-0011.npy +tests/data/ljspeech/wavs/LJ019-0208.wav|tests/data/ljspeech/wavs/LJ019-0208.npy +tests/data/ljspeech/wavs/LJ007-0232.wav|tests/data/ljspeech/wavs/LJ007-0232.npy +tests/data/ljspeech/wavs/LJ034-0092.wav|tests/data/ljspeech/wavs/LJ034-0092.npy +tests/data/ljspeech/wavs/LJ028-0295.wav|tests/data/ljspeech/wavs/LJ028-0295.npy +tests/data/ljspeech/wavs/LJ032-0022.wav|tests/data/ljspeech/wavs/LJ032-0022.npy +tests/data/ljspeech/wavs/LJ010-0284.wav|tests/data/ljspeech/wavs/LJ010-0284.npy +tests/data/ljspeech/wavs/LJ041-0032.wav|tests/data/ljspeech/wavs/LJ041-0032.npy +tests/data/ljspeech/wavs/LJ010-0241.wav|tests/data/ljspeech/wavs/LJ010-0241.npy +tests/data/ljspeech/wavs/LJ016-0397.wav|tests/data/ljspeech/wavs/LJ016-0397.npy +tests/data/ljspeech/wavs/LJ042-0139.wav|tests/data/ljspeech/wavs/LJ042-0139.npy +tests/data/ljspeech/wavs/LJ043-0175.wav|tests/data/ljspeech/wavs/LJ043-0175.npy +tests/data/ljspeech/wavs/LJ007-0189.wav|tests/data/ljspeech/wavs/LJ007-0189.npy +tests/data/ljspeech/wavs/LJ034-0046.wav|tests/data/ljspeech/wavs/LJ034-0046.npy +tests/data/ljspeech/wavs/LJ042-0146.wav|tests/data/ljspeech/wavs/LJ042-0146.npy +tests/data/ljspeech/wavs/LJ043-0092.wav|tests/data/ljspeech/wavs/LJ043-0092.npy +tests/data/ljspeech/wavs/LJ037-0036.wav|tests/data/ljspeech/wavs/LJ037-0036.npy +tests/data/ljspeech/wavs/LJ005-0140.wav|tests/data/ljspeech/wavs/LJ005-0140.npy +tests/data/ljspeech/wavs/LJ037-0060.wav|tests/data/ljspeech/wavs/LJ037-0060.npy +tests/data/ljspeech/wavs/LJ036-0108.wav|tests/data/ljspeech/wavs/LJ036-0108.npy +tests/data/ljspeech/wavs/LJ022-0080.wav|tests/data/ljspeech/wavs/LJ022-0080.npy +tests/data/ljspeech/wavs/LJ016-0011.wav|tests/data/ljspeech/wavs/LJ016-0011.npy +tests/data/ljspeech/wavs/LJ032-0256.wav|tests/data/ljspeech/wavs/LJ032-0256.npy +tests/data/ljspeech/wavs/LJ011-0076.wav|tests/data/ljspeech/wavs/LJ011-0076.npy +tests/data/ljspeech/wavs/LJ003-0278.wav|tests/data/ljspeech/wavs/LJ003-0278.npy +tests/data/ljspeech/wavs/LJ002-0041.wav|tests/data/ljspeech/wavs/LJ002-0041.npy +tests/data/ljspeech/wavs/LJ037-0092.wav|tests/data/ljspeech/wavs/LJ037-0092.npy +tests/data/ljspeech/wavs/LJ041-0108.wav|tests/data/ljspeech/wavs/LJ041-0108.npy +tests/data/ljspeech/wavs/LJ037-0042.wav|tests/data/ljspeech/wavs/LJ037-0042.npy +tests/data/ljspeech/wavs/LJ049-0093.wav|tests/data/ljspeech/wavs/LJ049-0093.npy +tests/data/ljspeech/wavs/LJ003-0144.wav|tests/data/ljspeech/wavs/LJ003-0144.npy +tests/data/ljspeech/wavs/LJ006-0154.wav|tests/data/ljspeech/wavs/LJ006-0154.npy +tests/data/ljspeech/wavs/LJ010-0176.wav|tests/data/ljspeech/wavs/LJ010-0176.npy +tests/data/ljspeech/wavs/LJ007-0162.wav|tests/data/ljspeech/wavs/LJ007-0162.npy +tests/data/ljspeech/wavs/LJ048-0169.wav|tests/data/ljspeech/wavs/LJ048-0169.npy +tests/data/ljspeech/wavs/LJ012-0241.wav|tests/data/ljspeech/wavs/LJ012-0241.npy +tests/data/ljspeech/wavs/LJ018-0371.wav|tests/data/ljspeech/wavs/LJ018-0371.npy +tests/data/ljspeech/wavs/LJ010-0280.wav|tests/data/ljspeech/wavs/LJ010-0280.npy +tests/data/ljspeech/wavs/LJ005-0073.wav|tests/data/ljspeech/wavs/LJ005-0073.npy +tests/data/ljspeech/wavs/LJ050-0222.wav|tests/data/ljspeech/wavs/LJ050-0222.npy +tests/data/ljspeech/wavs/LJ042-0224.wav|tests/data/ljspeech/wavs/LJ042-0224.npy +tests/data/ljspeech/wavs/LJ027-0004.wav|tests/data/ljspeech/wavs/LJ027-0004.npy +tests/data/ljspeech/wavs/LJ028-0390.wav|tests/data/ljspeech/wavs/LJ028-0390.npy +tests/data/ljspeech/wavs/LJ050-0063.wav|tests/data/ljspeech/wavs/LJ050-0063.npy +tests/data/ljspeech/wavs/LJ046-0176.wav|tests/data/ljspeech/wavs/LJ046-0176.npy +tests/data/ljspeech/wavs/LJ028-0365.wav|tests/data/ljspeech/wavs/LJ028-0365.npy +tests/data/ljspeech/wavs/LJ016-0010.wav|tests/data/ljspeech/wavs/LJ016-0010.npy +tests/data/ljspeech/wavs/LJ018-0398.wav|tests/data/ljspeech/wavs/LJ018-0398.npy +tests/data/ljspeech/wavs/LJ022-0049.wav|tests/data/ljspeech/wavs/LJ022-0049.npy +tests/data/ljspeech/wavs/LJ008-0295.wav|tests/data/ljspeech/wavs/LJ008-0295.npy +tests/data/ljspeech/wavs/LJ019-0268.wav|tests/data/ljspeech/wavs/LJ019-0268.npy +tests/data/ljspeech/wavs/LJ001-0087.wav|tests/data/ljspeech/wavs/LJ001-0087.npy +tests/data/ljspeech/wavs/LJ007-0083.wav|tests/data/ljspeech/wavs/LJ007-0083.npy +tests/data/ljspeech/wavs/LJ012-0007.wav|tests/data/ljspeech/wavs/LJ012-0007.npy +tests/data/ljspeech/wavs/LJ029-0094.wav|tests/data/ljspeech/wavs/LJ029-0094.npy +tests/data/ljspeech/wavs/LJ011-0166.wav|tests/data/ljspeech/wavs/LJ011-0166.npy +tests/data/ljspeech/wavs/LJ039-0177.wav|tests/data/ljspeech/wavs/LJ039-0177.npy +tests/data/ljspeech/wavs/LJ004-0084.wav|tests/data/ljspeech/wavs/LJ004-0084.npy +tests/data/ljspeech/wavs/LJ021-0031.wav|tests/data/ljspeech/wavs/LJ021-0031.npy +tests/data/ljspeech/wavs/LJ017-0232.wav|tests/data/ljspeech/wavs/LJ017-0232.npy +tests/data/ljspeech/wavs/LJ001-0105.wav|tests/data/ljspeech/wavs/LJ001-0105.npy +tests/data/ljspeech/wavs/LJ013-0022.wav|tests/data/ljspeech/wavs/LJ013-0022.npy +tests/data/ljspeech/wavs/LJ001-0183.wav|tests/data/ljspeech/wavs/LJ001-0183.npy +tests/data/ljspeech/wavs/LJ048-0132.wav|tests/data/ljspeech/wavs/LJ048-0132.npy +tests/data/ljspeech/wavs/LJ010-0040.wav|tests/data/ljspeech/wavs/LJ010-0040.npy +tests/data/ljspeech/wavs/LJ008-0155.wav|tests/data/ljspeech/wavs/LJ008-0155.npy +tests/data/ljspeech/wavs/LJ005-0281.wav|tests/data/ljspeech/wavs/LJ005-0281.npy +tests/data/ljspeech/wavs/LJ013-0143.wav|tests/data/ljspeech/wavs/LJ013-0143.npy +tests/data/ljspeech/wavs/LJ018-0311.wav|tests/data/ljspeech/wavs/LJ018-0311.npy +tests/data/ljspeech/wavs/LJ032-0234.wav|tests/data/ljspeech/wavs/LJ032-0234.npy +tests/data/ljspeech/wavs/LJ043-0034.wav|tests/data/ljspeech/wavs/LJ043-0034.npy +tests/data/ljspeech/wavs/LJ031-0038.wav|tests/data/ljspeech/wavs/LJ031-0038.npy +tests/data/ljspeech/wavs/LJ010-0045.wav|tests/data/ljspeech/wavs/LJ010-0045.npy +tests/data/ljspeech/wavs/LJ025-0005.wav|tests/data/ljspeech/wavs/LJ025-0005.npy +tests/data/ljspeech/wavs/LJ043-0140.wav|tests/data/ljspeech/wavs/LJ043-0140.npy +tests/data/ljspeech/wavs/LJ010-0003.wav|tests/data/ljspeech/wavs/LJ010-0003.npy +tests/data/ljspeech/wavs/LJ022-0178.wav|tests/data/ljspeech/wavs/LJ022-0178.npy +tests/data/ljspeech/wavs/LJ018-0197.wav|tests/data/ljspeech/wavs/LJ018-0197.npy +tests/data/ljspeech/wavs/LJ026-0129.wav|tests/data/ljspeech/wavs/LJ026-0129.npy +tests/data/ljspeech/wavs/LJ002-0138.wav|tests/data/ljspeech/wavs/LJ002-0138.npy +tests/data/ljspeech/wavs/LJ049-0105.wav|tests/data/ljspeech/wavs/LJ049-0105.npy +tests/data/ljspeech/wavs/LJ006-0249.wav|tests/data/ljspeech/wavs/LJ006-0249.npy +tests/data/ljspeech/wavs/LJ037-0161.wav|tests/data/ljspeech/wavs/LJ037-0161.npy +tests/data/ljspeech/wavs/LJ027-0101.wav|tests/data/ljspeech/wavs/LJ027-0101.npy +tests/data/ljspeech/wavs/LJ003-0267.wav|tests/data/ljspeech/wavs/LJ003-0267.npy +tests/data/ljspeech/wavs/LJ033-0016.wav|tests/data/ljspeech/wavs/LJ033-0016.npy +tests/data/ljspeech/wavs/LJ049-0214.wav|tests/data/ljspeech/wavs/LJ049-0214.npy +tests/data/ljspeech/wavs/LJ027-0122.wav|tests/data/ljspeech/wavs/LJ027-0122.npy +tests/data/ljspeech/wavs/LJ005-0262.wav|tests/data/ljspeech/wavs/LJ005-0262.npy +tests/data/ljspeech/wavs/LJ042-0024.wav|tests/data/ljspeech/wavs/LJ042-0024.npy +tests/data/ljspeech/wavs/LJ007-0085.wav|tests/data/ljspeech/wavs/LJ007-0085.npy +tests/data/ljspeech/wavs/LJ015-0031.wav|tests/data/ljspeech/wavs/LJ015-0031.npy +tests/data/ljspeech/wavs/LJ029-0137.wav|tests/data/ljspeech/wavs/LJ029-0137.npy +tests/data/ljspeech/wavs/LJ032-0193.wav|tests/data/ljspeech/wavs/LJ032-0193.npy +tests/data/ljspeech/wavs/LJ019-0340.wav|tests/data/ljspeech/wavs/LJ019-0340.npy +tests/data/ljspeech/wavs/LJ025-0099.wav|tests/data/ljspeech/wavs/LJ025-0099.npy +tests/data/ljspeech/wavs/LJ018-0095.wav|tests/data/ljspeech/wavs/LJ018-0095.npy +tests/data/ljspeech/wavs/LJ008-0319.wav|tests/data/ljspeech/wavs/LJ008-0319.npy +tests/data/ljspeech/wavs/LJ010-0060.wav|tests/data/ljspeech/wavs/LJ010-0060.npy +tests/data/ljspeech/wavs/LJ015-0238.wav|tests/data/ljspeech/wavs/LJ015-0238.npy +tests/data/ljspeech/wavs/LJ016-0112.wav|tests/data/ljspeech/wavs/LJ016-0112.npy +tests/data/ljspeech/wavs/LJ044-0139.wav|tests/data/ljspeech/wavs/LJ044-0139.npy +tests/data/ljspeech/wavs/LJ008-0093.wav|tests/data/ljspeech/wavs/LJ008-0093.npy +tests/data/ljspeech/wavs/LJ010-0020.wav|tests/data/ljspeech/wavs/LJ010-0020.npy +tests/data/ljspeech/wavs/LJ041-0181.wav|tests/data/ljspeech/wavs/LJ041-0181.npy +tests/data/ljspeech/wavs/LJ036-0032.wav|tests/data/ljspeech/wavs/LJ036-0032.npy +tests/data/ljspeech/wavs/LJ001-0099.wav|tests/data/ljspeech/wavs/LJ001-0099.npy +tests/data/ljspeech/wavs/LJ008-0268.wav|tests/data/ljspeech/wavs/LJ008-0268.npy +tests/data/ljspeech/wavs/LJ045-0126.wav|tests/data/ljspeech/wavs/LJ045-0126.npy +tests/data/ljspeech/wavs/LJ006-0293.wav|tests/data/ljspeech/wavs/LJ006-0293.npy +tests/data/ljspeech/wavs/LJ045-0123.wav|tests/data/ljspeech/wavs/LJ045-0123.npy +tests/data/ljspeech/wavs/LJ012-0278.wav|tests/data/ljspeech/wavs/LJ012-0278.npy +tests/data/ljspeech/wavs/LJ005-0274.wav|tests/data/ljspeech/wavs/LJ005-0274.npy +tests/data/ljspeech/wavs/LJ045-0127.wav|tests/data/ljspeech/wavs/LJ045-0127.npy +tests/data/ljspeech/wavs/LJ009-0137.wav|tests/data/ljspeech/wavs/LJ009-0137.npy +tests/data/ljspeech/wavs/LJ019-0324.wav|tests/data/ljspeech/wavs/LJ019-0324.npy +tests/data/ljspeech/wavs/LJ003-0230.wav|tests/data/ljspeech/wavs/LJ003-0230.npy +tests/data/ljspeech/wavs/LJ041-0073.wav|tests/data/ljspeech/wavs/LJ041-0073.npy +tests/data/ljspeech/wavs/LJ014-0156.wav|tests/data/ljspeech/wavs/LJ014-0156.npy +tests/data/ljspeech/wavs/LJ037-0218.wav|tests/data/ljspeech/wavs/LJ037-0218.npy +tests/data/ljspeech/wavs/LJ008-0081.wav|tests/data/ljspeech/wavs/LJ008-0081.npy +tests/data/ljspeech/wavs/LJ038-0008.wav|tests/data/ljspeech/wavs/LJ038-0008.npy +tests/data/ljspeech/wavs/LJ033-0165.wav|tests/data/ljspeech/wavs/LJ033-0165.npy +tests/data/ljspeech/wavs/LJ010-0104.wav|tests/data/ljspeech/wavs/LJ010-0104.npy +tests/data/ljspeech/wavs/LJ031-0158.wav|tests/data/ljspeech/wavs/LJ031-0158.npy +tests/data/ljspeech/wavs/LJ030-0131.wav|tests/data/ljspeech/wavs/LJ030-0131.npy +tests/data/ljspeech/wavs/LJ008-0113.wav|tests/data/ljspeech/wavs/LJ008-0113.npy +tests/data/ljspeech/wavs/LJ011-0060.wav|tests/data/ljspeech/wavs/LJ011-0060.npy +tests/data/ljspeech/wavs/LJ017-0155.wav|tests/data/ljspeech/wavs/LJ017-0155.npy +tests/data/ljspeech/wavs/LJ006-0054.wav|tests/data/ljspeech/wavs/LJ006-0054.npy +tests/data/ljspeech/wavs/LJ046-0020.wav|tests/data/ljspeech/wavs/LJ046-0020.npy +tests/data/ljspeech/wavs/LJ015-0109.wav|tests/data/ljspeech/wavs/LJ015-0109.npy +tests/data/ljspeech/wavs/LJ013-0166.wav|tests/data/ljspeech/wavs/LJ013-0166.npy +tests/data/ljspeech/wavs/LJ011-0238.wav|tests/data/ljspeech/wavs/LJ011-0238.npy +tests/data/ljspeech/wavs/LJ048-0123.wav|tests/data/ljspeech/wavs/LJ048-0123.npy +tests/data/ljspeech/wavs/LJ029-0085.wav|tests/data/ljspeech/wavs/LJ029-0085.npy +tests/data/ljspeech/wavs/LJ022-0107.wav|tests/data/ljspeech/wavs/LJ022-0107.npy +tests/data/ljspeech/wavs/LJ042-0177.wav|tests/data/ljspeech/wavs/LJ042-0177.npy +tests/data/ljspeech/wavs/LJ002-0235.wav|tests/data/ljspeech/wavs/LJ002-0235.npy +tests/data/ljspeech/wavs/LJ039-0106.wav|tests/data/ljspeech/wavs/LJ039-0106.npy +tests/data/ljspeech/wavs/LJ029-0177.wav|tests/data/ljspeech/wavs/LJ029-0177.npy +tests/data/ljspeech/wavs/LJ016-0359.wav|tests/data/ljspeech/wavs/LJ016-0359.npy +tests/data/ljspeech/wavs/LJ010-0311.wav|tests/data/ljspeech/wavs/LJ010-0311.npy +tests/data/ljspeech/wavs/LJ044-0189.wav|tests/data/ljspeech/wavs/LJ044-0189.npy +tests/data/ljspeech/wavs/LJ005-0164.wav|tests/data/ljspeech/wavs/LJ005-0164.npy +tests/data/ljspeech/wavs/LJ003-0202.wav|tests/data/ljspeech/wavs/LJ003-0202.npy +tests/data/ljspeech/wavs/LJ001-0159.wav|tests/data/ljspeech/wavs/LJ001-0159.npy +tests/data/ljspeech/wavs/LJ018-0396.wav|tests/data/ljspeech/wavs/LJ018-0396.npy +tests/data/ljspeech/wavs/LJ021-0094.wav|tests/data/ljspeech/wavs/LJ021-0094.npy +tests/data/ljspeech/wavs/LJ036-0023.wav|tests/data/ljspeech/wavs/LJ036-0023.npy +tests/data/ljspeech/wavs/LJ038-0167.wav|tests/data/ljspeech/wavs/LJ038-0167.npy +tests/data/ljspeech/wavs/LJ046-0022.wav|tests/data/ljspeech/wavs/LJ046-0022.npy +tests/data/ljspeech/wavs/LJ046-0002.wav|tests/data/ljspeech/wavs/LJ046-0002.npy +tests/data/ljspeech/wavs/LJ018-0247.wav|tests/data/ljspeech/wavs/LJ018-0247.npy +tests/data/ljspeech/wavs/LJ025-0088.wav|tests/data/ljspeech/wavs/LJ025-0088.npy +tests/data/ljspeech/wavs/LJ049-0217.wav|tests/data/ljspeech/wavs/LJ049-0217.npy +tests/data/ljspeech/wavs/LJ046-0243.wav|tests/data/ljspeech/wavs/LJ046-0243.npy +tests/data/ljspeech/wavs/LJ015-0116.wav|tests/data/ljspeech/wavs/LJ015-0116.npy +tests/data/ljspeech/wavs/LJ009-0045.wav|tests/data/ljspeech/wavs/LJ009-0045.npy +tests/data/ljspeech/wavs/LJ044-0085.wav|tests/data/ljspeech/wavs/LJ044-0085.npy +tests/data/ljspeech/wavs/LJ009-0209.wav|tests/data/ljspeech/wavs/LJ009-0209.npy +tests/data/ljspeech/wavs/LJ046-0232.wav|tests/data/ljspeech/wavs/LJ046-0232.npy +tests/data/ljspeech/wavs/LJ008-0079.wav|tests/data/ljspeech/wavs/LJ008-0079.npy +tests/data/ljspeech/wavs/LJ011-0050.wav|tests/data/ljspeech/wavs/LJ011-0050.npy +tests/data/ljspeech/wavs/LJ022-0163.wav|tests/data/ljspeech/wavs/LJ022-0163.npy +tests/data/ljspeech/wavs/LJ041-0163.wav|tests/data/ljspeech/wavs/LJ041-0163.npy +tests/data/ljspeech/wavs/LJ013-0088.wav|tests/data/ljspeech/wavs/LJ013-0088.npy +tests/data/ljspeech/wavs/LJ029-0062.wav|tests/data/ljspeech/wavs/LJ029-0062.npy +tests/data/ljspeech/wavs/LJ026-0067.wav|tests/data/ljspeech/wavs/LJ026-0067.npy +tests/data/ljspeech/wavs/LJ042-0225.wav|tests/data/ljspeech/wavs/LJ042-0225.npy +tests/data/ljspeech/wavs/LJ044-0049.wav|tests/data/ljspeech/wavs/LJ044-0049.npy +tests/data/ljspeech/wavs/LJ009-0274.wav|tests/data/ljspeech/wavs/LJ009-0274.npy +tests/data/ljspeech/wavs/LJ022-0189.wav|tests/data/ljspeech/wavs/LJ022-0189.npy +tests/data/ljspeech/wavs/LJ042-0060.wav|tests/data/ljspeech/wavs/LJ042-0060.npy +tests/data/ljspeech/wavs/LJ050-0253.wav|tests/data/ljspeech/wavs/LJ050-0253.npy +tests/data/ljspeech/wavs/LJ007-0052.wav|tests/data/ljspeech/wavs/LJ007-0052.npy +tests/data/ljspeech/wavs/LJ040-0176.wav|tests/data/ljspeech/wavs/LJ040-0176.npy +tests/data/ljspeech/wavs/LJ041-0048.wav|tests/data/ljspeech/wavs/LJ041-0048.npy +tests/data/ljspeech/wavs/LJ016-0349.wav|tests/data/ljspeech/wavs/LJ016-0349.npy +tests/data/ljspeech/wavs/LJ043-0061.wav|tests/data/ljspeech/wavs/LJ043-0061.npy +tests/data/ljspeech/wavs/LJ049-0032.wav|tests/data/ljspeech/wavs/LJ049-0032.npy +tests/data/ljspeech/wavs/LJ042-0055.wav|tests/data/ljspeech/wavs/LJ042-0055.npy +tests/data/ljspeech/wavs/LJ019-0356.wav|tests/data/ljspeech/wavs/LJ019-0356.npy +tests/data/ljspeech/wavs/LJ032-0168.wav|tests/data/ljspeech/wavs/LJ032-0168.npy +tests/data/ljspeech/wavs/LJ004-0198.wav|tests/data/ljspeech/wavs/LJ004-0198.npy +tests/data/ljspeech/wavs/LJ040-0063.wav|tests/data/ljspeech/wavs/LJ040-0063.npy +tests/data/ljspeech/wavs/LJ019-0353.wav|tests/data/ljspeech/wavs/LJ019-0353.npy +tests/data/ljspeech/wavs/LJ005-0294.wav|tests/data/ljspeech/wavs/LJ005-0294.npy +tests/data/ljspeech/wavs/LJ005-0234.wav|tests/data/ljspeech/wavs/LJ005-0234.npy +tests/data/ljspeech/wavs/LJ025-0061.wav|tests/data/ljspeech/wavs/LJ025-0061.npy +tests/data/ljspeech/wavs/LJ042-0171.wav|tests/data/ljspeech/wavs/LJ042-0171.npy +tests/data/ljspeech/wavs/LJ048-0207.wav|tests/data/ljspeech/wavs/LJ048-0207.npy +tests/data/ljspeech/wavs/LJ024-0032.wav|tests/data/ljspeech/wavs/LJ024-0032.npy +tests/data/ljspeech/wavs/LJ026-0069.wav|tests/data/ljspeech/wavs/LJ026-0069.npy +tests/data/ljspeech/wavs/LJ031-0207.wav|tests/data/ljspeech/wavs/LJ031-0207.npy +tests/data/ljspeech/wavs/LJ038-0168.wav|tests/data/ljspeech/wavs/LJ038-0168.npy +tests/data/ljspeech/wavs/LJ004-0190.wav|tests/data/ljspeech/wavs/LJ004-0190.npy +tests/data/ljspeech/wavs/LJ005-0216.wav|tests/data/ljspeech/wavs/LJ005-0216.npy +tests/data/ljspeech/wavs/LJ016-0017.wav|tests/data/ljspeech/wavs/LJ016-0017.npy +tests/data/ljspeech/wavs/LJ036-0162.wav|tests/data/ljspeech/wavs/LJ036-0162.npy +tests/data/ljspeech/wavs/LJ031-0021.wav|tests/data/ljspeech/wavs/LJ031-0021.npy +tests/data/ljspeech/wavs/LJ021-0116.wav|tests/data/ljspeech/wavs/LJ021-0116.npy +tests/data/ljspeech/wavs/LJ026-0039.wav|tests/data/ljspeech/wavs/LJ026-0039.npy +tests/data/ljspeech/wavs/LJ017-0071.wav|tests/data/ljspeech/wavs/LJ017-0071.npy +tests/data/ljspeech/wavs/LJ021-0035.wav|tests/data/ljspeech/wavs/LJ021-0035.npy +tests/data/ljspeech/wavs/LJ017-0103.wav|tests/data/ljspeech/wavs/LJ017-0103.npy +tests/data/ljspeech/wavs/LJ010-0119.wav|tests/data/ljspeech/wavs/LJ010-0119.npy +tests/data/ljspeech/wavs/LJ026-0094.wav|tests/data/ljspeech/wavs/LJ026-0094.npy +tests/data/ljspeech/wavs/LJ003-0056.wav|tests/data/ljspeech/wavs/LJ003-0056.npy +tests/data/ljspeech/wavs/LJ013-0016.wav|tests/data/ljspeech/wavs/LJ013-0016.npy +tests/data/ljspeech/wavs/LJ020-0054.wav|tests/data/ljspeech/wavs/LJ020-0054.npy +tests/data/ljspeech/wavs/LJ049-0056.wav|tests/data/ljspeech/wavs/LJ049-0056.npy +tests/data/ljspeech/wavs/LJ043-0028.wav|tests/data/ljspeech/wavs/LJ043-0028.npy +tests/data/ljspeech/wavs/LJ045-0091.wav|tests/data/ljspeech/wavs/LJ045-0091.npy +tests/data/ljspeech/wavs/LJ015-0020.wav|tests/data/ljspeech/wavs/LJ015-0020.npy +tests/data/ljspeech/wavs/LJ021-0069.wav|tests/data/ljspeech/wavs/LJ021-0069.npy +tests/data/ljspeech/wavs/LJ013-0068.wav|tests/data/ljspeech/wavs/LJ013-0068.npy +tests/data/ljspeech/wavs/LJ038-0096.wav|tests/data/ljspeech/wavs/LJ038-0096.npy +tests/data/ljspeech/wavs/LJ046-0245.wav|tests/data/ljspeech/wavs/LJ046-0245.npy +tests/data/ljspeech/wavs/LJ012-0071.wav|tests/data/ljspeech/wavs/LJ012-0071.npy +tests/data/ljspeech/wavs/LJ032-0181.wav|tests/data/ljspeech/wavs/LJ032-0181.npy +tests/data/ljspeech/wavs/LJ024-0125.wav|tests/data/ljspeech/wavs/LJ024-0125.npy +tests/data/ljspeech/wavs/LJ028-0003.wav|tests/data/ljspeech/wavs/LJ028-0003.npy +tests/data/ljspeech/wavs/LJ004-0164.wav|tests/data/ljspeech/wavs/LJ004-0164.npy +tests/data/ljspeech/wavs/LJ034-0208.wav|tests/data/ljspeech/wavs/LJ034-0208.npy +tests/data/ljspeech/wavs/LJ031-0031.wav|tests/data/ljspeech/wavs/LJ031-0031.npy +tests/data/ljspeech/wavs/LJ002-0294.wav|tests/data/ljspeech/wavs/LJ002-0294.npy +tests/data/ljspeech/wavs/LJ014-0294.wav|tests/data/ljspeech/wavs/LJ014-0294.npy +tests/data/ljspeech/wavs/LJ002-0108.wav|tests/data/ljspeech/wavs/LJ002-0108.npy +tests/data/ljspeech/wavs/LJ047-0150.wav|tests/data/ljspeech/wavs/LJ047-0150.npy +tests/data/ljspeech/wavs/LJ011-0278.wav|tests/data/ljspeech/wavs/LJ011-0278.npy +tests/data/ljspeech/wavs/LJ040-0154.wav|tests/data/ljspeech/wavs/LJ040-0154.npy +tests/data/ljspeech/wavs/LJ028-0392.wav|tests/data/ljspeech/wavs/LJ028-0392.npy +tests/data/ljspeech/wavs/LJ032-0108.wav|tests/data/ljspeech/wavs/LJ032-0108.npy +tests/data/ljspeech/wavs/LJ047-0186.wav|tests/data/ljspeech/wavs/LJ047-0186.npy +tests/data/ljspeech/wavs/LJ040-0031.wav|tests/data/ljspeech/wavs/LJ040-0031.npy +tests/data/ljspeech/wavs/LJ038-0112.wav|tests/data/ljspeech/wavs/LJ038-0112.npy +tests/data/ljspeech/wavs/LJ048-0092.wav|tests/data/ljspeech/wavs/LJ048-0092.npy +tests/data/ljspeech/wavs/LJ042-0092.wav|tests/data/ljspeech/wavs/LJ042-0092.npy +tests/data/ljspeech/wavs/LJ028-0395.wav|tests/data/ljspeech/wavs/LJ028-0395.npy +tests/data/ljspeech/wavs/LJ045-0154.wav|tests/data/ljspeech/wavs/LJ045-0154.npy +tests/data/ljspeech/wavs/LJ016-0247.wav|tests/data/ljspeech/wavs/LJ016-0247.npy +tests/data/ljspeech/wavs/LJ045-0049.wav|tests/data/ljspeech/wavs/LJ045-0049.npy +tests/data/ljspeech/wavs/LJ022-0045.wav|tests/data/ljspeech/wavs/LJ022-0045.npy +tests/data/ljspeech/wavs/LJ038-0267.wav|tests/data/ljspeech/wavs/LJ038-0267.npy +tests/data/ljspeech/wavs/LJ029-0191.wav|tests/data/ljspeech/wavs/LJ029-0191.npy +tests/data/ljspeech/wavs/LJ007-0161.wav|tests/data/ljspeech/wavs/LJ007-0161.npy +tests/data/ljspeech/wavs/LJ046-0206.wav|tests/data/ljspeech/wavs/LJ046-0206.npy +tests/data/ljspeech/wavs/LJ039-0094.wav|tests/data/ljspeech/wavs/LJ039-0094.npy +tests/data/ljspeech/wavs/LJ046-0070.wav|tests/data/ljspeech/wavs/LJ046-0070.npy +tests/data/ljspeech/wavs/LJ048-0179.wav|tests/data/ljspeech/wavs/LJ048-0179.npy +tests/data/ljspeech/wavs/LJ004-0051.wav|tests/data/ljspeech/wavs/LJ004-0051.npy +tests/data/ljspeech/wavs/LJ002-0283.wav|tests/data/ljspeech/wavs/LJ002-0283.npy +tests/data/ljspeech/wavs/LJ016-0239.wav|tests/data/ljspeech/wavs/LJ016-0239.npy +tests/data/ljspeech/wavs/LJ041-0013.wav|tests/data/ljspeech/wavs/LJ041-0013.npy +tests/data/ljspeech/wavs/LJ012-0137.wav|tests/data/ljspeech/wavs/LJ012-0137.npy +tests/data/ljspeech/wavs/LJ005-0197.wav|tests/data/ljspeech/wavs/LJ005-0197.npy +tests/data/ljspeech/wavs/LJ002-0220.wav|tests/data/ljspeech/wavs/LJ002-0220.npy +tests/data/ljspeech/wavs/LJ005-0186.wav|tests/data/ljspeech/wavs/LJ005-0186.npy +tests/data/ljspeech/wavs/LJ019-0147.wav|tests/data/ljspeech/wavs/LJ019-0147.npy +tests/data/ljspeech/wavs/LJ041-0101.wav|tests/data/ljspeech/wavs/LJ041-0101.npy +tests/data/ljspeech/wavs/LJ037-0244.wav|tests/data/ljspeech/wavs/LJ037-0244.npy +tests/data/ljspeech/wavs/LJ001-0034.wav|tests/data/ljspeech/wavs/LJ001-0034.npy +tests/data/ljspeech/wavs/LJ023-0020.wav|tests/data/ljspeech/wavs/LJ023-0020.npy +tests/data/ljspeech/wavs/LJ013-0184.wav|tests/data/ljspeech/wavs/LJ013-0184.npy +tests/data/ljspeech/wavs/LJ048-0112.wav|tests/data/ljspeech/wavs/LJ048-0112.npy +tests/data/ljspeech/wavs/LJ030-0049.wav|tests/data/ljspeech/wavs/LJ030-0049.npy +tests/data/ljspeech/wavs/LJ016-0172.wav|tests/data/ljspeech/wavs/LJ016-0172.npy +tests/data/ljspeech/wavs/LJ043-0053.wav|tests/data/ljspeech/wavs/LJ043-0053.npy +tests/data/ljspeech/wavs/LJ005-0070.wav|tests/data/ljspeech/wavs/LJ005-0070.npy +tests/data/ljspeech/wavs/LJ013-0152.wav|tests/data/ljspeech/wavs/LJ013-0152.npy +tests/data/ljspeech/wavs/LJ006-0022.wav|tests/data/ljspeech/wavs/LJ006-0022.npy +tests/data/ljspeech/wavs/LJ024-0059.wav|tests/data/ljspeech/wavs/LJ024-0059.npy +tests/data/ljspeech/wavs/LJ045-0041.wav|tests/data/ljspeech/wavs/LJ045-0041.npy +tests/data/ljspeech/wavs/LJ016-0396.wav|tests/data/ljspeech/wavs/LJ016-0396.npy +tests/data/ljspeech/wavs/LJ006-0010.wav|tests/data/ljspeech/wavs/LJ006-0010.npy +tests/data/ljspeech/wavs/LJ045-0005.wav|tests/data/ljspeech/wavs/LJ045-0005.npy +tests/data/ljspeech/wavs/LJ023-0113.wav|tests/data/ljspeech/wavs/LJ023-0113.npy +tests/data/ljspeech/wavs/LJ030-0084.wav|tests/data/ljspeech/wavs/LJ030-0084.npy +tests/data/ljspeech/wavs/LJ048-0124.wav|tests/data/ljspeech/wavs/LJ048-0124.npy +tests/data/ljspeech/wavs/LJ033-0062.wav|tests/data/ljspeech/wavs/LJ033-0062.npy +tests/data/ljspeech/wavs/LJ012-0198.wav|tests/data/ljspeech/wavs/LJ012-0198.npy +tests/data/ljspeech/wavs/LJ028-0296.wav|tests/data/ljspeech/wavs/LJ028-0296.npy +tests/data/ljspeech/wavs/LJ006-0292.wav|tests/data/ljspeech/wavs/LJ006-0292.npy +tests/data/ljspeech/wavs/LJ043-0067.wav|tests/data/ljspeech/wavs/LJ043-0067.npy +tests/data/ljspeech/wavs/LJ005-0065.wav|tests/data/ljspeech/wavs/LJ005-0065.npy +tests/data/ljspeech/wavs/LJ006-0025.wav|tests/data/ljspeech/wavs/LJ006-0025.npy +tests/data/ljspeech/wavs/LJ006-0038.wav|tests/data/ljspeech/wavs/LJ006-0038.npy +tests/data/ljspeech/wavs/LJ017-0037.wav|tests/data/ljspeech/wavs/LJ017-0037.npy +tests/data/ljspeech/wavs/LJ030-0059.wav|tests/data/ljspeech/wavs/LJ030-0059.npy +tests/data/ljspeech/wavs/LJ015-0205.wav|tests/data/ljspeech/wavs/LJ015-0205.npy +tests/data/ljspeech/wavs/LJ004-0147.wav|tests/data/ljspeech/wavs/LJ004-0147.npy +tests/data/ljspeech/wavs/LJ017-0230.wav|tests/data/ljspeech/wavs/LJ017-0230.npy +tests/data/ljspeech/wavs/LJ045-0178.wav|tests/data/ljspeech/wavs/LJ045-0178.npy +tests/data/ljspeech/wavs/LJ038-0086.wav|tests/data/ljspeech/wavs/LJ038-0086.npy +tests/data/ljspeech/wavs/LJ028-0355.wav|tests/data/ljspeech/wavs/LJ028-0355.npy +tests/data/ljspeech/wavs/LJ003-0048.wav|tests/data/ljspeech/wavs/LJ003-0048.npy +tests/data/ljspeech/wavs/LJ009-0002.wav|tests/data/ljspeech/wavs/LJ009-0002.npy +tests/data/ljspeech/wavs/LJ019-0189.wav|tests/data/ljspeech/wavs/LJ019-0189.npy +tests/data/ljspeech/wavs/LJ040-0183.wav|tests/data/ljspeech/wavs/LJ040-0183.npy +tests/data/ljspeech/wavs/LJ050-0206.wav|tests/data/ljspeech/wavs/LJ050-0206.npy +tests/data/ljspeech/wavs/LJ021-0209.wav|tests/data/ljspeech/wavs/LJ021-0209.npy +tests/data/ljspeech/wavs/LJ035-0072.wav|tests/data/ljspeech/wavs/LJ035-0072.npy +tests/data/ljspeech/wavs/LJ004-0059.wav|tests/data/ljspeech/wavs/LJ004-0059.npy +tests/data/ljspeech/wavs/LJ022-0038.wav|tests/data/ljspeech/wavs/LJ022-0038.npy +tests/data/ljspeech/wavs/LJ010-0056.wav|tests/data/ljspeech/wavs/LJ010-0056.npy +tests/data/ljspeech/wavs/LJ034-0078.wav|tests/data/ljspeech/wavs/LJ034-0078.npy +tests/data/ljspeech/wavs/LJ008-0153.wav|tests/data/ljspeech/wavs/LJ008-0153.npy +tests/data/ljspeech/wavs/LJ016-0220.wav|tests/data/ljspeech/wavs/LJ016-0220.npy +tests/data/ljspeech/wavs/LJ028-0061.wav|tests/data/ljspeech/wavs/LJ028-0061.npy +tests/data/ljspeech/wavs/LJ042-0088.wav|tests/data/ljspeech/wavs/LJ042-0088.npy +tests/data/ljspeech/wavs/LJ021-0151.wav|tests/data/ljspeech/wavs/LJ021-0151.npy +tests/data/ljspeech/wavs/LJ026-0062.wav|tests/data/ljspeech/wavs/LJ026-0062.npy +tests/data/ljspeech/wavs/LJ048-0055.wav|tests/data/ljspeech/wavs/LJ048-0055.npy +tests/data/ljspeech/wavs/LJ040-0120.wav|tests/data/ljspeech/wavs/LJ040-0120.npy +tests/data/ljspeech/wavs/LJ027-0177.wav|tests/data/ljspeech/wavs/LJ027-0177.npy +tests/data/ljspeech/wavs/LJ012-0258.wav|tests/data/ljspeech/wavs/LJ012-0258.npy +tests/data/ljspeech/wavs/LJ046-0054.wav|tests/data/ljspeech/wavs/LJ046-0054.npy +tests/data/ljspeech/wavs/LJ004-0072.wav|tests/data/ljspeech/wavs/LJ004-0072.npy +tests/data/ljspeech/wavs/LJ010-0175.wav|tests/data/ljspeech/wavs/LJ010-0175.npy +tests/data/ljspeech/wavs/LJ048-0192.wav|tests/data/ljspeech/wavs/LJ048-0192.npy +tests/data/ljspeech/wavs/LJ035-0023.wav|tests/data/ljspeech/wavs/LJ035-0023.npy +tests/data/ljspeech/wavs/LJ019-0370.wav|tests/data/ljspeech/wavs/LJ019-0370.npy +tests/data/ljspeech/wavs/LJ042-0123.wav|tests/data/ljspeech/wavs/LJ042-0123.npy +tests/data/ljspeech/wavs/LJ002-0325.wav|tests/data/ljspeech/wavs/LJ002-0325.npy +tests/data/ljspeech/wavs/LJ032-0015.wav|tests/data/ljspeech/wavs/LJ032-0015.npy +tests/data/ljspeech/wavs/LJ041-0202.wav|tests/data/ljspeech/wavs/LJ041-0202.npy +tests/data/ljspeech/wavs/LJ032-0221.wav|tests/data/ljspeech/wavs/LJ032-0221.npy +tests/data/ljspeech/wavs/LJ015-0002.wav|tests/data/ljspeech/wavs/LJ015-0002.npy +tests/data/ljspeech/wavs/LJ041-0130.wav|tests/data/ljspeech/wavs/LJ041-0130.npy +tests/data/ljspeech/wavs/LJ020-0085.wav|tests/data/ljspeech/wavs/LJ020-0085.npy +tests/data/ljspeech/wavs/LJ019-0074.wav|tests/data/ljspeech/wavs/LJ019-0074.npy +tests/data/ljspeech/wavs/LJ009-0012.wav|tests/data/ljspeech/wavs/LJ009-0012.npy +tests/data/ljspeech/wavs/LJ026-0132.wav|tests/data/ljspeech/wavs/LJ026-0132.npy +tests/data/ljspeech/wavs/LJ002-0222.wav|tests/data/ljspeech/wavs/LJ002-0222.npy +tests/data/ljspeech/wavs/LJ025-0115.wav|tests/data/ljspeech/wavs/LJ025-0115.npy +tests/data/ljspeech/wavs/LJ041-0023.wav|tests/data/ljspeech/wavs/LJ041-0023.npy +tests/data/ljspeech/wavs/LJ008-0219.wav|tests/data/ljspeech/wavs/LJ008-0219.npy +tests/data/ljspeech/wavs/LJ034-0157.wav|tests/data/ljspeech/wavs/LJ034-0157.npy +tests/data/ljspeech/wavs/LJ007-0096.wav|tests/data/ljspeech/wavs/LJ007-0096.npy +tests/data/ljspeech/wavs/LJ049-0119.wav|tests/data/ljspeech/wavs/LJ049-0119.npy +tests/data/ljspeech/wavs/LJ012-0173.wav|tests/data/ljspeech/wavs/LJ012-0173.npy +tests/data/ljspeech/wavs/LJ043-0076.wav|tests/data/ljspeech/wavs/LJ043-0076.npy +tests/data/ljspeech/wavs/LJ019-0040.wav|tests/data/ljspeech/wavs/LJ019-0040.npy +tests/data/ljspeech/wavs/LJ028-0452.wav|tests/data/ljspeech/wavs/LJ028-0452.npy +tests/data/ljspeech/wavs/LJ049-0026.wav|tests/data/ljspeech/wavs/LJ049-0026.npy +tests/data/ljspeech/wavs/LJ010-0279.wav|tests/data/ljspeech/wavs/LJ010-0279.npy +tests/data/ljspeech/wavs/LJ049-0092.wav|tests/data/ljspeech/wavs/LJ049-0092.npy +tests/data/ljspeech/wavs/LJ015-0042.wav|tests/data/ljspeech/wavs/LJ015-0042.npy +tests/data/ljspeech/wavs/LJ037-0166.wav|tests/data/ljspeech/wavs/LJ037-0166.npy +tests/data/ljspeech/wavs/LJ028-0445.wav|tests/data/ljspeech/wavs/LJ028-0445.npy +tests/data/ljspeech/wavs/LJ010-0024.wav|tests/data/ljspeech/wavs/LJ010-0024.npy +tests/data/ljspeech/wavs/LJ015-0043.wav|tests/data/ljspeech/wavs/LJ015-0043.npy +tests/data/ljspeech/wavs/LJ018-0081.wav|tests/data/ljspeech/wavs/LJ018-0081.npy +tests/data/ljspeech/wavs/LJ001-0132.wav|tests/data/ljspeech/wavs/LJ001-0132.npy +tests/data/ljspeech/wavs/LJ014-0199.wav|tests/data/ljspeech/wavs/LJ014-0199.npy +tests/data/ljspeech/wavs/LJ016-0014.wav|tests/data/ljspeech/wavs/LJ016-0014.npy +tests/data/ljspeech/wavs/LJ044-0220.wav|tests/data/ljspeech/wavs/LJ044-0220.npy +tests/data/ljspeech/wavs/LJ044-0106.wav|tests/data/ljspeech/wavs/LJ044-0106.npy +tests/data/ljspeech/wavs/LJ012-0197.wav|tests/data/ljspeech/wavs/LJ012-0197.npy +tests/data/ljspeech/wavs/LJ037-0230.wav|tests/data/ljspeech/wavs/LJ037-0230.npy +tests/data/ljspeech/wavs/LJ038-0156.wav|tests/data/ljspeech/wavs/LJ038-0156.npy +tests/data/ljspeech/wavs/LJ012-0239.wav|tests/data/ljspeech/wavs/LJ012-0239.npy +tests/data/ljspeech/wavs/LJ037-0070.wav|tests/data/ljspeech/wavs/LJ037-0070.npy +tests/data/ljspeech/wavs/LJ013-0115.wav|tests/data/ljspeech/wavs/LJ013-0115.npy +tests/data/ljspeech/wavs/LJ016-0305.wav|tests/data/ljspeech/wavs/LJ016-0305.npy +tests/data/ljspeech/wavs/LJ010-0253.wav|tests/data/ljspeech/wavs/LJ010-0253.npy +tests/data/ljspeech/wavs/LJ044-0109.wav|tests/data/ljspeech/wavs/LJ044-0109.npy +tests/data/ljspeech/wavs/LJ044-0121.wav|tests/data/ljspeech/wavs/LJ044-0121.npy +tests/data/ljspeech/wavs/LJ013-0135.wav|tests/data/ljspeech/wavs/LJ013-0135.npy +tests/data/ljspeech/wavs/LJ017-0014.wav|tests/data/ljspeech/wavs/LJ017-0014.npy +tests/data/ljspeech/wavs/LJ010-0265.wav|tests/data/ljspeech/wavs/LJ010-0265.npy +tests/data/ljspeech/wavs/LJ004-0009.wav|tests/data/ljspeech/wavs/LJ004-0009.npy +tests/data/ljspeech/wavs/LJ039-0212.wav|tests/data/ljspeech/wavs/LJ039-0212.npy +tests/data/ljspeech/wavs/LJ015-0047.wav|tests/data/ljspeech/wavs/LJ015-0047.npy +tests/data/ljspeech/wavs/LJ049-0143.wav|tests/data/ljspeech/wavs/LJ049-0143.npy +tests/data/ljspeech/wavs/LJ012-0204.wav|tests/data/ljspeech/wavs/LJ012-0204.npy +tests/data/ljspeech/wavs/LJ014-0024.wav|tests/data/ljspeech/wavs/LJ014-0024.npy +tests/data/ljspeech/wavs/LJ040-0185.wav|tests/data/ljspeech/wavs/LJ040-0185.npy +tests/data/ljspeech/wavs/LJ016-0386.wav|tests/data/ljspeech/wavs/LJ016-0386.npy +tests/data/ljspeech/wavs/LJ004-0197.wav|tests/data/ljspeech/wavs/LJ004-0197.npy +tests/data/ljspeech/wavs/LJ016-0113.wav|tests/data/ljspeech/wavs/LJ016-0113.npy +tests/data/ljspeech/wavs/LJ039-0197.wav|tests/data/ljspeech/wavs/LJ039-0197.npy +tests/data/ljspeech/wavs/LJ003-0330.wav|tests/data/ljspeech/wavs/LJ003-0330.npy +tests/data/ljspeech/wavs/LJ019-0034.wav|tests/data/ljspeech/wavs/LJ019-0034.npy +tests/data/ljspeech/wavs/LJ039-0220.wav|tests/data/ljspeech/wavs/LJ039-0220.npy +tests/data/ljspeech/wavs/LJ039-0195.wav|tests/data/ljspeech/wavs/LJ039-0195.npy +tests/data/ljspeech/wavs/LJ015-0229.wav|tests/data/ljspeech/wavs/LJ015-0229.npy +tests/data/ljspeech/wavs/LJ016-0361.wav|tests/data/ljspeech/wavs/LJ016-0361.npy +tests/data/ljspeech/wavs/LJ032-0212.wav|tests/data/ljspeech/wavs/LJ032-0212.npy +tests/data/ljspeech/wavs/LJ037-0134.wav|tests/data/ljspeech/wavs/LJ037-0134.npy +tests/data/ljspeech/wavs/LJ038-0206.wav|tests/data/ljspeech/wavs/LJ038-0206.npy +tests/data/ljspeech/wavs/LJ033-0093.wav|tests/data/ljspeech/wavs/LJ033-0093.npy +tests/data/ljspeech/wavs/LJ047-0026.wav|tests/data/ljspeech/wavs/LJ047-0026.npy +tests/data/ljspeech/wavs/LJ046-0253.wav|tests/data/ljspeech/wavs/LJ046-0253.npy +tests/data/ljspeech/wavs/LJ026-0035.wav|tests/data/ljspeech/wavs/LJ026-0035.npy +tests/data/ljspeech/wavs/LJ027-0011.wav|tests/data/ljspeech/wavs/LJ027-0011.npy +tests/data/ljspeech/wavs/LJ040-0191.wav|tests/data/ljspeech/wavs/LJ040-0191.npy +tests/data/ljspeech/wavs/LJ003-0133.wav|tests/data/ljspeech/wavs/LJ003-0133.npy +tests/data/ljspeech/wavs/LJ013-0098.wav|tests/data/ljspeech/wavs/LJ013-0098.npy +tests/data/ljspeech/wavs/LJ019-0140.wav|tests/data/ljspeech/wavs/LJ019-0140.npy +tests/data/ljspeech/wavs/LJ027-0019.wav|tests/data/ljspeech/wavs/LJ027-0019.npy +tests/data/ljspeech/wavs/LJ040-0114.wav|tests/data/ljspeech/wavs/LJ040-0114.npy +tests/data/ljspeech/wavs/LJ013-0103.wav|tests/data/ljspeech/wavs/LJ013-0103.npy +tests/data/ljspeech/wavs/LJ040-0202.wav|tests/data/ljspeech/wavs/LJ040-0202.npy +tests/data/ljspeech/wavs/LJ027-0078.wav|tests/data/ljspeech/wavs/LJ027-0078.npy +tests/data/ljspeech/wavs/LJ043-0136.wav|tests/data/ljspeech/wavs/LJ043-0136.npy +tests/data/ljspeech/wavs/LJ047-0048.wav|tests/data/ljspeech/wavs/LJ047-0048.npy +tests/data/ljspeech/wavs/LJ016-0143.wav|tests/data/ljspeech/wavs/LJ016-0143.npy +tests/data/ljspeech/wavs/LJ012-0063.wav|tests/data/ljspeech/wavs/LJ012-0063.npy +tests/data/ljspeech/wavs/LJ006-0050.wav|tests/data/ljspeech/wavs/LJ006-0050.npy +tests/data/ljspeech/wavs/LJ033-0014.wav|tests/data/ljspeech/wavs/LJ033-0014.npy +tests/data/ljspeech/wavs/LJ045-0235.wav|tests/data/ljspeech/wavs/LJ045-0235.npy +tests/data/ljspeech/wavs/LJ049-0148.wav|tests/data/ljspeech/wavs/LJ049-0148.npy +tests/data/ljspeech/wavs/LJ046-0036.wav|tests/data/ljspeech/wavs/LJ046-0036.npy +tests/data/ljspeech/wavs/LJ016-0370.wav|tests/data/ljspeech/wavs/LJ016-0370.npy +tests/data/ljspeech/wavs/LJ045-0080.wav|tests/data/ljspeech/wavs/LJ045-0080.npy +tests/data/ljspeech/wavs/LJ016-0419.wav|tests/data/ljspeech/wavs/LJ016-0419.npy +tests/data/ljspeech/wavs/LJ012-0016.wav|tests/data/ljspeech/wavs/LJ012-0016.npy +tests/data/ljspeech/wavs/LJ005-0015.wav|tests/data/ljspeech/wavs/LJ005-0015.npy +tests/data/ljspeech/wavs/LJ002-0195.wav|tests/data/ljspeech/wavs/LJ002-0195.npy +tests/data/ljspeech/wavs/LJ050-0237.wav|tests/data/ljspeech/wavs/LJ050-0237.npy +tests/data/ljspeech/wavs/LJ032-0159.wav|tests/data/ljspeech/wavs/LJ032-0159.npy +tests/data/ljspeech/wavs/LJ035-0168.wav|tests/data/ljspeech/wavs/LJ035-0168.npy +tests/data/ljspeech/wavs/LJ023-0010.wav|tests/data/ljspeech/wavs/LJ023-0010.npy +tests/data/ljspeech/wavs/LJ044-0034.wav|tests/data/ljspeech/wavs/LJ044-0034.npy +tests/data/ljspeech/wavs/LJ028-0239.wav|tests/data/ljspeech/wavs/LJ028-0239.npy +tests/data/ljspeech/wavs/LJ050-0233.wav|tests/data/ljspeech/wavs/LJ050-0233.npy +tests/data/ljspeech/wavs/LJ022-0056.wav|tests/data/ljspeech/wavs/LJ022-0056.npy +tests/data/ljspeech/wavs/LJ002-0097.wav|tests/data/ljspeech/wavs/LJ002-0097.npy +tests/data/ljspeech/wavs/LJ003-0112.wav|tests/data/ljspeech/wavs/LJ003-0112.npy +tests/data/ljspeech/wavs/LJ005-0283.wav|tests/data/ljspeech/wavs/LJ005-0283.npy +tests/data/ljspeech/wavs/LJ047-0243.wav|tests/data/ljspeech/wavs/LJ047-0243.npy +tests/data/ljspeech/wavs/LJ032-0127.wav|tests/data/ljspeech/wavs/LJ032-0127.npy +tests/data/ljspeech/wavs/LJ018-0343.wav|tests/data/ljspeech/wavs/LJ018-0343.npy +tests/data/ljspeech/wavs/LJ040-0174.wav|tests/data/ljspeech/wavs/LJ040-0174.npy +tests/data/ljspeech/wavs/LJ050-0136.wav|tests/data/ljspeech/wavs/LJ050-0136.npy +tests/data/ljspeech/wavs/LJ010-0261.wav|tests/data/ljspeech/wavs/LJ010-0261.npy +tests/data/ljspeech/wavs/LJ028-0349.wav|tests/data/ljspeech/wavs/LJ028-0349.npy +tests/data/ljspeech/wavs/LJ010-0030.wav|tests/data/ljspeech/wavs/LJ010-0030.npy +tests/data/ljspeech/wavs/LJ028-0102.wav|tests/data/ljspeech/wavs/LJ028-0102.npy +tests/data/ljspeech/wavs/LJ041-0036.wav|tests/data/ljspeech/wavs/LJ041-0036.npy +tests/data/ljspeech/wavs/LJ009-0050.wav|tests/data/ljspeech/wavs/LJ009-0050.npy +tests/data/ljspeech/wavs/LJ040-0182.wav|tests/data/ljspeech/wavs/LJ040-0182.npy +tests/data/ljspeech/wavs/LJ019-0153.wav|tests/data/ljspeech/wavs/LJ019-0153.npy +tests/data/ljspeech/wavs/LJ032-0060.wav|tests/data/ljspeech/wavs/LJ032-0060.npy +tests/data/ljspeech/wavs/LJ041-0014.wav|tests/data/ljspeech/wavs/LJ041-0014.npy +tests/data/ljspeech/wavs/LJ009-0206.wav|tests/data/ljspeech/wavs/LJ009-0206.npy +tests/data/ljspeech/wavs/LJ028-0282.wav|tests/data/ljspeech/wavs/LJ028-0282.npy +tests/data/ljspeech/wavs/LJ005-0273.wav|tests/data/ljspeech/wavs/LJ005-0273.npy +tests/data/ljspeech/wavs/LJ009-0239.wav|tests/data/ljspeech/wavs/LJ009-0239.npy +tests/data/ljspeech/wavs/LJ005-0286.wav|tests/data/ljspeech/wavs/LJ005-0286.npy +tests/data/ljspeech/wavs/LJ035-0105.wav|tests/data/ljspeech/wavs/LJ035-0105.npy +tests/data/ljspeech/wavs/LJ028-0360.wav|tests/data/ljspeech/wavs/LJ028-0360.npy +tests/data/ljspeech/wavs/LJ029-0057.wav|tests/data/ljspeech/wavs/LJ029-0057.npy +tests/data/ljspeech/wavs/LJ050-0149.wav|tests/data/ljspeech/wavs/LJ050-0149.npy +tests/data/ljspeech/wavs/LJ019-0179.wav|tests/data/ljspeech/wavs/LJ019-0179.npy +tests/data/ljspeech/wavs/LJ023-0059.wav|tests/data/ljspeech/wavs/LJ023-0059.npy +tests/data/ljspeech/wavs/LJ010-0136.wav|tests/data/ljspeech/wavs/LJ010-0136.npy +tests/data/ljspeech/wavs/LJ024-0011.wav|tests/data/ljspeech/wavs/LJ024-0011.npy +tests/data/ljspeech/wavs/LJ007-0065.wav|tests/data/ljspeech/wavs/LJ007-0065.npy +tests/data/ljspeech/wavs/LJ047-0225.wav|tests/data/ljspeech/wavs/LJ047-0225.npy +tests/data/ljspeech/wavs/LJ017-0265.wav|tests/data/ljspeech/wavs/LJ017-0265.npy +tests/data/ljspeech/wavs/LJ024-0096.wav|tests/data/ljspeech/wavs/LJ024-0096.npy +tests/data/ljspeech/wavs/LJ036-0150.wav|tests/data/ljspeech/wavs/LJ036-0150.npy +tests/data/ljspeech/wavs/LJ009-0171.wav|tests/data/ljspeech/wavs/LJ009-0171.npy +tests/data/ljspeech/wavs/LJ006-0171.wav|tests/data/ljspeech/wavs/LJ006-0171.npy +tests/data/ljspeech/wavs/LJ003-0204.wav|tests/data/ljspeech/wavs/LJ003-0204.npy +tests/data/ljspeech/wavs/LJ040-0028.wav|tests/data/ljspeech/wavs/LJ040-0028.npy +tests/data/ljspeech/wavs/LJ017-0247.wav|tests/data/ljspeech/wavs/LJ017-0247.npy +tests/data/ljspeech/wavs/LJ046-0140.wav|tests/data/ljspeech/wavs/LJ046-0140.npy +tests/data/ljspeech/wavs/LJ002-0259.wav|tests/data/ljspeech/wavs/LJ002-0259.npy +tests/data/ljspeech/wavs/LJ010-0151.wav|tests/data/ljspeech/wavs/LJ010-0151.npy +tests/data/ljspeech/wavs/LJ041-0076.wav|tests/data/ljspeech/wavs/LJ041-0076.npy +tests/data/ljspeech/wavs/LJ042-0176.wav|tests/data/ljspeech/wavs/LJ042-0176.npy +tests/data/ljspeech/wavs/LJ029-0159.wav|tests/data/ljspeech/wavs/LJ029-0159.npy +tests/data/ljspeech/wavs/LJ005-0083.wav|tests/data/ljspeech/wavs/LJ005-0083.npy +tests/data/ljspeech/wavs/LJ050-0180.wav|tests/data/ljspeech/wavs/LJ050-0180.npy +tests/data/ljspeech/wavs/LJ009-0122.wav|tests/data/ljspeech/wavs/LJ009-0122.npy +tests/data/ljspeech/wavs/LJ011-0058.wav|tests/data/ljspeech/wavs/LJ011-0058.npy +tests/data/ljspeech/wavs/LJ006-0277.wav|tests/data/ljspeech/wavs/LJ006-0277.npy +tests/data/ljspeech/wavs/LJ040-0016.wav|tests/data/ljspeech/wavs/LJ040-0016.npy +tests/data/ljspeech/wavs/LJ018-0046.wav|tests/data/ljspeech/wavs/LJ018-0046.npy +tests/data/ljspeech/wavs/LJ048-0278.wav|tests/data/ljspeech/wavs/LJ048-0278.npy +tests/data/ljspeech/wavs/LJ017-0248.wav|tests/data/ljspeech/wavs/LJ017-0248.npy +tests/data/ljspeech/wavs/LJ030-0144.wav|tests/data/ljspeech/wavs/LJ030-0144.npy +tests/data/ljspeech/wavs/LJ029-0038.wav|tests/data/ljspeech/wavs/LJ029-0038.npy +tests/data/ljspeech/wavs/LJ037-0228.wav|tests/data/ljspeech/wavs/LJ037-0228.npy +tests/data/ljspeech/wavs/LJ045-0201.wav|tests/data/ljspeech/wavs/LJ045-0201.npy +tests/data/ljspeech/wavs/LJ013-0217.wav|tests/data/ljspeech/wavs/LJ013-0217.npy +tests/data/ljspeech/wavs/LJ002-0062.wav|tests/data/ljspeech/wavs/LJ002-0062.npy +tests/data/ljspeech/wavs/LJ038-0248.wav|tests/data/ljspeech/wavs/LJ038-0248.npy +tests/data/ljspeech/wavs/LJ047-0068.wav|tests/data/ljspeech/wavs/LJ047-0068.npy +tests/data/ljspeech/wavs/LJ030-0238.wav|tests/data/ljspeech/wavs/LJ030-0238.npy +tests/data/ljspeech/wavs/LJ016-0080.wav|tests/data/ljspeech/wavs/LJ016-0080.npy +tests/data/ljspeech/wavs/LJ024-0061.wav|tests/data/ljspeech/wavs/LJ024-0061.npy +tests/data/ljspeech/wavs/LJ044-0177.wav|tests/data/ljspeech/wavs/LJ044-0177.npy +tests/data/ljspeech/wavs/LJ031-0008.wav|tests/data/ljspeech/wavs/LJ031-0008.npy +tests/data/ljspeech/wavs/LJ028-0470.wav|tests/data/ljspeech/wavs/LJ028-0470.npy +tests/data/ljspeech/wavs/LJ005-0285.wav|tests/data/ljspeech/wavs/LJ005-0285.npy +tests/data/ljspeech/wavs/LJ021-0004.wav|tests/data/ljspeech/wavs/LJ021-0004.npy +tests/data/ljspeech/wavs/LJ030-0008.wav|tests/data/ljspeech/wavs/LJ030-0008.npy +tests/data/ljspeech/wavs/LJ030-0121.wav|tests/data/ljspeech/wavs/LJ030-0121.npy +tests/data/ljspeech/wavs/LJ006-0006.wav|tests/data/ljspeech/wavs/LJ006-0006.npy +tests/data/ljspeech/wavs/LJ025-0009.wav|tests/data/ljspeech/wavs/LJ025-0009.npy +tests/data/ljspeech/wavs/LJ030-0219.wav|tests/data/ljspeech/wavs/LJ030-0219.npy +tests/data/ljspeech/wavs/LJ006-0235.wav|tests/data/ljspeech/wavs/LJ006-0235.npy +tests/data/ljspeech/wavs/LJ010-0055.wav|tests/data/ljspeech/wavs/LJ010-0055.npy +tests/data/ljspeech/wavs/LJ015-0196.wav|tests/data/ljspeech/wavs/LJ015-0196.npy +tests/data/ljspeech/wavs/LJ003-0234.wav|tests/data/ljspeech/wavs/LJ003-0234.npy +tests/data/ljspeech/wavs/LJ011-0156.wav|tests/data/ljspeech/wavs/LJ011-0156.npy +tests/data/ljspeech/wavs/LJ004-0233.wav|tests/data/ljspeech/wavs/LJ004-0233.npy +tests/data/ljspeech/wavs/LJ001-0018.wav|tests/data/ljspeech/wavs/LJ001-0018.npy +tests/data/ljspeech/wavs/LJ031-0077.wav|tests/data/ljspeech/wavs/LJ031-0077.npy +tests/data/ljspeech/wavs/LJ005-0093.wav|tests/data/ljspeech/wavs/LJ005-0093.npy +tests/data/ljspeech/wavs/LJ004-0139.wav|tests/data/ljspeech/wavs/LJ004-0139.npy +tests/data/ljspeech/wavs/LJ017-0129.wav|tests/data/ljspeech/wavs/LJ017-0129.npy +tests/data/ljspeech/wavs/LJ015-0292.wav|tests/data/ljspeech/wavs/LJ015-0292.npy +tests/data/ljspeech/wavs/LJ047-0076.wav|tests/data/ljspeech/wavs/LJ047-0076.npy +tests/data/ljspeech/wavs/LJ043-0057.wav|tests/data/ljspeech/wavs/LJ043-0057.npy +tests/data/ljspeech/wavs/LJ037-0224.wav|tests/data/ljspeech/wavs/LJ037-0224.npy +tests/data/ljspeech/wavs/LJ038-0149.wav|tests/data/ljspeech/wavs/LJ038-0149.npy +tests/data/ljspeech/wavs/LJ008-0156.wav|tests/data/ljspeech/wavs/LJ008-0156.npy +tests/data/ljspeech/wavs/LJ044-0168.wav|tests/data/ljspeech/wavs/LJ044-0168.npy +tests/data/ljspeech/wavs/LJ029-0037.wav|tests/data/ljspeech/wavs/LJ029-0037.npy +tests/data/ljspeech/wavs/LJ031-0212.wav|tests/data/ljspeech/wavs/LJ031-0212.npy +tests/data/ljspeech/wavs/LJ021-0072.wav|tests/data/ljspeech/wavs/LJ021-0072.npy +tests/data/ljspeech/wavs/LJ021-0207.wav|tests/data/ljspeech/wavs/LJ021-0207.npy +tests/data/ljspeech/wavs/LJ002-0095.wav|tests/data/ljspeech/wavs/LJ002-0095.npy +tests/data/ljspeech/wavs/LJ006-0086.wav|tests/data/ljspeech/wavs/LJ006-0086.npy +tests/data/ljspeech/wavs/LJ012-0164.wav|tests/data/ljspeech/wavs/LJ012-0164.npy +tests/data/ljspeech/wavs/LJ038-0264.wav|tests/data/ljspeech/wavs/LJ038-0264.npy +tests/data/ljspeech/wavs/LJ050-0003.wav|tests/data/ljspeech/wavs/LJ050-0003.npy +tests/data/ljspeech/wavs/LJ028-0368.wav|tests/data/ljspeech/wavs/LJ028-0368.npy +tests/data/ljspeech/wavs/LJ032-0175.wav|tests/data/ljspeech/wavs/LJ032-0175.npy +tests/data/ljspeech/wavs/LJ028-0519.wav|tests/data/ljspeech/wavs/LJ028-0519.npy +tests/data/ljspeech/wavs/LJ006-0191.wav|tests/data/ljspeech/wavs/LJ006-0191.npy +tests/data/ljspeech/wavs/LJ013-0262.wav|tests/data/ljspeech/wavs/LJ013-0262.npy +tests/data/ljspeech/wavs/LJ027-0104.wav|tests/data/ljspeech/wavs/LJ027-0104.npy +tests/data/ljspeech/wavs/LJ013-0037.wav|tests/data/ljspeech/wavs/LJ013-0037.npy +tests/data/ljspeech/wavs/LJ042-0076.wav|tests/data/ljspeech/wavs/LJ042-0076.npy +tests/data/ljspeech/wavs/LJ031-0226.wav|tests/data/ljspeech/wavs/LJ031-0226.npy +tests/data/ljspeech/wavs/LJ027-0142.wav|tests/data/ljspeech/wavs/LJ027-0142.npy +tests/data/ljspeech/wavs/LJ027-0178.wav|tests/data/ljspeech/wavs/LJ027-0178.npy +tests/data/ljspeech/wavs/LJ030-0195.wav|tests/data/ljspeech/wavs/LJ030-0195.npy +tests/data/ljspeech/wavs/LJ013-0248.wav|tests/data/ljspeech/wavs/LJ013-0248.npy +tests/data/ljspeech/wavs/LJ023-0004.wav|tests/data/ljspeech/wavs/LJ023-0004.npy +tests/data/ljspeech/wavs/LJ009-0218.wav|tests/data/ljspeech/wavs/LJ009-0218.npy +tests/data/ljspeech/wavs/LJ002-0335.wav|tests/data/ljspeech/wavs/LJ002-0335.npy +tests/data/ljspeech/wavs/LJ004-0049.wav|tests/data/ljspeech/wavs/LJ004-0049.npy +tests/data/ljspeech/wavs/LJ042-0190.wav|tests/data/ljspeech/wavs/LJ042-0190.npy +tests/data/ljspeech/wavs/LJ002-0186.wav|tests/data/ljspeech/wavs/LJ002-0186.npy +tests/data/ljspeech/wavs/LJ031-0134.wav|tests/data/ljspeech/wavs/LJ031-0134.npy +tests/data/ljspeech/wavs/LJ008-0041.wav|tests/data/ljspeech/wavs/LJ008-0041.npy +tests/data/ljspeech/wavs/LJ014-0243.wav|tests/data/ljspeech/wavs/LJ014-0243.npy +tests/data/ljspeech/wavs/LJ026-0017.wav|tests/data/ljspeech/wavs/LJ026-0017.npy +tests/data/ljspeech/wavs/LJ047-0120.wav|tests/data/ljspeech/wavs/LJ047-0120.npy +tests/data/ljspeech/wavs/LJ009-0048.wav|tests/data/ljspeech/wavs/LJ009-0048.npy +tests/data/ljspeech/wavs/LJ026-0142.wav|tests/data/ljspeech/wavs/LJ026-0142.npy +tests/data/ljspeech/wavs/LJ028-0362.wav|tests/data/ljspeech/wavs/LJ028-0362.npy +tests/data/ljspeech/wavs/LJ038-0133.wav|tests/data/ljspeech/wavs/LJ038-0133.npy +tests/data/ljspeech/wavs/LJ026-0078.wav|tests/data/ljspeech/wavs/LJ026-0078.npy +tests/data/ljspeech/wavs/LJ015-0093.wav|tests/data/ljspeech/wavs/LJ015-0093.npy +tests/data/ljspeech/wavs/LJ002-0056.wav|tests/data/ljspeech/wavs/LJ002-0056.npy +tests/data/ljspeech/wavs/LJ041-0131.wav|tests/data/ljspeech/wavs/LJ041-0131.npy +tests/data/ljspeech/wavs/LJ042-0138.wav|tests/data/ljspeech/wavs/LJ042-0138.npy +tests/data/ljspeech/wavs/LJ025-0155.wav|tests/data/ljspeech/wavs/LJ025-0155.npy +tests/data/ljspeech/wavs/LJ047-0065.wav|tests/data/ljspeech/wavs/LJ047-0065.npy +tests/data/ljspeech/wavs/LJ046-0123.wav|tests/data/ljspeech/wavs/LJ046-0123.npy +tests/data/ljspeech/wavs/LJ013-0105.wav|tests/data/ljspeech/wavs/LJ013-0105.npy +tests/data/ljspeech/wavs/LJ009-0024.wav|tests/data/ljspeech/wavs/LJ009-0024.npy +tests/data/ljspeech/wavs/LJ008-0211.wav|tests/data/ljspeech/wavs/LJ008-0211.npy +tests/data/ljspeech/wavs/LJ029-0198.wav|tests/data/ljspeech/wavs/LJ029-0198.npy +tests/data/ljspeech/wavs/LJ027-0094.wav|tests/data/ljspeech/wavs/LJ027-0094.npy +tests/data/ljspeech/wavs/LJ041-0143.wav|tests/data/ljspeech/wavs/LJ041-0143.npy +tests/data/ljspeech/wavs/LJ026-0079.wav|tests/data/ljspeech/wavs/LJ026-0079.npy +tests/data/ljspeech/wavs/LJ007-0089.wav|tests/data/ljspeech/wavs/LJ007-0089.npy +tests/data/ljspeech/wavs/LJ031-0152.wav|tests/data/ljspeech/wavs/LJ031-0152.npy +tests/data/ljspeech/wavs/LJ028-0320.wav|tests/data/ljspeech/wavs/LJ028-0320.npy +tests/data/ljspeech/wavs/LJ032-0265.wav|tests/data/ljspeech/wavs/LJ032-0265.npy +tests/data/ljspeech/wavs/LJ043-0154.wav|tests/data/ljspeech/wavs/LJ043-0154.npy +tests/data/ljspeech/wavs/LJ019-0016.wav|tests/data/ljspeech/wavs/LJ019-0016.npy +tests/data/ljspeech/wavs/LJ036-0022.wav|tests/data/ljspeech/wavs/LJ036-0022.npy +tests/data/ljspeech/wavs/LJ048-0287.wav|tests/data/ljspeech/wavs/LJ048-0287.npy +tests/data/ljspeech/wavs/LJ035-0204.wav|tests/data/ljspeech/wavs/LJ035-0204.npy +tests/data/ljspeech/wavs/LJ010-0114.wav|tests/data/ljspeech/wavs/LJ010-0114.npy +tests/data/ljspeech/wavs/LJ026-0024.wav|tests/data/ljspeech/wavs/LJ026-0024.npy +tests/data/ljspeech/wavs/LJ003-0055.wav|tests/data/ljspeech/wavs/LJ003-0055.npy +tests/data/ljspeech/wavs/LJ049-0189.wav|tests/data/ljspeech/wavs/LJ049-0189.npy +tests/data/ljspeech/wavs/LJ019-0051.wav|tests/data/ljspeech/wavs/LJ019-0051.npy +tests/data/ljspeech/wavs/LJ019-0289.wav|tests/data/ljspeech/wavs/LJ019-0289.npy +tests/data/ljspeech/wavs/LJ037-0109.wav|tests/data/ljspeech/wavs/LJ037-0109.npy +tests/data/ljspeech/wavs/LJ040-0073.wav|tests/data/ljspeech/wavs/LJ040-0073.npy +tests/data/ljspeech/wavs/LJ045-0067.wav|tests/data/ljspeech/wavs/LJ045-0067.npy +tests/data/ljspeech/wavs/LJ011-0061.wav|tests/data/ljspeech/wavs/LJ011-0061.npy +tests/data/ljspeech/wavs/LJ003-0078.wav|tests/data/ljspeech/wavs/LJ003-0078.npy +tests/data/ljspeech/wavs/LJ008-0269.wav|tests/data/ljspeech/wavs/LJ008-0269.npy +tests/data/ljspeech/wavs/LJ013-0265.wav|tests/data/ljspeech/wavs/LJ013-0265.npy +tests/data/ljspeech/wavs/LJ016-0208.wav|tests/data/ljspeech/wavs/LJ016-0208.npy +tests/data/ljspeech/wavs/LJ035-0060.wav|tests/data/ljspeech/wavs/LJ035-0060.npy +tests/data/ljspeech/wavs/LJ005-0058.wav|tests/data/ljspeech/wavs/LJ005-0058.npy +tests/data/ljspeech/wavs/LJ016-0099.wav|tests/data/ljspeech/wavs/LJ016-0099.npy +tests/data/ljspeech/wavs/LJ032-0218.wav|tests/data/ljspeech/wavs/LJ032-0218.npy +tests/data/ljspeech/wavs/LJ011-0274.wav|tests/data/ljspeech/wavs/LJ011-0274.npy +tests/data/ljspeech/wavs/LJ047-0163.wav|tests/data/ljspeech/wavs/LJ047-0163.npy +tests/data/ljspeech/wavs/LJ012-0054.wav|tests/data/ljspeech/wavs/LJ012-0054.npy +tests/data/ljspeech/wavs/LJ010-0310.wav|tests/data/ljspeech/wavs/LJ010-0310.npy +tests/data/ljspeech/wavs/LJ018-0025.wav|tests/data/ljspeech/wavs/LJ018-0025.npy +tests/data/ljspeech/wavs/LJ003-0180.wav|tests/data/ljspeech/wavs/LJ003-0180.npy +tests/data/ljspeech/wavs/LJ016-0283.wav|tests/data/ljspeech/wavs/LJ016-0283.npy +tests/data/ljspeech/wavs/LJ045-0168.wav|tests/data/ljspeech/wavs/LJ045-0168.npy +tests/data/ljspeech/wavs/LJ018-0181.wav|tests/data/ljspeech/wavs/LJ018-0181.npy +tests/data/ljspeech/wavs/LJ019-0323.wav|tests/data/ljspeech/wavs/LJ019-0323.npy +tests/data/ljspeech/wavs/LJ042-0079.wav|tests/data/ljspeech/wavs/LJ042-0079.npy +tests/data/ljspeech/wavs/LJ011-0095.wav|tests/data/ljspeech/wavs/LJ011-0095.npy +tests/data/ljspeech/wavs/LJ026-0055.wav|tests/data/ljspeech/wavs/LJ026-0055.npy +tests/data/ljspeech/wavs/LJ016-0375.wav|tests/data/ljspeech/wavs/LJ016-0375.npy +tests/data/ljspeech/wavs/LJ012-0022.wav|tests/data/ljspeech/wavs/LJ012-0022.npy +tests/data/ljspeech/wavs/LJ045-0050.wav|tests/data/ljspeech/wavs/LJ045-0050.npy +tests/data/ljspeech/wavs/LJ018-0381.wav|tests/data/ljspeech/wavs/LJ018-0381.npy +tests/data/ljspeech/wavs/LJ008-0117.wav|tests/data/ljspeech/wavs/LJ008-0117.npy +tests/data/ljspeech/wavs/LJ019-0142.wav|tests/data/ljspeech/wavs/LJ019-0142.npy +tests/data/ljspeech/wavs/LJ036-0137.wav|tests/data/ljspeech/wavs/LJ036-0137.npy +tests/data/ljspeech/wavs/LJ011-0290.wav|tests/data/ljspeech/wavs/LJ011-0290.npy +tests/data/ljspeech/wavs/LJ026-0071.wav|tests/data/ljspeech/wavs/LJ026-0071.npy +tests/data/ljspeech/wavs/LJ003-0243.wav|tests/data/ljspeech/wavs/LJ003-0243.npy +tests/data/ljspeech/wavs/LJ038-0090.wav|tests/data/ljspeech/wavs/LJ038-0090.npy +tests/data/ljspeech/wavs/LJ019-0116.wav|tests/data/ljspeech/wavs/LJ019-0116.npy +tests/data/ljspeech/wavs/LJ032-0135.wav|tests/data/ljspeech/wavs/LJ032-0135.npy +tests/data/ljspeech/wavs/LJ049-0223.wav|tests/data/ljspeech/wavs/LJ049-0223.npy +tests/data/ljspeech/wavs/LJ018-0352.wav|tests/data/ljspeech/wavs/LJ018-0352.npy +tests/data/ljspeech/wavs/LJ015-0063.wav|tests/data/ljspeech/wavs/LJ015-0063.npy +tests/data/ljspeech/wavs/LJ011-0121.wav|tests/data/ljspeech/wavs/LJ011-0121.npy +tests/data/ljspeech/wavs/LJ005-0092.wav|tests/data/ljspeech/wavs/LJ005-0092.npy +tests/data/ljspeech/wavs/LJ048-0283.wav|tests/data/ljspeech/wavs/LJ048-0283.npy +tests/data/ljspeech/wavs/LJ011-0181.wav|tests/data/ljspeech/wavs/LJ011-0181.npy +tests/data/ljspeech/wavs/LJ005-0254.wav|tests/data/ljspeech/wavs/LJ005-0254.npy +tests/data/ljspeech/wavs/LJ016-0418.wav|tests/data/ljspeech/wavs/LJ016-0418.npy +tests/data/ljspeech/wavs/LJ005-0189.wav|tests/data/ljspeech/wavs/LJ005-0189.npy +tests/data/ljspeech/wavs/LJ019-0107.wav|tests/data/ljspeech/wavs/LJ019-0107.npy +tests/data/ljspeech/wavs/LJ008-0116.wav|tests/data/ljspeech/wavs/LJ008-0116.npy +tests/data/ljspeech/wavs/LJ017-0110.wav|tests/data/ljspeech/wavs/LJ017-0110.npy +tests/data/ljspeech/wavs/LJ037-0081.wav|tests/data/ljspeech/wavs/LJ037-0081.npy +tests/data/ljspeech/wavs/LJ003-0227.wav|tests/data/ljspeech/wavs/LJ003-0227.npy +tests/data/ljspeech/wavs/LJ028-0028.wav|tests/data/ljspeech/wavs/LJ028-0028.npy +tests/data/ljspeech/wavs/LJ043-0122.wav|tests/data/ljspeech/wavs/LJ043-0122.npy +tests/data/ljspeech/wavs/LJ045-0102.wav|tests/data/ljspeech/wavs/LJ045-0102.npy +tests/data/ljspeech/wavs/LJ001-0077.wav|tests/data/ljspeech/wavs/LJ001-0077.npy +tests/data/ljspeech/wavs/LJ043-0063.wav|tests/data/ljspeech/wavs/LJ043-0063.npy +tests/data/ljspeech/wavs/LJ004-0220.wav|tests/data/ljspeech/wavs/LJ004-0220.npy +tests/data/ljspeech/wavs/LJ019-0058.wav|tests/data/ljspeech/wavs/LJ019-0058.npy +tests/data/ljspeech/wavs/LJ018-0283.wav|tests/data/ljspeech/wavs/LJ018-0283.npy +tests/data/ljspeech/wavs/LJ027-0119.wav|tests/data/ljspeech/wavs/LJ027-0119.npy +tests/data/ljspeech/wavs/LJ028-0157.wav|tests/data/ljspeech/wavs/LJ028-0157.npy +tests/data/ljspeech/wavs/LJ043-0017.wav|tests/data/ljspeech/wavs/LJ043-0017.npy +tests/data/ljspeech/wavs/LJ015-0037.wav|tests/data/ljspeech/wavs/LJ015-0037.npy +tests/data/ljspeech/wavs/LJ003-0145.wav|tests/data/ljspeech/wavs/LJ003-0145.npy +tests/data/ljspeech/wavs/LJ049-0061.wav|tests/data/ljspeech/wavs/LJ049-0061.npy +tests/data/ljspeech/wavs/LJ010-0236.wav|tests/data/ljspeech/wavs/LJ010-0236.npy +tests/data/ljspeech/wavs/LJ042-0103.wav|tests/data/ljspeech/wavs/LJ042-0103.npy +tests/data/ljspeech/wavs/LJ019-0096.wav|tests/data/ljspeech/wavs/LJ019-0096.npy +tests/data/ljspeech/wavs/LJ008-0235.wav|tests/data/ljspeech/wavs/LJ008-0235.npy +tests/data/ljspeech/wavs/LJ038-0202.wav|tests/data/ljspeech/wavs/LJ038-0202.npy +tests/data/ljspeech/wavs/LJ037-0075.wav|tests/data/ljspeech/wavs/LJ037-0075.npy +tests/data/ljspeech/wavs/LJ013-0218.wav|tests/data/ljspeech/wavs/LJ013-0218.npy +tests/data/ljspeech/wavs/LJ013-0194.wav|tests/data/ljspeech/wavs/LJ013-0194.npy +tests/data/ljspeech/wavs/LJ048-0216.wav|tests/data/ljspeech/wavs/LJ048-0216.npy +tests/data/ljspeech/wavs/LJ019-0106.wav|tests/data/ljspeech/wavs/LJ019-0106.npy +tests/data/ljspeech/wavs/LJ014-0118.wav|tests/data/ljspeech/wavs/LJ014-0118.npy +tests/data/ljspeech/wavs/LJ008-0289.wav|tests/data/ljspeech/wavs/LJ008-0289.npy +tests/data/ljspeech/wavs/LJ027-0055.wav|tests/data/ljspeech/wavs/LJ027-0055.npy +tests/data/ljspeech/wavs/LJ004-0070.wav|tests/data/ljspeech/wavs/LJ004-0070.npy +tests/data/ljspeech/wavs/LJ012-0275.wav|tests/data/ljspeech/wavs/LJ012-0275.npy +tests/data/ljspeech/wavs/LJ008-0166.wav|tests/data/ljspeech/wavs/LJ008-0166.npy +tests/data/ljspeech/wavs/LJ007-0005.wav|tests/data/ljspeech/wavs/LJ007-0005.npy +tests/data/ljspeech/wavs/LJ016-0253.wav|tests/data/ljspeech/wavs/LJ016-0253.npy +tests/data/ljspeech/wavs/LJ003-0151.wav|tests/data/ljspeech/wavs/LJ003-0151.npy +tests/data/ljspeech/wavs/LJ017-0076.wav|tests/data/ljspeech/wavs/LJ017-0076.npy +tests/data/ljspeech/wavs/LJ018-0267.wav|tests/data/ljspeech/wavs/LJ018-0267.npy +tests/data/ljspeech/wavs/LJ032-0062.wav|tests/data/ljspeech/wavs/LJ032-0062.npy +tests/data/ljspeech/wavs/LJ047-0136.wav|tests/data/ljspeech/wavs/LJ047-0136.npy +tests/data/ljspeech/wavs/LJ046-0217.wav|tests/data/ljspeech/wavs/LJ046-0217.npy +tests/data/ljspeech/wavs/LJ017-0011.wav|tests/data/ljspeech/wavs/LJ017-0011.npy +tests/data/ljspeech/wavs/LJ014-0049.wav|tests/data/ljspeech/wavs/LJ014-0049.npy +tests/data/ljspeech/wavs/LJ014-0180.wav|tests/data/ljspeech/wavs/LJ014-0180.npy +tests/data/ljspeech/wavs/LJ038-0114.wav|tests/data/ljspeech/wavs/LJ038-0114.npy +tests/data/ljspeech/wavs/LJ017-0052.wav|tests/data/ljspeech/wavs/LJ017-0052.npy +tests/data/ljspeech/wavs/LJ011-0093.wav|tests/data/ljspeech/wavs/LJ011-0093.npy +tests/data/ljspeech/wavs/LJ007-0238.wav|tests/data/ljspeech/wavs/LJ007-0238.npy +tests/data/ljspeech/wavs/LJ018-0299.wav|tests/data/ljspeech/wavs/LJ018-0299.npy +tests/data/ljspeech/wavs/LJ046-0098.wav|tests/data/ljspeech/wavs/LJ046-0098.npy +tests/data/ljspeech/wavs/LJ014-0200.wav|tests/data/ljspeech/wavs/LJ014-0200.npy +tests/data/ljspeech/wavs/LJ011-0135.wav|tests/data/ljspeech/wavs/LJ011-0135.npy +tests/data/ljspeech/wavs/LJ011-0240.wav|tests/data/ljspeech/wavs/LJ011-0240.npy +tests/data/ljspeech/wavs/LJ006-0233.wav|tests/data/ljspeech/wavs/LJ006-0233.npy +tests/data/ljspeech/wavs/LJ040-0092.wav|tests/data/ljspeech/wavs/LJ040-0092.npy +tests/data/ljspeech/wavs/LJ006-0226.wav|tests/data/ljspeech/wavs/LJ006-0226.npy +tests/data/ljspeech/wavs/LJ050-0057.wav|tests/data/ljspeech/wavs/LJ050-0057.npy +tests/data/ljspeech/wavs/LJ043-0024.wav|tests/data/ljspeech/wavs/LJ043-0024.npy +tests/data/ljspeech/wavs/LJ028-0366.wav|tests/data/ljspeech/wavs/LJ028-0366.npy +tests/data/ljspeech/wavs/LJ011-0139.wav|tests/data/ljspeech/wavs/LJ011-0139.npy +tests/data/ljspeech/wavs/LJ032-0117.wav|tests/data/ljspeech/wavs/LJ032-0117.npy +tests/data/ljspeech/wavs/LJ048-0066.wav|tests/data/ljspeech/wavs/LJ048-0066.npy +tests/data/ljspeech/wavs/LJ011-0119.wav|tests/data/ljspeech/wavs/LJ011-0119.npy +tests/data/ljspeech/wavs/LJ003-0212.wav|tests/data/ljspeech/wavs/LJ003-0212.npy +tests/data/ljspeech/wavs/LJ014-0012.wav|tests/data/ljspeech/wavs/LJ014-0012.npy +tests/data/ljspeech/wavs/LJ028-0264.wav|tests/data/ljspeech/wavs/LJ028-0264.npy +tests/data/ljspeech/wavs/LJ038-0224.wav|tests/data/ljspeech/wavs/LJ038-0224.npy +tests/data/ljspeech/wavs/LJ018-0022.wav|tests/data/ljspeech/wavs/LJ018-0022.npy +tests/data/ljspeech/wavs/LJ029-0056.wav|tests/data/ljspeech/wavs/LJ029-0056.npy +tests/data/ljspeech/wavs/LJ031-0169.wav|tests/data/ljspeech/wavs/LJ031-0169.npy +tests/data/ljspeech/wavs/LJ040-0163.wav|tests/data/ljspeech/wavs/LJ040-0163.npy +tests/data/ljspeech/wavs/LJ037-0137.wav|tests/data/ljspeech/wavs/LJ037-0137.npy +tests/data/ljspeech/wavs/LJ040-0107.wav|tests/data/ljspeech/wavs/LJ040-0107.npy +tests/data/ljspeech/wavs/LJ024-0060.wav|tests/data/ljspeech/wavs/LJ024-0060.npy +tests/data/ljspeech/wavs/LJ040-0127.wav|tests/data/ljspeech/wavs/LJ040-0127.npy +tests/data/ljspeech/wavs/LJ019-0235.wav|tests/data/ljspeech/wavs/LJ019-0235.npy +tests/data/ljspeech/wavs/LJ024-0103.wav|tests/data/ljspeech/wavs/LJ024-0103.npy +tests/data/ljspeech/wavs/LJ015-0267.wav|tests/data/ljspeech/wavs/LJ015-0267.npy +tests/data/ljspeech/wavs/LJ010-0150.wav|tests/data/ljspeech/wavs/LJ010-0150.npy +tests/data/ljspeech/wavs/LJ037-0235.wav|tests/data/ljspeech/wavs/LJ037-0235.npy +tests/data/ljspeech/wavs/LJ034-0070.wav|tests/data/ljspeech/wavs/LJ034-0070.npy +tests/data/ljspeech/wavs/LJ015-0260.wav|tests/data/ljspeech/wavs/LJ015-0260.npy +tests/data/ljspeech/wavs/LJ015-0251.wav|tests/data/ljspeech/wavs/LJ015-0251.npy +tests/data/ljspeech/wavs/LJ045-0224.wav|tests/data/ljspeech/wavs/LJ045-0224.npy +tests/data/ljspeech/wavs/LJ034-0065.wav|tests/data/ljspeech/wavs/LJ034-0065.npy +tests/data/ljspeech/wavs/LJ019-0376.wav|tests/data/ljspeech/wavs/LJ019-0376.npy +tests/data/ljspeech/wavs/LJ036-0069.wav|tests/data/ljspeech/wavs/LJ036-0069.npy +tests/data/ljspeech/wavs/LJ043-0179.wav|tests/data/ljspeech/wavs/LJ043-0179.npy +tests/data/ljspeech/wavs/LJ033-0180.wav|tests/data/ljspeech/wavs/LJ033-0180.npy +tests/data/ljspeech/wavs/LJ005-0007.wav|tests/data/ljspeech/wavs/LJ005-0007.npy +tests/data/ljspeech/wavs/LJ039-0089.wav|tests/data/ljspeech/wavs/LJ039-0089.npy +tests/data/ljspeech/wavs/LJ044-0196.wav|tests/data/ljspeech/wavs/LJ044-0196.npy +tests/data/ljspeech/wavs/LJ036-0058.wav|tests/data/ljspeech/wavs/LJ036-0058.npy +tests/data/ljspeech/wavs/LJ019-0301.wav|tests/data/ljspeech/wavs/LJ019-0301.npy +tests/data/ljspeech/wavs/LJ029-0064.wav|tests/data/ljspeech/wavs/LJ029-0064.npy +tests/data/ljspeech/wavs/LJ024-0139.wav|tests/data/ljspeech/wavs/LJ024-0139.npy +tests/data/ljspeech/wavs/LJ031-0197.wav|tests/data/ljspeech/wavs/LJ031-0197.npy +tests/data/ljspeech/wavs/LJ003-0100.wav|tests/data/ljspeech/wavs/LJ003-0100.npy +tests/data/ljspeech/wavs/LJ007-0107.wav|tests/data/ljspeech/wavs/LJ007-0107.npy +tests/data/ljspeech/wavs/LJ018-0013.wav|tests/data/ljspeech/wavs/LJ018-0013.npy +tests/data/ljspeech/wavs/LJ040-0210.wav|tests/data/ljspeech/wavs/LJ040-0210.npy +tests/data/ljspeech/wavs/LJ040-0216.wav|tests/data/ljspeech/wavs/LJ040-0216.npy +tests/data/ljspeech/wavs/LJ049-0098.wav|tests/data/ljspeech/wavs/LJ049-0098.npy +tests/data/ljspeech/wavs/LJ015-0088.wav|tests/data/ljspeech/wavs/LJ015-0088.npy +tests/data/ljspeech/wavs/LJ039-0076.wav|tests/data/ljspeech/wavs/LJ039-0076.npy +tests/data/ljspeech/wavs/LJ047-0158.wav|tests/data/ljspeech/wavs/LJ047-0158.npy +tests/data/ljspeech/wavs/LJ010-0097.wav|tests/data/ljspeech/wavs/LJ010-0097.npy +tests/data/ljspeech/wavs/LJ015-0125.wav|tests/data/ljspeech/wavs/LJ015-0125.npy +tests/data/ljspeech/wavs/LJ025-0168.wav|tests/data/ljspeech/wavs/LJ025-0168.npy +tests/data/ljspeech/wavs/LJ045-0020.wav|tests/data/ljspeech/wavs/LJ045-0020.npy +tests/data/ljspeech/wavs/LJ034-0164.wav|tests/data/ljspeech/wavs/LJ034-0164.npy +tests/data/ljspeech/wavs/LJ003-0111.wav|tests/data/ljspeech/wavs/LJ003-0111.npy +tests/data/ljspeech/wavs/LJ015-0153.wav|tests/data/ljspeech/wavs/LJ015-0153.npy +tests/data/ljspeech/wavs/LJ019-0159.wav|tests/data/ljspeech/wavs/LJ019-0159.npy +tests/data/ljspeech/wavs/LJ016-0089.wav|tests/data/ljspeech/wavs/LJ016-0089.npy +tests/data/ljspeech/wavs/LJ041-0096.wav|tests/data/ljspeech/wavs/LJ041-0096.npy +tests/data/ljspeech/wavs/LJ002-0291.wav|tests/data/ljspeech/wavs/LJ002-0291.npy +tests/data/ljspeech/wavs/LJ016-0074.wav|tests/data/ljspeech/wavs/LJ016-0074.npy +tests/data/ljspeech/wavs/LJ045-0004.wav|tests/data/ljspeech/wavs/LJ045-0004.npy +tests/data/ljspeech/wavs/LJ019-0385.wav|tests/data/ljspeech/wavs/LJ019-0385.npy +tests/data/ljspeech/wavs/LJ016-0225.wav|tests/data/ljspeech/wavs/LJ016-0225.npy +tests/data/ljspeech/wavs/LJ019-0177.wav|tests/data/ljspeech/wavs/LJ019-0177.npy +tests/data/ljspeech/wavs/LJ012-0012.wav|tests/data/ljspeech/wavs/LJ012-0012.npy +tests/data/ljspeech/wavs/LJ014-0008.wav|tests/data/ljspeech/wavs/LJ014-0008.npy +tests/data/ljspeech/wavs/LJ009-0292.wav|tests/data/ljspeech/wavs/LJ009-0292.npy +tests/data/ljspeech/wavs/LJ017-0016.wav|tests/data/ljspeech/wavs/LJ017-0016.npy +tests/data/ljspeech/wavs/LJ034-0128.wav|tests/data/ljspeech/wavs/LJ034-0128.npy +tests/data/ljspeech/wavs/LJ021-0080.wav|tests/data/ljspeech/wavs/LJ021-0080.npy +tests/data/ljspeech/wavs/LJ016-0118.wav|tests/data/ljspeech/wavs/LJ016-0118.npy +tests/data/ljspeech/wavs/LJ003-0015.wav|tests/data/ljspeech/wavs/LJ003-0015.npy +tests/data/ljspeech/wavs/LJ048-0246.wav|tests/data/ljspeech/wavs/LJ048-0246.npy +tests/data/ljspeech/wavs/LJ035-0004.wav|tests/data/ljspeech/wavs/LJ035-0004.npy +tests/data/ljspeech/wavs/LJ031-0189.wav|tests/data/ljspeech/wavs/LJ031-0189.npy +tests/data/ljspeech/wavs/LJ029-0124.wav|tests/data/ljspeech/wavs/LJ029-0124.npy +tests/data/ljspeech/wavs/LJ047-0080.wav|tests/data/ljspeech/wavs/LJ047-0080.npy +tests/data/ljspeech/wavs/LJ029-0083.wav|tests/data/ljspeech/wavs/LJ029-0083.npy +tests/data/ljspeech/wavs/LJ049-0122.wav|tests/data/ljspeech/wavs/LJ049-0122.npy +tests/data/ljspeech/wavs/LJ021-0196.wav|tests/data/ljspeech/wavs/LJ021-0196.npy +tests/data/ljspeech/wavs/LJ045-0112.wav|tests/data/ljspeech/wavs/LJ045-0112.npy +tests/data/ljspeech/wavs/LJ014-0173.wav|tests/data/ljspeech/wavs/LJ014-0173.npy +tests/data/ljspeech/wavs/LJ044-0151.wav|tests/data/ljspeech/wavs/LJ044-0151.npy +tests/data/ljspeech/wavs/LJ026-0141.wav|tests/data/ljspeech/wavs/LJ026-0141.npy +tests/data/ljspeech/wavs/LJ044-0007.wav|tests/data/ljspeech/wavs/LJ044-0007.npy +tests/data/ljspeech/wavs/LJ018-0045.wav|tests/data/ljspeech/wavs/LJ018-0045.npy +tests/data/ljspeech/wavs/LJ016-0132.wav|tests/data/ljspeech/wavs/LJ016-0132.npy +tests/data/ljspeech/wavs/LJ005-0012.wav|tests/data/ljspeech/wavs/LJ005-0012.npy +tests/data/ljspeech/wavs/LJ021-0194.wav|tests/data/ljspeech/wavs/LJ021-0194.npy +tests/data/ljspeech/wavs/LJ030-0045.wav|tests/data/ljspeech/wavs/LJ030-0045.npy +tests/data/ljspeech/wavs/LJ046-0143.wav|tests/data/ljspeech/wavs/LJ046-0143.npy +tests/data/ljspeech/wavs/LJ007-0080.wav|tests/data/ljspeech/wavs/LJ007-0080.npy +tests/data/ljspeech/wavs/LJ039-0124.wav|tests/data/ljspeech/wavs/LJ039-0124.npy +tests/data/ljspeech/wavs/LJ002-0257.wav|tests/data/ljspeech/wavs/LJ002-0257.npy +tests/data/ljspeech/wavs/LJ029-0209.wav|tests/data/ljspeech/wavs/LJ029-0209.npy +tests/data/ljspeech/wavs/LJ007-0059.wav|tests/data/ljspeech/wavs/LJ007-0059.npy +tests/data/ljspeech/wavs/LJ049-0170.wav|tests/data/ljspeech/wavs/LJ049-0170.npy +tests/data/ljspeech/wavs/LJ029-0145.wav|tests/data/ljspeech/wavs/LJ029-0145.npy +tests/data/ljspeech/wavs/LJ025-0118.wav|tests/data/ljspeech/wavs/LJ025-0118.npy +tests/data/ljspeech/wavs/LJ019-0186.wav|tests/data/ljspeech/wavs/LJ019-0186.npy +tests/data/ljspeech/wavs/LJ012-0083.wav|tests/data/ljspeech/wavs/LJ012-0083.npy +tests/data/ljspeech/wavs/LJ004-0193.wav|tests/data/ljspeech/wavs/LJ004-0193.npy +tests/data/ljspeech/wavs/LJ036-0107.wav|tests/data/ljspeech/wavs/LJ036-0107.npy +tests/data/ljspeech/wavs/LJ004-0231.wav|tests/data/ljspeech/wavs/LJ004-0231.npy +tests/data/ljspeech/wavs/LJ014-0123.wav|tests/data/ljspeech/wavs/LJ014-0123.npy +tests/data/ljspeech/wavs/LJ029-0211.wav|tests/data/ljspeech/wavs/LJ029-0211.npy +tests/data/ljspeech/wavs/LJ047-0079.wav|tests/data/ljspeech/wavs/LJ047-0079.npy +tests/data/ljspeech/wavs/LJ031-0013.wav|tests/data/ljspeech/wavs/LJ031-0013.npy +tests/data/ljspeech/wavs/LJ012-0018.wav|tests/data/ljspeech/wavs/LJ012-0018.npy +tests/data/ljspeech/wavs/LJ045-0026.wav|tests/data/ljspeech/wavs/LJ045-0026.npy +tests/data/ljspeech/wavs/LJ050-0250.wav|tests/data/ljspeech/wavs/LJ050-0250.npy +tests/data/ljspeech/wavs/LJ003-0005.wav|tests/data/ljspeech/wavs/LJ003-0005.npy +tests/data/ljspeech/wavs/LJ020-0003.wav|tests/data/ljspeech/wavs/LJ020-0003.npy +tests/data/ljspeech/wavs/LJ025-0159.wav|tests/data/ljspeech/wavs/LJ025-0159.npy +tests/data/ljspeech/wavs/LJ021-0081.wav|tests/data/ljspeech/wavs/LJ021-0081.npy +tests/data/ljspeech/wavs/LJ001-0021.wav|tests/data/ljspeech/wavs/LJ001-0021.npy +tests/data/ljspeech/wavs/LJ043-0030.wav|tests/data/ljspeech/wavs/LJ043-0030.npy +tests/data/ljspeech/wavs/LJ045-0202.wav|tests/data/ljspeech/wavs/LJ045-0202.npy +tests/data/ljspeech/wavs/LJ014-0045.wav|tests/data/ljspeech/wavs/LJ014-0045.npy +tests/data/ljspeech/wavs/LJ016-0076.wav|tests/data/ljspeech/wavs/LJ016-0076.npy +tests/data/ljspeech/wavs/LJ013-0256.wav|tests/data/ljspeech/wavs/LJ013-0256.npy +tests/data/ljspeech/wavs/LJ007-0030.wav|tests/data/ljspeech/wavs/LJ007-0030.npy +tests/data/ljspeech/wavs/LJ004-0130.wav|tests/data/ljspeech/wavs/LJ004-0130.npy +tests/data/ljspeech/wavs/LJ021-0082.wav|tests/data/ljspeech/wavs/LJ021-0082.npy +tests/data/ljspeech/wavs/LJ021-0160.wav|tests/data/ljspeech/wavs/LJ021-0160.npy +tests/data/ljspeech/wavs/LJ038-0022.wav|tests/data/ljspeech/wavs/LJ038-0022.npy +tests/data/ljspeech/wavs/LJ021-0155.wav|tests/data/ljspeech/wavs/LJ021-0155.npy +tests/data/ljspeech/wavs/LJ026-0080.wav|tests/data/ljspeech/wavs/LJ026-0080.npy +tests/data/ljspeech/wavs/LJ026-0025.wav|tests/data/ljspeech/wavs/LJ026-0025.npy +tests/data/ljspeech/wavs/LJ016-0117.wav|tests/data/ljspeech/wavs/LJ016-0117.npy +tests/data/ljspeech/wavs/LJ007-0032.wav|tests/data/ljspeech/wavs/LJ007-0032.npy +tests/data/ljspeech/wavs/LJ005-0013.wav|tests/data/ljspeech/wavs/LJ005-0013.npy +tests/data/ljspeech/wavs/LJ016-0243.wav|tests/data/ljspeech/wavs/LJ016-0243.npy +tests/data/ljspeech/wavs/LJ013-0244.wav|tests/data/ljspeech/wavs/LJ013-0244.npy +tests/data/ljspeech/wavs/LJ014-0084.wav|tests/data/ljspeech/wavs/LJ014-0084.npy +tests/data/ljspeech/wavs/LJ037-0169.wav|tests/data/ljspeech/wavs/LJ037-0169.npy +tests/data/ljspeech/wavs/LJ031-0088.wav|tests/data/ljspeech/wavs/LJ031-0088.npy +tests/data/ljspeech/wavs/LJ009-0286.wav|tests/data/ljspeech/wavs/LJ009-0286.npy +tests/data/ljspeech/wavs/LJ041-0135.wav|tests/data/ljspeech/wavs/LJ041-0135.npy +tests/data/ljspeech/wavs/LJ019-0229.wav|tests/data/ljspeech/wavs/LJ019-0229.npy +tests/data/ljspeech/wavs/LJ016-0306.wav|tests/data/ljspeech/wavs/LJ016-0306.npy +tests/data/ljspeech/wavs/LJ040-0093.wav|tests/data/ljspeech/wavs/LJ040-0093.npy +tests/data/ljspeech/wavs/LJ038-0100.wav|tests/data/ljspeech/wavs/LJ038-0100.npy +tests/data/ljspeech/wavs/LJ011-0262.wav|tests/data/ljspeech/wavs/LJ011-0262.npy +tests/data/ljspeech/wavs/LJ023-0081.wav|tests/data/ljspeech/wavs/LJ023-0081.npy +tests/data/ljspeech/wavs/LJ035-0190.wav|tests/data/ljspeech/wavs/LJ035-0190.npy +tests/data/ljspeech/wavs/LJ024-0087.wav|tests/data/ljspeech/wavs/LJ024-0087.npy +tests/data/ljspeech/wavs/LJ045-0043.wav|tests/data/ljspeech/wavs/LJ045-0043.npy +tests/data/ljspeech/wavs/LJ041-0200.wav|tests/data/ljspeech/wavs/LJ041-0200.npy +tests/data/ljspeech/wavs/LJ041-0087.wav|tests/data/ljspeech/wavs/LJ041-0087.npy +tests/data/ljspeech/wavs/LJ016-0303.wav|tests/data/ljspeech/wavs/LJ016-0303.npy +tests/data/ljspeech/wavs/LJ039-0163.wav|tests/data/ljspeech/wavs/LJ039-0163.npy +tests/data/ljspeech/wavs/LJ046-0149.wav|tests/data/ljspeech/wavs/LJ046-0149.npy +tests/data/ljspeech/wavs/LJ019-0011.wav|tests/data/ljspeech/wavs/LJ019-0011.npy +tests/data/ljspeech/wavs/LJ032-0105.wav|tests/data/ljspeech/wavs/LJ032-0105.npy +tests/data/ljspeech/wavs/LJ043-0087.wav|tests/data/ljspeech/wavs/LJ043-0087.npy +tests/data/ljspeech/wavs/LJ023-0068.wav|tests/data/ljspeech/wavs/LJ023-0068.npy +tests/data/ljspeech/wavs/LJ028-0195.wav|tests/data/ljspeech/wavs/LJ028-0195.npy +tests/data/ljspeech/wavs/LJ028-0304.wav|tests/data/ljspeech/wavs/LJ028-0304.npy +tests/data/ljspeech/wavs/LJ011-0291.wav|tests/data/ljspeech/wavs/LJ011-0291.npy +tests/data/ljspeech/wavs/LJ014-0257.wav|tests/data/ljspeech/wavs/LJ014-0257.npy +tests/data/ljspeech/wavs/LJ037-0262.wav|tests/data/ljspeech/wavs/LJ037-0262.npy +tests/data/ljspeech/wavs/LJ032-0084.wav|tests/data/ljspeech/wavs/LJ032-0084.npy +tests/data/ljspeech/wavs/LJ016-0302.wav|tests/data/ljspeech/wavs/LJ016-0302.npy +tests/data/ljspeech/wavs/LJ014-0318.wav|tests/data/ljspeech/wavs/LJ014-0318.npy +tests/data/ljspeech/wavs/LJ045-0121.wav|tests/data/ljspeech/wavs/LJ045-0121.npy +tests/data/ljspeech/wavs/LJ034-0007.wav|tests/data/ljspeech/wavs/LJ034-0007.npy +tests/data/ljspeech/wavs/LJ035-0127.wav|tests/data/ljspeech/wavs/LJ035-0127.npy +tests/data/ljspeech/wavs/LJ019-0293.wav|tests/data/ljspeech/wavs/LJ019-0293.npy +tests/data/ljspeech/wavs/LJ038-0054.wav|tests/data/ljspeech/wavs/LJ038-0054.npy +tests/data/ljspeech/wavs/LJ002-0158.wav|tests/data/ljspeech/wavs/LJ002-0158.npy +tests/data/ljspeech/wavs/LJ015-0181.wav|tests/data/ljspeech/wavs/LJ015-0181.npy +tests/data/ljspeech/wavs/LJ050-0235.wav|tests/data/ljspeech/wavs/LJ050-0235.npy +tests/data/ljspeech/wavs/LJ037-0053.wav|tests/data/ljspeech/wavs/LJ037-0053.npy +tests/data/ljspeech/wavs/LJ022-0157.wav|tests/data/ljspeech/wavs/LJ022-0157.npy +tests/data/ljspeech/wavs/LJ013-0111.wav|tests/data/ljspeech/wavs/LJ013-0111.npy +tests/data/ljspeech/wavs/LJ037-0260.wav|tests/data/ljspeech/wavs/LJ037-0260.npy +tests/data/ljspeech/wavs/LJ050-0231.wav|tests/data/ljspeech/wavs/LJ050-0231.npy +tests/data/ljspeech/wavs/LJ011-0232.wav|tests/data/ljspeech/wavs/LJ011-0232.npy +tests/data/ljspeech/wavs/LJ002-0103.wav|tests/data/ljspeech/wavs/LJ002-0103.npy +tests/data/ljspeech/wavs/LJ005-0171.wav|tests/data/ljspeech/wavs/LJ005-0171.npy +tests/data/ljspeech/wavs/LJ019-0234.wav|tests/data/ljspeech/wavs/LJ019-0234.npy +tests/data/ljspeech/wavs/LJ028-0353.wav|tests/data/ljspeech/wavs/LJ028-0353.npy +tests/data/ljspeech/wavs/LJ005-0289.wav|tests/data/ljspeech/wavs/LJ005-0289.npy +tests/data/ljspeech/wavs/LJ008-0177.wav|tests/data/ljspeech/wavs/LJ008-0177.npy +tests/data/ljspeech/wavs/LJ014-0303.wav|tests/data/ljspeech/wavs/LJ014-0303.npy +tests/data/ljspeech/wavs/LJ009-0193.wav|tests/data/ljspeech/wavs/LJ009-0193.npy +tests/data/ljspeech/wavs/LJ006-0096.wav|tests/data/ljspeech/wavs/LJ006-0096.npy +tests/data/ljspeech/wavs/LJ005-0054.wav|tests/data/ljspeech/wavs/LJ005-0054.npy +tests/data/ljspeech/wavs/LJ015-0185.wav|tests/data/ljspeech/wavs/LJ015-0185.npy +tests/data/ljspeech/wavs/LJ041-0098.wav|tests/data/ljspeech/wavs/LJ041-0098.npy +tests/data/ljspeech/wavs/LJ013-0019.wav|tests/data/ljspeech/wavs/LJ013-0019.npy +tests/data/ljspeech/wavs/LJ005-0042.wav|tests/data/ljspeech/wavs/LJ005-0042.npy +tests/data/ljspeech/wavs/LJ028-0463.wav|tests/data/ljspeech/wavs/LJ028-0463.npy +tests/data/ljspeech/wavs/LJ027-0118.wav|tests/data/ljspeech/wavs/LJ027-0118.npy +tests/data/ljspeech/wavs/LJ018-0331.wav|tests/data/ljspeech/wavs/LJ018-0331.npy +tests/data/ljspeech/wavs/LJ015-0263.wav|tests/data/ljspeech/wavs/LJ015-0263.npy +tests/data/ljspeech/wavs/LJ019-0104.wav|tests/data/ljspeech/wavs/LJ019-0104.npy +tests/data/ljspeech/wavs/LJ009-0112.wav|tests/data/ljspeech/wavs/LJ009-0112.npy +tests/data/ljspeech/wavs/LJ048-0233.wav|tests/data/ljspeech/wavs/LJ048-0233.npy +tests/data/ljspeech/wavs/LJ012-0242.wav|tests/data/ljspeech/wavs/LJ012-0242.npy +tests/data/ljspeech/wavs/LJ038-0238.wav|tests/data/ljspeech/wavs/LJ038-0238.npy +tests/data/ljspeech/wavs/LJ011-0177.wav|tests/data/ljspeech/wavs/LJ011-0177.npy +tests/data/ljspeech/wavs/LJ012-0008.wav|tests/data/ljspeech/wavs/LJ012-0008.npy +tests/data/ljspeech/wavs/LJ011-0283.wav|tests/data/ljspeech/wavs/LJ011-0283.npy +tests/data/ljspeech/wavs/LJ007-0210.wav|tests/data/ljspeech/wavs/LJ007-0210.npy +tests/data/ljspeech/wavs/LJ041-0148.wav|tests/data/ljspeech/wavs/LJ041-0148.npy +tests/data/ljspeech/wavs/LJ011-0230.wav|tests/data/ljspeech/wavs/LJ011-0230.npy +tests/data/ljspeech/wavs/LJ013-0094.wav|tests/data/ljspeech/wavs/LJ013-0094.npy +tests/data/ljspeech/wavs/LJ012-0058.wav|tests/data/ljspeech/wavs/LJ012-0058.npy +tests/data/ljspeech/wavs/LJ050-0185.wav|tests/data/ljspeech/wavs/LJ050-0185.npy +tests/data/ljspeech/wavs/LJ009-0208.wav|tests/data/ljspeech/wavs/LJ009-0208.npy +tests/data/ljspeech/wavs/LJ010-0160.wav|tests/data/ljspeech/wavs/LJ010-0160.npy +tests/data/ljspeech/wavs/LJ010-0127.wav|tests/data/ljspeech/wavs/LJ010-0127.npy +tests/data/ljspeech/wavs/LJ013-0264.wav|tests/data/ljspeech/wavs/LJ013-0264.npy +tests/data/ljspeech/wavs/LJ013-0080.wav|tests/data/ljspeech/wavs/LJ013-0080.npy +tests/data/ljspeech/wavs/LJ012-0157.wav|tests/data/ljspeech/wavs/LJ012-0157.npy +tests/data/ljspeech/wavs/LJ050-0128.wav|tests/data/ljspeech/wavs/LJ050-0128.npy +tests/data/ljspeech/wavs/LJ013-0054.wav|tests/data/ljspeech/wavs/LJ013-0054.npy +tests/data/ljspeech/wavs/LJ006-0248.wav|tests/data/ljspeech/wavs/LJ006-0248.npy +tests/data/ljspeech/wavs/LJ049-0179.wav|tests/data/ljspeech/wavs/LJ049-0179.npy +tests/data/ljspeech/wavs/LJ011-0048.wav|tests/data/ljspeech/wavs/LJ011-0048.npy +tests/data/ljspeech/wavs/LJ007-0167.wav|tests/data/ljspeech/wavs/LJ007-0167.npy +tests/data/ljspeech/wavs/LJ010-0225.wav|tests/data/ljspeech/wavs/LJ010-0225.npy +tests/data/ljspeech/wavs/LJ011-0159.wav|tests/data/ljspeech/wavs/LJ011-0159.npy +tests/data/ljspeech/wavs/LJ012-0276.wav|tests/data/ljspeech/wavs/LJ012-0276.npy +tests/data/ljspeech/wavs/LJ008-0066.wav|tests/data/ljspeech/wavs/LJ008-0066.npy +tests/data/ljspeech/wavs/LJ012-0156.wav|tests/data/ljspeech/wavs/LJ012-0156.npy +tests/data/ljspeech/wavs/LJ042-0180.wav|tests/data/ljspeech/wavs/LJ042-0180.npy +tests/data/ljspeech/wavs/LJ009-0190.wav|tests/data/ljspeech/wavs/LJ009-0190.npy +tests/data/ljspeech/wavs/LJ009-0064.wav|tests/data/ljspeech/wavs/LJ009-0064.npy +tests/data/ljspeech/wavs/LJ049-0182.wav|tests/data/ljspeech/wavs/LJ049-0182.npy +tests/data/ljspeech/wavs/LJ008-0054.wav|tests/data/ljspeech/wavs/LJ008-0054.npy +tests/data/ljspeech/wavs/LJ013-0116.wav|tests/data/ljspeech/wavs/LJ013-0116.npy +tests/data/ljspeech/wavs/LJ002-0014.wav|tests/data/ljspeech/wavs/LJ002-0014.npy +tests/data/ljspeech/wavs/LJ035-0192.wav|tests/data/ljspeech/wavs/LJ035-0192.npy +tests/data/ljspeech/wavs/LJ001-0038.wav|tests/data/ljspeech/wavs/LJ001-0038.npy +tests/data/ljspeech/wavs/LJ005-0146.wav|tests/data/ljspeech/wavs/LJ005-0146.npy +tests/data/ljspeech/wavs/LJ034-0088.wav|tests/data/ljspeech/wavs/LJ034-0088.npy +tests/data/ljspeech/wavs/LJ003-0054.wav|tests/data/ljspeech/wavs/LJ003-0054.npy +tests/data/ljspeech/wavs/LJ014-0196.wav|tests/data/ljspeech/wavs/LJ014-0196.npy +tests/data/ljspeech/wavs/LJ019-0199.wav|tests/data/ljspeech/wavs/LJ019-0199.npy +tests/data/ljspeech/wavs/LJ019-0138.wav|tests/data/ljspeech/wavs/LJ019-0138.npy +tests/data/ljspeech/wavs/LJ029-0111.wav|tests/data/ljspeech/wavs/LJ029-0111.npy +tests/data/ljspeech/wavs/LJ031-0041.wav|tests/data/ljspeech/wavs/LJ031-0041.npy +tests/data/ljspeech/wavs/LJ016-0223.wav|tests/data/ljspeech/wavs/LJ016-0223.npy +tests/data/ljspeech/wavs/LJ029-0050.wav|tests/data/ljspeech/wavs/LJ029-0050.npy +tests/data/ljspeech/wavs/LJ012-0056.wav|tests/data/ljspeech/wavs/LJ012-0056.npy +tests/data/ljspeech/wavs/LJ021-0021.wav|tests/data/ljspeech/wavs/LJ021-0021.npy +tests/data/ljspeech/wavs/LJ041-0169.wav|tests/data/ljspeech/wavs/LJ041-0169.npy +tests/data/ljspeech/wavs/LJ019-0029.wav|tests/data/ljspeech/wavs/LJ019-0029.npy +tests/data/ljspeech/wavs/LJ019-0285.wav|tests/data/ljspeech/wavs/LJ019-0285.npy +tests/data/ljspeech/wavs/LJ018-0370.wav|tests/data/ljspeech/wavs/LJ018-0370.npy +tests/data/ljspeech/wavs/LJ021-0093.wav|tests/data/ljspeech/wavs/LJ021-0093.npy +tests/data/ljspeech/wavs/LJ003-0146.wav|tests/data/ljspeech/wavs/LJ003-0146.npy +tests/data/ljspeech/wavs/LJ019-0386.wav|tests/data/ljspeech/wavs/LJ019-0386.npy +tests/data/ljspeech/wavs/LJ022-0105.wav|tests/data/ljspeech/wavs/LJ022-0105.npy +tests/data/ljspeech/wavs/LJ002-0006.wav|tests/data/ljspeech/wavs/LJ002-0006.npy +tests/data/ljspeech/wavs/LJ034-0189.wav|tests/data/ljspeech/wavs/LJ034-0189.npy +tests/data/ljspeech/wavs/LJ018-0342.wav|tests/data/ljspeech/wavs/LJ018-0342.npy +tests/data/ljspeech/wavs/LJ019-0127.wav|tests/data/ljspeech/wavs/LJ019-0127.npy +tests/data/ljspeech/wavs/LJ002-0017.wav|tests/data/ljspeech/wavs/LJ002-0017.npy +tests/data/ljspeech/wavs/LJ048-0137.wav|tests/data/ljspeech/wavs/LJ048-0137.npy +tests/data/ljspeech/wavs/LJ028-0453.wav|tests/data/ljspeech/wavs/LJ028-0453.npy +tests/data/ljspeech/wavs/LJ019-0260.wav|tests/data/ljspeech/wavs/LJ019-0260.npy +tests/data/ljspeech/wavs/LJ007-0046.wav|tests/data/ljspeech/wavs/LJ007-0046.npy +tests/data/ljspeech/wavs/LJ017-0201.wav|tests/data/ljspeech/wavs/LJ017-0201.npy +tests/data/ljspeech/wavs/LJ019-0335.wav|tests/data/ljspeech/wavs/LJ019-0335.npy +tests/data/ljspeech/wavs/LJ045-0007.wav|tests/data/ljspeech/wavs/LJ045-0007.npy +tests/data/ljspeech/wavs/LJ037-0162.wav|tests/data/ljspeech/wavs/LJ037-0162.npy +tests/data/ljspeech/wavs/LJ015-0023.wav|tests/data/ljspeech/wavs/LJ015-0023.npy +tests/data/ljspeech/wavs/LJ045-0074.wav|tests/data/ljspeech/wavs/LJ045-0074.npy +tests/data/ljspeech/wavs/LJ049-0053.wav|tests/data/ljspeech/wavs/LJ049-0053.npy +tests/data/ljspeech/wavs/LJ029-0165.wav|tests/data/ljspeech/wavs/LJ029-0165.npy +tests/data/ljspeech/wavs/LJ016-0156.wav|tests/data/ljspeech/wavs/LJ016-0156.npy +tests/data/ljspeech/wavs/LJ015-0248.wav|tests/data/ljspeech/wavs/LJ015-0248.npy +tests/data/ljspeech/wavs/LJ018-0211.wav|tests/data/ljspeech/wavs/LJ018-0211.npy +tests/data/ljspeech/wavs/LJ030-0042.wav|tests/data/ljspeech/wavs/LJ030-0042.npy +tests/data/ljspeech/wavs/LJ016-0147.wav|tests/data/ljspeech/wavs/LJ016-0147.npy +tests/data/ljspeech/wavs/LJ037-0035.wav|tests/data/ljspeech/wavs/LJ037-0035.npy +tests/data/ljspeech/wavs/LJ015-0195.wav|tests/data/ljspeech/wavs/LJ015-0195.npy +tests/data/ljspeech/wavs/LJ017-0267.wav|tests/data/ljspeech/wavs/LJ017-0267.npy +tests/data/ljspeech/wavs/LJ049-0035.wav|tests/data/ljspeech/wavs/LJ049-0035.npy +tests/data/ljspeech/wavs/LJ037-0136.wav|tests/data/ljspeech/wavs/LJ037-0136.npy +tests/data/ljspeech/wavs/LJ018-0034.wav|tests/data/ljspeech/wavs/LJ018-0034.npy +tests/data/ljspeech/wavs/LJ003-0218.wav|tests/data/ljspeech/wavs/LJ003-0218.npy +tests/data/ljspeech/wavs/LJ016-0210.wav|tests/data/ljspeech/wavs/LJ016-0210.npy +tests/data/ljspeech/wavs/LJ016-0427.wav|tests/data/ljspeech/wavs/LJ016-0427.npy +tests/data/ljspeech/wavs/LJ016-0340.wav|tests/data/ljspeech/wavs/LJ016-0340.npy +tests/data/ljspeech/wavs/LJ016-0121.wav|tests/data/ljspeech/wavs/LJ016-0121.npy +tests/data/ljspeech/wavs/LJ045-0077.wav|tests/data/ljspeech/wavs/LJ045-0077.npy +tests/data/ljspeech/wavs/LJ016-0053.wav|tests/data/ljspeech/wavs/LJ016-0053.npy +tests/data/ljspeech/wavs/LJ031-0143.wav|tests/data/ljspeech/wavs/LJ031-0143.npy +tests/data/ljspeech/wavs/LJ036-0054.wav|tests/data/ljspeech/wavs/LJ036-0054.npy +tests/data/ljspeech/wavs/LJ003-0210.wav|tests/data/ljspeech/wavs/LJ003-0210.npy +tests/data/ljspeech/wavs/LJ022-0122.wav|tests/data/ljspeech/wavs/LJ022-0122.npy +tests/data/ljspeech/wavs/LJ001-0106.wav|tests/data/ljspeech/wavs/LJ001-0106.npy +tests/data/ljspeech/wavs/LJ003-0244.wav|tests/data/ljspeech/wavs/LJ003-0244.npy +tests/data/ljspeech/wavs/LJ033-0119.wav|tests/data/ljspeech/wavs/LJ033-0119.npy +tests/data/ljspeech/wavs/LJ024-0053.wav|tests/data/ljspeech/wavs/LJ024-0053.npy +tests/data/ljspeech/wavs/LJ032-0033.wav|tests/data/ljspeech/wavs/LJ032-0033.npy +tests/data/ljspeech/wavs/LJ044-0195.wav|tests/data/ljspeech/wavs/LJ044-0195.npy +tests/data/ljspeech/wavs/LJ002-0201.wav|tests/data/ljspeech/wavs/LJ002-0201.npy +tests/data/ljspeech/wavs/LJ002-0188.wav|tests/data/ljspeech/wavs/LJ002-0188.npy +tests/data/ljspeech/wavs/LJ025-0054.wav|tests/data/ljspeech/wavs/LJ025-0054.npy +tests/data/ljspeech/wavs/LJ026-0163.wav|tests/data/ljspeech/wavs/LJ026-0163.npy +tests/data/ljspeech/wavs/LJ025-0126.wav|tests/data/ljspeech/wavs/LJ025-0126.npy +tests/data/ljspeech/wavs/LJ048-0231.wav|tests/data/ljspeech/wavs/LJ048-0231.npy +tests/data/ljspeech/wavs/LJ002-0304.wav|tests/data/ljspeech/wavs/LJ002-0304.npy +tests/data/ljspeech/wavs/LJ026-0146.wav|tests/data/ljspeech/wavs/LJ026-0146.npy +tests/data/ljspeech/wavs/LJ045-0237.wav|tests/data/ljspeech/wavs/LJ045-0237.npy +tests/data/ljspeech/wavs/LJ002-0256.wav|tests/data/ljspeech/wavs/LJ002-0256.npy +tests/data/ljspeech/wavs/LJ028-0210.wav|tests/data/ljspeech/wavs/LJ028-0210.npy +tests/data/ljspeech/wavs/LJ025-0069.wav|tests/data/ljspeech/wavs/LJ025-0069.npy +tests/data/ljspeech/wavs/LJ016-0021.wav|tests/data/ljspeech/wavs/LJ016-0021.npy +tests/data/ljspeech/wavs/LJ023-0109.wav|tests/data/ljspeech/wavs/LJ023-0109.npy +tests/data/ljspeech/wavs/LJ027-0015.wav|tests/data/ljspeech/wavs/LJ027-0015.npy +tests/data/ljspeech/wavs/LJ002-0144.wav|tests/data/ljspeech/wavs/LJ002-0144.npy +tests/data/ljspeech/wavs/LJ033-0207.wav|tests/data/ljspeech/wavs/LJ033-0207.npy +tests/data/ljspeech/wavs/LJ028-0083.wav|tests/data/ljspeech/wavs/LJ028-0083.npy +tests/data/ljspeech/wavs/LJ002-0121.wav|tests/data/ljspeech/wavs/LJ002-0121.npy +tests/data/ljspeech/wavs/LJ004-0118.wav|tests/data/ljspeech/wavs/LJ004-0118.npy +tests/data/ljspeech/wavs/LJ028-0391.wav|tests/data/ljspeech/wavs/LJ028-0391.npy +tests/data/ljspeech/wavs/LJ050-0254.wav|tests/data/ljspeech/wavs/LJ050-0254.npy +tests/data/ljspeech/wavs/LJ014-0067.wav|tests/data/ljspeech/wavs/LJ014-0067.npy +tests/data/ljspeech/wavs/LJ028-0248.wav|tests/data/ljspeech/wavs/LJ028-0248.npy +tests/data/ljspeech/wavs/LJ022-0193.wav|tests/data/ljspeech/wavs/LJ022-0193.npy +tests/data/ljspeech/wavs/LJ026-0027.wav|tests/data/ljspeech/wavs/LJ026-0027.npy +tests/data/ljspeech/wavs/LJ002-0099.wav|tests/data/ljspeech/wavs/LJ002-0099.npy +tests/data/ljspeech/wavs/LJ014-0034.wav|tests/data/ljspeech/wavs/LJ014-0034.npy +tests/data/ljspeech/wavs/LJ030-0006.wav|tests/data/ljspeech/wavs/LJ030-0006.npy +tests/data/ljspeech/wavs/LJ037-0085.wav|tests/data/ljspeech/wavs/LJ037-0085.npy +tests/data/ljspeech/wavs/LJ030-0062.wav|tests/data/ljspeech/wavs/LJ030-0062.npy +tests/data/ljspeech/wavs/LJ042-0063.wav|tests/data/ljspeech/wavs/LJ042-0063.npy +tests/data/ljspeech/wavs/LJ027-0173.wav|tests/data/ljspeech/wavs/LJ027-0173.npy +tests/data/ljspeech/wavs/LJ046-0144.wav|tests/data/ljspeech/wavs/LJ046-0144.npy +tests/data/ljspeech/wavs/LJ049-0038.wav|tests/data/ljspeech/wavs/LJ049-0038.npy +tests/data/ljspeech/wavs/LJ012-0048.wav|tests/data/ljspeech/wavs/LJ012-0048.npy +tests/data/ljspeech/wavs/LJ027-0156.wav|tests/data/ljspeech/wavs/LJ027-0156.npy +tests/data/ljspeech/wavs/LJ017-0082.wav|tests/data/ljspeech/wavs/LJ017-0082.npy +tests/data/ljspeech/wavs/LJ039-0139.wav|tests/data/ljspeech/wavs/LJ039-0139.npy +tests/data/ljspeech/wavs/LJ016-0073.wav|tests/data/ljspeech/wavs/LJ016-0073.npy +tests/data/ljspeech/wavs/LJ032-0077.wav|tests/data/ljspeech/wavs/LJ032-0077.npy +tests/data/ljspeech/wavs/LJ016-0189.wav|tests/data/ljspeech/wavs/LJ016-0189.npy +tests/data/ljspeech/wavs/LJ016-0261.wav|tests/data/ljspeech/wavs/LJ016-0261.npy +tests/data/ljspeech/wavs/LJ042-0108.wav|tests/data/ljspeech/wavs/LJ042-0108.npy +tests/data/ljspeech/wavs/LJ029-0203.wav|tests/data/ljspeech/wavs/LJ029-0203.npy +tests/data/ljspeech/wavs/LJ046-0077.wav|tests/data/ljspeech/wavs/LJ046-0077.npy +tests/data/ljspeech/wavs/LJ011-0153.wav|tests/data/ljspeech/wavs/LJ011-0153.npy +tests/data/ljspeech/wavs/LJ032-0252.wav|tests/data/ljspeech/wavs/LJ032-0252.npy +tests/data/ljspeech/wavs/LJ008-0133.wav|tests/data/ljspeech/wavs/LJ008-0133.npy +tests/data/ljspeech/wavs/LJ028-0149.wav|tests/data/ljspeech/wavs/LJ028-0149.npy +tests/data/ljspeech/wavs/LJ017-0241.wav|tests/data/ljspeech/wavs/LJ017-0241.npy +tests/data/ljspeech/wavs/LJ031-0073.wav|tests/data/ljspeech/wavs/LJ031-0073.npy +tests/data/ljspeech/wavs/LJ005-0097.wav|tests/data/ljspeech/wavs/LJ005-0097.npy +tests/data/ljspeech/wavs/LJ003-0229.wav|tests/data/ljspeech/wavs/LJ003-0229.npy +tests/data/ljspeech/wavs/LJ006-0094.wav|tests/data/ljspeech/wavs/LJ006-0094.npy +tests/data/ljspeech/wavs/LJ031-0110.wav|tests/data/ljspeech/wavs/LJ031-0110.npy +tests/data/ljspeech/wavs/LJ022-0183.wav|tests/data/ljspeech/wavs/LJ022-0183.npy +tests/data/ljspeech/wavs/LJ016-0405.wav|tests/data/ljspeech/wavs/LJ016-0405.npy +tests/data/ljspeech/wavs/LJ003-0308.wav|tests/data/ljspeech/wavs/LJ003-0308.npy +tests/data/ljspeech/wavs/LJ044-0095.wav|tests/data/ljspeech/wavs/LJ044-0095.npy +tests/data/ljspeech/wavs/LJ022-0083.wav|tests/data/ljspeech/wavs/LJ022-0083.npy +tests/data/ljspeech/wavs/LJ034-0038.wav|tests/data/ljspeech/wavs/LJ034-0038.npy +tests/data/ljspeech/wavs/LJ043-0138.wav|tests/data/ljspeech/wavs/LJ043-0138.npy +tests/data/ljspeech/wavs/LJ005-0243.wav|tests/data/ljspeech/wavs/LJ005-0243.npy +tests/data/ljspeech/wavs/LJ050-0179.wav|tests/data/ljspeech/wavs/LJ050-0179.npy +tests/data/ljspeech/wavs/LJ028-0321.wav|tests/data/ljspeech/wavs/LJ028-0321.npy +tests/data/ljspeech/wavs/LJ020-0016.wav|tests/data/ljspeech/wavs/LJ020-0016.npy +tests/data/ljspeech/wavs/LJ045-0065.wav|tests/data/ljspeech/wavs/LJ045-0065.npy +tests/data/ljspeech/wavs/LJ023-0033.wav|tests/data/ljspeech/wavs/LJ023-0033.npy +tests/data/ljspeech/wavs/LJ033-0163.wav|tests/data/ljspeech/wavs/LJ033-0163.npy +tests/data/ljspeech/wavs/LJ011-0210.wav|tests/data/ljspeech/wavs/LJ011-0210.npy +tests/data/ljspeech/wavs/LJ050-0263.wav|tests/data/ljspeech/wavs/LJ050-0263.npy +tests/data/ljspeech/wavs/LJ021-0099.wav|tests/data/ljspeech/wavs/LJ021-0099.npy +tests/data/ljspeech/wavs/LJ034-0096.wav|tests/data/ljspeech/wavs/LJ034-0096.npy +tests/data/ljspeech/wavs/LJ016-0218.wav|tests/data/ljspeech/wavs/LJ016-0218.npy +tests/data/ljspeech/wavs/LJ023-0036.wav|tests/data/ljspeech/wavs/LJ023-0036.npy +tests/data/ljspeech/wavs/LJ037-0176.wav|tests/data/ljspeech/wavs/LJ037-0176.npy +tests/data/ljspeech/wavs/LJ022-0138.wav|tests/data/ljspeech/wavs/LJ022-0138.npy +tests/data/ljspeech/wavs/LJ039-0223.wav|tests/data/ljspeech/wavs/LJ039-0223.npy +tests/data/ljspeech/wavs/LJ021-0055.wav|tests/data/ljspeech/wavs/LJ021-0055.npy +tests/data/ljspeech/wavs/LJ018-0239.wav|tests/data/ljspeech/wavs/LJ018-0239.npy +tests/data/ljspeech/wavs/LJ003-0076.wav|tests/data/ljspeech/wavs/LJ003-0076.npy +tests/data/ljspeech/wavs/LJ040-0228.wav|tests/data/ljspeech/wavs/LJ040-0228.npy +tests/data/ljspeech/wavs/LJ034-0066.wav|tests/data/ljspeech/wavs/LJ034-0066.npy +tests/data/ljspeech/wavs/LJ034-0037.wav|tests/data/ljspeech/wavs/LJ034-0037.npy +tests/data/ljspeech/wavs/LJ018-0074.wav|tests/data/ljspeech/wavs/LJ018-0074.npy +tests/data/ljspeech/wavs/LJ010-0099.wav|tests/data/ljspeech/wavs/LJ010-0099.npy +tests/data/ljspeech/wavs/LJ022-0052.wav|tests/data/ljspeech/wavs/LJ022-0052.npy +tests/data/ljspeech/wavs/LJ016-0374.wav|tests/data/ljspeech/wavs/LJ016-0374.npy +tests/data/ljspeech/wavs/LJ008-0040.wav|tests/data/ljspeech/wavs/LJ008-0040.npy +tests/data/ljspeech/wavs/LJ010-0061.wav|tests/data/ljspeech/wavs/LJ010-0061.npy +tests/data/ljspeech/wavs/LJ028-0198.wav|tests/data/ljspeech/wavs/LJ028-0198.npy +tests/data/ljspeech/wavs/LJ033-0004.wav|tests/data/ljspeech/wavs/LJ033-0004.npy +tests/data/ljspeech/wavs/LJ040-0112.wav|tests/data/ljspeech/wavs/LJ040-0112.npy +tests/data/ljspeech/wavs/LJ026-0088.wav|tests/data/ljspeech/wavs/LJ026-0088.npy +tests/data/ljspeech/wavs/LJ035-0069.wav|tests/data/ljspeech/wavs/LJ035-0069.npy +tests/data/ljspeech/wavs/LJ026-0053.wav|tests/data/ljspeech/wavs/LJ026-0053.npy +tests/data/ljspeech/wavs/LJ019-0236.wav|tests/data/ljspeech/wavs/LJ019-0236.npy +tests/data/ljspeech/wavs/LJ023-0012.wav|tests/data/ljspeech/wavs/LJ023-0012.npy +tests/data/ljspeech/wavs/LJ046-0196.wav|tests/data/ljspeech/wavs/LJ046-0196.npy +tests/data/ljspeech/wavs/LJ045-0114.wav|tests/data/ljspeech/wavs/LJ045-0114.npy +tests/data/ljspeech/wavs/LJ049-0146.wav|tests/data/ljspeech/wavs/LJ049-0146.npy +tests/data/ljspeech/wavs/LJ001-0166.wav|tests/data/ljspeech/wavs/LJ001-0166.npy +tests/data/ljspeech/wavs/LJ019-0332.wav|tests/data/ljspeech/wavs/LJ019-0332.npy +tests/data/ljspeech/wavs/LJ002-0210.wav|tests/data/ljspeech/wavs/LJ002-0210.npy +tests/data/ljspeech/wavs/LJ003-0102.wav|tests/data/ljspeech/wavs/LJ003-0102.npy +tests/data/ljspeech/wavs/LJ006-0060.wav|tests/data/ljspeech/wavs/LJ006-0060.npy +tests/data/ljspeech/wavs/LJ003-0013.wav|tests/data/ljspeech/wavs/LJ003-0013.npy +tests/data/ljspeech/wavs/LJ047-0202.wav|tests/data/ljspeech/wavs/LJ047-0202.npy +tests/data/ljspeech/wavs/LJ033-0019.wav|tests/data/ljspeech/wavs/LJ033-0019.npy +tests/data/ljspeech/wavs/LJ006-0046.wav|tests/data/ljspeech/wavs/LJ006-0046.npy +tests/data/ljspeech/wavs/LJ018-0244.wav|tests/data/ljspeech/wavs/LJ018-0244.npy +tests/data/ljspeech/wavs/LJ003-0103.wav|tests/data/ljspeech/wavs/LJ003-0103.npy +tests/data/ljspeech/wavs/LJ018-0123.wav|tests/data/ljspeech/wavs/LJ018-0123.npy +tests/data/ljspeech/wavs/LJ031-0151.wav|tests/data/ljspeech/wavs/LJ031-0151.npy +tests/data/ljspeech/wavs/LJ025-0017.wav|tests/data/ljspeech/wavs/LJ025-0017.npy +tests/data/ljspeech/wavs/LJ019-0094.wav|tests/data/ljspeech/wavs/LJ019-0094.npy +tests/data/ljspeech/wavs/LJ033-0116.wav|tests/data/ljspeech/wavs/LJ033-0116.npy +tests/data/ljspeech/wavs/LJ048-0014.wav|tests/data/ljspeech/wavs/LJ048-0014.npy +tests/data/ljspeech/wavs/LJ049-0029.wav|tests/data/ljspeech/wavs/LJ049-0029.npy +tests/data/ljspeech/wavs/LJ007-0129.wav|tests/data/ljspeech/wavs/LJ007-0129.npy +tests/data/ljspeech/wavs/LJ018-0155.wav|tests/data/ljspeech/wavs/LJ018-0155.npy +tests/data/ljspeech/wavs/LJ028-0129.wav|tests/data/ljspeech/wavs/LJ028-0129.npy +tests/data/ljspeech/wavs/LJ002-0217.wav|tests/data/ljspeech/wavs/LJ002-0217.npy +tests/data/ljspeech/wavs/LJ037-0247.wav|tests/data/ljspeech/wavs/LJ037-0247.npy +tests/data/ljspeech/wavs/LJ025-0106.wav|tests/data/ljspeech/wavs/LJ025-0106.npy +tests/data/ljspeech/wavs/LJ038-0152.wav|tests/data/ljspeech/wavs/LJ038-0152.npy +tests/data/ljspeech/wavs/LJ009-0061.wav|tests/data/ljspeech/wavs/LJ009-0061.npy +tests/data/ljspeech/wavs/LJ038-0276.wav|tests/data/ljspeech/wavs/LJ038-0276.npy +tests/data/ljspeech/wavs/LJ014-0086.wav|tests/data/ljspeech/wavs/LJ014-0086.npy +tests/data/ljspeech/wavs/LJ041-0100.wav|tests/data/ljspeech/wavs/LJ041-0100.npy +tests/data/ljspeech/wavs/LJ016-0404.wav|tests/data/ljspeech/wavs/LJ016-0404.npy +tests/data/ljspeech/wavs/LJ020-0023.wav|tests/data/ljspeech/wavs/LJ020-0023.npy +tests/data/ljspeech/wavs/LJ030-0123.wav|tests/data/ljspeech/wavs/LJ030-0123.npy +tests/data/ljspeech/wavs/LJ044-0201.wav|tests/data/ljspeech/wavs/LJ044-0201.npy +tests/data/ljspeech/wavs/LJ030-0155.wav|tests/data/ljspeech/wavs/LJ030-0155.npy +tests/data/ljspeech/wavs/LJ045-0063.wav|tests/data/ljspeech/wavs/LJ045-0063.npy +tests/data/ljspeech/wavs/LJ030-0215.wav|tests/data/ljspeech/wavs/LJ030-0215.npy +tests/data/ljspeech/wavs/LJ006-0221.wav|tests/data/ljspeech/wavs/LJ006-0221.npy +tests/data/ljspeech/wavs/LJ048-0114.wav|tests/data/ljspeech/wavs/LJ048-0114.npy +tests/data/ljspeech/wavs/LJ038-0020.wav|tests/data/ljspeech/wavs/LJ038-0020.npy +tests/data/ljspeech/wavs/LJ024-0094.wav|tests/data/ljspeech/wavs/LJ024-0094.npy +tests/data/ljspeech/wavs/LJ049-0127.wav|tests/data/ljspeech/wavs/LJ049-0127.npy +tests/data/ljspeech/wavs/LJ013-0144.wav|tests/data/ljspeech/wavs/LJ013-0144.npy +tests/data/ljspeech/wavs/LJ015-0276.wav|tests/data/ljspeech/wavs/LJ015-0276.npy +tests/data/ljspeech/wavs/LJ004-0007.wav|tests/data/ljspeech/wavs/LJ004-0007.npy +tests/data/ljspeech/wavs/LJ038-0064.wav|tests/data/ljspeech/wavs/LJ038-0064.npy +tests/data/ljspeech/wavs/LJ012-0188.wav|tests/data/ljspeech/wavs/LJ012-0188.npy +tests/data/ljspeech/wavs/LJ030-0124.wav|tests/data/ljspeech/wavs/LJ030-0124.npy +tests/data/ljspeech/wavs/LJ037-0004.wav|tests/data/ljspeech/wavs/LJ037-0004.npy +tests/data/ljspeech/wavs/LJ012-0293.wav|tests/data/ljspeech/wavs/LJ012-0293.npy +tests/data/ljspeech/wavs/LJ039-0108.wav|tests/data/ljspeech/wavs/LJ039-0108.npy +tests/data/ljspeech/wavs/LJ015-0028.wav|tests/data/ljspeech/wavs/LJ015-0028.npy +tests/data/ljspeech/wavs/LJ012-0135.wav|tests/data/ljspeech/wavs/LJ012-0135.npy +tests/data/ljspeech/wavs/LJ014-0021.wav|tests/data/ljspeech/wavs/LJ014-0021.npy +tests/data/ljspeech/wavs/LJ014-0185.wav|tests/data/ljspeech/wavs/LJ014-0185.npy +tests/data/ljspeech/wavs/LJ038-0126.wav|tests/data/ljspeech/wavs/LJ038-0126.npy +tests/data/ljspeech/wavs/LJ034-0114.wav|tests/data/ljspeech/wavs/LJ034-0114.npy +tests/data/ljspeech/wavs/LJ038-0303.wav|tests/data/ljspeech/wavs/LJ038-0303.npy +tests/data/ljspeech/wavs/LJ047-0218.wav|tests/data/ljspeech/wavs/LJ047-0218.npy +tests/data/ljspeech/wavs/LJ036-0126.wav|tests/data/ljspeech/wavs/LJ036-0126.npy +tests/data/ljspeech/wavs/LJ040-0032.wav|tests/data/ljspeech/wavs/LJ040-0032.npy +tests/data/ljspeech/wavs/LJ004-0055.wav|tests/data/ljspeech/wavs/LJ004-0055.npy +tests/data/ljspeech/wavs/LJ037-0021.wav|tests/data/ljspeech/wavs/LJ037-0021.npy +tests/data/ljspeech/wavs/LJ014-0187.wav|tests/data/ljspeech/wavs/LJ014-0187.npy +tests/data/ljspeech/wavs/LJ001-0068.wav|tests/data/ljspeech/wavs/LJ001-0068.npy +tests/data/ljspeech/wavs/LJ040-0024.wav|tests/data/ljspeech/wavs/LJ040-0024.npy +tests/data/ljspeech/wavs/LJ045-0083.wav|tests/data/ljspeech/wavs/LJ045-0083.npy +tests/data/ljspeech/wavs/LJ034-0049.wav|tests/data/ljspeech/wavs/LJ034-0049.npy +tests/data/ljspeech/wavs/LJ042-0213.wav|tests/data/ljspeech/wavs/LJ042-0213.npy +tests/data/ljspeech/wavs/LJ015-0162.wav|tests/data/ljspeech/wavs/LJ015-0162.npy +tests/data/ljspeech/wavs/LJ007-0158.wav|tests/data/ljspeech/wavs/LJ007-0158.npy +tests/data/ljspeech/wavs/LJ011-0105.wav|tests/data/ljspeech/wavs/LJ011-0105.npy +tests/data/ljspeech/wavs/LJ003-0068.wav|tests/data/ljspeech/wavs/LJ003-0068.npy +tests/data/ljspeech/wavs/LJ003-0057.wav|tests/data/ljspeech/wavs/LJ003-0057.npy +tests/data/ljspeech/wavs/LJ037-0031.wav|tests/data/ljspeech/wavs/LJ037-0031.npy +tests/data/ljspeech/wavs/LJ003-0016.wav|tests/data/ljspeech/wavs/LJ003-0016.npy +tests/data/ljspeech/wavs/LJ032-0067.wav|tests/data/ljspeech/wavs/LJ032-0067.npy +tests/data/ljspeech/wavs/LJ047-0211.wav|tests/data/ljspeech/wavs/LJ047-0211.npy +tests/data/ljspeech/wavs/LJ041-0025.wav|tests/data/ljspeech/wavs/LJ041-0025.npy +tests/data/ljspeech/wavs/LJ016-0343.wav|tests/data/ljspeech/wavs/LJ016-0343.npy +tests/data/ljspeech/wavs/LJ011-0235.wav|tests/data/ljspeech/wavs/LJ011-0235.npy +tests/data/ljspeech/wavs/LJ022-0111.wav|tests/data/ljspeech/wavs/LJ022-0111.npy +tests/data/ljspeech/wavs/LJ003-0269.wav|tests/data/ljspeech/wavs/LJ003-0269.npy +tests/data/ljspeech/wavs/LJ034-0091.wav|tests/data/ljspeech/wavs/LJ034-0091.npy +tests/data/ljspeech/wavs/LJ025-0055.wav|tests/data/ljspeech/wavs/LJ025-0055.npy +tests/data/ljspeech/wavs/LJ014-0278.wav|tests/data/ljspeech/wavs/LJ014-0278.npy +tests/data/ljspeech/wavs/LJ038-0282.wav|tests/data/ljspeech/wavs/LJ038-0282.npy +tests/data/ljspeech/wavs/LJ013-0083.wav|tests/data/ljspeech/wavs/LJ013-0083.npy +tests/data/ljspeech/wavs/LJ037-0261.wav|tests/data/ljspeech/wavs/LJ037-0261.npy +tests/data/ljspeech/wavs/LJ020-0041.wav|tests/data/ljspeech/wavs/LJ020-0041.npy +tests/data/ljspeech/wavs/LJ010-0200.wav|tests/data/ljspeech/wavs/LJ010-0200.npy +tests/data/ljspeech/wavs/LJ006-0269.wav|tests/data/ljspeech/wavs/LJ006-0269.npy +tests/data/ljspeech/wavs/LJ017-0154.wav|tests/data/ljspeech/wavs/LJ017-0154.npy +tests/data/ljspeech/wavs/LJ036-0164.wav|tests/data/ljspeech/wavs/LJ036-0164.npy +tests/data/ljspeech/wavs/LJ002-0140.wav|tests/data/ljspeech/wavs/LJ002-0140.npy +tests/data/ljspeech/wavs/LJ015-0264.wav|tests/data/ljspeech/wavs/LJ015-0264.npy +tests/data/ljspeech/wavs/LJ003-0313.wav|tests/data/ljspeech/wavs/LJ003-0313.npy +tests/data/ljspeech/wavs/LJ048-0039.wav|tests/data/ljspeech/wavs/LJ048-0039.npy +tests/data/ljspeech/wavs/LJ039-0008.wav|tests/data/ljspeech/wavs/LJ039-0008.npy +tests/data/ljspeech/wavs/LJ047-0232.wav|tests/data/ljspeech/wavs/LJ047-0232.npy +tests/data/ljspeech/wavs/LJ032-0244.wav|tests/data/ljspeech/wavs/LJ032-0244.npy +tests/data/ljspeech/wavs/LJ030-0098.wav|tests/data/ljspeech/wavs/LJ030-0098.npy +tests/data/ljspeech/wavs/LJ049-0159.wav|tests/data/ljspeech/wavs/LJ049-0159.npy +tests/data/ljspeech/wavs/LJ008-0031.wav|tests/data/ljspeech/wavs/LJ008-0031.npy +tests/data/ljspeech/wavs/LJ017-0091.wav|tests/data/ljspeech/wavs/LJ017-0091.npy +tests/data/ljspeech/wavs/LJ009-0181.wav|tests/data/ljspeech/wavs/LJ009-0181.npy +tests/data/ljspeech/wavs/LJ045-0191.wav|tests/data/ljspeech/wavs/LJ045-0191.npy +tests/data/ljspeech/wavs/LJ030-0139.wav|tests/data/ljspeech/wavs/LJ030-0139.npy +tests/data/ljspeech/wavs/LJ050-0071.wav|tests/data/ljspeech/wavs/LJ050-0071.npy +tests/data/ljspeech/wavs/LJ039-0238.wav|tests/data/ljspeech/wavs/LJ039-0238.npy +tests/data/ljspeech/wavs/LJ048-0265.wav|tests/data/ljspeech/wavs/LJ048-0265.npy +tests/data/ljspeech/wavs/LJ020-0078.wav|tests/data/ljspeech/wavs/LJ020-0078.npy +tests/data/ljspeech/wavs/LJ034-0035.wav|tests/data/ljspeech/wavs/LJ034-0035.npy +tests/data/ljspeech/wavs/LJ043-0019.wav|tests/data/ljspeech/wavs/LJ043-0019.npy +tests/data/ljspeech/wavs/LJ031-0029.wav|tests/data/ljspeech/wavs/LJ031-0029.npy +tests/data/ljspeech/wavs/LJ043-0171.wav|tests/data/ljspeech/wavs/LJ043-0171.npy +tests/data/ljspeech/wavs/LJ012-0123.wav|tests/data/ljspeech/wavs/LJ012-0123.npy +tests/data/ljspeech/wavs/LJ013-0121.wav|tests/data/ljspeech/wavs/LJ013-0121.npy +tests/data/ljspeech/wavs/LJ042-0015.wav|tests/data/ljspeech/wavs/LJ042-0015.npy +tests/data/ljspeech/wavs/LJ038-0219.wav|tests/data/ljspeech/wavs/LJ038-0219.npy +tests/data/ljspeech/wavs/LJ003-0277.wav|tests/data/ljspeech/wavs/LJ003-0277.npy +tests/data/ljspeech/wavs/LJ048-0031.wav|tests/data/ljspeech/wavs/LJ048-0031.npy +tests/data/ljspeech/wavs/LJ006-0203.wav|tests/data/ljspeech/wavs/LJ006-0203.npy +tests/data/ljspeech/wavs/LJ042-0047.wav|tests/data/ljspeech/wavs/LJ042-0047.npy +tests/data/ljspeech/wavs/LJ042-0061.wav|tests/data/ljspeech/wavs/LJ042-0061.npy +tests/data/ljspeech/wavs/LJ039-0143.wav|tests/data/ljspeech/wavs/LJ039-0143.npy +tests/data/ljspeech/wavs/LJ048-0209.wav|tests/data/ljspeech/wavs/LJ048-0209.npy +tests/data/ljspeech/wavs/LJ033-0094.wav|tests/data/ljspeech/wavs/LJ033-0094.npy +tests/data/ljspeech/wavs/LJ025-0157.wav|tests/data/ljspeech/wavs/LJ025-0157.npy +tests/data/ljspeech/wavs/LJ001-0116.wav|tests/data/ljspeech/wavs/LJ001-0116.npy +tests/data/ljspeech/wavs/LJ028-0179.wav|tests/data/ljspeech/wavs/LJ028-0179.npy +tests/data/ljspeech/wavs/LJ033-0073.wav|tests/data/ljspeech/wavs/LJ033-0073.npy +tests/data/ljspeech/wavs/LJ008-0126.wav|tests/data/ljspeech/wavs/LJ008-0126.npy +tests/data/ljspeech/wavs/LJ008-0174.wav|tests/data/ljspeech/wavs/LJ008-0174.npy +tests/data/ljspeech/wavs/LJ038-0060.wav|tests/data/ljspeech/wavs/LJ038-0060.npy +tests/data/ljspeech/wavs/LJ028-0011.wav|tests/data/ljspeech/wavs/LJ028-0011.npy +tests/data/ljspeech/wavs/LJ048-0085.wav|tests/data/ljspeech/wavs/LJ048-0085.npy +tests/data/ljspeech/wavs/LJ015-0287.wav|tests/data/ljspeech/wavs/LJ015-0287.npy +tests/data/ljspeech/wavs/LJ014-0010.wav|tests/data/ljspeech/wavs/LJ014-0010.npy +tests/data/ljspeech/wavs/LJ005-0009.wav|tests/data/ljspeech/wavs/LJ005-0009.npy +tests/data/ljspeech/wavs/LJ028-0120.wav|tests/data/ljspeech/wavs/LJ028-0120.npy +tests/data/ljspeech/wavs/LJ002-0211.wav|tests/data/ljspeech/wavs/LJ002-0211.npy +tests/data/ljspeech/wavs/LJ014-0026.wav|tests/data/ljspeech/wavs/LJ014-0026.npy +tests/data/ljspeech/wavs/LJ039-0119.wav|tests/data/ljspeech/wavs/LJ039-0119.npy +tests/data/ljspeech/wavs/LJ037-0159.wav|tests/data/ljspeech/wavs/LJ037-0159.npy +tests/data/ljspeech/wavs/LJ027-0018.wav|tests/data/ljspeech/wavs/LJ027-0018.npy +tests/data/ljspeech/wavs/LJ040-0102.wav|tests/data/ljspeech/wavs/LJ040-0102.npy +tests/data/ljspeech/wavs/LJ040-0124.wav|tests/data/ljspeech/wavs/LJ040-0124.npy +tests/data/ljspeech/wavs/LJ006-0300.wav|tests/data/ljspeech/wavs/LJ006-0300.npy +tests/data/ljspeech/wavs/LJ031-0188.wav|tests/data/ljspeech/wavs/LJ031-0188.npy +tests/data/ljspeech/wavs/LJ048-0143.wav|tests/data/ljspeech/wavs/LJ048-0143.npy +tests/data/ljspeech/wavs/LJ046-0178.wav|tests/data/ljspeech/wavs/LJ046-0178.npy +tests/data/ljspeech/wavs/LJ029-0112.wav|tests/data/ljspeech/wavs/LJ029-0112.npy +tests/data/ljspeech/wavs/LJ042-0161.wav|tests/data/ljspeech/wavs/LJ042-0161.npy +tests/data/ljspeech/wavs/LJ046-0083.wav|tests/data/ljspeech/wavs/LJ046-0083.npy +tests/data/ljspeech/wavs/LJ042-0230.wav|tests/data/ljspeech/wavs/LJ042-0230.npy +tests/data/ljspeech/wavs/LJ026-0089.wav|tests/data/ljspeech/wavs/LJ026-0089.npy +tests/data/ljspeech/wavs/LJ043-0075.wav|tests/data/ljspeech/wavs/LJ043-0075.npy +tests/data/ljspeech/wavs/LJ040-0165.wav|tests/data/ljspeech/wavs/LJ040-0165.npy +tests/data/ljspeech/wavs/LJ038-0117.wav|tests/data/ljspeech/wavs/LJ038-0117.npy +tests/data/ljspeech/wavs/LJ046-0174.wav|tests/data/ljspeech/wavs/LJ046-0174.npy +tests/data/ljspeech/wavs/LJ039-0033.wav|tests/data/ljspeech/wavs/LJ039-0033.npy +tests/data/ljspeech/wavs/LJ038-0191.wav|tests/data/ljspeech/wavs/LJ038-0191.npy +tests/data/ljspeech/wavs/LJ009-0291.wav|tests/data/ljspeech/wavs/LJ009-0291.npy +tests/data/ljspeech/wavs/LJ048-0142.wav|tests/data/ljspeech/wavs/LJ048-0142.npy +tests/data/ljspeech/wavs/LJ050-0156.wav|tests/data/ljspeech/wavs/LJ050-0156.npy +tests/data/ljspeech/wavs/LJ001-0158.wav|tests/data/ljspeech/wavs/LJ001-0158.npy +tests/data/ljspeech/wavs/LJ037-0087.wav|tests/data/ljspeech/wavs/LJ037-0087.npy +tests/data/ljspeech/wavs/LJ050-0100.wav|tests/data/ljspeech/wavs/LJ050-0100.npy +tests/data/ljspeech/wavs/LJ028-0254.wav|tests/data/ljspeech/wavs/LJ028-0254.npy +tests/data/ljspeech/wavs/LJ003-0117.wav|tests/data/ljspeech/wavs/LJ003-0117.npy +tests/data/ljspeech/wavs/LJ030-0164.wav|tests/data/ljspeech/wavs/LJ030-0164.npy +tests/data/ljspeech/wavs/LJ019-0151.wav|tests/data/ljspeech/wavs/LJ019-0151.npy +tests/data/ljspeech/wavs/LJ043-0060.wav|tests/data/ljspeech/wavs/LJ043-0060.npy +tests/data/ljspeech/wavs/LJ018-0214.wav|tests/data/ljspeech/wavs/LJ018-0214.npy +tests/data/ljspeech/wavs/LJ044-0221.wav|tests/data/ljspeech/wavs/LJ044-0221.npy +tests/data/ljspeech/wavs/LJ014-0306.wav|tests/data/ljspeech/wavs/LJ014-0306.npy +tests/data/ljspeech/wavs/LJ020-0098.wav|tests/data/ljspeech/wavs/LJ020-0098.npy +tests/data/ljspeech/wavs/LJ040-0166.wav|tests/data/ljspeech/wavs/LJ040-0166.npy +tests/data/ljspeech/wavs/LJ002-0192.wav|tests/data/ljspeech/wavs/LJ002-0192.npy +tests/data/ljspeech/wavs/LJ047-0053.wav|tests/data/ljspeech/wavs/LJ047-0053.npy +tests/data/ljspeech/wavs/LJ007-0082.wav|tests/data/ljspeech/wavs/LJ007-0082.npy +tests/data/ljspeech/wavs/LJ003-0053.wav|tests/data/ljspeech/wavs/LJ003-0053.npy +tests/data/ljspeech/wavs/LJ038-0262.wav|tests/data/ljspeech/wavs/LJ038-0262.npy +tests/data/ljspeech/wavs/LJ026-0082.wav|tests/data/ljspeech/wavs/LJ026-0082.npy +tests/data/ljspeech/wavs/LJ008-0182.wav|tests/data/ljspeech/wavs/LJ008-0182.npy +tests/data/ljspeech/wavs/LJ030-0243.wav|tests/data/ljspeech/wavs/LJ030-0243.npy +tests/data/ljspeech/wavs/LJ006-0077.wav|tests/data/ljspeech/wavs/LJ006-0077.npy +tests/data/ljspeech/wavs/LJ027-0074.wav|tests/data/ljspeech/wavs/LJ027-0074.npy +tests/data/ljspeech/wavs/LJ034-0156.wav|tests/data/ljspeech/wavs/LJ034-0156.npy +tests/data/ljspeech/wavs/LJ027-0053.wav|tests/data/ljspeech/wavs/LJ027-0053.npy +tests/data/ljspeech/wavs/LJ008-0087.wav|tests/data/ljspeech/wavs/LJ008-0087.npy +tests/data/ljspeech/wavs/LJ033-0066.wav|tests/data/ljspeech/wavs/LJ033-0066.npy +tests/data/ljspeech/wavs/LJ029-0130.wav|tests/data/ljspeech/wavs/LJ029-0130.npy +tests/data/ljspeech/wavs/LJ014-0020.wav|tests/data/ljspeech/wavs/LJ014-0020.npy +tests/data/ljspeech/wavs/LJ042-0022.wav|tests/data/ljspeech/wavs/LJ042-0022.npy +tests/data/ljspeech/wavs/LJ041-0157.wav|tests/data/ljspeech/wavs/LJ041-0157.npy +tests/data/ljspeech/wavs/LJ010-0026.wav|tests/data/ljspeech/wavs/LJ010-0026.npy +tests/data/ljspeech/wavs/LJ014-0029.wav|tests/data/ljspeech/wavs/LJ014-0029.npy +tests/data/ljspeech/wavs/LJ008-0239.wav|tests/data/ljspeech/wavs/LJ008-0239.npy +tests/data/ljspeech/wavs/LJ010-0076.wav|tests/data/ljspeech/wavs/LJ010-0076.npy +tests/data/ljspeech/wavs/LJ026-0032.wav|tests/data/ljspeech/wavs/LJ026-0032.npy +tests/data/ljspeech/wavs/LJ002-0135.wav|tests/data/ljspeech/wavs/LJ002-0135.npy +tests/data/ljspeech/wavs/LJ041-0012.wav|tests/data/ljspeech/wavs/LJ041-0012.npy +tests/data/ljspeech/wavs/LJ013-0207.wav|tests/data/ljspeech/wavs/LJ013-0207.npy +tests/data/ljspeech/wavs/LJ042-0048.wav|tests/data/ljspeech/wavs/LJ042-0048.npy +tests/data/ljspeech/wavs/LJ048-0227.wav|tests/data/ljspeech/wavs/LJ048-0227.npy +tests/data/ljspeech/wavs/LJ050-0032.wav|tests/data/ljspeech/wavs/LJ050-0032.npy +tests/data/ljspeech/wavs/LJ028-0218.wav|tests/data/ljspeech/wavs/LJ028-0218.npy +tests/data/ljspeech/wavs/LJ007-0194.wav|tests/data/ljspeech/wavs/LJ007-0194.npy +tests/data/ljspeech/wavs/LJ046-0181.wav|tests/data/ljspeech/wavs/LJ046-0181.npy +tests/data/ljspeech/wavs/LJ007-0214.wav|tests/data/ljspeech/wavs/LJ007-0214.npy +tests/data/ljspeech/wavs/LJ008-0154.wav|tests/data/ljspeech/wavs/LJ008-0154.npy +tests/data/ljspeech/wavs/LJ003-0128.wav|tests/data/ljspeech/wavs/LJ003-0128.npy +tests/data/ljspeech/wavs/LJ004-0185.wav|tests/data/ljspeech/wavs/LJ004-0185.npy +tests/data/ljspeech/wavs/LJ009-0169.wav|tests/data/ljspeech/wavs/LJ009-0169.npy +tests/data/ljspeech/wavs/LJ044-0192.wav|tests/data/ljspeech/wavs/LJ044-0192.npy +tests/data/ljspeech/wavs/LJ013-0188.wav|tests/data/ljspeech/wavs/LJ013-0188.npy +tests/data/ljspeech/wavs/LJ002-0313.wav|tests/data/ljspeech/wavs/LJ002-0313.npy +tests/data/ljspeech/wavs/LJ022-0092.wav|tests/data/ljspeech/wavs/LJ022-0092.npy +tests/data/ljspeech/wavs/LJ009-0089.wav|tests/data/ljspeech/wavs/LJ009-0089.npy +tests/data/ljspeech/wavs/LJ038-0295.wav|tests/data/ljspeech/wavs/LJ038-0295.npy +tests/data/ljspeech/wavs/LJ023-0018.wav|tests/data/ljspeech/wavs/LJ023-0018.npy +tests/data/ljspeech/wavs/LJ038-0143.wav|tests/data/ljspeech/wavs/LJ038-0143.npy +tests/data/ljspeech/wavs/LJ048-0004.wav|tests/data/ljspeech/wavs/LJ048-0004.npy +tests/data/ljspeech/wavs/LJ038-0182.wav|tests/data/ljspeech/wavs/LJ038-0182.npy +tests/data/ljspeech/wavs/LJ002-0276.wav|tests/data/ljspeech/wavs/LJ002-0276.npy +tests/data/ljspeech/wavs/LJ025-0024.wav|tests/data/ljspeech/wavs/LJ025-0024.npy +tests/data/ljspeech/wavs/LJ038-0169.wav|tests/data/ljspeech/wavs/LJ038-0169.npy +tests/data/ljspeech/wavs/LJ028-0354.wav|tests/data/ljspeech/wavs/LJ028-0354.npy +tests/data/ljspeech/wavs/LJ033-0106.wav|tests/data/ljspeech/wavs/LJ033-0106.npy +tests/data/ljspeech/wavs/LJ042-0125.wav|tests/data/ljspeech/wavs/LJ042-0125.npy +tests/data/ljspeech/wavs/LJ025-0135.wav|tests/data/ljspeech/wavs/LJ025-0135.npy +tests/data/ljspeech/wavs/LJ030-0190.wav|tests/data/ljspeech/wavs/LJ030-0190.npy +tests/data/ljspeech/wavs/LJ005-0291.wav|tests/data/ljspeech/wavs/LJ005-0291.npy +tests/data/ljspeech/wavs/LJ009-0158.wav|tests/data/ljspeech/wavs/LJ009-0158.npy +tests/data/ljspeech/wavs/LJ032-0110.wav|tests/data/ljspeech/wavs/LJ032-0110.npy +tests/data/ljspeech/wavs/LJ047-0071.wav|tests/data/ljspeech/wavs/LJ047-0071.npy +tests/data/ljspeech/wavs/LJ041-0093.wav|tests/data/ljspeech/wavs/LJ041-0093.npy +tests/data/ljspeech/wavs/LJ041-0095.wav|tests/data/ljspeech/wavs/LJ041-0095.npy +tests/data/ljspeech/wavs/LJ034-0027.wav|tests/data/ljspeech/wavs/LJ034-0027.npy +tests/data/ljspeech/wavs/LJ044-0197.wav|tests/data/ljspeech/wavs/LJ044-0197.npy +tests/data/ljspeech/wavs/LJ030-0186.wav|tests/data/ljspeech/wavs/LJ030-0186.npy +tests/data/ljspeech/wavs/LJ028-0148.wav|tests/data/ljspeech/wavs/LJ028-0148.npy +tests/data/ljspeech/wavs/LJ049-0118.wav|tests/data/ljspeech/wavs/LJ049-0118.npy +tests/data/ljspeech/wavs/LJ006-0033.wav|tests/data/ljspeech/wavs/LJ006-0033.npy +tests/data/ljspeech/wavs/LJ009-0111.wav|tests/data/ljspeech/wavs/LJ009-0111.npy +tests/data/ljspeech/wavs/LJ045-0012.wav|tests/data/ljspeech/wavs/LJ045-0012.npy +tests/data/ljspeech/wavs/LJ044-0130.wav|tests/data/ljspeech/wavs/LJ044-0130.npy +tests/data/ljspeech/wavs/LJ037-0104.wav|tests/data/ljspeech/wavs/LJ037-0104.npy +tests/data/ljspeech/wavs/LJ050-0217.wav|tests/data/ljspeech/wavs/LJ050-0217.npy +tests/data/ljspeech/wavs/LJ005-0138.wav|tests/data/ljspeech/wavs/LJ005-0138.npy +tests/data/ljspeech/wavs/LJ016-0249.wav|tests/data/ljspeech/wavs/LJ016-0249.npy +tests/data/ljspeech/wavs/LJ016-0052.wav|tests/data/ljspeech/wavs/LJ016-0052.npy +tests/data/ljspeech/wavs/LJ018-0127.wav|tests/data/ljspeech/wavs/LJ018-0127.npy +tests/data/ljspeech/wavs/LJ035-0170.wav|tests/data/ljspeech/wavs/LJ035-0170.npy +tests/data/ljspeech/wavs/LJ004-0014.wav|tests/data/ljspeech/wavs/LJ004-0014.npy +tests/data/ljspeech/wavs/LJ011-0281.wav|tests/data/ljspeech/wavs/LJ011-0281.npy +tests/data/ljspeech/wavs/LJ018-0120.wav|tests/data/ljspeech/wavs/LJ018-0120.npy +tests/data/ljspeech/wavs/LJ012-0003.wav|tests/data/ljspeech/wavs/LJ012-0003.npy +tests/data/ljspeech/wavs/LJ037-0256.wav|tests/data/ljspeech/wavs/LJ037-0256.npy +tests/data/ljspeech/wavs/LJ011-0026.wav|tests/data/ljspeech/wavs/LJ011-0026.npy +tests/data/ljspeech/wavs/LJ034-0095.wav|tests/data/ljspeech/wavs/LJ034-0095.npy +tests/data/ljspeech/wavs/LJ012-0265.wav|tests/data/ljspeech/wavs/LJ012-0265.npy +tests/data/ljspeech/wavs/LJ001-0109.wav|tests/data/ljspeech/wavs/LJ001-0109.npy +tests/data/ljspeech/wavs/LJ015-0054.wav|tests/data/ljspeech/wavs/LJ015-0054.npy +tests/data/ljspeech/wavs/LJ012-0229.wav|tests/data/ljspeech/wavs/LJ012-0229.npy +tests/data/ljspeech/wavs/LJ011-0270.wav|tests/data/ljspeech/wavs/LJ011-0270.npy +tests/data/ljspeech/wavs/LJ016-0380.wav|tests/data/ljspeech/wavs/LJ016-0380.npy +tests/data/ljspeech/wavs/LJ047-0189.wav|tests/data/ljspeech/wavs/LJ047-0189.npy +tests/data/ljspeech/wavs/LJ018-0265.wav|tests/data/ljspeech/wavs/LJ018-0265.npy +tests/data/ljspeech/wavs/LJ015-0218.wav|tests/data/ljspeech/wavs/LJ015-0218.npy +tests/data/ljspeech/wavs/LJ040-0011.wav|tests/data/ljspeech/wavs/LJ040-0011.npy +tests/data/ljspeech/wavs/LJ017-0189.wav|tests/data/ljspeech/wavs/LJ017-0189.npy +tests/data/ljspeech/wavs/LJ018-0288.wav|tests/data/ljspeech/wavs/LJ018-0288.npy +tests/data/ljspeech/wavs/LJ039-0209.wav|tests/data/ljspeech/wavs/LJ039-0209.npy +tests/data/ljspeech/wavs/LJ005-0082.wav|tests/data/ljspeech/wavs/LJ005-0082.npy +tests/data/ljspeech/wavs/LJ031-0107.wav|tests/data/ljspeech/wavs/LJ031-0107.npy +tests/data/ljspeech/wavs/LJ004-0166.wav|tests/data/ljspeech/wavs/LJ004-0166.npy +tests/data/ljspeech/wavs/LJ002-0055.wav|tests/data/ljspeech/wavs/LJ002-0055.npy +tests/data/ljspeech/wavs/LJ036-0094.wav|tests/data/ljspeech/wavs/LJ036-0094.npy +tests/data/ljspeech/wavs/LJ009-0161.wav|tests/data/ljspeech/wavs/LJ009-0161.npy +tests/data/ljspeech/wavs/LJ049-0067.wav|tests/data/ljspeech/wavs/LJ049-0067.npy +tests/data/ljspeech/wavs/LJ007-0199.wav|tests/data/ljspeech/wavs/LJ007-0199.npy +tests/data/ljspeech/wavs/LJ040-0050.wav|tests/data/ljspeech/wavs/LJ040-0050.npy +tests/data/ljspeech/wavs/LJ009-0150.wav|tests/data/ljspeech/wavs/LJ009-0150.npy +tests/data/ljspeech/wavs/LJ003-0156.wav|tests/data/ljspeech/wavs/LJ003-0156.npy +tests/data/ljspeech/wavs/LJ037-0155.wav|tests/data/ljspeech/wavs/LJ037-0155.npy +tests/data/ljspeech/wavs/LJ029-0199.wav|tests/data/ljspeech/wavs/LJ029-0199.npy +tests/data/ljspeech/wavs/LJ050-0121.wav|tests/data/ljspeech/wavs/LJ050-0121.npy +tests/data/ljspeech/wavs/LJ011-0087.wav|tests/data/ljspeech/wavs/LJ011-0087.npy +tests/data/ljspeech/wavs/LJ015-0145.wav|tests/data/ljspeech/wavs/LJ015-0145.npy +tests/data/ljspeech/wavs/LJ012-0052.wav|tests/data/ljspeech/wavs/LJ012-0052.npy +tests/data/ljspeech/wavs/LJ042-0212.wav|tests/data/ljspeech/wavs/LJ042-0212.npy +tests/data/ljspeech/wavs/LJ045-0103.wav|tests/data/ljspeech/wavs/LJ045-0103.npy +tests/data/ljspeech/wavs/LJ041-0070.wav|tests/data/ljspeech/wavs/LJ041-0070.npy +tests/data/ljspeech/wavs/LJ014-0201.wav|tests/data/ljspeech/wavs/LJ014-0201.npy +tests/data/ljspeech/wavs/LJ045-0068.wav|tests/data/ljspeech/wavs/LJ045-0068.npy +tests/data/ljspeech/wavs/LJ048-0236.wav|tests/data/ljspeech/wavs/LJ048-0236.npy +tests/data/ljspeech/wavs/LJ005-0264.wav|tests/data/ljspeech/wavs/LJ005-0264.npy +tests/data/ljspeech/wavs/LJ047-0011.wav|tests/data/ljspeech/wavs/LJ047-0011.npy +tests/data/ljspeech/wavs/LJ017-0202.wav|tests/data/ljspeech/wavs/LJ017-0202.npy +tests/data/ljspeech/wavs/LJ033-0125.wav|tests/data/ljspeech/wavs/LJ033-0125.npy +tests/data/ljspeech/wavs/LJ044-0047.wav|tests/data/ljspeech/wavs/LJ044-0047.npy +tests/data/ljspeech/wavs/LJ028-0330.wav|tests/data/ljspeech/wavs/LJ028-0330.npy +tests/data/ljspeech/wavs/LJ018-0031.wav|tests/data/ljspeech/wavs/LJ018-0031.npy +tests/data/ljspeech/wavs/LJ012-0142.wav|tests/data/ljspeech/wavs/LJ012-0142.npy +tests/data/ljspeech/wavs/LJ001-0070.wav|tests/data/ljspeech/wavs/LJ001-0070.npy +tests/data/ljspeech/wavs/LJ039-0070.wav|tests/data/ljspeech/wavs/LJ039-0070.npy +tests/data/ljspeech/wavs/LJ012-0233.wav|tests/data/ljspeech/wavs/LJ012-0233.npy +tests/data/ljspeech/wavs/LJ037-0110.wav|tests/data/ljspeech/wavs/LJ037-0110.npy +tests/data/ljspeech/wavs/LJ049-0158.wav|tests/data/ljspeech/wavs/LJ049-0158.npy +tests/data/ljspeech/wavs/LJ039-0079.wav|tests/data/ljspeech/wavs/LJ039-0079.npy +tests/data/ljspeech/wavs/LJ023-0045.wav|tests/data/ljspeech/wavs/LJ023-0045.npy +tests/data/ljspeech/wavs/LJ048-0234.wav|tests/data/ljspeech/wavs/LJ048-0234.npy +tests/data/ljspeech/wavs/LJ042-0085.wav|tests/data/ljspeech/wavs/LJ042-0085.npy +tests/data/ljspeech/wavs/LJ027-0089.wav|tests/data/ljspeech/wavs/LJ027-0089.npy +tests/data/ljspeech/wavs/LJ009-0079.wav|tests/data/ljspeech/wavs/LJ009-0079.npy +tests/data/ljspeech/wavs/LJ042-0142.wav|tests/data/ljspeech/wavs/LJ042-0142.npy +tests/data/ljspeech/wavs/LJ042-0058.wav|tests/data/ljspeech/wavs/LJ042-0058.npy +tests/data/ljspeech/wavs/LJ027-0065.wav|tests/data/ljspeech/wavs/LJ027-0065.npy +tests/data/ljspeech/wavs/LJ028-0012.wav|tests/data/ljspeech/wavs/LJ028-0012.npy +tests/data/ljspeech/wavs/LJ042-0021.wav|tests/data/ljspeech/wavs/LJ042-0021.npy +tests/data/ljspeech/wavs/LJ050-0212.wav|tests/data/ljspeech/wavs/LJ050-0212.npy +tests/data/ljspeech/wavs/LJ002-0104.wav|tests/data/ljspeech/wavs/LJ002-0104.npy +tests/data/ljspeech/wavs/LJ006-0085.wav|tests/data/ljspeech/wavs/LJ006-0085.npy +tests/data/ljspeech/wavs/LJ032-0164.wav|tests/data/ljspeech/wavs/LJ032-0164.npy +tests/data/ljspeech/wavs/LJ028-0070.wav|tests/data/ljspeech/wavs/LJ028-0070.npy +tests/data/ljspeech/wavs/LJ015-0126.wav|tests/data/ljspeech/wavs/LJ015-0126.npy +tests/data/ljspeech/wavs/LJ030-0090.wav|tests/data/ljspeech/wavs/LJ030-0090.npy +tests/data/ljspeech/wavs/LJ027-0108.wav|tests/data/ljspeech/wavs/LJ027-0108.npy +tests/data/ljspeech/wavs/LJ005-0295.wav|tests/data/ljspeech/wavs/LJ005-0295.npy +tests/data/ljspeech/wavs/LJ012-0082.wav|tests/data/ljspeech/wavs/LJ012-0082.npy +tests/data/ljspeech/wavs/LJ006-0070.wav|tests/data/ljspeech/wavs/LJ006-0070.npy +tests/data/ljspeech/wavs/LJ008-0128.wav|tests/data/ljspeech/wavs/LJ008-0128.npy +tests/data/ljspeech/wavs/LJ016-0029.wav|tests/data/ljspeech/wavs/LJ016-0029.npy +tests/data/ljspeech/wavs/LJ007-0022.wav|tests/data/ljspeech/wavs/LJ007-0022.npy +tests/data/ljspeech/wavs/LJ022-0126.wav|tests/data/ljspeech/wavs/LJ022-0126.npy +tests/data/ljspeech/wavs/LJ005-0298.wav|tests/data/ljspeech/wavs/LJ005-0298.npy +tests/data/ljspeech/wavs/LJ033-0212.wav|tests/data/ljspeech/wavs/LJ033-0212.npy +tests/data/ljspeech/wavs/LJ016-0101.wav|tests/data/ljspeech/wavs/LJ016-0101.npy +tests/data/ljspeech/wavs/LJ022-0023.wav|tests/data/ljspeech/wavs/LJ022-0023.npy +tests/data/ljspeech/wavs/LJ017-0032.wav|tests/data/ljspeech/wavs/LJ017-0032.npy +tests/data/ljspeech/wavs/LJ046-0107.wav|tests/data/ljspeech/wavs/LJ046-0107.npy +tests/data/ljspeech/wavs/LJ037-0077.wav|tests/data/ljspeech/wavs/LJ037-0077.npy +tests/data/ljspeech/wavs/LJ039-0172.wav|tests/data/ljspeech/wavs/LJ039-0172.npy +tests/data/ljspeech/wavs/LJ014-0219.wav|tests/data/ljspeech/wavs/LJ014-0219.npy +tests/data/ljspeech/wavs/LJ037-0039.wav|tests/data/ljspeech/wavs/LJ037-0039.npy +tests/data/ljspeech/wavs/LJ028-0114.wav|tests/data/ljspeech/wavs/LJ028-0114.npy +tests/data/ljspeech/wavs/LJ015-0309.wav|tests/data/ljspeech/wavs/LJ015-0309.npy +tests/data/ljspeech/wavs/LJ039-0167.wav|tests/data/ljspeech/wavs/LJ039-0167.npy +tests/data/ljspeech/wavs/LJ030-0236.wav|tests/data/ljspeech/wavs/LJ030-0236.npy +tests/data/ljspeech/wavs/LJ011-0239.wav|tests/data/ljspeech/wavs/LJ011-0239.npy +tests/data/ljspeech/wavs/LJ031-0066.wav|tests/data/ljspeech/wavs/LJ031-0066.npy +tests/data/ljspeech/wavs/LJ002-0072.wav|tests/data/ljspeech/wavs/LJ002-0072.npy +tests/data/ljspeech/wavs/LJ048-0023.wav|tests/data/ljspeech/wavs/LJ048-0023.npy +tests/data/ljspeech/wavs/LJ012-0013.wav|tests/data/ljspeech/wavs/LJ012-0013.npy +tests/data/ljspeech/wavs/LJ008-0265.wav|tests/data/ljspeech/wavs/LJ008-0265.npy +tests/data/ljspeech/wavs/LJ007-0014.wav|tests/data/ljspeech/wavs/LJ007-0014.npy +tests/data/ljspeech/wavs/LJ002-0190.wav|tests/data/ljspeech/wavs/LJ002-0190.npy +tests/data/ljspeech/wavs/LJ016-0294.wav|tests/data/ljspeech/wavs/LJ016-0294.npy +tests/data/ljspeech/wavs/LJ001-0089.wav|tests/data/ljspeech/wavs/LJ001-0089.npy +tests/data/ljspeech/wavs/LJ014-0073.wav|tests/data/ljspeech/wavs/LJ014-0073.npy +tests/data/ljspeech/wavs/LJ026-0026.wav|tests/data/ljspeech/wavs/LJ026-0026.npy +tests/data/ljspeech/wavs/LJ037-0040.wav|tests/data/ljspeech/wavs/LJ037-0040.npy +tests/data/ljspeech/wavs/LJ012-0010.wav|tests/data/ljspeech/wavs/LJ012-0010.npy +tests/data/ljspeech/wavs/LJ028-0238.wav|tests/data/ljspeech/wavs/LJ028-0238.npy +tests/data/ljspeech/wavs/LJ050-0192.wav|tests/data/ljspeech/wavs/LJ050-0192.npy +tests/data/ljspeech/wavs/LJ048-0022.wav|tests/data/ljspeech/wavs/LJ048-0022.npy +tests/data/ljspeech/wavs/LJ006-0138.wav|tests/data/ljspeech/wavs/LJ006-0138.npy +tests/data/ljspeech/wavs/LJ005-0199.wav|tests/data/ljspeech/wavs/LJ005-0199.npy +tests/data/ljspeech/wavs/LJ050-0218.wav|tests/data/ljspeech/wavs/LJ050-0218.npy +tests/data/ljspeech/wavs/LJ002-0064.wav|tests/data/ljspeech/wavs/LJ002-0064.npy +tests/data/ljspeech/wavs/LJ008-0249.wav|tests/data/ljspeech/wavs/LJ008-0249.npy +tests/data/ljspeech/wavs/LJ004-0184.wav|tests/data/ljspeech/wavs/LJ004-0184.npy +tests/data/ljspeech/wavs/LJ036-0004.wav|tests/data/ljspeech/wavs/LJ036-0004.npy +tests/data/ljspeech/wavs/LJ036-0044.wav|tests/data/ljspeech/wavs/LJ036-0044.npy +tests/data/ljspeech/wavs/LJ047-0144.wav|tests/data/ljspeech/wavs/LJ047-0144.npy +tests/data/ljspeech/wavs/LJ042-0197.wav|tests/data/ljspeech/wavs/LJ042-0197.npy +tests/data/ljspeech/wavs/LJ049-0225.wav|tests/data/ljspeech/wavs/LJ049-0225.npy +tests/data/ljspeech/wavs/LJ003-0159.wav|tests/data/ljspeech/wavs/LJ003-0159.npy +tests/data/ljspeech/wavs/LJ050-0119.wav|tests/data/ljspeech/wavs/LJ050-0119.npy +tests/data/ljspeech/wavs/LJ038-0108.wav|tests/data/ljspeech/wavs/LJ038-0108.npy +tests/data/ljspeech/wavs/LJ040-0139.wav|tests/data/ljspeech/wavs/LJ040-0139.npy +tests/data/ljspeech/wavs/LJ048-0157.wav|tests/data/ljspeech/wavs/LJ048-0157.npy +tests/data/ljspeech/wavs/LJ014-0275.wav|tests/data/ljspeech/wavs/LJ014-0275.npy +tests/data/ljspeech/wavs/LJ009-0018.wav|tests/data/ljspeech/wavs/LJ009-0018.npy +tests/data/ljspeech/wavs/LJ010-0137.wav|tests/data/ljspeech/wavs/LJ010-0137.npy +tests/data/ljspeech/wavs/LJ018-0099.wav|tests/data/ljspeech/wavs/LJ018-0099.npy +tests/data/ljspeech/wavs/LJ040-0119.wav|tests/data/ljspeech/wavs/LJ040-0119.npy +tests/data/ljspeech/wavs/LJ019-0322.wav|tests/data/ljspeech/wavs/LJ019-0322.npy +tests/data/ljspeech/wavs/LJ019-0065.wav|tests/data/ljspeech/wavs/LJ019-0065.npy +tests/data/ljspeech/wavs/LJ007-0113.wav|tests/data/ljspeech/wavs/LJ007-0113.npy +tests/data/ljspeech/wavs/LJ006-0044.wav|tests/data/ljspeech/wavs/LJ006-0044.npy +tests/data/ljspeech/wavs/LJ014-0307.wav|tests/data/ljspeech/wavs/LJ014-0307.npy +tests/data/ljspeech/wavs/LJ001-0150.wav|tests/data/ljspeech/wavs/LJ001-0150.npy +tests/data/ljspeech/wavs/LJ029-0047.wav|tests/data/ljspeech/wavs/LJ029-0047.npy +tests/data/ljspeech/wavs/LJ019-0397.wav|tests/data/ljspeech/wavs/LJ019-0397.npy +tests/data/ljspeech/wavs/LJ040-0054.wav|tests/data/ljspeech/wavs/LJ040-0054.npy +tests/data/ljspeech/wavs/LJ020-0088.wav|tests/data/ljspeech/wavs/LJ020-0088.npy +tests/data/ljspeech/wavs/LJ036-0056.wav|tests/data/ljspeech/wavs/LJ036-0056.npy +tests/data/ljspeech/wavs/LJ030-0178.wav|tests/data/ljspeech/wavs/LJ030-0178.npy +tests/data/ljspeech/wavs/LJ048-0264.wav|tests/data/ljspeech/wavs/LJ048-0264.npy +tests/data/ljspeech/wavs/LJ031-0182.wav|tests/data/ljspeech/wavs/LJ031-0182.npy +tests/data/ljspeech/wavs/LJ010-0249.wav|tests/data/ljspeech/wavs/LJ010-0249.npy +tests/data/ljspeech/wavs/LJ006-0183.wav|tests/data/ljspeech/wavs/LJ006-0183.npy +tests/data/ljspeech/wavs/LJ038-0237.wav|tests/data/ljspeech/wavs/LJ038-0237.npy +tests/data/ljspeech/wavs/LJ033-0042.wav|tests/data/ljspeech/wavs/LJ033-0042.npy +tests/data/ljspeech/wavs/LJ011-0035.wav|tests/data/ljspeech/wavs/LJ011-0035.npy +tests/data/ljspeech/wavs/LJ025-0098.wav|tests/data/ljspeech/wavs/LJ025-0098.npy +tests/data/ljspeech/wavs/LJ043-0151.wav|tests/data/ljspeech/wavs/LJ043-0151.npy +tests/data/ljspeech/wavs/LJ028-0311.wav|tests/data/ljspeech/wavs/LJ028-0311.npy +tests/data/ljspeech/wavs/LJ048-0224.wav|tests/data/ljspeech/wavs/LJ048-0224.npy +tests/data/ljspeech/wavs/LJ043-0006.wav|tests/data/ljspeech/wavs/LJ043-0006.npy +tests/data/ljspeech/wavs/LJ044-0181.wav|tests/data/ljspeech/wavs/LJ044-0181.npy +tests/data/ljspeech/wavs/LJ011-0034.wav|tests/data/ljspeech/wavs/LJ011-0034.npy +tests/data/ljspeech/wavs/LJ004-0122.wav|tests/data/ljspeech/wavs/LJ004-0122.npy +tests/data/ljspeech/wavs/LJ028-0314.wav|tests/data/ljspeech/wavs/LJ028-0314.npy +tests/data/ljspeech/wavs/LJ004-0018.wav|tests/data/ljspeech/wavs/LJ004-0018.npy +tests/data/ljspeech/wavs/LJ008-0297.wav|tests/data/ljspeech/wavs/LJ008-0297.npy +tests/data/ljspeech/wavs/LJ050-0127.wav|tests/data/ljspeech/wavs/LJ050-0127.npy +tests/data/ljspeech/wavs/LJ004-0076.wav|tests/data/ljspeech/wavs/LJ004-0076.npy +tests/data/ljspeech/wavs/LJ014-0239.wav|tests/data/ljspeech/wavs/LJ014-0239.npy +tests/data/ljspeech/wavs/LJ014-0292.wav|tests/data/ljspeech/wavs/LJ014-0292.npy +tests/data/ljspeech/wavs/LJ014-0046.wav|tests/data/ljspeech/wavs/LJ014-0046.npy +tests/data/ljspeech/wavs/LJ006-0197.wav|tests/data/ljspeech/wavs/LJ006-0197.npy +tests/data/ljspeech/wavs/LJ030-0134.wav|tests/data/ljspeech/wavs/LJ030-0134.npy +tests/data/ljspeech/wavs/LJ044-0157.wav|tests/data/ljspeech/wavs/LJ044-0157.npy +tests/data/ljspeech/wavs/LJ037-0062.wav|tests/data/ljspeech/wavs/LJ037-0062.npy +tests/data/ljspeech/wavs/LJ014-0094.wav|tests/data/ljspeech/wavs/LJ014-0094.npy +tests/data/ljspeech/wavs/LJ016-0319.wav|tests/data/ljspeech/wavs/LJ016-0319.npy +tests/data/ljspeech/wavs/LJ043-0098.wav|tests/data/ljspeech/wavs/LJ043-0098.npy +tests/data/ljspeech/wavs/LJ009-0116.wav|tests/data/ljspeech/wavs/LJ009-0116.npy +tests/data/ljspeech/wavs/LJ031-0084.wav|tests/data/ljspeech/wavs/LJ031-0084.npy +tests/data/ljspeech/wavs/LJ016-0338.wav|tests/data/ljspeech/wavs/LJ016-0338.npy +tests/data/ljspeech/wavs/LJ011-0218.wav|tests/data/ljspeech/wavs/LJ011-0218.npy +tests/data/ljspeech/wavs/LJ016-0263.wav|tests/data/ljspeech/wavs/LJ016-0263.npy +tests/data/ljspeech/wavs/LJ012-0196.wav|tests/data/ljspeech/wavs/LJ012-0196.npy +tests/data/ljspeech/wavs/LJ050-0145.wav|tests/data/ljspeech/wavs/LJ050-0145.npy +tests/data/ljspeech/wavs/LJ015-0051.wav|tests/data/ljspeech/wavs/LJ015-0051.npy +tests/data/ljspeech/wavs/LJ019-0133.wav|tests/data/ljspeech/wavs/LJ019-0133.npy +tests/data/ljspeech/wavs/LJ040-0145.wav|tests/data/ljspeech/wavs/LJ040-0145.npy +tests/data/ljspeech/wavs/LJ026-0098.wav|tests/data/ljspeech/wavs/LJ026-0098.npy +tests/data/ljspeech/wavs/LJ041-0183.wav|tests/data/ljspeech/wavs/LJ041-0183.npy +tests/data/ljspeech/wavs/LJ027-0092.wav|tests/data/ljspeech/wavs/LJ027-0092.npy +tests/data/ljspeech/wavs/LJ041-0174.wav|tests/data/ljspeech/wavs/LJ041-0174.npy +tests/data/ljspeech/wavs/LJ037-0091.wav|tests/data/ljspeech/wavs/LJ037-0091.npy +tests/data/ljspeech/wavs/LJ018-0326.wav|tests/data/ljspeech/wavs/LJ018-0326.npy +tests/data/ljspeech/wavs/LJ013-0041.wav|tests/data/ljspeech/wavs/LJ013-0041.npy +tests/data/ljspeech/wavs/LJ049-0176.wav|tests/data/ljspeech/wavs/LJ049-0176.npy +tests/data/ljspeech/wavs/LJ042-0038.wav|tests/data/ljspeech/wavs/LJ042-0038.npy +tests/data/ljspeech/wavs/LJ013-0260.wav|tests/data/ljspeech/wavs/LJ013-0260.npy +tests/data/ljspeech/wavs/LJ043-0002.wav|tests/data/ljspeech/wavs/LJ043-0002.npy +tests/data/ljspeech/wavs/LJ019-0112.wav|tests/data/ljspeech/wavs/LJ019-0112.npy +tests/data/ljspeech/wavs/LJ019-0031.wav|tests/data/ljspeech/wavs/LJ019-0031.npy +tests/data/ljspeech/wavs/LJ002-0086.wav|tests/data/ljspeech/wavs/LJ002-0086.npy +tests/data/ljspeech/wavs/LJ012-0060.wav|tests/data/ljspeech/wavs/LJ012-0060.npy +tests/data/ljspeech/wavs/LJ012-0146.wav|tests/data/ljspeech/wavs/LJ012-0146.npy +tests/data/ljspeech/wavs/LJ049-0134.wav|tests/data/ljspeech/wavs/LJ049-0134.npy +tests/data/ljspeech/wavs/LJ012-0104.wav|tests/data/ljspeech/wavs/LJ012-0104.npy +tests/data/ljspeech/wavs/LJ008-0064.wav|tests/data/ljspeech/wavs/LJ008-0064.npy +tests/data/ljspeech/wavs/LJ027-0160.wav|tests/data/ljspeech/wavs/LJ027-0160.npy +tests/data/ljspeech/wavs/LJ008-0072.wav|tests/data/ljspeech/wavs/LJ008-0072.npy +tests/data/ljspeech/wavs/LJ016-0240.wav|tests/data/ljspeech/wavs/LJ016-0240.npy +tests/data/ljspeech/wavs/LJ043-0163.wav|tests/data/ljspeech/wavs/LJ043-0163.npy +tests/data/ljspeech/wavs/LJ047-0197.wav|tests/data/ljspeech/wavs/LJ047-0197.npy +tests/data/ljspeech/wavs/LJ037-0145.wav|tests/data/ljspeech/wavs/LJ037-0145.npy +tests/data/ljspeech/wavs/LJ006-0128.wav|tests/data/ljspeech/wavs/LJ006-0128.npy +tests/data/ljspeech/wavs/LJ003-0312.wav|tests/data/ljspeech/wavs/LJ003-0312.npy +tests/data/ljspeech/wavs/LJ032-0162.wav|tests/data/ljspeech/wavs/LJ032-0162.npy +tests/data/ljspeech/wavs/LJ014-0334.wav|tests/data/ljspeech/wavs/LJ014-0334.npy +tests/data/ljspeech/wavs/LJ034-0106.wav|tests/data/ljspeech/wavs/LJ034-0106.npy +tests/data/ljspeech/wavs/LJ038-0158.wav|tests/data/ljspeech/wavs/LJ038-0158.npy +tests/data/ljspeech/wavs/LJ048-0131.wav|tests/data/ljspeech/wavs/LJ048-0131.npy +tests/data/ljspeech/wavs/LJ045-0214.wav|tests/data/ljspeech/wavs/LJ045-0214.npy +tests/data/ljspeech/wavs/LJ045-0095.wav|tests/data/ljspeech/wavs/LJ045-0095.npy +tests/data/ljspeech/wavs/LJ044-0223.wav|tests/data/ljspeech/wavs/LJ044-0223.npy +tests/data/ljspeech/wavs/LJ046-0141.wav|tests/data/ljspeech/wavs/LJ046-0141.npy +tests/data/ljspeech/wavs/LJ031-0103.wav|tests/data/ljspeech/wavs/LJ031-0103.npy +tests/data/ljspeech/wavs/LJ001-0023.wav|tests/data/ljspeech/wavs/LJ001-0023.npy +tests/data/ljspeech/wavs/LJ048-0102.wav|tests/data/ljspeech/wavs/LJ048-0102.npy +tests/data/ljspeech/wavs/LJ004-0244.wav|tests/data/ljspeech/wavs/LJ004-0244.npy +tests/data/ljspeech/wavs/LJ004-0209.wav|tests/data/ljspeech/wavs/LJ004-0209.npy +tests/data/ljspeech/wavs/LJ019-0377.wav|tests/data/ljspeech/wavs/LJ019-0377.npy +tests/data/ljspeech/wavs/LJ042-0002.wav|tests/data/ljspeech/wavs/LJ042-0002.npy +tests/data/ljspeech/wavs/LJ038-0095.wav|tests/data/ljspeech/wavs/LJ038-0095.npy +tests/data/ljspeech/wavs/LJ040-0134.wav|tests/data/ljspeech/wavs/LJ040-0134.npy +tests/data/ljspeech/wavs/LJ018-0028.wav|tests/data/ljspeech/wavs/LJ018-0028.npy +tests/data/ljspeech/wavs/LJ028-0404.wav|tests/data/ljspeech/wavs/LJ028-0404.npy +tests/data/ljspeech/wavs/LJ006-0212.wav|tests/data/ljspeech/wavs/LJ006-0212.npy +tests/data/ljspeech/wavs/LJ030-0163.wav|tests/data/ljspeech/wavs/LJ030-0163.npy +tests/data/ljspeech/wavs/LJ017-0031.wav|tests/data/ljspeech/wavs/LJ017-0031.npy +tests/data/ljspeech/wavs/LJ049-0103.wav|tests/data/ljspeech/wavs/LJ049-0103.npy +tests/data/ljspeech/wavs/LJ031-0049.wav|tests/data/ljspeech/wavs/LJ031-0049.npy +tests/data/ljspeech/wavs/LJ032-0258.wav|tests/data/ljspeech/wavs/LJ032-0258.npy +tests/data/ljspeech/wavs/LJ003-0215.wav|tests/data/ljspeech/wavs/LJ003-0215.npy +tests/data/ljspeech/wavs/LJ018-0017.wav|tests/data/ljspeech/wavs/LJ018-0017.npy +tests/data/ljspeech/wavs/LJ009-0241.wav|tests/data/ljspeech/wavs/LJ009-0241.npy +tests/data/ljspeech/wavs/LJ045-0106.wav|tests/data/ljspeech/wavs/LJ045-0106.npy +tests/data/ljspeech/wavs/LJ027-0041.wav|tests/data/ljspeech/wavs/LJ027-0041.npy +tests/data/ljspeech/wavs/LJ027-0083.wav|tests/data/ljspeech/wavs/LJ027-0083.npy +tests/data/ljspeech/wavs/LJ050-0198.wav|tests/data/ljspeech/wavs/LJ050-0198.npy +tests/data/ljspeech/wavs/LJ004-0087.wav|tests/data/ljspeech/wavs/LJ004-0087.npy +tests/data/ljspeech/wavs/LJ029-0157.wav|tests/data/ljspeech/wavs/LJ029-0157.npy +tests/data/ljspeech/wavs/LJ002-0107.wav|tests/data/ljspeech/wavs/LJ002-0107.npy +tests/data/ljspeech/wavs/LJ040-0205.wav|tests/data/ljspeech/wavs/LJ040-0205.npy +tests/data/ljspeech/wavs/LJ027-0072.wav|tests/data/ljspeech/wavs/LJ027-0072.npy +tests/data/ljspeech/wavs/LJ019-0361.wav|tests/data/ljspeech/wavs/LJ019-0361.npy +tests/data/ljspeech/wavs/LJ040-0126.wav|tests/data/ljspeech/wavs/LJ040-0126.npy +tests/data/ljspeech/wavs/LJ041-0017.wav|tests/data/ljspeech/wavs/LJ041-0017.npy +tests/data/ljspeech/wavs/LJ050-0120.wav|tests/data/ljspeech/wavs/LJ050-0120.npy +tests/data/ljspeech/wavs/LJ034-0198.wav|tests/data/ljspeech/wavs/LJ034-0198.npy +tests/data/ljspeech/wavs/LJ013-0092.wav|tests/data/ljspeech/wavs/LJ013-0092.npy +tests/data/ljspeech/wavs/LJ045-0203.wav|tests/data/ljspeech/wavs/LJ045-0203.npy +tests/data/ljspeech/wavs/LJ040-0010.wav|tests/data/ljspeech/wavs/LJ040-0010.npy +tests/data/ljspeech/wavs/LJ006-0019.wav|tests/data/ljspeech/wavs/LJ006-0019.npy +tests/data/ljspeech/wavs/LJ028-0466.wav|tests/data/ljspeech/wavs/LJ028-0466.npy +tests/data/ljspeech/wavs/LJ004-0227.wav|tests/data/ljspeech/wavs/LJ004-0227.npy +tests/data/ljspeech/wavs/LJ002-0085.wav|tests/data/ljspeech/wavs/LJ002-0085.npy +tests/data/ljspeech/wavs/LJ028-0426.wav|tests/data/ljspeech/wavs/LJ028-0426.npy +tests/data/ljspeech/wavs/LJ018-0260.wav|tests/data/ljspeech/wavs/LJ018-0260.npy +tests/data/ljspeech/wavs/LJ006-0204.wav|tests/data/ljspeech/wavs/LJ006-0204.npy +tests/data/ljspeech/wavs/LJ011-0170.wav|tests/data/ljspeech/wavs/LJ011-0170.npy +tests/data/ljspeech/wavs/LJ021-0138.wav|tests/data/ljspeech/wavs/LJ021-0138.npy +tests/data/ljspeech/wavs/LJ043-0172.wav|tests/data/ljspeech/wavs/LJ043-0172.npy +tests/data/ljspeech/wavs/LJ044-0136.wav|tests/data/ljspeech/wavs/LJ044-0136.npy +tests/data/ljspeech/wavs/LJ001-0100.wav|tests/data/ljspeech/wavs/LJ001-0100.npy +tests/data/ljspeech/wavs/LJ037-0173.wav|tests/data/ljspeech/wavs/LJ037-0173.npy +tests/data/ljspeech/wavs/LJ032-0007.wav|tests/data/ljspeech/wavs/LJ032-0007.npy +tests/data/ljspeech/wavs/LJ013-0119.wav|tests/data/ljspeech/wavs/LJ013-0119.npy +tests/data/ljspeech/wavs/LJ008-0238.wav|tests/data/ljspeech/wavs/LJ008-0238.npy +tests/data/ljspeech/wavs/LJ017-0206.wav|tests/data/ljspeech/wavs/LJ017-0206.npy +tests/data/ljspeech/wavs/LJ013-0117.wav|tests/data/ljspeech/wavs/LJ013-0117.npy +tests/data/ljspeech/wavs/LJ009-0237.wav|tests/data/ljspeech/wavs/LJ009-0237.npy +tests/data/ljspeech/wavs/LJ038-0012.wav|tests/data/ljspeech/wavs/LJ038-0012.npy +tests/data/ljspeech/wavs/LJ030-0138.wav|tests/data/ljspeech/wavs/LJ030-0138.npy +tests/data/ljspeech/wavs/LJ042-0150.wav|tests/data/ljspeech/wavs/LJ042-0150.npy +tests/data/ljspeech/wavs/LJ032-0141.wav|tests/data/ljspeech/wavs/LJ032-0141.npy +tests/data/ljspeech/wavs/LJ038-0215.wav|tests/data/ljspeech/wavs/LJ038-0215.npy +tests/data/ljspeech/wavs/LJ012-0127.wav|tests/data/ljspeech/wavs/LJ012-0127.npy +tests/data/ljspeech/wavs/LJ038-0244.wav|tests/data/ljspeech/wavs/LJ038-0244.npy +tests/data/ljspeech/wavs/LJ042-0084.wav|tests/data/ljspeech/wavs/LJ042-0084.npy +tests/data/ljspeech/wavs/LJ018-0039.wav|tests/data/ljspeech/wavs/LJ018-0039.npy +tests/data/ljspeech/wavs/LJ027-0149.wav|tests/data/ljspeech/wavs/LJ027-0149.npy +tests/data/ljspeech/wavs/LJ015-0269.wav|tests/data/ljspeech/wavs/LJ015-0269.npy +tests/data/ljspeech/wavs/LJ018-0338.wav|tests/data/ljspeech/wavs/LJ018-0338.npy +tests/data/ljspeech/wavs/LJ007-0155.wav|tests/data/ljspeech/wavs/LJ007-0155.npy +tests/data/ljspeech/wavs/LJ049-0086.wav|tests/data/ljspeech/wavs/LJ049-0086.npy +tests/data/ljspeech/wavs/LJ031-0163.wav|tests/data/ljspeech/wavs/LJ031-0163.npy +tests/data/ljspeech/wavs/LJ013-0096.wav|tests/data/ljspeech/wavs/LJ013-0096.npy +tests/data/ljspeech/wavs/LJ019-0072.wav|tests/data/ljspeech/wavs/LJ019-0072.npy +tests/data/ljspeech/wavs/LJ010-0059.wav|tests/data/ljspeech/wavs/LJ010-0059.npy +tests/data/ljspeech/wavs/LJ018-0089.wav|tests/data/ljspeech/wavs/LJ018-0089.npy +tests/data/ljspeech/wavs/LJ018-0333.wav|tests/data/ljspeech/wavs/LJ018-0333.npy +tests/data/ljspeech/wavs/LJ018-0372.wav|tests/data/ljspeech/wavs/LJ018-0372.npy +tests/data/ljspeech/wavs/LJ019-0156.wav|tests/data/ljspeech/wavs/LJ019-0156.npy +tests/data/ljspeech/wavs/LJ019-0114.wav|tests/data/ljspeech/wavs/LJ019-0114.npy +tests/data/ljspeech/wavs/LJ009-0232.wav|tests/data/ljspeech/wavs/LJ009-0232.npy +tests/data/ljspeech/wavs/LJ003-0315.wav|tests/data/ljspeech/wavs/LJ003-0315.npy +tests/data/ljspeech/wavs/LJ008-0282.wav|tests/data/ljspeech/wavs/LJ008-0282.npy +tests/data/ljspeech/wavs/LJ008-0124.wav|tests/data/ljspeech/wavs/LJ008-0124.npy +tests/data/ljspeech/wavs/LJ015-0080.wav|tests/data/ljspeech/wavs/LJ015-0080.npy +tests/data/ljspeech/wavs/LJ040-0113.wav|tests/data/ljspeech/wavs/LJ040-0113.npy +tests/data/ljspeech/wavs/LJ004-0171.wav|tests/data/ljspeech/wavs/LJ004-0171.npy +tests/data/ljspeech/wavs/LJ009-0230.wav|tests/data/ljspeech/wavs/LJ009-0230.npy +tests/data/ljspeech/wavs/LJ038-0306.wav|tests/data/ljspeech/wavs/LJ038-0306.npy +tests/data/ljspeech/wavs/LJ016-0226.wav|tests/data/ljspeech/wavs/LJ016-0226.npy +tests/data/ljspeech/wavs/LJ009-0179.wav|tests/data/ljspeech/wavs/LJ009-0179.npy +tests/data/ljspeech/wavs/LJ002-0268.wav|tests/data/ljspeech/wavs/LJ002-0268.npy +tests/data/ljspeech/wavs/LJ005-0225.wav|tests/data/ljspeech/wavs/LJ005-0225.npy +tests/data/ljspeech/wavs/LJ009-0176.wav|tests/data/ljspeech/wavs/LJ009-0176.npy +tests/data/ljspeech/wavs/LJ025-0166.wav|tests/data/ljspeech/wavs/LJ025-0166.npy +tests/data/ljspeech/wavs/LJ031-0018.wav|tests/data/ljspeech/wavs/LJ031-0018.npy +tests/data/ljspeech/wavs/LJ019-0121.wav|tests/data/ljspeech/wavs/LJ019-0121.npy +tests/data/ljspeech/wavs/LJ031-0017.wav|tests/data/ljspeech/wavs/LJ031-0017.npy +tests/data/ljspeech/wavs/LJ016-0445.wav|tests/data/ljspeech/wavs/LJ016-0445.npy +tests/data/ljspeech/wavs/LJ004-0155.wav|tests/data/ljspeech/wavs/LJ004-0155.npy +tests/data/ljspeech/wavs/LJ045-0185.wav|tests/data/ljspeech/wavs/LJ045-0185.npy +tests/data/ljspeech/wavs/LJ028-0507.wav|tests/data/ljspeech/wavs/LJ028-0507.npy +tests/data/ljspeech/wavs/LJ031-0145.wav|tests/data/ljspeech/wavs/LJ031-0145.npy +tests/data/ljspeech/wavs/LJ005-0113.wav|tests/data/ljspeech/wavs/LJ005-0113.npy +tests/data/ljspeech/wavs/LJ007-0054.wav|tests/data/ljspeech/wavs/LJ007-0054.npy +tests/data/ljspeech/wavs/LJ048-0229.wav|tests/data/ljspeech/wavs/LJ048-0229.npy +tests/data/ljspeech/wavs/LJ018-0090.wav|tests/data/ljspeech/wavs/LJ018-0090.npy +tests/data/ljspeech/wavs/LJ003-0274.wav|tests/data/ljspeech/wavs/LJ003-0274.npy +tests/data/ljspeech/wavs/LJ009-0257.wav|tests/data/ljspeech/wavs/LJ009-0257.npy +tests/data/ljspeech/wavs/LJ007-0116.wav|tests/data/ljspeech/wavs/LJ007-0116.npy +tests/data/ljspeech/wavs/LJ013-0061.wav|tests/data/ljspeech/wavs/LJ013-0061.npy +tests/data/ljspeech/wavs/LJ025-0015.wav|tests/data/ljspeech/wavs/LJ025-0015.npy +tests/data/ljspeech/wavs/LJ004-0102.wav|tests/data/ljspeech/wavs/LJ004-0102.npy +tests/data/ljspeech/wavs/LJ048-0130.wav|tests/data/ljspeech/wavs/LJ048-0130.npy +tests/data/ljspeech/wavs/LJ042-0206.wav|tests/data/ljspeech/wavs/LJ042-0206.npy +tests/data/ljspeech/wavs/LJ033-0114.wav|tests/data/ljspeech/wavs/LJ033-0114.npy +tests/data/ljspeech/wavs/LJ034-0099.wav|tests/data/ljspeech/wavs/LJ034-0099.npy +tests/data/ljspeech/wavs/LJ001-0066.wav|tests/data/ljspeech/wavs/LJ001-0066.npy +tests/data/ljspeech/wavs/LJ004-0183.wav|tests/data/ljspeech/wavs/LJ004-0183.npy +tests/data/ljspeech/wavs/LJ034-0177.wav|tests/data/ljspeech/wavs/LJ034-0177.npy +tests/data/ljspeech/wavs/LJ038-0293.wav|tests/data/ljspeech/wavs/LJ038-0293.npy +tests/data/ljspeech/wavs/LJ021-0123.wav|tests/data/ljspeech/wavs/LJ021-0123.npy +tests/data/ljspeech/wavs/LJ032-0064.wav|tests/data/ljspeech/wavs/LJ032-0064.npy +tests/data/ljspeech/wavs/LJ047-0184.wav|tests/data/ljspeech/wavs/LJ047-0184.npy +tests/data/ljspeech/wavs/LJ006-0165.wav|tests/data/ljspeech/wavs/LJ006-0165.npy +tests/data/ljspeech/wavs/LJ005-0051.wav|tests/data/ljspeech/wavs/LJ005-0051.npy +tests/data/ljspeech/wavs/LJ037-0245.wav|tests/data/ljspeech/wavs/LJ037-0245.npy +tests/data/ljspeech/wavs/LJ013-0153.wav|tests/data/ljspeech/wavs/LJ013-0153.npy +tests/data/ljspeech/wavs/LJ049-0066.wav|tests/data/ljspeech/wavs/LJ049-0066.npy +tests/data/ljspeech/wavs/LJ012-0005.wav|tests/data/ljspeech/wavs/LJ012-0005.npy +tests/data/ljspeech/wavs/LJ025-0032.wav|tests/data/ljspeech/wavs/LJ025-0032.npy +tests/data/ljspeech/wavs/LJ029-0015.wav|tests/data/ljspeech/wavs/LJ029-0015.npy +tests/data/ljspeech/wavs/LJ039-0017.wav|tests/data/ljspeech/wavs/LJ039-0017.npy +tests/data/ljspeech/wavs/LJ045-0164.wav|tests/data/ljspeech/wavs/LJ045-0164.npy +tests/data/ljspeech/wavs/LJ016-0402.wav|tests/data/ljspeech/wavs/LJ016-0402.npy +tests/data/ljspeech/wavs/LJ010-0112.wav|tests/data/ljspeech/wavs/LJ010-0112.npy +tests/data/ljspeech/wavs/LJ049-0125.wav|tests/data/ljspeech/wavs/LJ049-0125.npy +tests/data/ljspeech/wavs/LJ046-0220.wav|tests/data/ljspeech/wavs/LJ046-0220.npy +tests/data/ljspeech/wavs/LJ010-0145.wav|tests/data/ljspeech/wavs/LJ010-0145.npy +tests/data/ljspeech/wavs/LJ042-0217.wav|tests/data/ljspeech/wavs/LJ042-0217.npy +tests/data/ljspeech/wavs/LJ039-0059.wav|tests/data/ljspeech/wavs/LJ039-0059.npy +tests/data/ljspeech/wavs/LJ019-0348.wav|tests/data/ljspeech/wavs/LJ019-0348.npy +tests/data/ljspeech/wavs/LJ018-0380.wav|tests/data/ljspeech/wavs/LJ018-0380.npy +tests/data/ljspeech/wavs/LJ031-0093.wav|tests/data/ljspeech/wavs/LJ031-0093.npy +tests/data/ljspeech/wavs/LJ012-0182.wav|tests/data/ljspeech/wavs/LJ012-0182.npy +tests/data/ljspeech/wavs/LJ045-0246.wav|tests/data/ljspeech/wavs/LJ045-0246.npy +tests/data/ljspeech/wavs/LJ012-0183.wav|tests/data/ljspeech/wavs/LJ012-0183.npy +tests/data/ljspeech/wavs/LJ039-0234.wav|tests/data/ljspeech/wavs/LJ039-0234.npy +tests/data/ljspeech/wavs/LJ006-0032.wav|tests/data/ljspeech/wavs/LJ006-0032.npy +tests/data/ljspeech/wavs/LJ041-0161.wav|tests/data/ljspeech/wavs/LJ041-0161.npy +tests/data/ljspeech/wavs/LJ019-0346.wav|tests/data/ljspeech/wavs/LJ019-0346.npy +tests/data/ljspeech/wavs/LJ049-0096.wav|tests/data/ljspeech/wavs/LJ049-0096.npy +tests/data/ljspeech/wavs/LJ012-0209.wav|tests/data/ljspeech/wavs/LJ012-0209.npy +tests/data/ljspeech/wavs/LJ033-0169.wav|tests/data/ljspeech/wavs/LJ033-0169.npy +tests/data/ljspeech/wavs/LJ038-0023.wav|tests/data/ljspeech/wavs/LJ038-0023.npy +tests/data/ljspeech/wavs/LJ002-0030.wav|tests/data/ljspeech/wavs/LJ002-0030.npy +tests/data/ljspeech/wavs/LJ043-0026.wav|tests/data/ljspeech/wavs/LJ043-0026.npy +tests/data/ljspeech/wavs/LJ031-0123.wav|tests/data/ljspeech/wavs/LJ031-0123.npy +tests/data/ljspeech/wavs/LJ002-0031.wav|tests/data/ljspeech/wavs/LJ002-0031.npy +tests/data/ljspeech/wavs/LJ033-0045.wav|tests/data/ljspeech/wavs/LJ033-0045.npy +tests/data/ljspeech/wavs/LJ002-0028.wav|tests/data/ljspeech/wavs/LJ002-0028.npy +tests/data/ljspeech/wavs/LJ043-0011.wav|tests/data/ljspeech/wavs/LJ043-0011.npy +tests/data/ljspeech/wavs/LJ046-0069.wav|tests/data/ljspeech/wavs/LJ046-0069.npy +tests/data/ljspeech/wavs/LJ018-0176.wav|tests/data/ljspeech/wavs/LJ018-0176.npy +tests/data/ljspeech/wavs/LJ050-0099.wav|tests/data/ljspeech/wavs/LJ050-0099.npy +tests/data/ljspeech/wavs/LJ046-0081.wav|tests/data/ljspeech/wavs/LJ046-0081.npy +tests/data/ljspeech/wavs/LJ001-0162.wav|tests/data/ljspeech/wavs/LJ001-0162.npy +tests/data/ljspeech/wavs/LJ043-0008.wav|tests/data/ljspeech/wavs/LJ043-0008.npy +tests/data/ljspeech/wavs/LJ032-0137.wav|tests/data/ljspeech/wavs/LJ032-0137.npy +tests/data/ljspeech/wavs/LJ009-0022.wav|tests/data/ljspeech/wavs/LJ009-0022.npy +tests/data/ljspeech/wavs/LJ028-0192.wav|tests/data/ljspeech/wavs/LJ028-0192.npy +tests/data/ljspeech/wavs/LJ001-0184.wav|tests/data/ljspeech/wavs/LJ001-0184.npy +tests/data/ljspeech/wavs/LJ008-0044.wav|tests/data/ljspeech/wavs/LJ008-0044.npy +tests/data/ljspeech/wavs/LJ026-0036.wav|tests/data/ljspeech/wavs/LJ026-0036.npy +tests/data/ljspeech/wavs/LJ050-0104.wav|tests/data/ljspeech/wavs/LJ050-0104.npy +tests/data/ljspeech/wavs/LJ006-0148.wav|tests/data/ljspeech/wavs/LJ006-0148.npy +tests/data/ljspeech/wavs/LJ007-0019.wav|tests/data/ljspeech/wavs/LJ007-0019.npy +tests/data/ljspeech/wavs/LJ028-0419.wav|tests/data/ljspeech/wavs/LJ028-0419.npy +tests/data/ljspeech/wavs/LJ007-0135.wav|tests/data/ljspeech/wavs/LJ007-0135.npy +tests/data/ljspeech/wavs/LJ048-0163.wav|tests/data/ljspeech/wavs/LJ048-0163.npy +tests/data/ljspeech/wavs/LJ001-0133.wav|tests/data/ljspeech/wavs/LJ001-0133.npy +tests/data/ljspeech/wavs/LJ049-0121.wav|tests/data/ljspeech/wavs/LJ049-0121.npy +tests/data/ljspeech/wavs/LJ028-0302.wav|tests/data/ljspeech/wavs/LJ028-0302.npy +tests/data/ljspeech/wavs/LJ028-0312.wav|tests/data/ljspeech/wavs/LJ028-0312.npy +tests/data/ljspeech/wavs/LJ028-0437.wav|tests/data/ljspeech/wavs/LJ028-0437.npy +tests/data/ljspeech/wavs/LJ010-0067.wav|tests/data/ljspeech/wavs/LJ010-0067.npy +tests/data/ljspeech/wavs/LJ029-0012.wav|tests/data/ljspeech/wavs/LJ029-0012.npy +tests/data/ljspeech/wavs/LJ022-0170.wav|tests/data/ljspeech/wavs/LJ022-0170.npy +tests/data/ljspeech/wavs/LJ003-0031.wav|tests/data/ljspeech/wavs/LJ003-0031.npy +tests/data/ljspeech/wavs/LJ045-0211.wav|tests/data/ljspeech/wavs/LJ045-0211.npy +tests/data/ljspeech/wavs/LJ021-0061.wav|tests/data/ljspeech/wavs/LJ021-0061.npy +tests/data/ljspeech/wavs/LJ040-0221.wav|tests/data/ljspeech/wavs/LJ040-0221.npy +tests/data/ljspeech/wavs/LJ015-0302.wav|tests/data/ljspeech/wavs/LJ015-0302.npy +tests/data/ljspeech/wavs/LJ047-0238.wav|tests/data/ljspeech/wavs/LJ047-0238.npy +tests/data/ljspeech/wavs/LJ050-0042.wav|tests/data/ljspeech/wavs/LJ050-0042.npy +tests/data/ljspeech/wavs/LJ038-0007.wav|tests/data/ljspeech/wavs/LJ038-0007.npy +tests/data/ljspeech/wavs/LJ022-0190.wav|tests/data/ljspeech/wavs/LJ022-0190.npy +tests/data/ljspeech/wavs/LJ020-0081.wav|tests/data/ljspeech/wavs/LJ020-0081.npy +tests/data/ljspeech/wavs/LJ043-0182.wav|tests/data/ljspeech/wavs/LJ043-0182.npy +tests/data/ljspeech/wavs/LJ028-0235.wav|tests/data/ljspeech/wavs/LJ028-0235.npy +tests/data/ljspeech/wavs/LJ048-0151.wav|tests/data/ljspeech/wavs/LJ048-0151.npy +tests/data/ljspeech/wavs/LJ035-0013.wav|tests/data/ljspeech/wavs/LJ035-0013.npy +tests/data/ljspeech/wavs/LJ005-0237.wav|tests/data/ljspeech/wavs/LJ005-0237.npy +tests/data/ljspeech/wavs/LJ010-0062.wav|tests/data/ljspeech/wavs/LJ010-0062.npy +tests/data/ljspeech/wavs/LJ021-0206.wav|tests/data/ljspeech/wavs/LJ021-0206.npy +tests/data/ljspeech/wavs/LJ028-0173.wav|tests/data/ljspeech/wavs/LJ028-0173.npy +tests/data/ljspeech/wavs/LJ039-0126.wav|tests/data/ljspeech/wavs/LJ039-0126.npy +tests/data/ljspeech/wavs/LJ002-0305.wav|tests/data/ljspeech/wavs/LJ002-0305.npy +tests/data/ljspeech/wavs/LJ028-0329.wav|tests/data/ljspeech/wavs/LJ028-0329.npy +tests/data/ljspeech/wavs/LJ029-0044.wav|tests/data/ljspeech/wavs/LJ029-0044.npy +tests/data/ljspeech/wavs/LJ036-0109.wav|tests/data/ljspeech/wavs/LJ036-0109.npy +tests/data/ljspeech/wavs/LJ040-0184.wav|tests/data/ljspeech/wavs/LJ040-0184.npy +tests/data/ljspeech/wavs/LJ006-0219.wav|tests/data/ljspeech/wavs/LJ006-0219.npy +tests/data/ljspeech/wavs/LJ028-0482.wav|tests/data/ljspeech/wavs/LJ028-0482.npy +tests/data/ljspeech/wavs/LJ002-0329.wav|tests/data/ljspeech/wavs/LJ002-0329.npy +tests/data/ljspeech/wavs/LJ034-0186.wav|tests/data/ljspeech/wavs/LJ034-0186.npy +tests/data/ljspeech/wavs/LJ040-0195.wav|tests/data/ljspeech/wavs/LJ040-0195.npy +tests/data/ljspeech/wavs/LJ034-0209.wav|tests/data/ljspeech/wavs/LJ034-0209.npy +tests/data/ljspeech/wavs/LJ040-0042.wav|tests/data/ljspeech/wavs/LJ040-0042.npy +tests/data/ljspeech/wavs/LJ035-0186.wav|tests/data/ljspeech/wavs/LJ035-0186.npy +tests/data/ljspeech/wavs/LJ045-0128.wav|tests/data/ljspeech/wavs/LJ045-0128.npy +tests/data/ljspeech/wavs/LJ036-0040.wav|tests/data/ljspeech/wavs/LJ036-0040.npy +tests/data/ljspeech/wavs/LJ045-0046.wav|tests/data/ljspeech/wavs/LJ045-0046.npy +tests/data/ljspeech/wavs/LJ018-0169.wav|tests/data/ljspeech/wavs/LJ018-0169.npy +tests/data/ljspeech/wavs/LJ022-0058.wav|tests/data/ljspeech/wavs/LJ022-0058.npy +tests/data/ljspeech/wavs/LJ044-0141.wav|tests/data/ljspeech/wavs/LJ044-0141.npy +tests/data/ljspeech/wavs/LJ036-0037.wav|tests/data/ljspeech/wavs/LJ036-0037.npy +tests/data/ljspeech/wavs/LJ049-0203.wav|tests/data/ljspeech/wavs/LJ049-0203.npy +tests/data/ljspeech/wavs/LJ036-0055.wav|tests/data/ljspeech/wavs/LJ036-0055.npy +tests/data/ljspeech/wavs/LJ049-0005.wav|tests/data/ljspeech/wavs/LJ049-0005.npy +tests/data/ljspeech/wavs/LJ019-0187.wav|tests/data/ljspeech/wavs/LJ019-0187.npy +tests/data/ljspeech/wavs/LJ012-0284.wav|tests/data/ljspeech/wavs/LJ012-0284.npy +tests/data/ljspeech/wavs/LJ016-0234.wav|tests/data/ljspeech/wavs/LJ016-0234.npy +tests/data/ljspeech/wavs/LJ016-0216.wav|tests/data/ljspeech/wavs/LJ016-0216.npy +tests/data/ljspeech/wavs/LJ049-0070.wav|tests/data/ljspeech/wavs/LJ049-0070.npy +tests/data/ljspeech/wavs/LJ044-0116.wav|tests/data/ljspeech/wavs/LJ044-0116.npy +tests/data/ljspeech/wavs/LJ040-0004.wav|tests/data/ljspeech/wavs/LJ040-0004.npy +tests/data/ljspeech/wavs/LJ016-0373.wav|tests/data/ljspeech/wavs/LJ016-0373.npy +tests/data/ljspeech/wavs/LJ037-0131.wav|tests/data/ljspeech/wavs/LJ037-0131.npy +tests/data/ljspeech/wavs/LJ019-0226.wav|tests/data/ljspeech/wavs/LJ019-0226.npy +tests/data/ljspeech/wavs/LJ036-0128.wav|tests/data/ljspeech/wavs/LJ036-0128.npy +tests/data/ljspeech/wavs/LJ009-0166.wav|tests/data/ljspeech/wavs/LJ009-0166.npy +tests/data/ljspeech/wavs/LJ018-0341.wav|tests/data/ljspeech/wavs/LJ018-0341.npy +tests/data/ljspeech/wavs/LJ036-0101.wav|tests/data/ljspeech/wavs/LJ036-0101.npy +tests/data/ljspeech/wavs/LJ019-0211.wav|tests/data/ljspeech/wavs/LJ019-0211.npy +tests/data/ljspeech/wavs/LJ049-0020.wav|tests/data/ljspeech/wavs/LJ049-0020.npy +tests/data/ljspeech/wavs/LJ016-0311.wav|tests/data/ljspeech/wavs/LJ016-0311.npy +tests/data/ljspeech/wavs/LJ040-0141.wav|tests/data/ljspeech/wavs/LJ040-0141.npy +tests/data/ljspeech/wavs/LJ049-0082.wav|tests/data/ljspeech/wavs/LJ049-0082.npy +tests/data/ljspeech/wavs/LJ037-0167.wav|tests/data/ljspeech/wavs/LJ037-0167.npy +tests/data/ljspeech/wavs/LJ004-0152.wav|tests/data/ljspeech/wavs/LJ004-0152.npy +tests/data/ljspeech/wavs/LJ027-0027.wav|tests/data/ljspeech/wavs/LJ027-0027.npy +tests/data/ljspeech/wavs/LJ044-0214.wav|tests/data/ljspeech/wavs/LJ044-0214.npy +tests/data/ljspeech/wavs/LJ002-0285.wav|tests/data/ljspeech/wavs/LJ002-0285.npy +tests/data/ljspeech/wavs/LJ041-0180.wav|tests/data/ljspeech/wavs/LJ041-0180.npy +tests/data/ljspeech/wavs/LJ043-0137.wav|tests/data/ljspeech/wavs/LJ043-0137.npy +tests/data/ljspeech/wavs/LJ046-0240.wav|tests/data/ljspeech/wavs/LJ046-0240.npy +tests/data/ljspeech/wavs/LJ048-0043.wav|tests/data/ljspeech/wavs/LJ048-0043.npy +tests/data/ljspeech/wavs/LJ033-0140.wav|tests/data/ljspeech/wavs/LJ033-0140.npy +tests/data/ljspeech/wavs/LJ026-0157.wav|tests/data/ljspeech/wavs/LJ026-0157.npy +tests/data/ljspeech/wavs/LJ008-0280.wav|tests/data/ljspeech/wavs/LJ008-0280.npy +tests/data/ljspeech/wavs/LJ014-0323.wav|tests/data/ljspeech/wavs/LJ014-0323.npy +tests/data/ljspeech/wavs/LJ009-0042.wav|tests/data/ljspeech/wavs/LJ009-0042.npy +tests/data/ljspeech/wavs/LJ013-0131.wav|tests/data/ljspeech/wavs/LJ013-0131.npy +tests/data/ljspeech/wavs/LJ046-0013.wav|tests/data/ljspeech/wavs/LJ046-0013.npy +tests/data/ljspeech/wavs/LJ028-0035.wav|tests/data/ljspeech/wavs/LJ028-0035.npy +tests/data/ljspeech/wavs/LJ008-0181.wav|tests/data/ljspeech/wavs/LJ008-0181.npy +tests/data/ljspeech/wavs/LJ025-0125.wav|tests/data/ljspeech/wavs/LJ025-0125.npy +tests/data/ljspeech/wavs/LJ004-0106.wav|tests/data/ljspeech/wavs/LJ004-0106.npy +tests/data/ljspeech/wavs/LJ004-0086.wav|tests/data/ljspeech/wavs/LJ004-0086.npy +tests/data/ljspeech/wavs/LJ026-0086.wav|tests/data/ljspeech/wavs/LJ026-0086.npy +tests/data/ljspeech/wavs/LJ050-0016.wav|tests/data/ljspeech/wavs/LJ050-0016.npy +tests/data/ljspeech/wavs/LJ008-0090.wav|tests/data/ljspeech/wavs/LJ008-0090.npy +tests/data/ljspeech/wavs/LJ013-0058.wav|tests/data/ljspeech/wavs/LJ013-0058.npy +tests/data/ljspeech/wavs/LJ027-0174.wav|tests/data/ljspeech/wavs/LJ027-0174.npy +tests/data/ljspeech/wavs/LJ049-0145.wav|tests/data/ljspeech/wavs/LJ049-0145.npy +tests/data/ljspeech/wavs/LJ014-0018.wav|tests/data/ljspeech/wavs/LJ014-0018.npy +tests/data/ljspeech/wavs/LJ033-0152.wav|tests/data/ljspeech/wavs/LJ033-0152.npy +tests/data/ljspeech/wavs/LJ008-0165.wav|tests/data/ljspeech/wavs/LJ008-0165.npy +tests/data/ljspeech/wavs/LJ008-0225.wav|tests/data/ljspeech/wavs/LJ008-0225.npy +tests/data/ljspeech/wavs/LJ032-0262.wav|tests/data/ljspeech/wavs/LJ032-0262.npy +tests/data/ljspeech/wavs/LJ024-0031.wav|tests/data/ljspeech/wavs/LJ024-0031.npy +tests/data/ljspeech/wavs/LJ014-0313.wav|tests/data/ljspeech/wavs/LJ014-0313.npy +tests/data/ljspeech/wavs/LJ013-0147.wav|tests/data/ljspeech/wavs/LJ013-0147.npy +tests/data/ljspeech/wavs/LJ016-0278.wav|tests/data/ljspeech/wavs/LJ016-0278.npy +tests/data/ljspeech/wavs/LJ013-0056.wav|tests/data/ljspeech/wavs/LJ013-0056.npy +tests/data/ljspeech/wavs/LJ040-0014.wav|tests/data/ljspeech/wavs/LJ040-0014.npy +tests/data/ljspeech/wavs/LJ015-0055.wav|tests/data/ljspeech/wavs/LJ015-0055.npy +tests/data/ljspeech/wavs/LJ038-0070.wav|tests/data/ljspeech/wavs/LJ038-0070.npy +tests/data/ljspeech/wavs/LJ038-0274.wav|tests/data/ljspeech/wavs/LJ038-0274.npy +tests/data/ljspeech/wavs/LJ015-0090.wav|tests/data/ljspeech/wavs/LJ015-0090.npy +tests/data/ljspeech/wavs/LJ013-0223.wav|tests/data/ljspeech/wavs/LJ013-0223.npy +tests/data/ljspeech/wavs/LJ015-0074.wav|tests/data/ljspeech/wavs/LJ015-0074.npy +tests/data/ljspeech/wavs/LJ014-0309.wav|tests/data/ljspeech/wavs/LJ014-0309.npy +tests/data/ljspeech/wavs/LJ017-0180.wav|tests/data/ljspeech/wavs/LJ017-0180.npy +tests/data/ljspeech/wavs/LJ017-0049.wav|tests/data/ljspeech/wavs/LJ017-0049.npy +tests/data/ljspeech/wavs/LJ027-0106.wav|tests/data/ljspeech/wavs/LJ027-0106.npy +tests/data/ljspeech/wavs/LJ003-0298.wav|tests/data/ljspeech/wavs/LJ003-0298.npy +tests/data/ljspeech/wavs/LJ014-0109.wav|tests/data/ljspeech/wavs/LJ014-0109.npy +tests/data/ljspeech/wavs/LJ014-0057.wav|tests/data/ljspeech/wavs/LJ014-0057.npy +tests/data/ljspeech/wavs/LJ038-0059.wav|tests/data/ljspeech/wavs/LJ038-0059.npy +tests/data/ljspeech/wavs/LJ004-0071.wav|tests/data/ljspeech/wavs/LJ004-0071.npy +tests/data/ljspeech/wavs/LJ015-0017.wav|tests/data/ljspeech/wavs/LJ015-0017.npy +tests/data/ljspeech/wavs/LJ037-0079.wav|tests/data/ljspeech/wavs/LJ037-0079.npy +tests/data/ljspeech/wavs/LJ008-0243.wav|tests/data/ljspeech/wavs/LJ008-0243.npy +tests/data/ljspeech/wavs/LJ030-0189.wav|tests/data/ljspeech/wavs/LJ030-0189.npy +tests/data/ljspeech/wavs/LJ004-0159.wav|tests/data/ljspeech/wavs/LJ004-0159.npy +tests/data/ljspeech/wavs/LJ037-0099.wav|tests/data/ljspeech/wavs/LJ037-0099.npy +tests/data/ljspeech/wavs/LJ038-0265.wav|tests/data/ljspeech/wavs/LJ038-0265.npy +tests/data/ljspeech/wavs/LJ011-0187.wav|tests/data/ljspeech/wavs/LJ011-0187.npy +tests/data/ljspeech/wavs/LJ030-0076.wav|tests/data/ljspeech/wavs/LJ030-0076.npy +tests/data/ljspeech/wavs/LJ013-0039.wav|tests/data/ljspeech/wavs/LJ013-0039.npy +tests/data/ljspeech/wavs/LJ045-0239.wav|tests/data/ljspeech/wavs/LJ045-0239.npy +tests/data/ljspeech/wavs/LJ013-0233.wav|tests/data/ljspeech/wavs/LJ013-0233.npy +tests/data/ljspeech/wavs/LJ014-0282.wav|tests/data/ljspeech/wavs/LJ014-0282.npy +tests/data/ljspeech/wavs/LJ041-0079.wav|tests/data/ljspeech/wavs/LJ041-0079.npy +tests/data/ljspeech/wavs/LJ047-0021.wav|tests/data/ljspeech/wavs/LJ047-0021.npy +tests/data/ljspeech/wavs/LJ032-0019.wav|tests/data/ljspeech/wavs/LJ032-0019.npy +tests/data/ljspeech/wavs/LJ047-0073.wav|tests/data/ljspeech/wavs/LJ047-0073.npy +tests/data/ljspeech/wavs/LJ009-0149.wav|tests/data/ljspeech/wavs/LJ009-0149.npy +tests/data/ljspeech/wavs/LJ042-0248.wav|tests/data/ljspeech/wavs/LJ042-0248.npy +tests/data/ljspeech/wavs/LJ042-0236.wav|tests/data/ljspeech/wavs/LJ042-0236.npy +tests/data/ljspeech/wavs/LJ049-0173.wav|tests/data/ljspeech/wavs/LJ049-0173.npy +tests/data/ljspeech/wavs/LJ015-0208.wav|tests/data/ljspeech/wavs/LJ015-0208.npy +tests/data/ljspeech/wavs/LJ048-0088.wav|tests/data/ljspeech/wavs/LJ048-0088.npy +tests/data/ljspeech/wavs/LJ009-0159.wav|tests/data/ljspeech/wavs/LJ009-0159.npy +tests/data/ljspeech/wavs/LJ014-0267.wav|tests/data/ljspeech/wavs/LJ014-0267.npy +tests/data/ljspeech/wavs/LJ046-0234.wav|tests/data/ljspeech/wavs/LJ046-0234.npy +tests/data/ljspeech/wavs/LJ012-0205.wav|tests/data/ljspeech/wavs/LJ012-0205.npy +tests/data/ljspeech/wavs/LJ017-0249.wav|tests/data/ljspeech/wavs/LJ017-0249.npy +tests/data/ljspeech/wavs/LJ015-0304.wav|tests/data/ljspeech/wavs/LJ015-0304.npy +tests/data/ljspeech/wavs/LJ004-0019.wav|tests/data/ljspeech/wavs/LJ004-0019.npy +tests/data/ljspeech/wavs/LJ038-0002.wav|tests/data/ljspeech/wavs/LJ038-0002.npy +tests/data/ljspeech/wavs/LJ007-0018.wav|tests/data/ljspeech/wavs/LJ007-0018.npy +tests/data/ljspeech/wavs/LJ016-0031.wav|tests/data/ljspeech/wavs/LJ016-0031.npy +tests/data/ljspeech/wavs/LJ002-0299.wav|tests/data/ljspeech/wavs/LJ002-0299.npy +tests/data/ljspeech/wavs/LJ050-0247.wav|tests/data/ljspeech/wavs/LJ050-0247.npy +tests/data/ljspeech/wavs/LJ034-0002.wav|tests/data/ljspeech/wavs/LJ034-0002.npy +tests/data/ljspeech/wavs/LJ014-0276.wav|tests/data/ljspeech/wavs/LJ014-0276.npy +tests/data/ljspeech/wavs/LJ003-0021.wav|tests/data/ljspeech/wavs/LJ003-0021.npy +tests/data/ljspeech/wavs/LJ009-0229.wav|tests/data/ljspeech/wavs/LJ009-0229.npy +tests/data/ljspeech/wavs/LJ021-0198.wav|tests/data/ljspeech/wavs/LJ021-0198.npy +tests/data/ljspeech/wavs/LJ012-0086.wav|tests/data/ljspeech/wavs/LJ012-0086.npy +tests/data/ljspeech/wavs/LJ030-0120.wav|tests/data/ljspeech/wavs/LJ030-0120.npy +tests/data/ljspeech/wavs/LJ006-0192.wav|tests/data/ljspeech/wavs/LJ006-0192.npy +tests/data/ljspeech/wavs/LJ050-0008.wav|tests/data/ljspeech/wavs/LJ050-0008.npy +tests/data/ljspeech/wavs/LJ045-0150.wav|tests/data/ljspeech/wavs/LJ045-0150.npy +tests/data/ljspeech/wavs/LJ042-0049.wav|tests/data/ljspeech/wavs/LJ042-0049.npy +tests/data/ljspeech/wavs/LJ021-0166.wav|tests/data/ljspeech/wavs/LJ021-0166.npy +tests/data/ljspeech/wavs/LJ042-0013.wav|tests/data/ljspeech/wavs/LJ042-0013.npy +tests/data/ljspeech/wavs/LJ026-0061.wav|tests/data/ljspeech/wavs/LJ026-0061.npy +tests/data/ljspeech/wavs/LJ027-0170.wav|tests/data/ljspeech/wavs/LJ027-0170.npy +tests/data/ljspeech/wavs/LJ045-0110.wav|tests/data/ljspeech/wavs/LJ045-0110.npy +tests/data/ljspeech/wavs/LJ005-0126.wav|tests/data/ljspeech/wavs/LJ005-0126.npy +tests/data/ljspeech/wavs/LJ024-0101.wav|tests/data/ljspeech/wavs/LJ024-0101.npy +tests/data/ljspeech/wavs/LJ027-0095.wav|tests/data/ljspeech/wavs/LJ027-0095.npy +tests/data/ljspeech/wavs/LJ009-0026.wav|tests/data/ljspeech/wavs/LJ009-0026.npy +tests/data/ljspeech/wavs/LJ048-0182.wav|tests/data/ljspeech/wavs/LJ048-0182.npy +tests/data/ljspeech/wavs/LJ021-0006.wav|tests/data/ljspeech/wavs/LJ021-0006.npy +tests/data/ljspeech/wavs/LJ050-0256.wav|tests/data/ljspeech/wavs/LJ050-0256.npy +tests/data/ljspeech/wavs/LJ025-0039.wav|tests/data/ljspeech/wavs/LJ025-0039.npy +tests/data/ljspeech/wavs/LJ040-0117.wav|tests/data/ljspeech/wavs/LJ040-0117.npy +tests/data/ljspeech/wavs/LJ050-0013.wav|tests/data/ljspeech/wavs/LJ050-0013.npy +tests/data/ljspeech/wavs/LJ050-0175.wav|tests/data/ljspeech/wavs/LJ050-0175.npy +tests/data/ljspeech/wavs/LJ050-0043.wav|tests/data/ljspeech/wavs/LJ050-0043.npy +tests/data/ljspeech/wavs/LJ007-0133.wav|tests/data/ljspeech/wavs/LJ007-0133.npy +tests/data/ljspeech/wavs/LJ035-0138.wav|tests/data/ljspeech/wavs/LJ035-0138.npy +tests/data/ljspeech/wavs/LJ006-0068.wav|tests/data/ljspeech/wavs/LJ006-0068.npy +tests/data/ljspeech/wavs/LJ021-0024.wav|tests/data/ljspeech/wavs/LJ021-0024.npy +tests/data/ljspeech/wavs/LJ028-0223.wav|tests/data/ljspeech/wavs/LJ028-0223.npy +tests/data/ljspeech/wavs/LJ028-0100.wav|tests/data/ljspeech/wavs/LJ028-0100.npy +tests/data/ljspeech/wavs/LJ046-0065.wav|tests/data/ljspeech/wavs/LJ046-0065.npy +tests/data/ljspeech/wavs/LJ030-0249.wav|tests/data/ljspeech/wavs/LJ030-0249.npy +tests/data/ljspeech/wavs/LJ042-0109.wav|tests/data/ljspeech/wavs/LJ042-0109.npy +tests/data/ljspeech/wavs/LJ010-0243.wav|tests/data/ljspeech/wavs/LJ010-0243.npy +tests/data/ljspeech/wavs/LJ002-0312.wav|tests/data/ljspeech/wavs/LJ002-0312.npy +tests/data/ljspeech/wavs/LJ042-0098.wav|tests/data/ljspeech/wavs/LJ042-0098.npy +tests/data/ljspeech/wavs/LJ008-0185.wav|tests/data/ljspeech/wavs/LJ008-0185.npy +tests/data/ljspeech/wavs/LJ019-0383.wav|tests/data/ljspeech/wavs/LJ019-0383.npy +tests/data/ljspeech/wavs/LJ029-0052.wav|tests/data/ljspeech/wavs/LJ029-0052.npy +tests/data/ljspeech/wavs/LJ019-0244.wav|tests/data/ljspeech/wavs/LJ019-0244.npy +tests/data/ljspeech/wavs/LJ040-0161.wav|tests/data/ljspeech/wavs/LJ040-0161.npy +tests/data/ljspeech/wavs/LJ047-0019.wav|tests/data/ljspeech/wavs/LJ047-0019.npy +tests/data/ljspeech/wavs/LJ044-0024.wav|tests/data/ljspeech/wavs/LJ044-0024.npy +tests/data/ljspeech/wavs/LJ006-0073.wav|tests/data/ljspeech/wavs/LJ006-0073.npy +tests/data/ljspeech/wavs/LJ048-0286.wav|tests/data/ljspeech/wavs/LJ048-0286.npy +tests/data/ljspeech/wavs/LJ006-0082.wav|tests/data/ljspeech/wavs/LJ006-0082.npy +tests/data/ljspeech/wavs/LJ004-0016.wav|tests/data/ljspeech/wavs/LJ004-0016.npy +tests/data/ljspeech/wavs/LJ050-0132.wav|tests/data/ljspeech/wavs/LJ050-0132.npy +tests/data/ljspeech/wavs/LJ045-0139.wav|tests/data/ljspeech/wavs/LJ045-0139.npy +tests/data/ljspeech/wavs/LJ003-0097.wav|tests/data/ljspeech/wavs/LJ003-0097.npy +tests/data/ljspeech/wavs/LJ045-0142.wav|tests/data/ljspeech/wavs/LJ045-0142.npy +tests/data/ljspeech/wavs/LJ003-0186.wav|tests/data/ljspeech/wavs/LJ003-0186.npy +tests/data/ljspeech/wavs/LJ005-0211.wav|tests/data/ljspeech/wavs/LJ005-0211.npy +tests/data/ljspeech/wavs/LJ005-0100.wav|tests/data/ljspeech/wavs/LJ005-0100.npy +tests/data/ljspeech/wavs/LJ007-0079.wav|tests/data/ljspeech/wavs/LJ007-0079.npy +tests/data/ljspeech/wavs/LJ043-0065.wav|tests/data/ljspeech/wavs/LJ043-0065.npy +tests/data/ljspeech/wavs/LJ004-0222.wav|tests/data/ljspeech/wavs/LJ004-0222.npy +tests/data/ljspeech/wavs/LJ019-0144.wav|tests/data/ljspeech/wavs/LJ019-0144.npy +tests/data/ljspeech/wavs/LJ019-0190.wav|tests/data/ljspeech/wavs/LJ019-0190.npy +tests/data/ljspeech/wavs/LJ003-0283.wav|tests/data/ljspeech/wavs/LJ003-0283.npy +tests/data/ljspeech/wavs/LJ002-0132.wav|tests/data/ljspeech/wavs/LJ002-0132.npy +tests/data/ljspeech/wavs/LJ003-0132.wav|tests/data/ljspeech/wavs/LJ003-0132.npy +tests/data/ljspeech/wavs/LJ045-0055.wav|tests/data/ljspeech/wavs/LJ045-0055.npy +tests/data/ljspeech/wavs/LJ040-0157.wav|tests/data/ljspeech/wavs/LJ040-0157.npy +tests/data/ljspeech/wavs/LJ046-0033.wav|tests/data/ljspeech/wavs/LJ046-0033.npy +tests/data/ljspeech/wavs/LJ010-0214.wav|tests/data/ljspeech/wavs/LJ010-0214.npy +tests/data/ljspeech/wavs/LJ019-0120.wav|tests/data/ljspeech/wavs/LJ019-0120.npy +tests/data/ljspeech/wavs/LJ045-0075.wav|tests/data/ljspeech/wavs/LJ045-0075.npy +tests/data/ljspeech/wavs/LJ007-0114.wav|tests/data/ljspeech/wavs/LJ007-0114.npy +tests/data/ljspeech/wavs/LJ029-0164.wav|tests/data/ljspeech/wavs/LJ029-0164.npy +tests/data/ljspeech/wavs/LJ014-0308.wav|tests/data/ljspeech/wavs/LJ014-0308.npy +tests/data/ljspeech/wavs/LJ047-0072.wav|tests/data/ljspeech/wavs/LJ047-0072.npy +tests/data/ljspeech/wavs/LJ048-0133.wav|tests/data/ljspeech/wavs/LJ048-0133.npy +tests/data/ljspeech/wavs/LJ022-0057.wav|tests/data/ljspeech/wavs/LJ022-0057.npy +tests/data/ljspeech/wavs/LJ015-0266.wav|tests/data/ljspeech/wavs/LJ015-0266.npy +tests/data/ljspeech/wavs/LJ005-0067.wav|tests/data/ljspeech/wavs/LJ005-0067.npy +tests/data/ljspeech/wavs/LJ041-0086.wav|tests/data/ljspeech/wavs/LJ041-0086.npy +tests/data/ljspeech/wavs/LJ033-0131.wav|tests/data/ljspeech/wavs/LJ033-0131.npy +tests/data/ljspeech/wavs/LJ029-0042.wav|tests/data/ljspeech/wavs/LJ029-0042.npy +tests/data/ljspeech/wavs/LJ002-0060.wav|tests/data/ljspeech/wavs/LJ002-0060.npy +tests/data/ljspeech/wavs/LJ009-0259.wav|tests/data/ljspeech/wavs/LJ009-0259.npy +tests/data/ljspeech/wavs/LJ027-0145.wav|tests/data/ljspeech/wavs/LJ027-0145.npy +tests/data/ljspeech/wavs/LJ038-0233.wav|tests/data/ljspeech/wavs/LJ038-0233.npy +tests/data/ljspeech/wavs/LJ041-0046.wav|tests/data/ljspeech/wavs/LJ041-0046.npy +tests/data/ljspeech/wavs/LJ048-0167.wav|tests/data/ljspeech/wavs/LJ048-0167.npy +tests/data/ljspeech/wavs/LJ041-0168.wav|tests/data/ljspeech/wavs/LJ041-0168.npy +tests/data/ljspeech/wavs/LJ034-0072.wav|tests/data/ljspeech/wavs/LJ034-0072.npy +tests/data/ljspeech/wavs/LJ040-0198.wav|tests/data/ljspeech/wavs/LJ040-0198.npy +tests/data/ljspeech/wavs/LJ015-0115.wav|tests/data/ljspeech/wavs/LJ015-0115.npy +tests/data/ljspeech/wavs/LJ008-0258.wav|tests/data/ljspeech/wavs/LJ008-0258.npy +tests/data/ljspeech/wavs/LJ050-0163.wav|tests/data/ljspeech/wavs/LJ050-0163.npy +tests/data/ljspeech/wavs/LJ008-0257.wav|tests/data/ljspeech/wavs/LJ008-0257.npy +tests/data/ljspeech/wavs/LJ041-0016.wav|tests/data/ljspeech/wavs/LJ041-0016.npy +tests/data/ljspeech/wavs/LJ043-0044.wav|tests/data/ljspeech/wavs/LJ043-0044.npy +tests/data/ljspeech/wavs/LJ029-0139.wav|tests/data/ljspeech/wavs/LJ029-0139.npy +tests/data/ljspeech/wavs/LJ006-0040.wav|tests/data/ljspeech/wavs/LJ006-0040.npy +tests/data/ljspeech/wavs/LJ025-0035.wav|tests/data/ljspeech/wavs/LJ025-0035.npy +tests/data/ljspeech/wavs/LJ028-0490.wav|tests/data/ljspeech/wavs/LJ028-0490.npy +tests/data/ljspeech/wavs/LJ009-0187.wav|tests/data/ljspeech/wavs/LJ009-0187.npy +tests/data/ljspeech/wavs/LJ003-0119.wav|tests/data/ljspeech/wavs/LJ003-0119.npy +tests/data/ljspeech/wavs/LJ038-0048.wav|tests/data/ljspeech/wavs/LJ038-0048.npy +tests/data/ljspeech/wavs/LJ039-0146.wav|tests/data/ljspeech/wavs/LJ039-0146.npy +tests/data/ljspeech/wavs/LJ049-0163.wav|tests/data/ljspeech/wavs/LJ049-0163.npy +tests/data/ljspeech/wavs/LJ027-0110.wav|tests/data/ljspeech/wavs/LJ027-0110.npy +tests/data/ljspeech/wavs/LJ031-0133.wav|tests/data/ljspeech/wavs/LJ031-0133.npy +tests/data/ljspeech/wavs/LJ048-0211.wav|tests/data/ljspeech/wavs/LJ048-0211.npy +tests/data/ljspeech/wavs/LJ040-0178.wav|tests/data/ljspeech/wavs/LJ040-0178.npy +tests/data/ljspeech/wavs/LJ013-0149.wav|tests/data/ljspeech/wavs/LJ013-0149.npy +tests/data/ljspeech/wavs/LJ024-0016.wav|tests/data/ljspeech/wavs/LJ024-0016.npy +tests/data/ljspeech/wavs/LJ013-0136.wav|tests/data/ljspeech/wavs/LJ013-0136.npy +tests/data/ljspeech/wavs/LJ049-0144.wav|tests/data/ljspeech/wavs/LJ049-0144.npy +tests/data/ljspeech/wavs/LJ030-0007.wav|tests/data/ljspeech/wavs/LJ030-0007.npy +tests/data/ljspeech/wavs/LJ013-0192.wav|tests/data/ljspeech/wavs/LJ013-0192.npy +tests/data/ljspeech/wavs/LJ027-0088.wav|tests/data/ljspeech/wavs/LJ027-0088.npy +tests/data/ljspeech/wavs/LJ012-0030.wav|tests/data/ljspeech/wavs/LJ012-0030.npy +tests/data/ljspeech/wavs/LJ029-0153.wav|tests/data/ljspeech/wavs/LJ029-0153.npy +tests/data/ljspeech/wavs/LJ033-0059.wav|tests/data/ljspeech/wavs/LJ033-0059.npy +tests/data/ljspeech/wavs/LJ016-0248.wav|tests/data/ljspeech/wavs/LJ016-0248.npy +tests/data/ljspeech/wavs/LJ027-0128.wav|tests/data/ljspeech/wavs/LJ027-0128.npy +tests/data/ljspeech/wavs/LJ027-0024.wav|tests/data/ljspeech/wavs/LJ027-0024.npy +tests/data/ljspeech/wavs/LJ033-0061.wav|tests/data/ljspeech/wavs/LJ033-0061.npy +tests/data/ljspeech/wavs/LJ040-0005.wav|tests/data/ljspeech/wavs/LJ040-0005.npy +tests/data/ljspeech/wavs/LJ028-0450.wav|tests/data/ljspeech/wavs/LJ028-0450.npy +tests/data/ljspeech/wavs/LJ047-0107.wav|tests/data/ljspeech/wavs/LJ047-0107.npy +tests/data/ljspeech/wavs/LJ028-0233.wav|tests/data/ljspeech/wavs/LJ028-0233.npy +tests/data/ljspeech/wavs/LJ016-0246.wav|tests/data/ljspeech/wavs/LJ016-0246.npy +tests/data/ljspeech/wavs/LJ014-0060.wav|tests/data/ljspeech/wavs/LJ014-0060.npy +tests/data/ljspeech/wavs/LJ010-0125.wav|tests/data/ljspeech/wavs/LJ010-0125.npy +tests/data/ljspeech/wavs/LJ012-0132.wav|tests/data/ljspeech/wavs/LJ012-0132.npy +tests/data/ljspeech/wavs/LJ037-0098.wav|tests/data/ljspeech/wavs/LJ037-0098.npy +tests/data/ljspeech/wavs/LJ016-0288.wav|tests/data/ljspeech/wavs/LJ016-0288.npy +tests/data/ljspeech/wavs/LJ013-0036.wav|tests/data/ljspeech/wavs/LJ013-0036.npy +tests/data/ljspeech/wavs/LJ009-0075.wav|tests/data/ljspeech/wavs/LJ009-0075.npy +tests/data/ljspeech/wavs/LJ033-0052.wav|tests/data/ljspeech/wavs/LJ033-0052.npy +tests/data/ljspeech/wavs/LJ042-0184.wav|tests/data/ljspeech/wavs/LJ042-0184.npy +tests/data/ljspeech/wavs/LJ031-0080.wav|tests/data/ljspeech/wavs/LJ031-0080.npy +tests/data/ljspeech/wavs/LJ026-0162.wav|tests/data/ljspeech/wavs/LJ026-0162.npy +tests/data/ljspeech/wavs/LJ042-0087.wav|tests/data/ljspeech/wavs/LJ042-0087.npy +tests/data/ljspeech/wavs/LJ042-0245.wav|tests/data/ljspeech/wavs/LJ042-0245.npy +tests/data/ljspeech/wavs/LJ040-0089.wav|tests/data/ljspeech/wavs/LJ040-0089.npy +tests/data/ljspeech/wavs/LJ048-0116.wav|tests/data/ljspeech/wavs/LJ048-0116.npy +tests/data/ljspeech/wavs/LJ050-0098.wav|tests/data/ljspeech/wavs/LJ050-0098.npy +tests/data/ljspeech/wavs/LJ019-0158.wav|tests/data/ljspeech/wavs/LJ019-0158.npy +tests/data/ljspeech/wavs/LJ014-0208.wav|tests/data/ljspeech/wavs/LJ014-0208.npy +tests/data/ljspeech/wavs/LJ010-0229.wav|tests/data/ljspeech/wavs/LJ010-0229.npy +tests/data/ljspeech/wavs/LJ038-0150.wav|tests/data/ljspeech/wavs/LJ038-0150.npy +tests/data/ljspeech/wavs/LJ028-0394.wav|tests/data/ljspeech/wavs/LJ028-0394.npy +tests/data/ljspeech/wavs/LJ014-0195.wav|tests/data/ljspeech/wavs/LJ014-0195.npy +tests/data/ljspeech/wavs/LJ007-0204.wav|tests/data/ljspeech/wavs/LJ007-0204.npy +tests/data/ljspeech/wavs/LJ018-0100.wav|tests/data/ljspeech/wavs/LJ018-0100.npy +tests/data/ljspeech/wavs/LJ017-0213.wav|tests/data/ljspeech/wavs/LJ017-0213.npy +tests/data/ljspeech/wavs/LJ026-0138.wav|tests/data/ljspeech/wavs/LJ026-0138.npy +tests/data/ljspeech/wavs/LJ014-0114.wav|tests/data/ljspeech/wavs/LJ014-0114.npy +tests/data/ljspeech/wavs/LJ049-0221.wav|tests/data/ljspeech/wavs/LJ049-0221.npy +tests/data/ljspeech/wavs/LJ038-0160.wav|tests/data/ljspeech/wavs/LJ038-0160.npy +tests/data/ljspeech/wavs/LJ037-0180.wav|tests/data/ljspeech/wavs/LJ037-0180.npy +tests/data/ljspeech/wavs/LJ034-0197.wav|tests/data/ljspeech/wavs/LJ034-0197.npy +tests/data/ljspeech/wavs/LJ014-0085.wav|tests/data/ljspeech/wavs/LJ014-0085.npy +tests/data/ljspeech/wavs/LJ040-0087.wav|tests/data/ljspeech/wavs/LJ040-0087.npy +tests/data/ljspeech/wavs/LJ017-0169.wav|tests/data/ljspeech/wavs/LJ017-0169.npy +tests/data/ljspeech/wavs/LJ031-0079.wav|tests/data/ljspeech/wavs/LJ031-0079.npy +tests/data/ljspeech/wavs/LJ006-0306.wav|tests/data/ljspeech/wavs/LJ006-0306.npy +tests/data/ljspeech/wavs/LJ014-0212.wav|tests/data/ljspeech/wavs/LJ014-0212.npy +tests/data/ljspeech/wavs/LJ018-0009.wav|tests/data/ljspeech/wavs/LJ018-0009.npy +tests/data/ljspeech/wavs/LJ048-0289.wav|tests/data/ljspeech/wavs/LJ048-0289.npy +tests/data/ljspeech/wavs/LJ046-0204.wav|tests/data/ljspeech/wavs/LJ046-0204.npy +tests/data/ljspeech/wavs/LJ026-0059.wav|tests/data/ljspeech/wavs/LJ026-0059.npy +tests/data/ljspeech/wavs/LJ011-0002.wav|tests/data/ljspeech/wavs/LJ011-0002.npy +tests/data/ljspeech/wavs/LJ004-0082.wav|tests/data/ljspeech/wavs/LJ004-0082.npy +tests/data/ljspeech/wavs/LJ036-0153.wav|tests/data/ljspeech/wavs/LJ036-0153.npy +tests/data/ljspeech/wavs/LJ050-0050.wav|tests/data/ljspeech/wavs/LJ050-0050.npy +tests/data/ljspeech/wavs/LJ007-0147.wav|tests/data/ljspeech/wavs/LJ007-0147.npy +tests/data/ljspeech/wavs/LJ044-0209.wav|tests/data/ljspeech/wavs/LJ044-0209.npy +tests/data/ljspeech/wavs/LJ047-0227.wav|tests/data/ljspeech/wavs/LJ047-0227.npy +tests/data/ljspeech/wavs/LJ006-0296.wav|tests/data/ljspeech/wavs/LJ006-0296.npy +tests/data/ljspeech/wavs/LJ038-0142.wav|tests/data/ljspeech/wavs/LJ038-0142.npy +tests/data/ljspeech/wavs/LJ028-0397.wav|tests/data/ljspeech/wavs/LJ028-0397.npy +tests/data/ljspeech/wavs/LJ047-0191.wav|tests/data/ljspeech/wavs/LJ047-0191.npy +tests/data/ljspeech/wavs/LJ042-0140.wav|tests/data/ljspeech/wavs/LJ042-0140.npy +tests/data/ljspeech/wavs/LJ021-0053.wav|tests/data/ljspeech/wavs/LJ021-0053.npy +tests/data/ljspeech/wavs/LJ028-0414.wav|tests/data/ljspeech/wavs/LJ028-0414.npy +tests/data/ljspeech/wavs/LJ017-0069.wav|tests/data/ljspeech/wavs/LJ017-0069.npy +tests/data/ljspeech/wavs/LJ019-0049.wav|tests/data/ljspeech/wavs/LJ019-0049.npy +tests/data/ljspeech/wavs/LJ010-0198.wav|tests/data/ljspeech/wavs/LJ010-0198.npy +tests/data/ljspeech/wavs/LJ020-0053.wav|tests/data/ljspeech/wavs/LJ020-0053.npy +tests/data/ljspeech/wavs/LJ014-0179.wav|tests/data/ljspeech/wavs/LJ014-0179.npy +tests/data/ljspeech/wavs/LJ030-0114.wav|tests/data/ljspeech/wavs/LJ030-0114.npy +tests/data/ljspeech/wavs/LJ006-0276.wav|tests/data/ljspeech/wavs/LJ006-0276.npy +tests/data/ljspeech/wavs/LJ014-0217.wav|tests/data/ljspeech/wavs/LJ014-0217.npy +tests/data/ljspeech/wavs/LJ007-0106.wav|tests/data/ljspeech/wavs/LJ007-0106.npy +tests/data/ljspeech/wavs/LJ034-0175.wav|tests/data/ljspeech/wavs/LJ034-0175.npy +tests/data/ljspeech/wavs/LJ017-0096.wav|tests/data/ljspeech/wavs/LJ017-0096.npy +tests/data/ljspeech/wavs/LJ036-0018.wav|tests/data/ljspeech/wavs/LJ036-0018.npy +tests/data/ljspeech/wavs/LJ033-0033.wav|tests/data/ljspeech/wavs/LJ033-0033.npy +tests/data/ljspeech/wavs/LJ014-0113.wav|tests/data/ljspeech/wavs/LJ014-0113.npy +tests/data/ljspeech/wavs/LJ035-0134.wav|tests/data/ljspeech/wavs/LJ035-0134.npy +tests/data/ljspeech/wavs/LJ016-0432.wav|tests/data/ljspeech/wavs/LJ016-0432.npy +tests/data/ljspeech/wavs/LJ032-0216.wav|tests/data/ljspeech/wavs/LJ032-0216.npy +tests/data/ljspeech/wavs/LJ011-0130.wav|tests/data/ljspeech/wavs/LJ011-0130.npy +tests/data/ljspeech/wavs/LJ036-0205.wav|tests/data/ljspeech/wavs/LJ036-0205.npy +tests/data/ljspeech/wavs/LJ012-0032.wav|tests/data/ljspeech/wavs/LJ012-0032.npy +tests/data/ljspeech/wavs/LJ019-0137.wav|tests/data/ljspeech/wavs/LJ019-0137.npy +tests/data/ljspeech/wavs/LJ032-0140.wav|tests/data/ljspeech/wavs/LJ032-0140.npy +tests/data/ljspeech/wavs/LJ020-0037.wav|tests/data/ljspeech/wavs/LJ020-0037.npy +tests/data/ljspeech/wavs/LJ013-0238.wav|tests/data/ljspeech/wavs/LJ013-0238.npy +tests/data/ljspeech/wavs/LJ011-0125.wav|tests/data/ljspeech/wavs/LJ011-0125.npy +tests/data/ljspeech/wavs/LJ027-0060.wav|tests/data/ljspeech/wavs/LJ027-0060.npy +tests/data/ljspeech/wavs/LJ019-0217.wav|tests/data/ljspeech/wavs/LJ019-0217.npy +tests/data/ljspeech/wavs/LJ048-0051.wav|tests/data/ljspeech/wavs/LJ048-0051.npy +tests/data/ljspeech/wavs/LJ046-0052.wav|tests/data/ljspeech/wavs/LJ046-0052.npy +tests/data/ljspeech/wavs/LJ028-0161.wav|tests/data/ljspeech/wavs/LJ028-0161.npy +tests/data/ljspeech/wavs/LJ039-0121.wav|tests/data/ljspeech/wavs/LJ039-0121.npy +tests/data/ljspeech/wavs/LJ006-0287.wav|tests/data/ljspeech/wavs/LJ006-0287.npy +tests/data/ljspeech/wavs/LJ015-0081.wav|tests/data/ljspeech/wavs/LJ015-0081.npy +tests/data/ljspeech/wavs/LJ011-0209.wav|tests/data/ljspeech/wavs/LJ011-0209.npy +tests/data/ljspeech/wavs/LJ004-0144.wav|tests/data/ljspeech/wavs/LJ004-0144.npy +tests/data/ljspeech/wavs/LJ003-0072.wav|tests/data/ljspeech/wavs/LJ003-0072.npy +tests/data/ljspeech/wavs/LJ030-0201.wav|tests/data/ljspeech/wavs/LJ030-0201.npy +tests/data/ljspeech/wavs/LJ012-0179.wav|tests/data/ljspeech/wavs/LJ012-0179.npy +tests/data/ljspeech/wavs/LJ006-0209.wav|tests/data/ljspeech/wavs/LJ006-0209.npy +tests/data/ljspeech/wavs/LJ002-0082.wav|tests/data/ljspeech/wavs/LJ002-0082.npy +tests/data/ljspeech/wavs/LJ050-0113.wav|tests/data/ljspeech/wavs/LJ050-0113.npy +tests/data/ljspeech/wavs/LJ019-0263.wav|tests/data/ljspeech/wavs/LJ019-0263.npy +tests/data/ljspeech/wavs/LJ002-0084.wav|tests/data/ljspeech/wavs/LJ002-0084.npy +tests/data/ljspeech/wavs/LJ011-0062.wav|tests/data/ljspeech/wavs/LJ011-0062.npy +tests/data/ljspeech/wavs/LJ014-0052.wav|tests/data/ljspeech/wavs/LJ014-0052.npy +tests/data/ljspeech/wavs/LJ032-0254.wav|tests/data/ljspeech/wavs/LJ032-0254.npy +tests/data/ljspeech/wavs/LJ020-0049.wav|tests/data/ljspeech/wavs/LJ020-0049.npy +tests/data/ljspeech/wavs/LJ001-0017.wav|tests/data/ljspeech/wavs/LJ001-0017.npy +tests/data/ljspeech/wavs/LJ016-0090.wav|tests/data/ljspeech/wavs/LJ016-0090.npy +tests/data/ljspeech/wavs/LJ048-0109.wav|tests/data/ljspeech/wavs/LJ048-0109.npy +tests/data/ljspeech/wavs/LJ012-0124.wav|tests/data/ljspeech/wavs/LJ012-0124.npy +tests/data/ljspeech/wavs/LJ018-0084.wav|tests/data/ljspeech/wavs/LJ018-0084.npy +tests/data/ljspeech/wavs/LJ041-0145.wav|tests/data/ljspeech/wavs/LJ041-0145.npy +tests/data/ljspeech/wavs/LJ003-0237.wav|tests/data/ljspeech/wavs/LJ003-0237.npy +tests/data/ljspeech/wavs/LJ006-0125.wav|tests/data/ljspeech/wavs/LJ006-0125.npy +tests/data/ljspeech/wavs/LJ033-0204.wav|tests/data/ljspeech/wavs/LJ033-0204.npy +tests/data/ljspeech/wavs/LJ011-0083.wav|tests/data/ljspeech/wavs/LJ011-0083.npy +tests/data/ljspeech/wavs/LJ016-0114.wav|tests/data/ljspeech/wavs/LJ016-0114.npy +tests/data/ljspeech/wavs/LJ006-0116.wav|tests/data/ljspeech/wavs/LJ006-0116.npy +tests/data/ljspeech/wavs/LJ041-0120.wav|tests/data/ljspeech/wavs/LJ041-0120.npy +tests/data/ljspeech/wavs/LJ018-0027.wav|tests/data/ljspeech/wavs/LJ018-0027.npy +tests/data/ljspeech/wavs/LJ045-0097.wav|tests/data/ljspeech/wavs/LJ045-0097.npy +tests/data/ljspeech/wavs/LJ050-0140.wav|tests/data/ljspeech/wavs/LJ050-0140.npy +tests/data/ljspeech/wavs/LJ009-0183.wav|tests/data/ljspeech/wavs/LJ009-0183.npy +tests/data/ljspeech/wavs/LJ029-0123.wav|tests/data/ljspeech/wavs/LJ029-0123.npy +tests/data/ljspeech/wavs/LJ019-0162.wav|tests/data/ljspeech/wavs/LJ019-0162.npy +tests/data/ljspeech/wavs/LJ008-0232.wav|tests/data/ljspeech/wavs/LJ008-0232.npy +tests/data/ljspeech/wavs/LJ036-0163.wav|tests/data/ljspeech/wavs/LJ036-0163.npy +tests/data/ljspeech/wavs/LJ018-0093.wav|tests/data/ljspeech/wavs/LJ018-0093.npy +tests/data/ljspeech/wavs/LJ021-0202.wav|tests/data/ljspeech/wavs/LJ021-0202.npy +tests/data/ljspeech/wavs/LJ050-0021.wav|tests/data/ljspeech/wavs/LJ050-0021.npy +tests/data/ljspeech/wavs/LJ045-0231.wav|tests/data/ljspeech/wavs/LJ045-0231.npy +tests/data/ljspeech/wavs/LJ006-0104.wav|tests/data/ljspeech/wavs/LJ006-0104.npy +tests/data/ljspeech/wavs/LJ009-0104.wav|tests/data/ljspeech/wavs/LJ009-0104.npy +tests/data/ljspeech/wavs/LJ016-0035.wav|tests/data/ljspeech/wavs/LJ016-0035.npy +tests/data/ljspeech/wavs/LJ008-0097.wav|tests/data/ljspeech/wavs/LJ008-0097.npy +tests/data/ljspeech/wavs/LJ016-0045.wav|tests/data/ljspeech/wavs/LJ016-0045.npy +tests/data/ljspeech/wavs/LJ009-0196.wav|tests/data/ljspeech/wavs/LJ009-0196.npy +tests/data/ljspeech/wavs/LJ006-0228.wav|tests/data/ljspeech/wavs/LJ006-0228.npy +tests/data/ljspeech/wavs/LJ003-0265.wav|tests/data/ljspeech/wavs/LJ003-0265.npy +tests/data/ljspeech/wavs/LJ032-0205.wav|tests/data/ljspeech/wavs/LJ032-0205.npy +tests/data/ljspeech/wavs/LJ044-0124.wav|tests/data/ljspeech/wavs/LJ044-0124.npy +tests/data/ljspeech/wavs/LJ027-0133.wav|tests/data/ljspeech/wavs/LJ027-0133.npy +tests/data/ljspeech/wavs/LJ037-0019.wav|tests/data/ljspeech/wavs/LJ037-0019.npy +tests/data/ljspeech/wavs/LJ037-0198.wav|tests/data/ljspeech/wavs/LJ037-0198.npy +tests/data/ljspeech/wavs/LJ012-0140.wav|tests/data/ljspeech/wavs/LJ012-0140.npy +tests/data/ljspeech/wavs/LJ047-0170.wav|tests/data/ljspeech/wavs/LJ047-0170.npy +tests/data/ljspeech/wavs/LJ003-0082.wav|tests/data/ljspeech/wavs/LJ003-0082.npy +tests/data/ljspeech/wavs/LJ019-0372.wav|tests/data/ljspeech/wavs/LJ019-0372.npy +tests/data/ljspeech/wavs/LJ025-0084.wav|tests/data/ljspeech/wavs/LJ025-0084.npy +tests/data/ljspeech/wavs/LJ021-0185.wav|tests/data/ljspeech/wavs/LJ021-0185.npy +tests/data/ljspeech/wavs/LJ016-0410.wav|tests/data/ljspeech/wavs/LJ016-0410.npy +tests/data/ljspeech/wavs/LJ021-0197.wav|tests/data/ljspeech/wavs/LJ021-0197.npy +tests/data/ljspeech/wavs/LJ004-0204.wav|tests/data/ljspeech/wavs/LJ004-0204.npy +tests/data/ljspeech/wavs/LJ013-0021.wav|tests/data/ljspeech/wavs/LJ013-0021.npy +tests/data/ljspeech/wavs/LJ037-0212.wav|tests/data/ljspeech/wavs/LJ037-0212.npy +tests/data/ljspeech/wavs/LJ049-0074.wav|tests/data/ljspeech/wavs/LJ049-0074.npy +tests/data/ljspeech/wavs/LJ010-0009.wav|tests/data/ljspeech/wavs/LJ010-0009.npy +tests/data/ljspeech/wavs/LJ025-0062.wav|tests/data/ljspeech/wavs/LJ025-0062.npy +tests/data/ljspeech/wavs/LJ015-0216.wav|tests/data/ljspeech/wavs/LJ015-0216.npy +tests/data/ljspeech/wavs/LJ036-0039.wav|tests/data/ljspeech/wavs/LJ036-0039.npy +tests/data/ljspeech/wavs/LJ039-0100.wav|tests/data/ljspeech/wavs/LJ039-0100.npy +tests/data/ljspeech/wavs/LJ045-0207.wav|tests/data/ljspeech/wavs/LJ045-0207.npy +tests/data/ljspeech/wavs/LJ006-0146.wav|tests/data/ljspeech/wavs/LJ006-0146.npy +tests/data/ljspeech/wavs/LJ038-0016.wav|tests/data/ljspeech/wavs/LJ038-0016.npy +tests/data/ljspeech/wavs/LJ007-0168.wav|tests/data/ljspeech/wavs/LJ007-0168.npy +tests/data/ljspeech/wavs/LJ035-0082.wav|tests/data/ljspeech/wavs/LJ035-0082.npy +tests/data/ljspeech/wavs/LJ045-0009.wav|tests/data/ljspeech/wavs/LJ045-0009.npy +tests/data/ljspeech/wavs/LJ008-0173.wav|tests/data/ljspeech/wavs/LJ008-0173.npy +tests/data/ljspeech/wavs/LJ033-0087.wav|tests/data/ljspeech/wavs/LJ033-0087.npy +tests/data/ljspeech/wavs/LJ001-0173.wav|tests/data/ljspeech/wavs/LJ001-0173.npy +tests/data/ljspeech/wavs/LJ016-0433.wav|tests/data/ljspeech/wavs/LJ016-0433.npy +tests/data/ljspeech/wavs/LJ044-0230.wav|tests/data/ljspeech/wavs/LJ044-0230.npy +tests/data/ljspeech/wavs/LJ003-0301.wav|tests/data/ljspeech/wavs/LJ003-0301.npy +tests/data/ljspeech/wavs/LJ037-0116.wav|tests/data/ljspeech/wavs/LJ037-0116.npy +tests/data/ljspeech/wavs/LJ049-0165.wav|tests/data/ljspeech/wavs/LJ049-0165.npy +tests/data/ljspeech/wavs/LJ024-0127.wav|tests/data/ljspeech/wavs/LJ024-0127.npy +tests/data/ljspeech/wavs/LJ025-0170.wav|tests/data/ljspeech/wavs/LJ025-0170.npy +tests/data/ljspeech/wavs/LJ021-0090.wav|tests/data/ljspeech/wavs/LJ021-0090.npy +tests/data/ljspeech/wavs/LJ015-0130.wav|tests/data/ljspeech/wavs/LJ015-0130.npy +tests/data/ljspeech/wavs/LJ019-0068.wav|tests/data/ljspeech/wavs/LJ019-0068.npy +tests/data/ljspeech/wavs/LJ044-0231.wav|tests/data/ljspeech/wavs/LJ044-0231.npy +tests/data/ljspeech/wavs/LJ016-0198.wav|tests/data/ljspeech/wavs/LJ016-0198.npy +tests/data/ljspeech/wavs/LJ021-0130.wav|tests/data/ljspeech/wavs/LJ021-0130.npy +tests/data/ljspeech/wavs/LJ033-0130.wav|tests/data/ljspeech/wavs/LJ033-0130.npy +tests/data/ljspeech/wavs/LJ004-0006.wav|tests/data/ljspeech/wavs/LJ004-0006.npy +tests/data/ljspeech/wavs/LJ039-0087.wav|tests/data/ljspeech/wavs/LJ039-0087.npy +tests/data/ljspeech/wavs/LJ013-0204.wav|tests/data/ljspeech/wavs/LJ013-0204.npy +tests/data/ljspeech/wavs/LJ021-0043.wav|tests/data/ljspeech/wavs/LJ021-0043.npy +tests/data/ljspeech/wavs/LJ038-0116.wav|tests/data/ljspeech/wavs/LJ038-0116.npy +tests/data/ljspeech/wavs/LJ002-0277.wav|tests/data/ljspeech/wavs/LJ002-0277.npy +tests/data/ljspeech/wavs/LJ018-0199.wav|tests/data/ljspeech/wavs/LJ018-0199.npy +tests/data/ljspeech/wavs/LJ039-0074.wav|tests/data/ljspeech/wavs/LJ039-0074.npy +tests/data/ljspeech/wavs/LJ015-0230.wav|tests/data/ljspeech/wavs/LJ015-0230.npy +tests/data/ljspeech/wavs/LJ014-0141.wav|tests/data/ljspeech/wavs/LJ014-0141.npy +tests/data/ljspeech/wavs/LJ003-0325.wav|tests/data/ljspeech/wavs/LJ003-0325.npy +tests/data/ljspeech/wavs/LJ025-0136.wav|tests/data/ljspeech/wavs/LJ025-0136.npy +tests/data/ljspeech/wavs/LJ046-0194.wav|tests/data/ljspeech/wavs/LJ046-0194.npy +tests/data/ljspeech/wavs/LJ035-0206.wav|tests/data/ljspeech/wavs/LJ035-0206.npy +tests/data/ljspeech/wavs/LJ016-0215.wav|tests/data/ljspeech/wavs/LJ016-0215.npy +tests/data/ljspeech/wavs/LJ019-0056.wav|tests/data/ljspeech/wavs/LJ019-0056.npy +tests/data/ljspeech/wavs/LJ017-0144.wav|tests/data/ljspeech/wavs/LJ017-0144.npy +tests/data/ljspeech/wavs/LJ030-0251.wav|tests/data/ljspeech/wavs/LJ030-0251.npy +tests/data/ljspeech/wavs/LJ004-0142.wav|tests/data/ljspeech/wavs/LJ004-0142.npy +tests/data/ljspeech/wavs/LJ029-0175.wav|tests/data/ljspeech/wavs/LJ029-0175.npy +tests/data/ljspeech/wavs/LJ005-0167.wav|tests/data/ljspeech/wavs/LJ005-0167.npy +tests/data/ljspeech/wavs/LJ046-0167.wav|tests/data/ljspeech/wavs/LJ046-0167.npy +tests/data/ljspeech/wavs/LJ006-0139.wav|tests/data/ljspeech/wavs/LJ006-0139.npy +tests/data/ljspeech/wavs/LJ030-0016.wav|tests/data/ljspeech/wavs/LJ030-0016.npy +tests/data/ljspeech/wavs/LJ044-0028.wav|tests/data/ljspeech/wavs/LJ044-0028.npy +tests/data/ljspeech/wavs/LJ016-0255.wav|tests/data/ljspeech/wavs/LJ016-0255.npy +tests/data/ljspeech/wavs/LJ038-0093.wav|tests/data/ljspeech/wavs/LJ038-0093.npy +tests/data/ljspeech/wavs/LJ010-0106.wav|tests/data/ljspeech/wavs/LJ010-0106.npy +tests/data/ljspeech/wavs/LJ041-0109.wav|tests/data/ljspeech/wavs/LJ041-0109.npy +tests/data/ljspeech/wavs/LJ040-0097.wav|tests/data/ljspeech/wavs/LJ040-0097.npy +tests/data/ljspeech/wavs/LJ010-0246.wav|tests/data/ljspeech/wavs/LJ010-0246.npy +tests/data/ljspeech/wavs/LJ011-0053.wav|tests/data/ljspeech/wavs/LJ011-0053.npy +tests/data/ljspeech/wavs/LJ030-0081.wav|tests/data/ljspeech/wavs/LJ030-0081.npy +tests/data/ljspeech/wavs/LJ001-0128.wav|tests/data/ljspeech/wavs/LJ001-0128.npy +tests/data/ljspeech/wavs/LJ030-0135.wav|tests/data/ljspeech/wavs/LJ030-0135.npy +tests/data/ljspeech/wavs/LJ005-0235.wav|tests/data/ljspeech/wavs/LJ005-0235.npy +tests/data/ljspeech/wavs/LJ031-0075.wav|tests/data/ljspeech/wavs/LJ031-0075.npy +tests/data/ljspeech/wavs/LJ046-0043.wav|tests/data/ljspeech/wavs/LJ046-0043.npy +tests/data/ljspeech/wavs/LJ010-0282.wav|tests/data/ljspeech/wavs/LJ010-0282.npy +tests/data/ljspeech/wavs/LJ019-0259.wav|tests/data/ljspeech/wavs/LJ019-0259.npy +tests/data/ljspeech/wavs/LJ008-0169.wav|tests/data/ljspeech/wavs/LJ008-0169.npy +tests/data/ljspeech/wavs/LJ024-0047.wav|tests/data/ljspeech/wavs/LJ024-0047.npy +tests/data/ljspeech/wavs/LJ041-0075.wav|tests/data/ljspeech/wavs/LJ041-0075.npy +tests/data/ljspeech/wavs/LJ038-0253.wav|tests/data/ljspeech/wavs/LJ038-0253.npy +tests/data/ljspeech/wavs/LJ001-0124.wav|tests/data/ljspeech/wavs/LJ001-0124.npy +tests/data/ljspeech/wavs/LJ007-0220.wav|tests/data/ljspeech/wavs/LJ007-0220.npy +tests/data/ljspeech/wavs/LJ028-0271.wav|tests/data/ljspeech/wavs/LJ028-0271.npy +tests/data/ljspeech/wavs/LJ001-0085.wav|tests/data/ljspeech/wavs/LJ001-0085.npy +tests/data/ljspeech/wavs/LJ001-0088.wav|tests/data/ljspeech/wavs/LJ001-0088.npy +tests/data/ljspeech/wavs/LJ028-0376.wav|tests/data/ljspeech/wavs/LJ028-0376.npy +tests/data/ljspeech/wavs/LJ014-0124.wav|tests/data/ljspeech/wavs/LJ014-0124.npy +tests/data/ljspeech/wavs/LJ046-0180.wav|tests/data/ljspeech/wavs/LJ046-0180.npy +tests/data/ljspeech/wavs/LJ042-0081.wav|tests/data/ljspeech/wavs/LJ042-0081.npy +tests/data/ljspeech/wavs/LJ030-0153.wav|tests/data/ljspeech/wavs/LJ030-0153.npy +tests/data/ljspeech/wavs/LJ029-0049.wav|tests/data/ljspeech/wavs/LJ029-0049.npy +tests/data/ljspeech/wavs/LJ043-0108.wav|tests/data/ljspeech/wavs/LJ043-0108.npy +tests/data/ljspeech/wavs/LJ029-0076.wav|tests/data/ljspeech/wavs/LJ029-0076.npy +tests/data/ljspeech/wavs/LJ008-0095.wav|tests/data/ljspeech/wavs/LJ008-0095.npy +tests/data/ljspeech/wavs/LJ027-0153.wav|tests/data/ljspeech/wavs/LJ027-0153.npy +tests/data/ljspeech/wavs/LJ040-0081.wav|tests/data/ljspeech/wavs/LJ040-0081.npy +tests/data/ljspeech/wavs/LJ049-0188.wav|tests/data/ljspeech/wavs/LJ049-0188.npy +tests/data/ljspeech/wavs/LJ005-0248.wav|tests/data/ljspeech/wavs/LJ005-0248.npy +tests/data/ljspeech/wavs/LJ032-0151.wav|tests/data/ljspeech/wavs/LJ032-0151.npy +tests/data/ljspeech/wavs/LJ010-0075.wav|tests/data/ljspeech/wavs/LJ010-0075.npy +tests/data/ljspeech/wavs/LJ008-0089.wav|tests/data/ljspeech/wavs/LJ008-0089.npy +tests/data/ljspeech/wavs/LJ005-0056.wav|tests/data/ljspeech/wavs/LJ005-0056.npy +tests/data/ljspeech/wavs/LJ039-0213.wav|tests/data/ljspeech/wavs/LJ039-0213.npy +tests/data/ljspeech/wavs/LJ005-0245.wav|tests/data/ljspeech/wavs/LJ005-0245.npy +tests/data/ljspeech/wavs/LJ048-0165.wav|tests/data/ljspeech/wavs/LJ048-0165.npy +tests/data/ljspeech/wavs/LJ010-0289.wav|tests/data/ljspeech/wavs/LJ010-0289.npy +tests/data/ljspeech/wavs/LJ050-0084.wav|tests/data/ljspeech/wavs/LJ050-0084.npy +tests/data/ljspeech/wavs/LJ008-0075.wav|tests/data/ljspeech/wavs/LJ008-0075.npy +tests/data/ljspeech/wavs/LJ028-0458.wav|tests/data/ljspeech/wavs/LJ028-0458.npy +tests/data/ljspeech/wavs/LJ030-0176.wav|tests/data/ljspeech/wavs/LJ030-0176.npy +tests/data/ljspeech/wavs/LJ030-0204.wav|tests/data/ljspeech/wavs/LJ030-0204.npy +tests/data/ljspeech/wavs/LJ042-0222.wav|tests/data/ljspeech/wavs/LJ042-0222.npy +tests/data/ljspeech/wavs/LJ028-0227.wav|tests/data/ljspeech/wavs/LJ028-0227.npy +tests/data/ljspeech/wavs/LJ006-0216.wav|tests/data/ljspeech/wavs/LJ006-0216.npy +tests/data/ljspeech/wavs/LJ032-0113.wav|tests/data/ljspeech/wavs/LJ032-0113.npy +tests/data/ljspeech/wavs/LJ040-0122.wav|tests/data/ljspeech/wavs/LJ040-0122.npy +tests/data/ljspeech/wavs/LJ011-0215.wav|tests/data/ljspeech/wavs/LJ011-0215.npy +tests/data/ljspeech/wavs/LJ032-0153.wav|tests/data/ljspeech/wavs/LJ032-0153.npy +tests/data/ljspeech/wavs/LJ032-0177.wav|tests/data/ljspeech/wavs/LJ032-0177.npy +tests/data/ljspeech/wavs/LJ034-0056.wav|tests/data/ljspeech/wavs/LJ034-0056.npy +tests/data/ljspeech/wavs/LJ009-0011.wav|tests/data/ljspeech/wavs/LJ009-0011.npy +tests/data/ljspeech/wavs/LJ041-0084.wav|tests/data/ljspeech/wavs/LJ041-0084.npy +tests/data/ljspeech/wavs/LJ045-0042.wav|tests/data/ljspeech/wavs/LJ045-0042.npy +tests/data/ljspeech/wavs/LJ045-0140.wav|tests/data/ljspeech/wavs/LJ045-0140.npy +tests/data/ljspeech/wavs/LJ045-0028.wav|tests/data/ljspeech/wavs/LJ045-0028.npy +tests/data/ljspeech/wavs/LJ025-0052.wav|tests/data/ljspeech/wavs/LJ025-0052.npy +tests/data/ljspeech/wavs/LJ033-0155.wav|tests/data/ljspeech/wavs/LJ033-0155.npy +tests/data/ljspeech/wavs/LJ041-0160.wav|tests/data/ljspeech/wavs/LJ041-0160.npy +tests/data/ljspeech/wavs/LJ009-0180.wav|tests/data/ljspeech/wavs/LJ009-0180.npy +tests/data/ljspeech/wavs/LJ041-0125.wav|tests/data/ljspeech/wavs/LJ041-0125.npy +tests/data/ljspeech/wavs/LJ031-0149.wav|tests/data/ljspeech/wavs/LJ031-0149.npy +tests/data/ljspeech/wavs/LJ001-0027.wav|tests/data/ljspeech/wavs/LJ001-0027.npy +tests/data/ljspeech/wavs/LJ033-0144.wav|tests/data/ljspeech/wavs/LJ033-0144.npy +tests/data/ljspeech/wavs/LJ029-0105.wav|tests/data/ljspeech/wavs/LJ029-0105.npy +tests/data/ljspeech/wavs/LJ030-0038.wav|tests/data/ljspeech/wavs/LJ030-0038.npy +tests/data/ljspeech/wavs/LJ029-0149.wav|tests/data/ljspeech/wavs/LJ029-0149.npy +tests/data/ljspeech/wavs/LJ003-0294.wav|tests/data/ljspeech/wavs/LJ003-0294.npy +tests/data/ljspeech/wavs/LJ050-0024.wav|tests/data/ljspeech/wavs/LJ050-0024.npy +tests/data/ljspeech/wavs/LJ017-0143.wav|tests/data/ljspeech/wavs/LJ017-0143.npy +tests/data/ljspeech/wavs/LJ031-0118.wav|tests/data/ljspeech/wavs/LJ031-0118.npy +tests/data/ljspeech/wavs/LJ016-0043.wav|tests/data/ljspeech/wavs/LJ016-0043.npy +tests/data/ljspeech/wavs/LJ001-0142.wav|tests/data/ljspeech/wavs/LJ001-0142.npy +tests/data/ljspeech/wavs/LJ016-0425.wav|tests/data/ljspeech/wavs/LJ016-0425.npy +tests/data/ljspeech/wavs/LJ016-0047.wav|tests/data/ljspeech/wavs/LJ016-0047.npy +tests/data/ljspeech/wavs/LJ009-0130.wav|tests/data/ljspeech/wavs/LJ009-0130.npy +tests/data/ljspeech/wavs/LJ002-0292.wav|tests/data/ljspeech/wavs/LJ002-0292.npy +tests/data/ljspeech/wavs/LJ050-0009.wav|tests/data/ljspeech/wavs/LJ050-0009.npy +tests/data/ljspeech/wavs/LJ034-0192.wav|tests/data/ljspeech/wavs/LJ034-0192.npy +tests/data/ljspeech/wavs/LJ007-0090.wav|tests/data/ljspeech/wavs/LJ007-0090.npy +tests/data/ljspeech/wavs/LJ030-0175.wav|tests/data/ljspeech/wavs/LJ030-0175.npy +tests/data/ljspeech/wavs/LJ009-0227.wav|tests/data/ljspeech/wavs/LJ009-0227.npy +tests/data/ljspeech/wavs/LJ030-0145.wav|tests/data/ljspeech/wavs/LJ030-0145.npy +tests/data/ljspeech/wavs/LJ034-0150.wav|tests/data/ljspeech/wavs/LJ034-0150.npy +tests/data/ljspeech/wavs/LJ042-0238.wav|tests/data/ljspeech/wavs/LJ042-0238.npy +tests/data/ljspeech/wavs/LJ019-0205.wav|tests/data/ljspeech/wavs/LJ019-0205.npy +tests/data/ljspeech/wavs/LJ031-0039.wav|tests/data/ljspeech/wavs/LJ031-0039.npy +tests/data/ljspeech/wavs/LJ050-0087.wav|tests/data/ljspeech/wavs/LJ050-0087.npy +tests/data/ljspeech/wavs/LJ047-0214.wav|tests/data/ljspeech/wavs/LJ047-0214.npy +tests/data/ljspeech/wavs/LJ015-0235.wav|tests/data/ljspeech/wavs/LJ015-0235.npy +tests/data/ljspeech/wavs/LJ049-0045.wav|tests/data/ljspeech/wavs/LJ049-0045.npy +tests/data/ljspeech/wavs/LJ031-0015.wav|tests/data/ljspeech/wavs/LJ031-0015.npy +tests/data/ljspeech/wavs/LJ019-0075.wav|tests/data/ljspeech/wavs/LJ019-0075.npy +tests/data/ljspeech/wavs/LJ048-0259.wav|tests/data/ljspeech/wavs/LJ048-0259.npy +tests/data/ljspeech/wavs/LJ046-0150.wav|tests/data/ljspeech/wavs/LJ046-0150.npy +tests/data/ljspeech/wavs/LJ046-0211.wav|tests/data/ljspeech/wavs/LJ046-0211.npy +tests/data/ljspeech/wavs/LJ043-0009.wav|tests/data/ljspeech/wavs/LJ043-0009.npy +tests/data/ljspeech/wavs/LJ017-0140.wav|tests/data/ljspeech/wavs/LJ017-0140.npy +tests/data/ljspeech/wavs/LJ018-0047.wav|tests/data/ljspeech/wavs/LJ018-0047.npy +tests/data/ljspeech/wavs/LJ018-0065.wav|tests/data/ljspeech/wavs/LJ018-0065.npy +tests/data/ljspeech/wavs/LJ003-0116.wav|tests/data/ljspeech/wavs/LJ003-0116.npy +tests/data/ljspeech/wavs/LJ017-0004.wav|tests/data/ljspeech/wavs/LJ017-0004.npy +tests/data/ljspeech/wavs/LJ034-0120.wav|tests/data/ljspeech/wavs/LJ034-0120.npy +tests/data/ljspeech/wavs/LJ018-0102.wav|tests/data/ljspeech/wavs/LJ018-0102.npy +tests/data/ljspeech/wavs/LJ017-0269.wav|tests/data/ljspeech/wavs/LJ017-0269.npy +tests/data/ljspeech/wavs/LJ019-0223.wav|tests/data/ljspeech/wavs/LJ019-0223.npy +tests/data/ljspeech/wavs/LJ021-0173.wav|tests/data/ljspeech/wavs/LJ021-0173.npy +tests/data/ljspeech/wavs/LJ003-0250.wav|tests/data/ljspeech/wavs/LJ003-0250.npy +tests/data/ljspeech/wavs/LJ050-0242.wav|tests/data/ljspeech/wavs/LJ050-0242.npy +tests/data/ljspeech/wavs/LJ021-0113.wav|tests/data/ljspeech/wavs/LJ021-0113.npy +tests/data/ljspeech/wavs/LJ006-0101.wav|tests/data/ljspeech/wavs/LJ006-0101.npy +tests/data/ljspeech/wavs/LJ017-0268.wav|tests/data/ljspeech/wavs/LJ017-0268.npy +tests/data/ljspeech/wavs/LJ007-0038.wav|tests/data/ljspeech/wavs/LJ007-0038.npy +tests/data/ljspeech/wavs/LJ031-0090.wav|tests/data/ljspeech/wavs/LJ031-0090.npy +tests/data/ljspeech/wavs/LJ044-0183.wav|tests/data/ljspeech/wavs/LJ044-0183.npy +tests/data/ljspeech/wavs/LJ038-0211.wav|tests/data/ljspeech/wavs/LJ038-0211.npy +tests/data/ljspeech/wavs/LJ026-0090.wav|tests/data/ljspeech/wavs/LJ026-0090.npy +tests/data/ljspeech/wavs/LJ040-0144.wav|tests/data/ljspeech/wavs/LJ040-0144.npy +tests/data/ljspeech/wavs/LJ011-0070.wav|tests/data/ljspeech/wavs/LJ011-0070.npy +tests/data/ljspeech/wavs/LJ008-0048.wav|tests/data/ljspeech/wavs/LJ008-0048.npy +tests/data/ljspeech/wavs/LJ050-0169.wav|tests/data/ljspeech/wavs/LJ050-0169.npy +tests/data/ljspeech/wavs/LJ046-0102.wav|tests/data/ljspeech/wavs/LJ046-0102.npy +tests/data/ljspeech/wavs/LJ032-0078.wav|tests/data/ljspeech/wavs/LJ032-0078.npy +tests/data/ljspeech/wavs/LJ046-0089.wav|tests/data/ljspeech/wavs/LJ046-0089.npy +tests/data/ljspeech/wavs/LJ044-0137.wav|tests/data/ljspeech/wavs/LJ044-0137.npy +tests/data/ljspeech/wavs/LJ049-0155.wav|tests/data/ljspeech/wavs/LJ049-0155.npy +tests/data/ljspeech/wavs/LJ049-0104.wav|tests/data/ljspeech/wavs/LJ049-0104.npy +tests/data/ljspeech/wavs/LJ050-0259.wav|tests/data/ljspeech/wavs/LJ050-0259.npy +tests/data/ljspeech/wavs/LJ003-0181.wav|tests/data/ljspeech/wavs/LJ003-0181.npy +tests/data/ljspeech/wavs/LJ014-0162.wav|tests/data/ljspeech/wavs/LJ014-0162.npy +tests/data/ljspeech/wavs/LJ032-0071.wav|tests/data/ljspeech/wavs/LJ032-0071.npy +tests/data/ljspeech/wavs/LJ015-0057.wav|tests/data/ljspeech/wavs/LJ015-0057.npy +tests/data/ljspeech/wavs/LJ010-0244.wav|tests/data/ljspeech/wavs/LJ010-0244.npy +tests/data/ljspeech/wavs/LJ011-0199.wav|tests/data/ljspeech/wavs/LJ011-0199.npy +tests/data/ljspeech/wavs/LJ010-0082.wav|tests/data/ljspeech/wavs/LJ010-0082.npy +tests/data/ljspeech/wavs/LJ013-0071.wav|tests/data/ljspeech/wavs/LJ013-0071.npy +tests/data/ljspeech/wavs/LJ005-0219.wav|tests/data/ljspeech/wavs/LJ005-0219.npy +tests/data/ljspeech/wavs/LJ031-0228.wav|tests/data/ljspeech/wavs/LJ031-0228.npy +tests/data/ljspeech/wavs/LJ010-0212.wav|tests/data/ljspeech/wavs/LJ010-0212.npy +tests/data/ljspeech/wavs/LJ011-0195.wav|tests/data/ljspeech/wavs/LJ011-0195.npy +tests/data/ljspeech/wavs/LJ028-0459.wav|tests/data/ljspeech/wavs/LJ028-0459.npy +tests/data/ljspeech/wavs/LJ021-0124.wav|tests/data/ljspeech/wavs/LJ021-0124.npy +tests/data/ljspeech/wavs/LJ049-0195.wav|tests/data/ljspeech/wavs/LJ049-0195.npy +tests/data/ljspeech/wavs/LJ047-0200.wav|tests/data/ljspeech/wavs/LJ047-0200.npy +tests/data/ljspeech/wavs/LJ009-0255.wav|tests/data/ljspeech/wavs/LJ009-0255.npy +tests/data/ljspeech/wavs/LJ012-0256.wav|tests/data/ljspeech/wavs/LJ012-0256.npy +tests/data/ljspeech/wavs/LJ032-0063.wav|tests/data/ljspeech/wavs/LJ032-0063.npy +tests/data/ljspeech/wavs/LJ032-0238.wav|tests/data/ljspeech/wavs/LJ032-0238.npy +tests/data/ljspeech/wavs/LJ007-0231.wav|tests/data/ljspeech/wavs/LJ007-0231.npy +tests/data/ljspeech/wavs/LJ026-0149.wav|tests/data/ljspeech/wavs/LJ026-0149.npy +tests/data/ljspeech/wavs/LJ027-0139.wav|tests/data/ljspeech/wavs/LJ027-0139.npy +tests/data/ljspeech/wavs/LJ044-0238.wav|tests/data/ljspeech/wavs/LJ044-0238.npy +tests/data/ljspeech/wavs/LJ011-0078.wav|tests/data/ljspeech/wavs/LJ011-0078.npy +tests/data/ljspeech/wavs/LJ005-0003.wav|tests/data/ljspeech/wavs/LJ005-0003.npy +tests/data/ljspeech/wavs/LJ044-0110.wav|tests/data/ljspeech/wavs/LJ044-0110.npy +tests/data/ljspeech/wavs/LJ005-0048.wav|tests/data/ljspeech/wavs/LJ005-0048.npy +tests/data/ljspeech/wavs/LJ007-0195.wav|tests/data/ljspeech/wavs/LJ007-0195.npy +tests/data/ljspeech/wavs/LJ005-0030.wav|tests/data/ljspeech/wavs/LJ005-0030.npy +tests/data/ljspeech/wavs/LJ004-0250.wav|tests/data/ljspeech/wavs/LJ004-0250.npy +tests/data/ljspeech/wavs/LJ017-0208.wav|tests/data/ljspeech/wavs/LJ017-0208.npy +tests/data/ljspeech/wavs/LJ049-0166.wav|tests/data/ljspeech/wavs/LJ049-0166.npy +tests/data/ljspeech/wavs/LJ048-0059.wav|tests/data/ljspeech/wavs/LJ048-0059.npy +tests/data/ljspeech/wavs/LJ029-0034.wav|tests/data/ljspeech/wavs/LJ029-0034.npy +tests/data/ljspeech/wavs/LJ014-0059.wav|tests/data/ljspeech/wavs/LJ014-0059.npy +tests/data/ljspeech/wavs/LJ026-0147.wav|tests/data/ljspeech/wavs/LJ026-0147.npy +tests/data/ljspeech/wavs/LJ028-0130.wav|tests/data/ljspeech/wavs/LJ028-0130.npy +tests/data/ljspeech/wavs/LJ038-0176.wav|tests/data/ljspeech/wavs/LJ038-0176.npy +tests/data/ljspeech/wavs/LJ025-0151.wav|tests/data/ljspeech/wavs/LJ025-0151.npy +tests/data/ljspeech/wavs/LJ011-0106.wav|tests/data/ljspeech/wavs/LJ011-0106.npy +tests/data/ljspeech/wavs/LJ036-0141.wav|tests/data/ljspeech/wavs/LJ036-0141.npy +tests/data/ljspeech/wavs/LJ034-0123.wav|tests/data/ljspeech/wavs/LJ034-0123.npy +tests/data/ljspeech/wavs/LJ050-0158.wav|tests/data/ljspeech/wavs/LJ050-0158.npy +tests/data/ljspeech/wavs/LJ033-0085.wav|tests/data/ljspeech/wavs/LJ033-0085.npy +tests/data/ljspeech/wavs/LJ005-0272.wav|tests/data/ljspeech/wavs/LJ005-0272.npy +tests/data/ljspeech/wavs/LJ011-0046.wav|tests/data/ljspeech/wavs/LJ011-0046.npy +tests/data/ljspeech/wavs/LJ014-0120.wav|tests/data/ljspeech/wavs/LJ014-0120.npy +tests/data/ljspeech/wavs/LJ018-0030.wav|tests/data/ljspeech/wavs/LJ018-0030.npy +tests/data/ljspeech/wavs/LJ012-0099.wav|tests/data/ljspeech/wavs/LJ012-0099.npy +tests/data/ljspeech/wavs/LJ044-0147.wav|tests/data/ljspeech/wavs/LJ044-0147.npy +tests/data/ljspeech/wavs/LJ035-0158.wav|tests/data/ljspeech/wavs/LJ035-0158.npy +tests/data/ljspeech/wavs/LJ019-0026.wav|tests/data/ljspeech/wavs/LJ019-0026.npy +tests/data/ljspeech/wavs/LJ039-0083.wav|tests/data/ljspeech/wavs/LJ039-0083.npy +tests/data/ljspeech/wavs/LJ019-0053.wav|tests/data/ljspeech/wavs/LJ019-0053.npy +tests/data/ljspeech/wavs/LJ047-0123.wav|tests/data/ljspeech/wavs/LJ047-0123.npy +tests/data/ljspeech/wavs/LJ018-0325.wav|tests/data/ljspeech/wavs/LJ018-0325.npy +tests/data/ljspeech/wavs/LJ028-0292.wav|tests/data/ljspeech/wavs/LJ028-0292.npy +tests/data/ljspeech/wavs/LJ048-0084.wav|tests/data/ljspeech/wavs/LJ048-0084.npy +tests/data/ljspeech/wavs/LJ048-0198.wav|tests/data/ljspeech/wavs/LJ048-0198.npy +tests/data/ljspeech/wavs/LJ028-0408.wav|tests/data/ljspeech/wavs/LJ028-0408.npy +tests/data/ljspeech/wavs/LJ045-0144.wav|tests/data/ljspeech/wavs/LJ045-0144.npy +tests/data/ljspeech/wavs/LJ013-0221.wav|tests/data/ljspeech/wavs/LJ013-0221.npy +tests/data/ljspeech/wavs/LJ012-0286.wav|tests/data/ljspeech/wavs/LJ012-0286.npy +tests/data/ljspeech/wavs/LJ039-0043.wav|tests/data/ljspeech/wavs/LJ039-0043.npy +tests/data/ljspeech/wavs/LJ032-0170.wav|tests/data/ljspeech/wavs/LJ032-0170.npy +tests/data/ljspeech/wavs/LJ035-0130.wav|tests/data/ljspeech/wavs/LJ035-0130.npy +tests/data/ljspeech/wavs/LJ046-0047.wav|tests/data/ljspeech/wavs/LJ046-0047.npy +tests/data/ljspeech/wavs/LJ019-0224.wav|tests/data/ljspeech/wavs/LJ019-0224.npy +tests/data/ljspeech/wavs/LJ031-0002.wav|tests/data/ljspeech/wavs/LJ031-0002.npy +tests/data/ljspeech/wavs/LJ005-0080.wav|tests/data/ljspeech/wavs/LJ005-0080.npy +tests/data/ljspeech/wavs/LJ042-0211.wav|tests/data/ljspeech/wavs/LJ042-0211.npy +tests/data/ljspeech/wavs/LJ047-0122.wav|tests/data/ljspeech/wavs/LJ047-0122.npy +tests/data/ljspeech/wavs/LJ020-0082.wav|tests/data/ljspeech/wavs/LJ020-0082.npy +tests/data/ljspeech/wavs/LJ020-0099.wav|tests/data/ljspeech/wavs/LJ020-0099.npy +tests/data/ljspeech/wavs/LJ007-0132.wav|tests/data/ljspeech/wavs/LJ007-0132.npy +tests/data/ljspeech/wavs/LJ035-0113.wav|tests/data/ljspeech/wavs/LJ035-0113.npy +tests/data/ljspeech/wavs/LJ019-0393.wav|tests/data/ljspeech/wavs/LJ019-0393.npy +tests/data/ljspeech/wavs/LJ007-0056.wav|tests/data/ljspeech/wavs/LJ007-0056.npy +tests/data/ljspeech/wavs/LJ039-0210.wav|tests/data/ljspeech/wavs/LJ039-0210.npy +tests/data/ljspeech/wavs/LJ007-0137.wav|tests/data/ljspeech/wavs/LJ007-0137.npy +tests/data/ljspeech/wavs/LJ006-0223.wav|tests/data/ljspeech/wavs/LJ006-0223.npy +tests/data/ljspeech/wavs/LJ033-0096.wav|tests/data/ljspeech/wavs/LJ033-0096.npy +tests/data/ljspeech/wavs/LJ003-0337.wav|tests/data/ljspeech/wavs/LJ003-0337.npy +tests/data/ljspeech/wavs/LJ025-0016.wav|tests/data/ljspeech/wavs/LJ025-0016.npy +tests/data/ljspeech/wavs/LJ009-0117.wav|tests/data/ljspeech/wavs/LJ009-0117.npy +tests/data/ljspeech/wavs/LJ004-0128.wav|tests/data/ljspeech/wavs/LJ004-0128.npy +tests/data/ljspeech/wavs/LJ037-0233.wav|tests/data/ljspeech/wavs/LJ037-0233.npy +tests/data/ljspeech/wavs/LJ040-0187.wav|tests/data/ljspeech/wavs/LJ040-0187.npy +tests/data/ljspeech/wavs/LJ029-0101.wav|tests/data/ljspeech/wavs/LJ029-0101.npy +tests/data/ljspeech/wavs/LJ015-0268.wav|tests/data/ljspeech/wavs/LJ015-0268.npy +tests/data/ljspeech/wavs/LJ029-0055.wav|tests/data/ljspeech/wavs/LJ029-0055.npy +tests/data/ljspeech/wavs/LJ025-0102.wav|tests/data/ljspeech/wavs/LJ025-0102.npy +tests/data/ljspeech/wavs/LJ025-0060.wav|tests/data/ljspeech/wavs/LJ025-0060.npy +tests/data/ljspeech/wavs/LJ006-0028.wav|tests/data/ljspeech/wavs/LJ006-0028.npy +tests/data/ljspeech/wavs/LJ037-0067.wav|tests/data/ljspeech/wavs/LJ037-0067.npy +tests/data/ljspeech/wavs/LJ037-0223.wav|tests/data/ljspeech/wavs/LJ037-0223.npy +tests/data/ljspeech/wavs/LJ015-0045.wav|tests/data/ljspeech/wavs/LJ015-0045.npy +tests/data/ljspeech/wavs/LJ016-0013.wav|tests/data/ljspeech/wavs/LJ016-0013.npy +tests/data/ljspeech/wavs/LJ010-0012.wav|tests/data/ljspeech/wavs/LJ010-0012.npy +tests/data/ljspeech/wavs/LJ014-0296.wav|tests/data/ljspeech/wavs/LJ014-0296.npy +tests/data/ljspeech/wavs/LJ029-0161.wav|tests/data/ljspeech/wavs/LJ029-0161.npy +tests/data/ljspeech/wavs/LJ016-0175.wav|tests/data/ljspeech/wavs/LJ016-0175.npy +tests/data/ljspeech/wavs/LJ026-0012.wav|tests/data/ljspeech/wavs/LJ026-0012.npy +tests/data/ljspeech/wavs/LJ005-0239.wav|tests/data/ljspeech/wavs/LJ005-0239.npy +tests/data/ljspeech/wavs/LJ046-0026.wav|tests/data/ljspeech/wavs/LJ046-0026.npy +tests/data/ljspeech/wavs/LJ044-0218.wav|tests/data/ljspeech/wavs/LJ044-0218.npy +tests/data/ljspeech/wavs/LJ009-0233.wav|tests/data/ljspeech/wavs/LJ009-0233.npy +tests/data/ljspeech/wavs/LJ002-0133.wav|tests/data/ljspeech/wavs/LJ002-0133.npy +tests/data/ljspeech/wavs/LJ025-0020.wav|tests/data/ljspeech/wavs/LJ025-0020.npy +tests/data/ljspeech/wavs/LJ004-0058.wav|tests/data/ljspeech/wavs/LJ004-0058.npy +tests/data/ljspeech/wavs/LJ009-0253.wav|tests/data/ljspeech/wavs/LJ009-0253.npy +tests/data/ljspeech/wavs/LJ009-0143.wav|tests/data/ljspeech/wavs/LJ009-0143.npy +tests/data/ljspeech/wavs/LJ050-0015.wav|tests/data/ljspeech/wavs/LJ050-0015.npy +tests/data/ljspeech/wavs/LJ034-0103.wav|tests/data/ljspeech/wavs/LJ034-0103.npy +tests/data/ljspeech/wavs/LJ028-0412.wav|tests/data/ljspeech/wavs/LJ028-0412.npy +tests/data/ljspeech/wavs/LJ045-0088.wav|tests/data/ljspeech/wavs/LJ045-0088.npy +tests/data/ljspeech/wavs/LJ044-0204.wav|tests/data/ljspeech/wavs/LJ044-0204.npy +tests/data/ljspeech/wavs/LJ044-0119.wav|tests/data/ljspeech/wavs/LJ044-0119.npy +tests/data/ljspeech/wavs/LJ017-0013.wav|tests/data/ljspeech/wavs/LJ017-0013.npy +tests/data/ljspeech/wavs/LJ008-0098.wav|tests/data/ljspeech/wavs/LJ008-0098.npy +tests/data/ljspeech/wavs/LJ042-0044.wav|tests/data/ljspeech/wavs/LJ042-0044.npy +tests/data/ljspeech/wavs/LJ029-0016.wav|tests/data/ljspeech/wavs/LJ029-0016.npy +tests/data/ljspeech/wavs/LJ049-0116.wav|tests/data/ljspeech/wavs/LJ049-0116.npy +tests/data/ljspeech/wavs/LJ002-0046.wav|tests/data/ljspeech/wavs/LJ002-0046.npy +tests/data/ljspeech/wavs/LJ016-0421.wav|tests/data/ljspeech/wavs/LJ016-0421.npy +tests/data/ljspeech/wavs/LJ025-0129.wav|tests/data/ljspeech/wavs/LJ025-0129.npy +tests/data/ljspeech/wavs/LJ037-0011.wav|tests/data/ljspeech/wavs/LJ037-0011.npy +tests/data/ljspeech/wavs/LJ026-0044.wav|tests/data/ljspeech/wavs/LJ026-0044.npy +tests/data/ljspeech/wavs/LJ014-0232.wav|tests/data/ljspeech/wavs/LJ014-0232.npy +tests/data/ljspeech/wavs/LJ033-0190.wav|tests/data/ljspeech/wavs/LJ033-0190.npy +tests/data/ljspeech/wavs/LJ008-0316.wav|tests/data/ljspeech/wavs/LJ008-0316.npy +tests/data/ljspeech/wavs/LJ037-0025.wav|tests/data/ljspeech/wavs/LJ037-0025.npy +tests/data/ljspeech/wavs/LJ037-0059.wav|tests/data/ljspeech/wavs/LJ037-0059.npy +tests/data/ljspeech/wavs/LJ041-0170.wav|tests/data/ljspeech/wavs/LJ041-0170.npy +tests/data/ljspeech/wavs/LJ032-0034.wav|tests/data/ljspeech/wavs/LJ032-0034.npy +tests/data/ljspeech/wavs/LJ016-0259.wav|tests/data/ljspeech/wavs/LJ016-0259.npy +tests/data/ljspeech/wavs/LJ006-0071.wav|tests/data/ljspeech/wavs/LJ006-0071.npy +tests/data/ljspeech/wavs/LJ033-0195.wav|tests/data/ljspeech/wavs/LJ033-0195.npy +tests/data/ljspeech/wavs/LJ008-0183.wav|tests/data/ljspeech/wavs/LJ008-0183.npy +tests/data/ljspeech/wavs/LJ008-0160.wav|tests/data/ljspeech/wavs/LJ008-0160.npy +tests/data/ljspeech/wavs/LJ029-0212.wav|tests/data/ljspeech/wavs/LJ029-0212.npy +tests/data/ljspeech/wavs/LJ048-0062.wav|tests/data/ljspeech/wavs/LJ048-0062.npy +tests/data/ljspeech/wavs/LJ014-0169.wav|tests/data/ljspeech/wavs/LJ014-0169.npy +tests/data/ljspeech/wavs/LJ033-0078.wav|tests/data/ljspeech/wavs/LJ033-0078.npy +tests/data/ljspeech/wavs/LJ048-0222.wav|tests/data/ljspeech/wavs/LJ048-0222.npy +tests/data/ljspeech/wavs/LJ011-0094.wav|tests/data/ljspeech/wavs/LJ011-0094.npy +tests/data/ljspeech/wavs/LJ004-0038.wav|tests/data/ljspeech/wavs/LJ004-0038.npy +tests/data/ljspeech/wavs/LJ045-0052.wav|tests/data/ljspeech/wavs/LJ045-0052.npy +tests/data/ljspeech/wavs/LJ045-0057.wav|tests/data/ljspeech/wavs/LJ045-0057.npy +tests/data/ljspeech/wavs/LJ041-0114.wav|tests/data/ljspeech/wavs/LJ041-0114.npy +tests/data/ljspeech/wavs/LJ025-0152.wav|tests/data/ljspeech/wavs/LJ025-0152.npy +tests/data/ljspeech/wavs/LJ020-0061.wav|tests/data/ljspeech/wavs/LJ020-0061.npy +tests/data/ljspeech/wavs/LJ047-0110.wav|tests/data/ljspeech/wavs/LJ047-0110.npy +tests/data/ljspeech/wavs/LJ032-0076.wav|tests/data/ljspeech/wavs/LJ032-0076.npy +tests/data/ljspeech/wavs/LJ037-0174.wav|tests/data/ljspeech/wavs/LJ037-0174.npy +tests/data/ljspeech/wavs/LJ048-0256.wav|tests/data/ljspeech/wavs/LJ048-0256.npy +tests/data/ljspeech/wavs/LJ022-0104.wav|tests/data/ljspeech/wavs/LJ022-0104.npy +tests/data/ljspeech/wavs/LJ030-0198.wav|tests/data/ljspeech/wavs/LJ030-0198.npy +tests/data/ljspeech/wavs/LJ041-0078.wav|tests/data/ljspeech/wavs/LJ041-0078.npy +tests/data/ljspeech/wavs/LJ011-0272.wav|tests/data/ljspeech/wavs/LJ011-0272.npy +tests/data/ljspeech/wavs/LJ006-0004.wav|tests/data/ljspeech/wavs/LJ006-0004.npy +tests/data/ljspeech/wavs/LJ005-0293.wav|tests/data/ljspeech/wavs/LJ005-0293.npy +tests/data/ljspeech/wavs/LJ032-0101.wav|tests/data/ljspeech/wavs/LJ032-0101.npy +tests/data/ljspeech/wavs/LJ008-0303.wav|tests/data/ljspeech/wavs/LJ008-0303.npy +tests/data/ljspeech/wavs/LJ008-0302.wav|tests/data/ljspeech/wavs/LJ008-0302.npy +tests/data/ljspeech/wavs/LJ009-0226.wav|tests/data/ljspeech/wavs/LJ009-0226.npy +tests/data/ljspeech/wavs/LJ001-0127.wav|tests/data/ljspeech/wavs/LJ001-0127.npy +tests/data/ljspeech/wavs/LJ009-0220.wav|tests/data/ljspeech/wavs/LJ009-0220.npy +tests/data/ljspeech/wavs/LJ003-0262.wav|tests/data/ljspeech/wavs/LJ003-0262.npy +tests/data/ljspeech/wavs/LJ016-0299.wav|tests/data/ljspeech/wavs/LJ016-0299.npy +tests/data/ljspeech/wavs/LJ028-0145.wav|tests/data/ljspeech/wavs/LJ028-0145.npy +tests/data/ljspeech/wavs/LJ028-0332.wav|tests/data/ljspeech/wavs/LJ028-0332.npy +tests/data/ljspeech/wavs/LJ022-0162.wav|tests/data/ljspeech/wavs/LJ022-0162.npy +tests/data/ljspeech/wavs/LJ048-0164.wav|tests/data/ljspeech/wavs/LJ048-0164.npy +tests/data/ljspeech/wavs/LJ038-0140.wav|tests/data/ljspeech/wavs/LJ038-0140.npy +tests/data/ljspeech/wavs/LJ016-0295.wav|tests/data/ljspeech/wavs/LJ016-0295.npy +tests/data/ljspeech/wavs/LJ001-0076.wav|tests/data/ljspeech/wavs/LJ001-0076.npy +tests/data/ljspeech/wavs/LJ007-0243.wav|tests/data/ljspeech/wavs/LJ007-0243.npy +tests/data/ljspeech/wavs/LJ044-0029.wav|tests/data/ljspeech/wavs/LJ044-0029.npy +tests/data/ljspeech/wavs/LJ044-0054.wav|tests/data/ljspeech/wavs/LJ044-0054.npy +tests/data/ljspeech/wavs/LJ011-0006.wav|tests/data/ljspeech/wavs/LJ011-0006.npy +tests/data/ljspeech/wavs/LJ006-0299.wav|tests/data/ljspeech/wavs/LJ006-0299.npy +tests/data/ljspeech/wavs/LJ046-0214.wav|tests/data/ljspeech/wavs/LJ046-0214.npy +tests/data/ljspeech/wavs/LJ018-0005.wav|tests/data/ljspeech/wavs/LJ018-0005.npy +tests/data/ljspeech/wavs/LJ050-0188.wav|tests/data/ljspeech/wavs/LJ050-0188.npy +tests/data/ljspeech/wavs/LJ036-0110.wav|tests/data/ljspeech/wavs/LJ036-0110.npy +tests/data/ljspeech/wavs/LJ018-0275.wav|tests/data/ljspeech/wavs/LJ018-0275.npy +tests/data/ljspeech/wavs/LJ005-0124.wav|tests/data/ljspeech/wavs/LJ005-0124.npy +tests/data/ljspeech/wavs/LJ016-0119.wav|tests/data/ljspeech/wavs/LJ016-0119.npy +tests/data/ljspeech/wavs/LJ003-0168.wav|tests/data/ljspeech/wavs/LJ003-0168.npy +tests/data/ljspeech/wavs/LJ045-0036.wav|tests/data/ljspeech/wavs/LJ045-0036.npy +tests/data/ljspeech/wavs/LJ019-0024.wav|tests/data/ljspeech/wavs/LJ019-0024.npy +tests/data/ljspeech/wavs/LJ007-0011.wav|tests/data/ljspeech/wavs/LJ007-0011.npy +tests/data/ljspeech/wavs/LJ040-0095.wav|tests/data/ljspeech/wavs/LJ040-0095.npy +tests/data/ljspeech/wavs/LJ039-0136.wav|tests/data/ljspeech/wavs/LJ039-0136.npy +tests/data/ljspeech/wavs/LJ010-0122.wav|tests/data/ljspeech/wavs/LJ010-0122.npy +tests/data/ljspeech/wavs/LJ011-0088.wav|tests/data/ljspeech/wavs/LJ011-0088.npy +tests/data/ljspeech/wavs/LJ037-0263.wav|tests/data/ljspeech/wavs/LJ037-0263.npy +tests/data/ljspeech/wavs/LJ014-0019.wav|tests/data/ljspeech/wavs/LJ014-0019.npy +tests/data/ljspeech/wavs/LJ007-0184.wav|tests/data/ljspeech/wavs/LJ007-0184.npy +tests/data/ljspeech/wavs/LJ005-0255.wav|tests/data/ljspeech/wavs/LJ005-0255.npy +tests/data/ljspeech/wavs/LJ007-0093.wav|tests/data/ljspeech/wavs/LJ007-0093.npy +tests/data/ljspeech/wavs/LJ035-0201.wav|tests/data/ljspeech/wavs/LJ035-0201.npy +tests/data/ljspeech/wavs/LJ015-0082.wav|tests/data/ljspeech/wavs/LJ015-0082.npy +tests/data/ljspeech/wavs/LJ010-0126.wav|tests/data/ljspeech/wavs/LJ010-0126.npy +tests/data/ljspeech/wavs/LJ005-0246.wav|tests/data/ljspeech/wavs/LJ005-0246.npy +tests/data/ljspeech/wavs/LJ037-0243.wav|tests/data/ljspeech/wavs/LJ037-0243.npy +tests/data/ljspeech/wavs/LJ015-0168.wav|tests/data/ljspeech/wavs/LJ015-0168.npy +tests/data/ljspeech/wavs/LJ007-0017.wav|tests/data/ljspeech/wavs/LJ007-0017.npy +tests/data/ljspeech/wavs/LJ044-0068.wav|tests/data/ljspeech/wavs/LJ044-0068.npy +tests/data/ljspeech/wavs/LJ011-0080.wav|tests/data/ljspeech/wavs/LJ011-0080.npy +tests/data/ljspeech/wavs/LJ005-0027.wav|tests/data/ljspeech/wavs/LJ005-0027.npy +tests/data/ljspeech/wavs/LJ044-0100.wav|tests/data/ljspeech/wavs/LJ044-0100.npy +tests/data/ljspeech/wavs/LJ012-0051.wav|tests/data/ljspeech/wavs/LJ012-0051.npy +tests/data/ljspeech/wavs/LJ046-0250.wav|tests/data/ljspeech/wavs/LJ046-0250.npy +tests/data/ljspeech/wavs/LJ011-0066.wav|tests/data/ljspeech/wavs/LJ011-0066.npy +tests/data/ljspeech/wavs/LJ049-0181.wav|tests/data/ljspeech/wavs/LJ049-0181.npy +tests/data/ljspeech/wavs/LJ011-0248.wav|tests/data/ljspeech/wavs/LJ011-0248.npy +tests/data/ljspeech/wavs/LJ012-0050.wav|tests/data/ljspeech/wavs/LJ012-0050.npy +tests/data/ljspeech/wavs/LJ050-0183.wav|tests/data/ljspeech/wavs/LJ050-0183.npy +tests/data/ljspeech/wavs/LJ007-0101.wav|tests/data/ljspeech/wavs/LJ007-0101.npy +tests/data/ljspeech/wavs/LJ032-0095.wav|tests/data/ljspeech/wavs/LJ032-0095.npy +tests/data/ljspeech/wavs/LJ018-0139.wav|tests/data/ljspeech/wavs/LJ018-0139.npy +tests/data/ljspeech/wavs/LJ046-0072.wav|tests/data/ljspeech/wavs/LJ046-0072.npy +tests/data/ljspeech/wavs/LJ019-0242.wav|tests/data/ljspeech/wavs/LJ019-0242.npy +tests/data/ljspeech/wavs/LJ005-0023.wav|tests/data/ljspeech/wavs/LJ005-0023.npy +tests/data/ljspeech/wavs/LJ049-0215.wav|tests/data/ljspeech/wavs/LJ049-0215.npy +tests/data/ljspeech/wavs/LJ004-0236.wav|tests/data/ljspeech/wavs/LJ004-0236.npy +tests/data/ljspeech/wavs/LJ040-0003.wav|tests/data/ljspeech/wavs/LJ040-0003.npy +tests/data/ljspeech/wavs/LJ014-0044.wav|tests/data/ljspeech/wavs/LJ014-0044.npy +tests/data/ljspeech/wavs/LJ042-0078.wav|tests/data/ljspeech/wavs/LJ042-0078.npy +tests/data/ljspeech/wavs/LJ039-0132.wav|tests/data/ljspeech/wavs/LJ039-0132.npy +tests/data/ljspeech/wavs/LJ039-0101.wav|tests/data/ljspeech/wavs/LJ039-0101.npy +tests/data/ljspeech/wavs/LJ011-0151.wav|tests/data/ljspeech/wavs/LJ011-0151.npy +tests/data/ljspeech/wavs/LJ035-0090.wav|tests/data/ljspeech/wavs/LJ035-0090.npy +tests/data/ljspeech/wavs/LJ012-0244.wav|tests/data/ljspeech/wavs/LJ012-0244.npy +tests/data/ljspeech/wavs/LJ028-0236.wav|tests/data/ljspeech/wavs/LJ028-0236.npy +tests/data/ljspeech/wavs/LJ006-0115.wav|tests/data/ljspeech/wavs/LJ006-0115.npy +tests/data/ljspeech/wavs/LJ032-0178.wav|tests/data/ljspeech/wavs/LJ032-0178.npy +tests/data/ljspeech/wavs/LJ002-0059.wav|tests/data/ljspeech/wavs/LJ002-0059.npy +tests/data/ljspeech/wavs/LJ013-0196.wav|tests/data/ljspeech/wavs/LJ013-0196.npy +tests/data/ljspeech/wavs/LJ005-0251.wav|tests/data/ljspeech/wavs/LJ005-0251.npy +tests/data/ljspeech/wavs/LJ031-0167.wav|tests/data/ljspeech/wavs/LJ031-0167.npy +tests/data/ljspeech/wavs/LJ006-0157.wav|tests/data/ljspeech/wavs/LJ006-0157.npy +tests/data/ljspeech/wavs/LJ029-0023.wav|tests/data/ljspeech/wavs/LJ029-0023.npy +tests/data/ljspeech/wavs/LJ047-0012.wav|tests/data/ljspeech/wavs/LJ047-0012.npy +tests/data/ljspeech/wavs/LJ047-0088.wav|tests/data/ljspeech/wavs/LJ047-0088.npy +tests/data/ljspeech/wavs/LJ043-0042.wav|tests/data/ljspeech/wavs/LJ043-0042.npy +tests/data/ljspeech/wavs/LJ011-0031.wav|tests/data/ljspeech/wavs/LJ011-0031.npy +tests/data/ljspeech/wavs/LJ007-0117.wav|tests/data/ljspeech/wavs/LJ007-0117.npy +tests/data/ljspeech/wavs/LJ007-0109.wav|tests/data/ljspeech/wavs/LJ007-0109.npy +tests/data/ljspeech/wavs/LJ040-0204.wav|tests/data/ljspeech/wavs/LJ040-0204.npy +tests/data/ljspeech/wavs/LJ050-0176.wav|tests/data/ljspeech/wavs/LJ050-0176.npy +tests/data/ljspeech/wavs/LJ031-0032.wav|tests/data/ljspeech/wavs/LJ031-0032.npy +tests/data/ljspeech/wavs/LJ013-0100.wav|tests/data/ljspeech/wavs/LJ013-0100.npy +tests/data/ljspeech/wavs/LJ028-0444.wav|tests/data/ljspeech/wavs/LJ028-0444.npy +tests/data/ljspeech/wavs/LJ043-0033.wav|tests/data/ljspeech/wavs/LJ043-0033.npy +tests/data/ljspeech/wavs/LJ048-0081.wav|tests/data/ljspeech/wavs/LJ048-0081.npy +tests/data/ljspeech/wavs/LJ008-0284.wav|tests/data/ljspeech/wavs/LJ008-0284.npy +tests/data/ljspeech/wavs/LJ006-0149.wav|tests/data/ljspeech/wavs/LJ006-0149.npy +tests/data/ljspeech/wavs/LJ040-0168.wav|tests/data/ljspeech/wavs/LJ040-0168.npy +tests/data/ljspeech/wavs/LJ006-0279.wav|tests/data/ljspeech/wavs/LJ006-0279.npy +tests/data/ljspeech/wavs/LJ042-0153.wav|tests/data/ljspeech/wavs/LJ042-0153.npy +tests/data/ljspeech/wavs/LJ008-0171.wav|tests/data/ljspeech/wavs/LJ008-0171.npy +tests/data/ljspeech/wavs/LJ010-0010.wav|tests/data/ljspeech/wavs/LJ010-0010.npy +tests/data/ljspeech/wavs/LJ030-0125.wav|tests/data/ljspeech/wavs/LJ030-0125.npy +tests/data/ljspeech/wavs/LJ030-0013.wav|tests/data/ljspeech/wavs/LJ030-0013.npy +tests/data/ljspeech/wavs/LJ008-0121.wav|tests/data/ljspeech/wavs/LJ008-0121.npy +tests/data/ljspeech/wavs/LJ008-0056.wav|tests/data/ljspeech/wavs/LJ008-0056.npy +tests/data/ljspeech/wavs/LJ007-0234.wav|tests/data/ljspeech/wavs/LJ007-0234.npy +tests/data/ljspeech/wavs/LJ050-0276.wav|tests/data/ljspeech/wavs/LJ050-0276.npy +tests/data/ljspeech/wavs/LJ043-0027.wav|tests/data/ljspeech/wavs/LJ043-0027.npy +tests/data/ljspeech/wavs/LJ010-0254.wav|tests/data/ljspeech/wavs/LJ010-0254.npy +tests/data/ljspeech/wavs/LJ014-0320.wav|tests/data/ljspeech/wavs/LJ014-0320.npy +tests/data/ljspeech/wavs/LJ043-0145.wav|tests/data/ljspeech/wavs/LJ043-0145.npy +tests/data/ljspeech/wavs/LJ045-0122.wav|tests/data/ljspeech/wavs/LJ045-0122.npy +tests/data/ljspeech/wavs/LJ016-0244.wav|tests/data/ljspeech/wavs/LJ016-0244.npy +tests/data/ljspeech/wavs/LJ033-0179.wav|tests/data/ljspeech/wavs/LJ033-0179.npy +tests/data/ljspeech/wavs/LJ004-0022.wav|tests/data/ljspeech/wavs/LJ004-0022.npy +tests/data/ljspeech/wavs/LJ041-0092.wav|tests/data/ljspeech/wavs/LJ041-0092.npy +tests/data/ljspeech/wavs/LJ041-0107.wav|tests/data/ljspeech/wavs/LJ041-0107.npy +tests/data/ljspeech/wavs/LJ004-0048.wav|tests/data/ljspeech/wavs/LJ004-0048.npy +tests/data/ljspeech/wavs/LJ041-0179.wav|tests/data/ljspeech/wavs/LJ041-0179.npy +tests/data/ljspeech/wavs/LJ018-0324.wav|tests/data/ljspeech/wavs/LJ018-0324.npy +tests/data/ljspeech/wavs/LJ025-0147.wav|tests/data/ljspeech/wavs/LJ025-0147.npy +tests/data/ljspeech/wavs/LJ004-0041.wav|tests/data/ljspeech/wavs/LJ004-0041.npy +tests/data/ljspeech/wavs/LJ046-0184.wav|tests/data/ljspeech/wavs/LJ046-0184.npy +tests/data/ljspeech/wavs/LJ016-0309.wav|tests/data/ljspeech/wavs/LJ016-0309.npy +tests/data/ljspeech/wavs/LJ027-0116.wav|tests/data/ljspeech/wavs/LJ027-0116.npy +tests/data/ljspeech/wavs/LJ031-0144.wav|tests/data/ljspeech/wavs/LJ031-0144.npy +tests/data/ljspeech/wavs/LJ014-0255.wav|tests/data/ljspeech/wavs/LJ014-0255.npy +tests/data/ljspeech/wavs/LJ016-0196.wav|tests/data/ljspeech/wavs/LJ016-0196.npy +tests/data/ljspeech/wavs/LJ036-0041.wav|tests/data/ljspeech/wavs/LJ036-0041.npy +tests/data/ljspeech/wavs/LJ016-0271.wav|tests/data/ljspeech/wavs/LJ016-0271.npy +tests/data/ljspeech/wavs/LJ038-0004.wav|tests/data/ljspeech/wavs/LJ038-0004.npy +tests/data/ljspeech/wavs/LJ015-0249.wav|tests/data/ljspeech/wavs/LJ015-0249.npy +tests/data/ljspeech/wavs/LJ003-0338.wav|tests/data/ljspeech/wavs/LJ003-0338.npy +tests/data/ljspeech/wavs/LJ041-0146.wav|tests/data/ljspeech/wavs/LJ041-0146.npy +tests/data/ljspeech/wavs/LJ002-0218.wav|tests/data/ljspeech/wavs/LJ002-0218.npy +tests/data/ljspeech/wavs/LJ003-0163.wav|tests/data/ljspeech/wavs/LJ003-0163.npy +tests/data/ljspeech/wavs/LJ003-0333.wav|tests/data/ljspeech/wavs/LJ003-0333.npy +tests/data/ljspeech/wavs/LJ045-0070.wav|tests/data/ljspeech/wavs/LJ045-0070.npy +tests/data/ljspeech/wavs/LJ047-0067.wav|tests/data/ljspeech/wavs/LJ047-0067.npy +tests/data/ljspeech/wavs/LJ016-0123.wav|tests/data/ljspeech/wavs/LJ016-0123.npy +tests/data/ljspeech/wavs/LJ016-0322.wav|tests/data/ljspeech/wavs/LJ016-0322.npy +tests/data/ljspeech/wavs/LJ035-0084.wav|tests/data/ljspeech/wavs/LJ035-0084.npy +tests/data/ljspeech/wavs/LJ026-0077.wav|tests/data/ljspeech/wavs/LJ026-0077.npy +tests/data/ljspeech/wavs/LJ002-0308.wav|tests/data/ljspeech/wavs/LJ002-0308.npy +tests/data/ljspeech/wavs/LJ035-0145.wav|tests/data/ljspeech/wavs/LJ035-0145.npy +tests/data/ljspeech/wavs/LJ044-0193.wav|tests/data/ljspeech/wavs/LJ044-0193.npy +tests/data/ljspeech/wavs/LJ014-0211.wav|tests/data/ljspeech/wavs/LJ014-0211.npy +tests/data/ljspeech/wavs/LJ003-0026.wav|tests/data/ljspeech/wavs/LJ003-0026.npy +tests/data/ljspeech/wavs/LJ046-0045.wav|tests/data/ljspeech/wavs/LJ046-0045.npy +tests/data/ljspeech/wavs/LJ019-0391.wav|tests/data/ljspeech/wavs/LJ019-0391.npy +tests/data/ljspeech/wavs/LJ027-0008.wav|tests/data/ljspeech/wavs/LJ027-0008.npy +tests/data/ljspeech/wavs/LJ042-0018.wav|tests/data/ljspeech/wavs/LJ042-0018.npy +tests/data/ljspeech/wavs/LJ027-0070.wav|tests/data/ljspeech/wavs/LJ027-0070.npy +tests/data/ljspeech/wavs/LJ016-0391.wav|tests/data/ljspeech/wavs/LJ016-0391.npy +tests/data/ljspeech/wavs/LJ034-0069.wav|tests/data/ljspeech/wavs/LJ034-0069.npy +tests/data/ljspeech/wavs/LJ019-0398.wav|tests/data/ljspeech/wavs/LJ019-0398.npy +tests/data/ljspeech/wavs/LJ002-0168.wav|tests/data/ljspeech/wavs/LJ002-0168.npy +tests/data/ljspeech/wavs/LJ016-0344.wav|tests/data/ljspeech/wavs/LJ016-0344.npy +tests/data/ljspeech/wavs/LJ049-0140.wav|tests/data/ljspeech/wavs/LJ049-0140.npy +tests/data/ljspeech/wavs/LJ003-0239.wav|tests/data/ljspeech/wavs/LJ003-0239.npy +tests/data/ljspeech/wavs/LJ014-0171.wav|tests/data/ljspeech/wavs/LJ014-0171.npy +tests/data/ljspeech/wavs/LJ035-0122.wav|tests/data/ljspeech/wavs/LJ035-0122.npy +tests/data/ljspeech/wavs/LJ038-0242.wav|tests/data/ljspeech/wavs/LJ038-0242.npy +tests/data/ljspeech/wavs/LJ035-0111.wav|tests/data/ljspeech/wavs/LJ035-0111.npy +tests/data/ljspeech/wavs/LJ014-0016.wav|tests/data/ljspeech/wavs/LJ014-0016.npy +tests/data/ljspeech/wavs/LJ016-0408.wav|tests/data/ljspeech/wavs/LJ016-0408.npy +tests/data/ljspeech/wavs/LJ019-0163.wav|tests/data/ljspeech/wavs/LJ019-0163.npy +tests/data/ljspeech/wavs/LJ013-0214.wav|tests/data/ljspeech/wavs/LJ013-0214.npy +tests/data/ljspeech/wavs/LJ014-0246.wav|tests/data/ljspeech/wavs/LJ014-0246.npy +tests/data/ljspeech/wavs/LJ014-0106.wav|tests/data/ljspeech/wavs/LJ014-0106.npy +tests/data/ljspeech/wavs/LJ002-0185.wav|tests/data/ljspeech/wavs/LJ002-0185.npy +tests/data/ljspeech/wavs/LJ017-0085.wav|tests/data/ljspeech/wavs/LJ017-0085.npy +tests/data/ljspeech/wavs/LJ035-0123.wav|tests/data/ljspeech/wavs/LJ035-0123.npy +tests/data/ljspeech/wavs/LJ042-0135.wav|tests/data/ljspeech/wavs/LJ042-0135.npy +tests/data/ljspeech/wavs/LJ035-0086.wav|tests/data/ljspeech/wavs/LJ035-0086.npy +tests/data/ljspeech/wavs/LJ031-0078.wav|tests/data/ljspeech/wavs/LJ031-0078.npy +tests/data/ljspeech/wavs/LJ045-0183.wav|tests/data/ljspeech/wavs/LJ045-0183.npy +tests/data/ljspeech/wavs/LJ015-0132.wav|tests/data/ljspeech/wavs/LJ015-0132.npy +tests/data/ljspeech/wavs/LJ035-0207.wav|tests/data/ljspeech/wavs/LJ035-0207.npy +tests/data/ljspeech/wavs/LJ003-0069.wav|tests/data/ljspeech/wavs/LJ003-0069.npy +tests/data/ljspeech/wavs/LJ047-0145.wav|tests/data/ljspeech/wavs/LJ047-0145.npy +tests/data/ljspeech/wavs/LJ019-0170.wav|tests/data/ljspeech/wavs/LJ019-0170.npy +tests/data/ljspeech/wavs/LJ034-0162.wav|tests/data/ljspeech/wavs/LJ034-0162.npy +tests/data/ljspeech/wavs/LJ047-0242.wav|tests/data/ljspeech/wavs/LJ047-0242.npy +tests/data/ljspeech/wavs/LJ018-0235.wav|tests/data/ljspeech/wavs/LJ018-0235.npy +tests/data/ljspeech/wavs/LJ006-0130.wav|tests/data/ljspeech/wavs/LJ006-0130.npy +tests/data/ljspeech/wavs/LJ041-0088.wav|tests/data/ljspeech/wavs/LJ041-0088.npy +tests/data/ljspeech/wavs/LJ048-0118.wav|tests/data/ljspeech/wavs/LJ048-0118.npy +tests/data/ljspeech/wavs/LJ008-0184.wav|tests/data/ljspeech/wavs/LJ008-0184.npy +tests/data/ljspeech/wavs/LJ019-0086.wav|tests/data/ljspeech/wavs/LJ019-0086.npy +tests/data/ljspeech/wavs/LJ048-0126.wav|tests/data/ljspeech/wavs/LJ048-0126.npy +tests/data/ljspeech/wavs/LJ041-0124.wav|tests/data/ljspeech/wavs/LJ041-0124.npy +tests/data/ljspeech/wavs/LJ020-0077.wav|tests/data/ljspeech/wavs/LJ020-0077.npy +tests/data/ljspeech/wavs/LJ047-0034.wav|tests/data/ljspeech/wavs/LJ047-0034.npy +tests/data/ljspeech/wavs/LJ003-0169.wav|tests/data/ljspeech/wavs/LJ003-0169.npy +tests/data/ljspeech/wavs/LJ013-0139.wav|tests/data/ljspeech/wavs/LJ013-0139.npy +tests/data/ljspeech/wavs/LJ007-0084.wav|tests/data/ljspeech/wavs/LJ007-0084.npy +tests/data/ljspeech/wavs/LJ030-0096.wav|tests/data/ljspeech/wavs/LJ030-0096.npy +tests/data/ljspeech/wavs/LJ018-0234.wav|tests/data/ljspeech/wavs/LJ018-0234.npy +tests/data/ljspeech/wavs/LJ001-0005.wav|tests/data/ljspeech/wavs/LJ001-0005.npy +tests/data/ljspeech/wavs/LJ030-0217.wav|tests/data/ljspeech/wavs/LJ030-0217.npy +tests/data/ljspeech/wavs/LJ048-0153.wav|tests/data/ljspeech/wavs/LJ048-0153.npy +tests/data/ljspeech/wavs/LJ016-0371.wav|tests/data/ljspeech/wavs/LJ016-0371.npy +tests/data/ljspeech/wavs/LJ022-0020.wav|tests/data/ljspeech/wavs/LJ022-0020.npy +tests/data/ljspeech/wavs/LJ006-0274.wav|tests/data/ljspeech/wavs/LJ006-0274.npy +tests/data/ljspeech/wavs/LJ045-0227.wav|tests/data/ljspeech/wavs/LJ045-0227.npy +tests/data/ljspeech/wavs/LJ040-0053.wav|tests/data/ljspeech/wavs/LJ040-0053.npy +tests/data/ljspeech/wavs/LJ016-0329.wav|tests/data/ljspeech/wavs/LJ016-0329.npy +tests/data/ljspeech/wavs/LJ044-0162.wav|tests/data/ljspeech/wavs/LJ044-0162.npy +tests/data/ljspeech/wavs/LJ044-0088.wav|tests/data/ljspeech/wavs/LJ044-0088.npy +tests/data/ljspeech/wavs/LJ011-0096.wav|tests/data/ljspeech/wavs/LJ011-0096.npy +tests/data/ljspeech/wavs/LJ039-0226.wav|tests/data/ljspeech/wavs/LJ039-0226.npy +tests/data/ljspeech/wavs/LJ001-0171.wav|tests/data/ljspeech/wavs/LJ001-0171.npy +tests/data/ljspeech/wavs/LJ002-0181.wav|tests/data/ljspeech/wavs/LJ002-0181.npy +tests/data/ljspeech/wavs/LJ012-0115.wav|tests/data/ljspeech/wavs/LJ012-0115.npy +tests/data/ljspeech/wavs/LJ005-0046.wav|tests/data/ljspeech/wavs/LJ005-0046.npy +tests/data/ljspeech/wavs/LJ004-0085.wav|tests/data/ljspeech/wavs/LJ004-0085.npy +tests/data/ljspeech/wavs/LJ004-0093.wav|tests/data/ljspeech/wavs/LJ004-0093.npy +tests/data/ljspeech/wavs/LJ022-0127.wav|tests/data/ljspeech/wavs/LJ022-0127.npy +tests/data/ljspeech/wavs/LJ009-0155.wav|tests/data/ljspeech/wavs/LJ009-0155.npy +tests/data/ljspeech/wavs/LJ032-0184.wav|tests/data/ljspeech/wavs/LJ032-0184.npy +tests/data/ljspeech/wavs/LJ038-0214.wav|tests/data/ljspeech/wavs/LJ038-0214.npy +tests/data/ljspeech/wavs/LJ049-0147.wav|tests/data/ljspeech/wavs/LJ049-0147.npy +tests/data/ljspeech/wavs/LJ048-0018.wav|tests/data/ljspeech/wavs/LJ048-0018.npy +tests/data/ljspeech/wavs/LJ006-0015.wav|tests/data/ljspeech/wavs/LJ006-0015.npy +tests/data/ljspeech/wavs/LJ004-0037.wav|tests/data/ljspeech/wavs/LJ004-0037.npy +tests/data/ljspeech/wavs/LJ012-0066.wav|tests/data/ljspeech/wavs/LJ012-0066.npy +tests/data/ljspeech/wavs/LJ025-0119.wav|tests/data/ljspeech/wavs/LJ025-0119.npy +tests/data/ljspeech/wavs/LJ031-0178.wav|tests/data/ljspeech/wavs/LJ031-0178.npy +tests/data/ljspeech/wavs/LJ013-0145.wav|tests/data/ljspeech/wavs/LJ013-0145.npy +tests/data/ljspeech/wavs/LJ014-0103.wav|tests/data/ljspeech/wavs/LJ014-0103.npy +tests/data/ljspeech/wavs/LJ014-0326.wav|tests/data/ljspeech/wavs/LJ014-0326.npy +tests/data/ljspeech/wavs/LJ026-0100.wav|tests/data/ljspeech/wavs/LJ026-0100.npy +tests/data/ljspeech/wavs/LJ014-0149.wav|tests/data/ljspeech/wavs/LJ014-0149.npy +tests/data/ljspeech/wavs/LJ016-0356.wav|tests/data/ljspeech/wavs/LJ016-0356.npy +tests/data/ljspeech/wavs/LJ025-0071.wav|tests/data/ljspeech/wavs/LJ025-0071.npy +tests/data/ljspeech/wavs/LJ002-0318.wav|tests/data/ljspeech/wavs/LJ002-0318.npy +tests/data/ljspeech/wavs/LJ049-0129.wav|tests/data/ljspeech/wavs/LJ049-0129.npy +tests/data/ljspeech/wavs/LJ041-0019.wav|tests/data/ljspeech/wavs/LJ041-0019.npy +tests/data/ljspeech/wavs/LJ044-0005.wav|tests/data/ljspeech/wavs/LJ044-0005.npy +tests/data/ljspeech/wavs/LJ040-0056.wav|tests/data/ljspeech/wavs/LJ040-0056.npy +tests/data/ljspeech/wavs/LJ046-0207.wav|tests/data/ljspeech/wavs/LJ046-0207.npy +tests/data/ljspeech/wavs/LJ047-0044.wav|tests/data/ljspeech/wavs/LJ047-0044.npy +tests/data/ljspeech/wavs/LJ017-0078.wav|tests/data/ljspeech/wavs/LJ017-0078.npy +tests/data/ljspeech/wavs/LJ050-0082.wav|tests/data/ljspeech/wavs/LJ050-0082.npy +tests/data/ljspeech/wavs/LJ019-0207.wav|tests/data/ljspeech/wavs/LJ019-0207.npy +tests/data/ljspeech/wavs/LJ016-0137.wav|tests/data/ljspeech/wavs/LJ016-0137.npy +tests/data/ljspeech/wavs/LJ007-0183.wav|tests/data/ljspeech/wavs/LJ007-0183.npy +tests/data/ljspeech/wavs/LJ016-0094.wav|tests/data/ljspeech/wavs/LJ016-0094.npy +tests/data/ljspeech/wavs/LJ009-0298.wav|tests/data/ljspeech/wavs/LJ009-0298.npy +tests/data/ljspeech/wavs/LJ049-0123.wav|tests/data/ljspeech/wavs/LJ049-0123.npy +tests/data/ljspeech/wavs/LJ016-0199.wav|tests/data/ljspeech/wavs/LJ016-0199.npy +tests/data/ljspeech/wavs/LJ009-0186.wav|tests/data/ljspeech/wavs/LJ009-0186.npy +tests/data/ljspeech/wavs/LJ030-0018.wav|tests/data/ljspeech/wavs/LJ030-0018.npy +tests/data/ljspeech/wavs/LJ041-0059.wav|tests/data/ljspeech/wavs/LJ041-0059.npy +tests/data/ljspeech/wavs/LJ047-0013.wav|tests/data/ljspeech/wavs/LJ047-0013.npy +tests/data/ljspeech/wavs/LJ025-0103.wav|tests/data/ljspeech/wavs/LJ025-0103.npy +tests/data/ljspeech/wavs/LJ016-0360.wav|tests/data/ljspeech/wavs/LJ016-0360.npy +tests/data/ljspeech/wavs/LJ016-0057.wav|tests/data/ljspeech/wavs/LJ016-0057.npy +tests/data/ljspeech/wavs/LJ010-0043.wav|tests/data/ljspeech/wavs/LJ010-0043.npy +tests/data/ljspeech/wavs/LJ040-0055.wav|tests/data/ljspeech/wavs/LJ040-0055.npy +tests/data/ljspeech/wavs/LJ028-0448.wav|tests/data/ljspeech/wavs/LJ028-0448.npy +tests/data/ljspeech/wavs/LJ007-0074.wav|tests/data/ljspeech/wavs/LJ007-0074.npy +tests/data/ljspeech/wavs/LJ003-0095.wav|tests/data/ljspeech/wavs/LJ003-0095.npy +tests/data/ljspeech/wavs/LJ050-0278.wav|tests/data/ljspeech/wavs/LJ050-0278.npy +tests/data/ljspeech/wavs/LJ028-0505.wav|tests/data/ljspeech/wavs/LJ028-0505.npy +tests/data/ljspeech/wavs/LJ032-0228.wav|tests/data/ljspeech/wavs/LJ032-0228.npy +tests/data/ljspeech/wavs/LJ022-0174.wav|tests/data/ljspeech/wavs/LJ022-0174.npy +tests/data/ljspeech/wavs/LJ049-0030.wav|tests/data/ljspeech/wavs/LJ049-0030.npy +tests/data/ljspeech/wavs/LJ042-0166.wav|tests/data/ljspeech/wavs/LJ042-0166.npy +tests/data/ljspeech/wavs/LJ044-0025.wav|tests/data/ljspeech/wavs/LJ044-0025.npy +tests/data/ljspeech/wavs/LJ034-0098.wav|tests/data/ljspeech/wavs/LJ034-0098.npy +tests/data/ljspeech/wavs/LJ035-0147.wav|tests/data/ljspeech/wavs/LJ035-0147.npy +tests/data/ljspeech/wavs/LJ018-0251.wav|tests/data/ljspeech/wavs/LJ018-0251.npy +tests/data/ljspeech/wavs/LJ028-0326.wav|tests/data/ljspeech/wavs/LJ028-0326.npy +tests/data/ljspeech/wavs/LJ043-0123.wav|tests/data/ljspeech/wavs/LJ043-0123.npy +tests/data/ljspeech/wavs/LJ035-0046.wav|tests/data/ljspeech/wavs/LJ035-0046.npy +tests/data/ljspeech/wavs/LJ030-0072.wav|tests/data/ljspeech/wavs/LJ030-0072.npy +tests/data/ljspeech/wavs/LJ014-0066.wav|tests/data/ljspeech/wavs/LJ014-0066.npy +tests/data/ljspeech/wavs/LJ004-0226.wav|tests/data/ljspeech/wavs/LJ004-0226.npy +tests/data/ljspeech/wavs/LJ043-0059.wav|tests/data/ljspeech/wavs/LJ043-0059.npy +tests/data/ljspeech/wavs/LJ026-0060.wav|tests/data/ljspeech/wavs/LJ026-0060.npy +tests/data/ljspeech/wavs/LJ030-0024.wav|tests/data/ljspeech/wavs/LJ030-0024.npy +tests/data/ljspeech/wavs/LJ035-0195.wav|tests/data/ljspeech/wavs/LJ035-0195.npy +tests/data/ljspeech/wavs/LJ012-0028.wav|tests/data/ljspeech/wavs/LJ012-0028.npy +tests/data/ljspeech/wavs/LJ033-0202.wav|tests/data/ljspeech/wavs/LJ033-0202.npy +tests/data/ljspeech/wavs/LJ028-0427.wav|tests/data/ljspeech/wavs/LJ028-0427.npy +tests/data/ljspeech/wavs/LJ007-0190.wav|tests/data/ljspeech/wavs/LJ007-0190.npy +tests/data/ljspeech/wavs/LJ041-0171.wav|tests/data/ljspeech/wavs/LJ041-0171.npy +tests/data/ljspeech/wavs/LJ042-0216.wav|tests/data/ljspeech/wavs/LJ042-0216.npy +tests/data/ljspeech/wavs/LJ017-0134.wav|tests/data/ljspeech/wavs/LJ017-0134.npy +tests/data/ljspeech/wavs/LJ012-0107.wav|tests/data/ljspeech/wavs/LJ012-0107.npy +tests/data/ljspeech/wavs/LJ007-0216.wav|tests/data/ljspeech/wavs/LJ007-0216.npy +tests/data/ljspeech/wavs/LJ013-0151.wav|tests/data/ljspeech/wavs/LJ013-0151.npy +tests/data/ljspeech/wavs/LJ034-0064.wav|tests/data/ljspeech/wavs/LJ034-0064.npy +tests/data/ljspeech/wavs/LJ020-0035.wav|tests/data/ljspeech/wavs/LJ020-0035.npy +tests/data/ljspeech/wavs/LJ006-0013.wav|tests/data/ljspeech/wavs/LJ006-0013.npy +tests/data/ljspeech/wavs/LJ011-0277.wav|tests/data/ljspeech/wavs/LJ011-0277.npy +tests/data/ljspeech/wavs/LJ020-0022.wav|tests/data/ljspeech/wavs/LJ020-0022.npy +tests/data/ljspeech/wavs/LJ013-0176.wav|tests/data/ljspeech/wavs/LJ013-0176.npy +tests/data/ljspeech/wavs/LJ039-0038.wav|tests/data/ljspeech/wavs/LJ039-0038.npy +tests/data/ljspeech/wavs/LJ050-0223.wav|tests/data/ljspeech/wavs/LJ050-0223.npy +tests/data/ljspeech/wavs/LJ019-0284.wav|tests/data/ljspeech/wavs/LJ019-0284.npy +tests/data/ljspeech/wavs/LJ044-0135.wav|tests/data/ljspeech/wavs/LJ044-0135.npy +tests/data/ljspeech/wavs/LJ019-0099.wav|tests/data/ljspeech/wavs/LJ019-0099.npy +tests/data/ljspeech/wavs/LJ038-0075.wav|tests/data/ljspeech/wavs/LJ038-0075.npy +tests/data/ljspeech/wavs/LJ028-0269.wav|tests/data/ljspeech/wavs/LJ028-0269.npy +tests/data/ljspeech/wavs/LJ044-0133.wav|tests/data/ljspeech/wavs/LJ044-0133.npy +tests/data/ljspeech/wavs/LJ003-0173.wav|tests/data/ljspeech/wavs/LJ003-0173.npy +tests/data/ljspeech/wavs/LJ008-0178.wav|tests/data/ljspeech/wavs/LJ008-0178.npy +tests/data/ljspeech/wavs/LJ048-0030.wav|tests/data/ljspeech/wavs/LJ048-0030.npy +tests/data/ljspeech/wavs/LJ033-0070.wav|tests/data/ljspeech/wavs/LJ033-0070.npy +tests/data/ljspeech/wavs/LJ010-0187.wav|tests/data/ljspeech/wavs/LJ010-0187.npy +tests/data/ljspeech/wavs/LJ025-0176.wav|tests/data/ljspeech/wavs/LJ025-0176.npy +tests/data/ljspeech/wavs/LJ032-0055.wav|tests/data/ljspeech/wavs/LJ032-0055.npy +tests/data/ljspeech/wavs/LJ033-0056.wav|tests/data/ljspeech/wavs/LJ033-0056.npy +tests/data/ljspeech/wavs/LJ028-0079.wav|tests/data/ljspeech/wavs/LJ028-0079.npy +tests/data/ljspeech/wavs/LJ045-0099.wav|tests/data/ljspeech/wavs/LJ045-0099.npy +tests/data/ljspeech/wavs/LJ003-0045.wav|tests/data/ljspeech/wavs/LJ003-0045.npy +tests/data/ljspeech/wavs/LJ010-0181.wav|tests/data/ljspeech/wavs/LJ010-0181.npy +tests/data/ljspeech/wavs/LJ001-0057.wav|tests/data/ljspeech/wavs/LJ001-0057.npy +tests/data/ljspeech/wavs/LJ003-0331.wav|tests/data/ljspeech/wavs/LJ003-0331.npy +tests/data/ljspeech/wavs/LJ028-0232.wav|tests/data/ljspeech/wavs/LJ028-0232.npy +tests/data/ljspeech/wavs/LJ029-0197.wav|tests/data/ljspeech/wavs/LJ029-0197.npy +tests/data/ljspeech/wavs/LJ003-0088.wav|tests/data/ljspeech/wavs/LJ003-0088.npy +tests/data/ljspeech/wavs/LJ038-0256.wav|tests/data/ljspeech/wavs/LJ038-0256.npy +tests/data/ljspeech/wavs/LJ008-0229.wav|tests/data/ljspeech/wavs/LJ008-0229.npy +tests/data/ljspeech/wavs/LJ010-0090.wav|tests/data/ljspeech/wavs/LJ010-0090.npy +tests/data/ljspeech/wavs/LJ029-0120.wav|tests/data/ljspeech/wavs/LJ029-0120.npy +tests/data/ljspeech/wavs/LJ041-0123.wav|tests/data/ljspeech/wavs/LJ041-0123.npy +tests/data/ljspeech/wavs/LJ045-0228.wav|tests/data/ljspeech/wavs/LJ045-0228.npy +tests/data/ljspeech/wavs/LJ037-0266.wav|tests/data/ljspeech/wavs/LJ037-0266.npy +tests/data/ljspeech/wavs/LJ009-0203.wav|tests/data/ljspeech/wavs/LJ009-0203.npy +tests/data/ljspeech/wavs/LJ007-0078.wav|tests/data/ljspeech/wavs/LJ007-0078.npy +tests/data/ljspeech/wavs/LJ036-0159.wav|tests/data/ljspeech/wavs/LJ036-0159.npy +tests/data/ljspeech/wavs/LJ014-0132.wav|tests/data/ljspeech/wavs/LJ014-0132.npy +tests/data/ljspeech/wavs/LJ028-0416.wav|tests/data/ljspeech/wavs/LJ028-0416.npy +tests/data/ljspeech/wavs/LJ025-0127.wav|tests/data/ljspeech/wavs/LJ025-0127.npy +tests/data/ljspeech/wavs/LJ005-0240.wav|tests/data/ljspeech/wavs/LJ005-0240.npy +tests/data/ljspeech/wavs/LJ012-0133.wav|tests/data/ljspeech/wavs/LJ012-0133.npy +tests/data/ljspeech/wavs/LJ049-0079.wav|tests/data/ljspeech/wavs/LJ049-0079.npy +tests/data/ljspeech/wavs/LJ029-0205.wav|tests/data/ljspeech/wavs/LJ029-0205.npy +tests/data/ljspeech/wavs/LJ005-0253.wav|tests/data/ljspeech/wavs/LJ005-0253.npy +tests/data/ljspeech/wavs/LJ022-0063.wav|tests/data/ljspeech/wavs/LJ022-0063.npy +tests/data/ljspeech/wavs/LJ035-0131.wav|tests/data/ljspeech/wavs/LJ035-0131.npy +tests/data/ljspeech/wavs/LJ002-0321.wav|tests/data/ljspeech/wavs/LJ002-0321.npy +tests/data/ljspeech/wavs/LJ014-0218.wav|tests/data/ljspeech/wavs/LJ014-0218.npy +tests/data/ljspeech/wavs/LJ019-0154.wav|tests/data/ljspeech/wavs/LJ019-0154.npy +tests/data/ljspeech/wavs/LJ049-0193.wav|tests/data/ljspeech/wavs/LJ049-0193.npy +tests/data/ljspeech/wavs/LJ028-0122.wav|tests/data/ljspeech/wavs/LJ028-0122.npy +tests/data/ljspeech/wavs/LJ014-0175.wav|tests/data/ljspeech/wavs/LJ014-0175.npy +tests/data/ljspeech/wavs/LJ002-0301.wav|tests/data/ljspeech/wavs/LJ002-0301.npy +tests/data/ljspeech/wavs/LJ002-0003.wav|tests/data/ljspeech/wavs/LJ002-0003.npy +tests/data/ljspeech/wavs/LJ003-0217.wav|tests/data/ljspeech/wavs/LJ003-0217.npy +tests/data/ljspeech/wavs/LJ006-0134.wav|tests/data/ljspeech/wavs/LJ006-0134.npy +tests/data/ljspeech/wavs/LJ029-0200.wav|tests/data/ljspeech/wavs/LJ029-0200.npy +tests/data/ljspeech/wavs/LJ032-0187.wav|tests/data/ljspeech/wavs/LJ032-0187.npy +tests/data/ljspeech/wavs/LJ040-0143.wav|tests/data/ljspeech/wavs/LJ040-0143.npy +tests/data/ljspeech/wavs/LJ019-0069.wav|tests/data/ljspeech/wavs/LJ019-0069.npy +tests/data/ljspeech/wavs/LJ038-0285.wav|tests/data/ljspeech/wavs/LJ038-0285.npy +tests/data/ljspeech/wavs/LJ028-0487.wav|tests/data/ljspeech/wavs/LJ028-0487.npy +tests/data/ljspeech/wavs/LJ029-0082.wav|tests/data/ljspeech/wavs/LJ029-0082.npy +tests/data/ljspeech/wavs/LJ014-0327.wav|tests/data/ljspeech/wavs/LJ014-0327.npy +tests/data/ljspeech/wavs/LJ028-0405.wav|tests/data/ljspeech/wavs/LJ028-0405.npy +tests/data/ljspeech/wavs/LJ019-0043.wav|tests/data/ljspeech/wavs/LJ019-0043.npy +tests/data/ljspeech/wavs/LJ002-0243.wav|tests/data/ljspeech/wavs/LJ002-0243.npy +tests/data/ljspeech/wavs/LJ026-0158.wav|tests/data/ljspeech/wavs/LJ026-0158.npy +tests/data/ljspeech/wavs/LJ043-0078.wav|tests/data/ljspeech/wavs/LJ043-0078.npy +tests/data/ljspeech/wavs/LJ026-0135.wav|tests/data/ljspeech/wavs/LJ026-0135.npy +tests/data/ljspeech/wavs/LJ048-0003.wav|tests/data/ljspeech/wavs/LJ048-0003.npy +tests/data/ljspeech/wavs/LJ018-0302.wav|tests/data/ljspeech/wavs/LJ018-0302.npy +tests/data/ljspeech/wavs/LJ018-0376.wav|tests/data/ljspeech/wavs/LJ018-0376.npy +tests/data/ljspeech/wavs/LJ005-0061.wav|tests/data/ljspeech/wavs/LJ005-0061.npy +tests/data/ljspeech/wavs/LJ040-0018.wav|tests/data/ljspeech/wavs/LJ040-0018.npy +tests/data/ljspeech/wavs/LJ019-0264.wav|tests/data/ljspeech/wavs/LJ019-0264.npy +tests/data/ljspeech/wavs/LJ048-0148.wav|tests/data/ljspeech/wavs/LJ048-0148.npy +tests/data/ljspeech/wavs/LJ030-0254.wav|tests/data/ljspeech/wavs/LJ030-0254.npy +tests/data/ljspeech/wavs/LJ042-0106.wav|tests/data/ljspeech/wavs/LJ042-0106.npy +tests/data/ljspeech/wavs/LJ043-0126.wav|tests/data/ljspeech/wavs/LJ043-0126.npy +tests/data/ljspeech/wavs/LJ017-0244.wav|tests/data/ljspeech/wavs/LJ017-0244.npy +tests/data/ljspeech/wavs/LJ004-0100.wav|tests/data/ljspeech/wavs/LJ004-0100.npy +tests/data/ljspeech/wavs/LJ013-0199.wav|tests/data/ljspeech/wavs/LJ013-0199.npy +tests/data/ljspeech/wavs/LJ044-0087.wav|tests/data/ljspeech/wavs/LJ044-0087.npy +tests/data/ljspeech/wavs/LJ010-0300.wav|tests/data/ljspeech/wavs/LJ010-0300.npy +tests/data/ljspeech/wavs/LJ021-0115.wav|tests/data/ljspeech/wavs/LJ021-0115.npy +tests/data/ljspeech/wavs/LJ005-0143.wav|tests/data/ljspeech/wavs/LJ005-0143.npy +tests/data/ljspeech/wavs/LJ030-0239.wav|tests/data/ljspeech/wavs/LJ030-0239.npy +tests/data/ljspeech/wavs/LJ005-0125.wav|tests/data/ljspeech/wavs/LJ005-0125.npy +tests/data/ljspeech/wavs/LJ008-0272.wav|tests/data/ljspeech/wavs/LJ008-0272.npy +tests/data/ljspeech/wavs/LJ011-0178.wav|tests/data/ljspeech/wavs/LJ011-0178.npy +tests/data/ljspeech/wavs/LJ018-0320.wav|tests/data/ljspeech/wavs/LJ018-0320.npy +tests/data/ljspeech/wavs/LJ045-0038.wav|tests/data/ljspeech/wavs/LJ045-0038.npy +tests/data/ljspeech/wavs/LJ011-0143.wav|tests/data/ljspeech/wavs/LJ011-0143.npy +tests/data/ljspeech/wavs/LJ018-0348.wav|tests/data/ljspeech/wavs/LJ018-0348.npy +tests/data/ljspeech/wavs/LJ050-0187.wav|tests/data/ljspeech/wavs/LJ050-0187.npy +tests/data/ljspeech/wavs/LJ018-0215.wav|tests/data/ljspeech/wavs/LJ018-0215.npy +tests/data/ljspeech/wavs/LJ009-0153.wav|tests/data/ljspeech/wavs/LJ009-0153.npy +tests/data/ljspeech/wavs/LJ038-0208.wav|tests/data/ljspeech/wavs/LJ038-0208.npy +tests/data/ljspeech/wavs/LJ041-0199.wav|tests/data/ljspeech/wavs/LJ041-0199.npy +tests/data/ljspeech/wavs/LJ002-0021.wav|tests/data/ljspeech/wavs/LJ002-0021.npy +tests/data/ljspeech/wavs/LJ029-0186.wav|tests/data/ljspeech/wavs/LJ029-0186.npy +tests/data/ljspeech/wavs/LJ010-0073.wav|tests/data/ljspeech/wavs/LJ010-0073.npy +tests/data/ljspeech/wavs/LJ046-0042.wav|tests/data/ljspeech/wavs/LJ046-0042.npy +tests/data/ljspeech/wavs/LJ007-0226.wav|tests/data/ljspeech/wavs/LJ007-0226.npy +tests/data/ljspeech/wavs/LJ010-0227.wav|tests/data/ljspeech/wavs/LJ010-0227.npy +tests/data/ljspeech/wavs/LJ045-0090.wav|tests/data/ljspeech/wavs/LJ045-0090.npy +tests/data/ljspeech/wavs/LJ048-0266.wav|tests/data/ljspeech/wavs/LJ048-0266.npy +tests/data/ljspeech/wavs/LJ016-0307.wav|tests/data/ljspeech/wavs/LJ016-0307.npy +tests/data/ljspeech/wavs/LJ042-0204.wav|tests/data/ljspeech/wavs/LJ042-0204.npy +tests/data/ljspeech/wavs/LJ033-0181.wav|tests/data/ljspeech/wavs/LJ033-0181.npy +tests/data/ljspeech/wavs/LJ047-0165.wav|tests/data/ljspeech/wavs/LJ047-0165.npy +tests/data/ljspeech/wavs/LJ039-0122.wav|tests/data/ljspeech/wavs/LJ039-0122.npy +tests/data/ljspeech/wavs/LJ044-0210.wav|tests/data/ljspeech/wavs/LJ044-0210.npy +tests/data/ljspeech/wavs/LJ016-0145.wav|tests/data/ljspeech/wavs/LJ016-0145.npy +tests/data/ljspeech/wavs/LJ046-0213.wav|tests/data/ljspeech/wavs/LJ046-0213.npy +tests/data/ljspeech/wavs/LJ008-0299.wav|tests/data/ljspeech/wavs/LJ008-0299.npy +tests/data/ljspeech/wavs/LJ049-0110.wav|tests/data/ljspeech/wavs/LJ049-0110.npy +tests/data/ljspeech/wavs/LJ011-0163.wav|tests/data/ljspeech/wavs/LJ011-0163.npy +tests/data/ljspeech/wavs/LJ042-0194.wav|tests/data/ljspeech/wavs/LJ042-0194.npy +tests/data/ljspeech/wavs/LJ048-0106.wav|tests/data/ljspeech/wavs/LJ048-0106.npy +tests/data/ljspeech/wavs/LJ035-0049.wav|tests/data/ljspeech/wavs/LJ035-0049.npy +tests/data/ljspeech/wavs/LJ008-0204.wav|tests/data/ljspeech/wavs/LJ008-0204.npy +tests/data/ljspeech/wavs/LJ005-0118.wav|tests/data/ljspeech/wavs/LJ005-0118.npy +tests/data/ljspeech/wavs/LJ014-0340.wav|tests/data/ljspeech/wavs/LJ014-0340.npy +tests/data/ljspeech/wavs/LJ015-0236.wav|tests/data/ljspeech/wavs/LJ015-0236.npy +tests/data/ljspeech/wavs/LJ049-0201.wav|tests/data/ljspeech/wavs/LJ049-0201.npy +tests/data/ljspeech/wavs/LJ048-0071.wav|tests/data/ljspeech/wavs/LJ048-0071.npy +tests/data/ljspeech/wavs/LJ028-0105.wav|tests/data/ljspeech/wavs/LJ028-0105.npy +tests/data/ljspeech/wavs/LJ033-0128.wav|tests/data/ljspeech/wavs/LJ033-0128.npy +tests/data/ljspeech/wavs/LJ029-0014.wav|tests/data/ljspeech/wavs/LJ029-0014.npy +tests/data/ljspeech/wavs/LJ044-0127.wav|tests/data/ljspeech/wavs/LJ044-0127.npy +tests/data/ljspeech/wavs/LJ046-0236.wav|tests/data/ljspeech/wavs/LJ046-0236.npy +tests/data/ljspeech/wavs/LJ012-0072.wav|tests/data/ljspeech/wavs/LJ012-0072.npy +tests/data/ljspeech/wavs/LJ029-0166.wav|tests/data/ljspeech/wavs/LJ029-0166.npy +tests/data/ljspeech/wavs/LJ034-0142.wav|tests/data/ljspeech/wavs/LJ034-0142.npy +tests/data/ljspeech/wavs/LJ019-0379.wav|tests/data/ljspeech/wavs/LJ019-0379.npy +tests/data/ljspeech/wavs/LJ027-0009.wav|tests/data/ljspeech/wavs/LJ027-0009.npy +tests/data/ljspeech/wavs/LJ040-0152.wav|tests/data/ljspeech/wavs/LJ040-0152.npy +tests/data/ljspeech/wavs/LJ040-0188.wav|tests/data/ljspeech/wavs/LJ040-0188.npy +tests/data/ljspeech/wavs/LJ047-0241.wav|tests/data/ljspeech/wavs/LJ047-0241.npy +tests/data/ljspeech/wavs/LJ029-0108.wav|tests/data/ljspeech/wavs/LJ029-0108.npy +tests/data/ljspeech/wavs/LJ050-0190.wav|tests/data/ljspeech/wavs/LJ050-0190.npy +tests/data/ljspeech/wavs/LJ012-0067.wav|tests/data/ljspeech/wavs/LJ012-0067.npy +tests/data/ljspeech/wavs/LJ016-0365.wav|tests/data/ljspeech/wavs/LJ016-0365.npy +tests/data/ljspeech/wavs/LJ040-0059.wav|tests/data/ljspeech/wavs/LJ040-0059.npy +tests/data/ljspeech/wavs/LJ014-0198.wav|tests/data/ljspeech/wavs/LJ014-0198.npy +tests/data/ljspeech/wavs/LJ020-0058.wav|tests/data/ljspeech/wavs/LJ020-0058.npy +tests/data/ljspeech/wavs/LJ003-0291.wav|tests/data/ljspeech/wavs/LJ003-0291.npy +tests/data/ljspeech/wavs/LJ031-0129.wav|tests/data/ljspeech/wavs/LJ031-0129.npy +tests/data/ljspeech/wavs/LJ012-0145.wav|tests/data/ljspeech/wavs/LJ012-0145.npy +tests/data/ljspeech/wavs/LJ046-0044.wav|tests/data/ljspeech/wavs/LJ046-0044.npy +tests/data/ljspeech/wavs/LJ045-0059.wav|tests/data/ljspeech/wavs/LJ045-0059.npy +tests/data/ljspeech/wavs/LJ043-0058.wav|tests/data/ljspeech/wavs/LJ043-0058.npy +tests/data/ljspeech/wavs/LJ028-0108.wav|tests/data/ljspeech/wavs/LJ028-0108.npy +tests/data/ljspeech/wavs/LJ047-0204.wav|tests/data/ljspeech/wavs/LJ047-0204.npy +tests/data/ljspeech/wavs/LJ044-0219.wav|tests/data/ljspeech/wavs/LJ044-0219.npy +tests/data/ljspeech/wavs/LJ042-0183.wav|tests/data/ljspeech/wavs/LJ042-0183.npy +tests/data/ljspeech/wavs/LJ019-0375.wav|tests/data/ljspeech/wavs/LJ019-0375.npy +tests/data/ljspeech/wavs/LJ004-0046.wav|tests/data/ljspeech/wavs/LJ004-0046.npy +tests/data/ljspeech/wavs/LJ013-0029.wav|tests/data/ljspeech/wavs/LJ013-0029.npy +tests/data/ljspeech/wavs/LJ013-0063.wav|tests/data/ljspeech/wavs/LJ013-0063.npy +tests/data/ljspeech/wavs/LJ006-0063.wav|tests/data/ljspeech/wavs/LJ006-0063.npy +tests/data/ljspeech/wavs/LJ025-0146.wav|tests/data/ljspeech/wavs/LJ025-0146.npy +tests/data/ljspeech/wavs/LJ045-0248.wav|tests/data/ljspeech/wavs/LJ045-0248.npy +tests/data/ljspeech/wavs/LJ017-0057.wav|tests/data/ljspeech/wavs/LJ017-0057.npy +tests/data/ljspeech/wavs/LJ031-0053.wav|tests/data/ljspeech/wavs/LJ031-0053.npy +tests/data/ljspeech/wavs/LJ003-0317.wav|tests/data/ljspeech/wavs/LJ003-0317.npy +tests/data/ljspeech/wavs/LJ049-0152.wav|tests/data/ljspeech/wavs/LJ049-0152.npy +tests/data/ljspeech/wavs/LJ019-0085.wav|tests/data/ljspeech/wavs/LJ019-0085.npy +tests/data/ljspeech/wavs/LJ014-0027.wav|tests/data/ljspeech/wavs/LJ014-0027.npy +tests/data/ljspeech/wavs/LJ025-0165.wav|tests/data/ljspeech/wavs/LJ025-0165.npy +tests/data/ljspeech/wavs/LJ019-0070.wav|tests/data/ljspeech/wavs/LJ019-0070.npy +tests/data/ljspeech/wavs/LJ002-0066.wav|tests/data/ljspeech/wavs/LJ002-0066.npy +tests/data/ljspeech/wavs/LJ041-0049.wav|tests/data/ljspeech/wavs/LJ041-0049.npy +tests/data/ljspeech/wavs/LJ015-0064.wav|tests/data/ljspeech/wavs/LJ015-0064.npy +tests/data/ljspeech/wavs/LJ006-0294.wav|tests/data/ljspeech/wavs/LJ006-0294.npy +tests/data/ljspeech/wavs/LJ046-0228.wav|tests/data/ljspeech/wavs/LJ046-0228.npy +tests/data/ljspeech/wavs/LJ005-0268.wav|tests/data/ljspeech/wavs/LJ005-0268.npy +tests/data/ljspeech/wavs/LJ030-0030.wav|tests/data/ljspeech/wavs/LJ030-0030.npy +tests/data/ljspeech/wavs/LJ006-0254.wav|tests/data/ljspeech/wavs/LJ006-0254.npy +tests/data/ljspeech/wavs/LJ011-0200.wav|tests/data/ljspeech/wavs/LJ011-0200.npy +tests/data/ljspeech/wavs/LJ029-0114.wav|tests/data/ljspeech/wavs/LJ029-0114.npy +tests/data/ljspeech/wavs/LJ010-0291.wav|tests/data/ljspeech/wavs/LJ010-0291.npy +tests/data/ljspeech/wavs/LJ041-0152.wav|tests/data/ljspeech/wavs/LJ041-0152.npy +tests/data/ljspeech/wavs/LJ035-0026.wav|tests/data/ljspeech/wavs/LJ035-0026.npy +tests/data/ljspeech/wavs/LJ012-0170.wav|tests/data/ljspeech/wavs/LJ012-0170.npy +tests/data/ljspeech/wavs/LJ011-0222.wav|tests/data/ljspeech/wavs/LJ011-0222.npy +tests/data/ljspeech/wavs/LJ034-0182.wav|tests/data/ljspeech/wavs/LJ034-0182.npy +tests/data/ljspeech/wavs/LJ003-0219.wav|tests/data/ljspeech/wavs/LJ003-0219.npy +tests/data/ljspeech/wavs/LJ006-0090.wav|tests/data/ljspeech/wavs/LJ006-0090.npy +tests/data/ljspeech/wavs/LJ035-0117.wav|tests/data/ljspeech/wavs/LJ035-0117.npy +tests/data/ljspeech/wavs/LJ013-0167.wav|tests/data/ljspeech/wavs/LJ013-0167.npy +tests/data/ljspeech/wavs/LJ033-0208.wav|tests/data/ljspeech/wavs/LJ033-0208.npy +tests/data/ljspeech/wavs/LJ026-0160.wav|tests/data/ljspeech/wavs/LJ026-0160.npy +tests/data/ljspeech/wavs/LJ045-0135.wav|tests/data/ljspeech/wavs/LJ045-0135.npy +tests/data/ljspeech/wavs/LJ044-0173.wav|tests/data/ljspeech/wavs/LJ044-0173.npy +tests/data/ljspeech/wavs/LJ038-0161.wav|tests/data/ljspeech/wavs/LJ038-0161.npy +tests/data/ljspeech/wavs/LJ048-0269.wav|tests/data/ljspeech/wavs/LJ048-0269.npy +tests/data/ljspeech/wavs/LJ047-0043.wav|tests/data/ljspeech/wavs/LJ047-0043.npy +tests/data/ljspeech/wavs/LJ030-0055.wav|tests/data/ljspeech/wavs/LJ030-0055.npy +tests/data/ljspeech/wavs/LJ043-0048.wav|tests/data/ljspeech/wavs/LJ043-0048.npy +tests/data/ljspeech/wavs/LJ008-0209.wav|tests/data/ljspeech/wavs/LJ008-0209.npy +tests/data/ljspeech/wavs/LJ031-0220.wav|tests/data/ljspeech/wavs/LJ031-0220.npy +tests/data/ljspeech/wavs/LJ016-0023.wav|tests/data/ljspeech/wavs/LJ016-0023.npy +tests/data/ljspeech/wavs/LJ003-0290.wav|tests/data/ljspeech/wavs/LJ003-0290.npy +tests/data/ljspeech/wavs/LJ018-0303.wav|tests/data/ljspeech/wavs/LJ018-0303.npy +tests/data/ljspeech/wavs/LJ042-0026.wav|tests/data/ljspeech/wavs/LJ042-0026.npy +tests/data/ljspeech/wavs/LJ042-0105.wav|tests/data/ljspeech/wavs/LJ042-0105.npy +tests/data/ljspeech/wavs/LJ009-0247.wav|tests/data/ljspeech/wavs/LJ009-0247.npy +tests/data/ljspeech/wavs/LJ017-0137.wav|tests/data/ljspeech/wavs/LJ017-0137.npy +tests/data/ljspeech/wavs/LJ015-0299.wav|tests/data/ljspeech/wavs/LJ015-0299.npy +tests/data/ljspeech/wavs/LJ030-0104.wav|tests/data/ljspeech/wavs/LJ030-0104.npy +tests/data/ljspeech/wavs/LJ048-0250.wav|tests/data/ljspeech/wavs/LJ048-0250.npy +tests/data/ljspeech/wavs/LJ022-0175.wav|tests/data/ljspeech/wavs/LJ022-0175.npy +tests/data/ljspeech/wavs/LJ009-0016.wav|tests/data/ljspeech/wavs/LJ009-0016.npy +tests/data/ljspeech/wavs/LJ004-0245.wav|tests/data/ljspeech/wavs/LJ004-0245.npy +tests/data/ljspeech/wavs/LJ017-0098.wav|tests/data/ljspeech/wavs/LJ017-0098.npy +tests/data/ljspeech/wavs/LJ050-0245.wav|tests/data/ljspeech/wavs/LJ050-0245.npy +tests/data/ljspeech/wavs/LJ002-0207.wav|tests/data/ljspeech/wavs/LJ002-0207.npy +tests/data/ljspeech/wavs/LJ043-0066.wav|tests/data/ljspeech/wavs/LJ043-0066.npy +tests/data/ljspeech/wavs/LJ018-0186.wav|tests/data/ljspeech/wavs/LJ018-0186.npy +tests/data/ljspeech/wavs/LJ015-0245.wav|tests/data/ljspeech/wavs/LJ015-0245.npy +tests/data/ljspeech/wavs/LJ019-0041.wav|tests/data/ljspeech/wavs/LJ019-0041.npy +tests/data/ljspeech/wavs/LJ018-0307.wav|tests/data/ljspeech/wavs/LJ018-0307.npy +tests/data/ljspeech/wavs/LJ021-0101.wav|tests/data/ljspeech/wavs/LJ021-0101.npy +tests/data/ljspeech/wavs/LJ031-0204.wav|tests/data/ljspeech/wavs/LJ031-0204.npy +tests/data/ljspeech/wavs/LJ031-0057.wav|tests/data/ljspeech/wavs/LJ031-0057.npy +tests/data/ljspeech/wavs/LJ032-0083.wav|tests/data/ljspeech/wavs/LJ032-0083.npy +tests/data/ljspeech/wavs/LJ028-0174.wav|tests/data/ljspeech/wavs/LJ028-0174.npy +tests/data/ljspeech/wavs/LJ019-0167.wav|tests/data/ljspeech/wavs/LJ019-0167.npy +tests/data/ljspeech/wavs/LJ019-0019.wav|tests/data/ljspeech/wavs/LJ019-0019.npy +tests/data/ljspeech/wavs/LJ034-0100.wav|tests/data/ljspeech/wavs/LJ034-0100.npy +tests/data/ljspeech/wavs/LJ019-0025.wav|tests/data/ljspeech/wavs/LJ019-0025.npy +tests/data/ljspeech/wavs/LJ030-0034.wav|tests/data/ljspeech/wavs/LJ030-0034.npy +tests/data/ljspeech/wavs/LJ034-0085.wav|tests/data/ljspeech/wavs/LJ034-0085.npy +tests/data/ljspeech/wavs/LJ050-0061.wav|tests/data/ljspeech/wavs/LJ050-0061.npy +tests/data/ljspeech/wavs/LJ019-0201.wav|tests/data/ljspeech/wavs/LJ019-0201.npy +tests/data/ljspeech/wavs/LJ014-0331.wav|tests/data/ljspeech/wavs/LJ014-0331.npy +tests/data/ljspeech/wavs/LJ017-0079.wav|tests/data/ljspeech/wavs/LJ017-0079.npy +tests/data/ljspeech/wavs/LJ014-0006.wav|tests/data/ljspeech/wavs/LJ014-0006.npy +tests/data/ljspeech/wavs/LJ019-0027.wav|tests/data/ljspeech/wavs/LJ019-0027.npy +tests/data/ljspeech/wavs/LJ046-0218.wav|tests/data/ljspeech/wavs/LJ046-0218.npy +tests/data/ljspeech/wavs/LJ030-0021.wav|tests/data/ljspeech/wavs/LJ030-0021.npy +tests/data/ljspeech/wavs/LJ040-0026.wav|tests/data/ljspeech/wavs/LJ040-0026.npy +tests/data/ljspeech/wavs/LJ033-0136.wav|tests/data/ljspeech/wavs/LJ033-0136.npy +tests/data/ljspeech/wavs/LJ032-0249.wav|tests/data/ljspeech/wavs/LJ032-0249.npy +tests/data/ljspeech/wavs/LJ015-0087.wav|tests/data/ljspeech/wavs/LJ015-0087.npy +tests/data/ljspeech/wavs/LJ038-0204.wav|tests/data/ljspeech/wavs/LJ038-0204.npy +tests/data/ljspeech/wavs/LJ016-0406.wav|tests/data/ljspeech/wavs/LJ016-0406.npy +tests/data/ljspeech/wavs/LJ019-0350.wav|tests/data/ljspeech/wavs/LJ019-0350.npy +tests/data/ljspeech/wavs/LJ009-0145.wav|tests/data/ljspeech/wavs/LJ009-0145.npy +tests/data/ljspeech/wavs/LJ022-0119.wav|tests/data/ljspeech/wavs/LJ022-0119.npy +tests/data/ljspeech/wavs/LJ019-0125.wav|tests/data/ljspeech/wavs/LJ019-0125.npy +tests/data/ljspeech/wavs/LJ007-0118.wav|tests/data/ljspeech/wavs/LJ007-0118.npy +tests/data/ljspeech/wavs/LJ048-0105.wav|tests/data/ljspeech/wavs/LJ048-0105.npy +tests/data/ljspeech/wavs/LJ015-0096.wav|tests/data/ljspeech/wavs/LJ015-0096.npy +tests/data/ljspeech/wavs/LJ034-0032.wav|tests/data/ljspeech/wavs/LJ034-0032.npy +tests/data/ljspeech/wavs/LJ005-0011.wav|tests/data/ljspeech/wavs/LJ005-0011.npy +tests/data/ljspeech/wavs/LJ041-0031.wav|tests/data/ljspeech/wavs/LJ041-0031.npy +tests/data/ljspeech/wavs/LJ046-0090.wav|tests/data/ljspeech/wavs/LJ046-0090.npy +tests/data/ljspeech/wavs/LJ026-0014.wav|tests/data/ljspeech/wavs/LJ026-0014.npy +tests/data/ljspeech/wavs/LJ012-0023.wav|tests/data/ljspeech/wavs/LJ012-0023.npy +tests/data/ljspeech/wavs/LJ007-0034.wav|tests/data/ljspeech/wavs/LJ007-0034.npy +tests/data/ljspeech/wavs/LJ044-0009.wav|tests/data/ljspeech/wavs/LJ044-0009.npy +tests/data/ljspeech/wavs/LJ022-0004.wav|tests/data/ljspeech/wavs/LJ022-0004.npy +tests/data/ljspeech/wavs/LJ049-0136.wav|tests/data/ljspeech/wavs/LJ049-0136.npy +tests/data/ljspeech/wavs/LJ050-0151.wav|tests/data/ljspeech/wavs/LJ050-0151.npy +tests/data/ljspeech/wavs/LJ003-0316.wav|tests/data/ljspeech/wavs/LJ003-0316.npy +tests/data/ljspeech/wavs/LJ042-0227.wav|tests/data/ljspeech/wavs/LJ042-0227.npy +tests/data/ljspeech/wavs/LJ050-0139.wav|tests/data/ljspeech/wavs/LJ050-0139.npy +tests/data/ljspeech/wavs/LJ006-0057.wav|tests/data/ljspeech/wavs/LJ006-0057.npy +tests/data/ljspeech/wavs/LJ042-0046.wav|tests/data/ljspeech/wavs/LJ042-0046.npy +tests/data/ljspeech/wavs/LJ004-0013.wav|tests/data/ljspeech/wavs/LJ004-0013.npy +tests/data/ljspeech/wavs/LJ007-0050.wav|tests/data/ljspeech/wavs/LJ007-0050.npy +tests/data/ljspeech/wavs/LJ007-0094.wav|tests/data/ljspeech/wavs/LJ007-0094.npy +tests/data/ljspeech/wavs/LJ039-0077.wav|tests/data/ljspeech/wavs/LJ039-0077.npy +tests/data/ljspeech/wavs/LJ009-0225.wav|tests/data/ljspeech/wavs/LJ009-0225.npy +tests/data/ljspeech/wavs/LJ042-0122.wav|tests/data/ljspeech/wavs/LJ042-0122.npy +tests/data/ljspeech/wavs/LJ048-0175.wav|tests/data/ljspeech/wavs/LJ048-0175.npy +tests/data/ljspeech/wavs/LJ006-0252.wav|tests/data/ljspeech/wavs/LJ006-0252.npy +tests/data/ljspeech/wavs/LJ006-0224.wav|tests/data/ljspeech/wavs/LJ006-0224.npy +tests/data/ljspeech/wavs/LJ039-0235.wav|tests/data/ljspeech/wavs/LJ039-0235.npy +tests/data/ljspeech/wavs/LJ028-0388.wav|tests/data/ljspeech/wavs/LJ028-0388.npy +tests/data/ljspeech/wavs/LJ020-0074.wav|tests/data/ljspeech/wavs/LJ020-0074.npy +tests/data/ljspeech/wavs/LJ002-0209.wav|tests/data/ljspeech/wavs/LJ002-0209.npy +tests/data/ljspeech/wavs/LJ007-0029.wav|tests/data/ljspeech/wavs/LJ007-0029.npy +tests/data/ljspeech/wavs/LJ047-0108.wav|tests/data/ljspeech/wavs/LJ047-0108.npy +tests/data/ljspeech/wavs/LJ008-0051.wav|tests/data/ljspeech/wavs/LJ008-0051.npy +tests/data/ljspeech/wavs/LJ029-0028.wav|tests/data/ljspeech/wavs/LJ029-0028.npy +tests/data/ljspeech/wavs/LJ046-0199.wav|tests/data/ljspeech/wavs/LJ046-0199.npy +tests/data/ljspeech/wavs/LJ041-0196.wav|tests/data/ljspeech/wavs/LJ041-0196.npy +tests/data/ljspeech/wavs/LJ044-0149.wav|tests/data/ljspeech/wavs/LJ044-0149.npy +tests/data/ljspeech/wavs/LJ035-0047.wav|tests/data/ljspeech/wavs/LJ035-0047.npy +tests/data/ljspeech/wavs/LJ012-0074.wav|tests/data/ljspeech/wavs/LJ012-0074.npy +tests/data/ljspeech/wavs/LJ002-0074.wav|tests/data/ljspeech/wavs/LJ002-0074.npy +tests/data/ljspeech/wavs/LJ045-0023.wav|tests/data/ljspeech/wavs/LJ045-0023.npy +tests/data/ljspeech/wavs/LJ002-0079.wav|tests/data/ljspeech/wavs/LJ002-0079.npy +tests/data/ljspeech/wavs/LJ011-0131.wav|tests/data/ljspeech/wavs/LJ011-0131.npy +tests/data/ljspeech/wavs/LJ020-0024.wav|tests/data/ljspeech/wavs/LJ020-0024.npy +tests/data/ljspeech/wavs/LJ036-0131.wav|tests/data/ljspeech/wavs/LJ036-0131.npy +tests/data/ljspeech/wavs/LJ046-0039.wav|tests/data/ljspeech/wavs/LJ046-0039.npy +tests/data/ljspeech/wavs/LJ001-0169.wav|tests/data/ljspeech/wavs/LJ001-0169.npy +tests/data/ljspeech/wavs/LJ003-0167.wav|tests/data/ljspeech/wavs/LJ003-0167.npy +tests/data/ljspeech/wavs/LJ028-0219.wav|tests/data/ljspeech/wavs/LJ028-0219.npy +tests/data/ljspeech/wavs/LJ050-0252.wav|tests/data/ljspeech/wavs/LJ050-0252.npy +tests/data/ljspeech/wavs/LJ044-0097.wav|tests/data/ljspeech/wavs/LJ044-0097.npy +tests/data/ljspeech/wavs/LJ049-0033.wav|tests/data/ljspeech/wavs/LJ049-0033.npy +tests/data/ljspeech/wavs/LJ044-0206.wav|tests/data/ljspeech/wavs/LJ044-0206.npy +tests/data/ljspeech/wavs/LJ035-0016.wav|tests/data/ljspeech/wavs/LJ035-0016.npy +tests/data/ljspeech/wavs/LJ017-0246.wav|tests/data/ljspeech/wavs/LJ017-0246.npy +tests/data/ljspeech/wavs/LJ034-0207.wav|tests/data/ljspeech/wavs/LJ034-0207.npy +tests/data/ljspeech/wavs/LJ027-0034.wav|tests/data/ljspeech/wavs/LJ027-0034.npy +tests/data/ljspeech/wavs/LJ047-0178.wav|tests/data/ljspeech/wavs/LJ047-0178.npy +tests/data/ljspeech/wavs/LJ044-0104.wav|tests/data/ljspeech/wavs/LJ044-0104.npy +tests/data/ljspeech/wavs/LJ010-0109.wav|tests/data/ljspeech/wavs/LJ010-0109.npy +tests/data/ljspeech/wavs/LJ012-0200.wav|tests/data/ljspeech/wavs/LJ012-0200.npy +tests/data/ljspeech/wavs/LJ048-0184.wav|tests/data/ljspeech/wavs/LJ048-0184.npy +tests/data/ljspeech/wavs/LJ001-0154.wav|tests/data/ljspeech/wavs/LJ001-0154.npy +tests/data/ljspeech/wavs/LJ011-0212.wav|tests/data/ljspeech/wavs/LJ011-0212.npy +tests/data/ljspeech/wavs/LJ019-0139.wav|tests/data/ljspeech/wavs/LJ019-0139.npy +tests/data/ljspeech/wavs/LJ017-0099.wav|tests/data/ljspeech/wavs/LJ017-0099.npy +tests/data/ljspeech/wavs/LJ037-0084.wav|tests/data/ljspeech/wavs/LJ037-0084.npy +tests/data/ljspeech/wavs/LJ048-0017.wav|tests/data/ljspeech/wavs/LJ048-0017.npy +tests/data/ljspeech/wavs/LJ004-0066.wav|tests/data/ljspeech/wavs/LJ004-0066.npy +tests/data/ljspeech/wavs/LJ034-0023.wav|tests/data/ljspeech/wavs/LJ034-0023.npy +tests/data/ljspeech/wavs/LJ027-0143.wav|tests/data/ljspeech/wavs/LJ027-0143.npy +tests/data/ljspeech/wavs/LJ050-0001.wav|tests/data/ljspeech/wavs/LJ050-0001.npy +tests/data/ljspeech/wavs/LJ005-0098.wav|tests/data/ljspeech/wavs/LJ005-0098.npy +tests/data/ljspeech/wavs/LJ009-0295.wav|tests/data/ljspeech/wavs/LJ009-0295.npy +tests/data/ljspeech/wavs/LJ013-0133.wav|tests/data/ljspeech/wavs/LJ013-0133.npy +tests/data/ljspeech/wavs/LJ037-0147.wav|tests/data/ljspeech/wavs/LJ037-0147.npy +tests/data/ljspeech/wavs/LJ028-0324.wav|tests/data/ljspeech/wavs/LJ028-0324.npy +tests/data/ljspeech/wavs/LJ047-0152.wav|tests/data/ljspeech/wavs/LJ047-0152.npy +tests/data/ljspeech/wavs/LJ048-0186.wav|tests/data/ljspeech/wavs/LJ048-0186.npy +tests/data/ljspeech/wavs/LJ049-0022.wav|tests/data/ljspeech/wavs/LJ049-0022.npy +tests/data/ljspeech/wavs/LJ005-0006.wav|tests/data/ljspeech/wavs/LJ005-0006.npy +tests/data/ljspeech/wavs/LJ012-0100.wav|tests/data/ljspeech/wavs/LJ012-0100.npy +tests/data/ljspeech/wavs/LJ014-0129.wav|tests/data/ljspeech/wavs/LJ014-0129.npy +tests/data/ljspeech/wavs/LJ012-0270.wav|tests/data/ljspeech/wavs/LJ012-0270.npy +tests/data/ljspeech/wavs/LJ018-0112.wav|tests/data/ljspeech/wavs/LJ018-0112.npy +tests/data/ljspeech/wavs/LJ012-0238.wav|tests/data/ljspeech/wavs/LJ012-0238.npy +tests/data/ljspeech/wavs/LJ018-0107.wav|tests/data/ljspeech/wavs/LJ018-0107.npy +tests/data/ljspeech/wavs/LJ005-0052.wav|tests/data/ljspeech/wavs/LJ005-0052.npy +tests/data/ljspeech/wavs/LJ013-0086.wav|tests/data/ljspeech/wavs/LJ013-0086.npy +tests/data/ljspeech/wavs/LJ015-0039.wav|tests/data/ljspeech/wavs/LJ015-0039.npy +tests/data/ljspeech/wavs/LJ003-0085.wav|tests/data/ljspeech/wavs/LJ003-0085.npy +tests/data/ljspeech/wavs/LJ020-0046.wav|tests/data/ljspeech/wavs/LJ020-0046.npy +tests/data/ljspeech/wavs/LJ037-0107.wav|tests/data/ljspeech/wavs/LJ037-0107.npy +tests/data/ljspeech/wavs/LJ006-0093.wav|tests/data/ljspeech/wavs/LJ006-0093.npy +tests/data/ljspeech/wavs/LJ049-0108.wav|tests/data/ljspeech/wavs/LJ049-0108.npy +tests/data/ljspeech/wavs/LJ010-0247.wav|tests/data/ljspeech/wavs/LJ010-0247.npy +tests/data/ljspeech/wavs/LJ049-0150.wav|tests/data/ljspeech/wavs/LJ049-0150.npy +tests/data/ljspeech/wavs/LJ043-0168.wav|tests/data/ljspeech/wavs/LJ043-0168.npy +tests/data/ljspeech/wavs/LJ033-0111.wav|tests/data/ljspeech/wavs/LJ033-0111.npy +tests/data/ljspeech/wavs/LJ029-0126.wav|tests/data/ljspeech/wavs/LJ029-0126.npy +tests/data/ljspeech/wavs/LJ040-0150.wav|tests/data/ljspeech/wavs/LJ040-0150.npy +tests/data/ljspeech/wavs/LJ011-0108.wav|tests/data/ljspeech/wavs/LJ011-0108.npy +tests/data/ljspeech/wavs/LJ029-0022.wav|tests/data/ljspeech/wavs/LJ029-0022.npy +tests/data/ljspeech/wavs/LJ038-0081.wav|tests/data/ljspeech/wavs/LJ038-0081.npy +tests/data/ljspeech/wavs/LJ038-0288.wav|tests/data/ljspeech/wavs/LJ038-0288.npy +tests/data/ljspeech/wavs/LJ029-0069.wav|tests/data/ljspeech/wavs/LJ029-0069.npy +tests/data/ljspeech/wavs/LJ019-0241.wav|tests/data/ljspeech/wavs/LJ019-0241.npy +tests/data/ljspeech/wavs/LJ047-0181.wav|tests/data/ljspeech/wavs/LJ047-0181.npy +tests/data/ljspeech/wavs/LJ047-0050.wav|tests/data/ljspeech/wavs/LJ047-0050.npy +tests/data/ljspeech/wavs/LJ012-0064.wav|tests/data/ljspeech/wavs/LJ012-0064.npy +tests/data/ljspeech/wavs/LJ016-0353.wav|tests/data/ljspeech/wavs/LJ016-0353.npy +tests/data/ljspeech/wavs/LJ048-0155.wav|tests/data/ljspeech/wavs/LJ048-0155.npy +tests/data/ljspeech/wavs/LJ007-0104.wav|tests/data/ljspeech/wavs/LJ007-0104.npy +tests/data/ljspeech/wavs/LJ015-0104.wav|tests/data/ljspeech/wavs/LJ015-0104.npy +tests/data/ljspeech/wavs/LJ040-0035.wav|tests/data/ljspeech/wavs/LJ040-0035.npy +tests/data/ljspeech/wavs/LJ008-0094.wav|tests/data/ljspeech/wavs/LJ008-0094.npy +tests/data/ljspeech/wavs/LJ006-0095.wav|tests/data/ljspeech/wavs/LJ006-0095.npy +tests/data/ljspeech/wavs/LJ015-0166.wav|tests/data/ljspeech/wavs/LJ015-0166.npy +tests/data/ljspeech/wavs/LJ007-0072.wav|tests/data/ljspeech/wavs/LJ007-0072.npy +tests/data/ljspeech/wavs/LJ013-0118.wav|tests/data/ljspeech/wavs/LJ013-0118.npy +tests/data/ljspeech/wavs/LJ030-0031.wav|tests/data/ljspeech/wavs/LJ030-0031.npy +tests/data/ljspeech/wavs/LJ016-0357.wav|tests/data/ljspeech/wavs/LJ016-0357.npy +tests/data/ljspeech/wavs/LJ030-0234.wav|tests/data/ljspeech/wavs/LJ030-0234.npy +tests/data/ljspeech/wavs/LJ050-0086.wav|tests/data/ljspeech/wavs/LJ050-0086.npy +tests/data/ljspeech/wavs/LJ008-0286.wav|tests/data/ljspeech/wavs/LJ008-0286.npy +tests/data/ljspeech/wavs/LJ008-0158.wav|tests/data/ljspeech/wavs/LJ008-0158.npy +tests/data/ljspeech/wavs/LJ016-0214.wav|tests/data/ljspeech/wavs/LJ016-0214.npy +tests/data/ljspeech/wavs/LJ007-0010.wav|tests/data/ljspeech/wavs/LJ007-0010.npy +tests/data/ljspeech/wavs/LJ006-0282.wav|tests/data/ljspeech/wavs/LJ006-0282.npy +tests/data/ljspeech/wavs/LJ047-0156.wav|tests/data/ljspeech/wavs/LJ047-0156.npy +tests/data/ljspeech/wavs/LJ030-0157.wav|tests/data/ljspeech/wavs/LJ030-0157.npy +tests/data/ljspeech/wavs/LJ044-0211.wav|tests/data/ljspeech/wavs/LJ044-0211.npy +tests/data/ljspeech/wavs/LJ041-0051.wav|tests/data/ljspeech/wavs/LJ041-0051.npy +tests/data/ljspeech/wavs/LJ007-0140.wav|tests/data/ljspeech/wavs/LJ007-0140.npy +tests/data/ljspeech/wavs/LJ042-0233.wav|tests/data/ljspeech/wavs/LJ042-0233.npy +tests/data/ljspeech/wavs/LJ042-0136.wav|tests/data/ljspeech/wavs/LJ042-0136.npy +tests/data/ljspeech/wavs/LJ041-0115.wav|tests/data/ljspeech/wavs/LJ041-0115.npy +tests/data/ljspeech/wavs/LJ009-0114.wav|tests/data/ljspeech/wavs/LJ009-0114.npy +tests/data/ljspeech/wavs/LJ007-0185.wav|tests/data/ljspeech/wavs/LJ007-0185.npy +tests/data/ljspeech/wavs/LJ005-0242.wav|tests/data/ljspeech/wavs/LJ005-0242.npy +tests/data/ljspeech/wavs/LJ005-0214.wav|tests/data/ljspeech/wavs/LJ005-0214.npy +tests/data/ljspeech/wavs/LJ004-0235.wav|tests/data/ljspeech/wavs/LJ004-0235.npy +tests/data/ljspeech/wavs/LJ008-0039.wav|tests/data/ljspeech/wavs/LJ008-0039.npy +tests/data/ljspeech/wavs/LJ047-0018.wav|tests/data/ljspeech/wavs/LJ047-0018.npy +tests/data/ljspeech/wavs/LJ003-0040.wav|tests/data/ljspeech/wavs/LJ003-0040.npy +tests/data/ljspeech/wavs/LJ046-0219.wav|tests/data/ljspeech/wavs/LJ046-0219.npy +tests/data/ljspeech/wavs/LJ050-0275.wav|tests/data/ljspeech/wavs/LJ050-0275.npy +tests/data/ljspeech/wavs/LJ006-0017.wav|tests/data/ljspeech/wavs/LJ006-0017.npy +tests/data/ljspeech/wavs/LJ006-0027.wav|tests/data/ljspeech/wavs/LJ006-0027.npy +tests/data/ljspeech/wavs/LJ007-0235.wav|tests/data/ljspeech/wavs/LJ007-0235.npy +tests/data/ljspeech/wavs/LJ005-0233.wav|tests/data/ljspeech/wavs/LJ005-0233.npy +tests/data/ljspeech/wavs/LJ004-0121.wav|tests/data/ljspeech/wavs/LJ004-0121.npy +tests/data/ljspeech/wavs/LJ005-0063.wav|tests/data/ljspeech/wavs/LJ005-0063.npy +tests/data/ljspeech/wavs/LJ035-0067.wav|tests/data/ljspeech/wavs/LJ035-0067.npy +tests/data/ljspeech/wavs/LJ007-0045.wav|tests/data/ljspeech/wavs/LJ007-0045.npy +tests/data/ljspeech/wavs/LJ012-0117.wav|tests/data/ljspeech/wavs/LJ012-0117.npy +tests/data/ljspeech/wavs/LJ042-0178.wav|tests/data/ljspeech/wavs/LJ042-0178.npy +tests/data/ljspeech/wavs/LJ005-0192.wav|tests/data/ljspeech/wavs/LJ005-0192.npy +tests/data/ljspeech/wavs/LJ008-0264.wav|tests/data/ljspeech/wavs/LJ008-0264.npy +tests/data/ljspeech/wavs/LJ003-0233.wav|tests/data/ljspeech/wavs/LJ003-0233.npy +tests/data/ljspeech/wavs/LJ004-0036.wav|tests/data/ljspeech/wavs/LJ004-0036.npy +tests/data/ljspeech/wavs/LJ009-0131.wav|tests/data/ljspeech/wavs/LJ009-0131.npy +tests/data/ljspeech/wavs/LJ050-0226.wav|tests/data/ljspeech/wavs/LJ050-0226.npy +tests/data/ljspeech/wavs/LJ002-0196.wav|tests/data/ljspeech/wavs/LJ002-0196.npy +tests/data/ljspeech/wavs/LJ001-0096.wav|tests/data/ljspeech/wavs/LJ001-0096.npy +tests/data/ljspeech/wavs/LJ016-0436.wav|tests/data/ljspeech/wavs/LJ016-0436.npy +tests/data/ljspeech/wavs/LJ004-0005.wav|tests/data/ljspeech/wavs/LJ004-0005.npy +tests/data/ljspeech/wavs/LJ016-0228.wav|tests/data/ljspeech/wavs/LJ016-0228.npy +tests/data/ljspeech/wavs/LJ049-0011.wav|tests/data/ljspeech/wavs/LJ049-0011.npy +tests/data/ljspeech/wavs/LJ031-0016.wav|tests/data/ljspeech/wavs/LJ031-0016.npy +tests/data/ljspeech/wavs/LJ018-0032.wav|tests/data/ljspeech/wavs/LJ018-0032.npy +tests/data/ljspeech/wavs/LJ031-0063.wav|tests/data/ljspeech/wavs/LJ031-0063.npy +tests/data/ljspeech/wavs/LJ016-0377.wav|tests/data/ljspeech/wavs/LJ016-0377.npy +tests/data/ljspeech/wavs/LJ016-0134.wav|tests/data/ljspeech/wavs/LJ016-0134.npy +tests/data/ljspeech/wavs/LJ014-0058.wav|tests/data/ljspeech/wavs/LJ014-0058.npy +tests/data/ljspeech/wavs/LJ001-0059.wav|tests/data/ljspeech/wavs/LJ001-0059.npy +tests/data/ljspeech/wavs/LJ016-0399.wav|tests/data/ljspeech/wavs/LJ016-0399.npy +tests/data/ljspeech/wavs/LJ032-0125.wav|tests/data/ljspeech/wavs/LJ032-0125.npy +tests/data/ljspeech/wavs/LJ032-0065.wav|tests/data/ljspeech/wavs/LJ032-0065.npy +tests/data/ljspeech/wavs/LJ013-0038.wav|tests/data/ljspeech/wavs/LJ013-0038.npy +tests/data/ljspeech/wavs/LJ002-0328.wav|tests/data/ljspeech/wavs/LJ002-0328.npy +tests/data/ljspeech/wavs/LJ017-0084.wav|tests/data/ljspeech/wavs/LJ017-0084.npy +tests/data/ljspeech/wavs/LJ016-0272.wav|tests/data/ljspeech/wavs/LJ016-0272.npy +tests/data/ljspeech/wavs/LJ047-0231.wav|tests/data/ljspeech/wavs/LJ047-0231.npy +tests/data/ljspeech/wavs/LJ014-0287.wav|tests/data/ljspeech/wavs/LJ014-0287.npy +tests/data/ljspeech/wavs/LJ049-0149.wav|tests/data/ljspeech/wavs/LJ049-0149.npy +tests/data/ljspeech/wavs/LJ016-0384.wav|tests/data/ljspeech/wavs/LJ016-0384.npy +tests/data/ljspeech/wavs/LJ012-0195.wav|tests/data/ljspeech/wavs/LJ012-0195.npy +tests/data/ljspeech/wavs/LJ014-0089.wav|tests/data/ljspeech/wavs/LJ014-0089.npy +tests/data/ljspeech/wavs/LJ016-0028.wav|tests/data/ljspeech/wavs/LJ016-0028.npy +tests/data/ljspeech/wavs/LJ031-0108.wav|tests/data/ljspeech/wavs/LJ031-0108.npy +tests/data/ljspeech/wavs/LJ017-0233.wav|tests/data/ljspeech/wavs/LJ017-0233.npy +tests/data/ljspeech/wavs/LJ013-0067.wav|tests/data/ljspeech/wavs/LJ013-0067.npy +tests/data/ljspeech/wavs/LJ014-0100.wav|tests/data/ljspeech/wavs/LJ014-0100.npy +tests/data/ljspeech/wavs/LJ042-0154.wav|tests/data/ljspeech/wavs/LJ042-0154.npy +tests/data/ljspeech/wavs/LJ011-0260.wav|tests/data/ljspeech/wavs/LJ011-0260.npy +tests/data/ljspeech/wavs/LJ011-0065.wav|tests/data/ljspeech/wavs/LJ011-0065.npy +tests/data/ljspeech/wavs/LJ045-0166.wav|tests/data/ljspeech/wavs/LJ045-0166.npy +tests/data/ljspeech/wavs/LJ006-0284.wav|tests/data/ljspeech/wavs/LJ006-0284.npy +tests/data/ljspeech/wavs/LJ037-0080.wav|tests/data/ljspeech/wavs/LJ037-0080.npy +tests/data/ljspeech/wavs/LJ019-0354.wav|tests/data/ljspeech/wavs/LJ019-0354.npy +tests/data/ljspeech/wavs/LJ007-0145.wav|tests/data/ljspeech/wavs/LJ007-0145.npy +tests/data/ljspeech/wavs/LJ034-0199.wav|tests/data/ljspeech/wavs/LJ034-0199.npy +tests/data/ljspeech/wavs/LJ038-0294.wav|tests/data/ljspeech/wavs/LJ038-0294.npy +tests/data/ljspeech/wavs/LJ015-0170.wav|tests/data/ljspeech/wavs/LJ015-0170.npy +tests/data/ljspeech/wavs/LJ001-0001.wav|tests/data/ljspeech/wavs/LJ001-0001.npy +tests/data/ljspeech/wavs/LJ041-0194.wav|tests/data/ljspeech/wavs/LJ041-0194.npy +tests/data/ljspeech/wavs/LJ007-0062.wav|tests/data/ljspeech/wavs/LJ007-0062.npy +tests/data/ljspeech/wavs/LJ029-0043.wav|tests/data/ljspeech/wavs/LJ029-0043.npy +tests/data/ljspeech/wavs/LJ043-0102.wav|tests/data/ljspeech/wavs/LJ043-0102.npy +tests/data/ljspeech/wavs/LJ033-0198.wav|tests/data/ljspeech/wavs/LJ033-0198.npy +tests/data/ljspeech/wavs/LJ006-0271.wav|tests/data/ljspeech/wavs/LJ006-0271.npy +tests/data/ljspeech/wavs/LJ046-0074.wav|tests/data/ljspeech/wavs/LJ046-0074.npy +tests/data/ljspeech/wavs/LJ019-0256.wav|tests/data/ljspeech/wavs/LJ019-0256.npy +tests/data/ljspeech/wavs/LJ019-0364.wav|tests/data/ljspeech/wavs/LJ019-0364.npy +tests/data/ljspeech/wavs/LJ014-0079.wav|tests/data/ljspeech/wavs/LJ014-0079.npy +tests/data/ljspeech/wavs/LJ029-0189.wav|tests/data/ljspeech/wavs/LJ029-0189.npy +tests/data/ljspeech/wavs/LJ034-0140.wav|tests/data/ljspeech/wavs/LJ034-0140.npy +tests/data/ljspeech/wavs/LJ009-0055.wav|tests/data/ljspeech/wavs/LJ009-0055.npy +tests/data/ljspeech/wavs/LJ008-0012.wav|tests/data/ljspeech/wavs/LJ008-0012.npy +tests/data/ljspeech/wavs/LJ016-0015.wav|tests/data/ljspeech/wavs/LJ016-0015.npy +tests/data/ljspeech/wavs/LJ014-0002.wav|tests/data/ljspeech/wavs/LJ014-0002.npy +tests/data/ljspeech/wavs/LJ009-0240.wav|tests/data/ljspeech/wavs/LJ009-0240.npy +tests/data/ljspeech/wavs/LJ010-0183.wav|tests/data/ljspeech/wavs/LJ010-0183.npy +tests/data/ljspeech/wavs/LJ020-0107.wav|tests/data/ljspeech/wavs/LJ020-0107.npy +tests/data/ljspeech/wavs/LJ007-0016.wav|tests/data/ljspeech/wavs/LJ007-0016.npy +tests/data/ljspeech/wavs/LJ045-0086.wav|tests/data/ljspeech/wavs/LJ045-0086.npy +tests/data/ljspeech/wavs/LJ031-0199.wav|tests/data/ljspeech/wavs/LJ031-0199.npy +tests/data/ljspeech/wavs/LJ041-0189.wav|tests/data/ljspeech/wavs/LJ041-0189.npy +tests/data/ljspeech/wavs/LJ046-0246.wav|tests/data/ljspeech/wavs/LJ046-0246.npy +tests/data/ljspeech/wavs/LJ018-0091.wav|tests/data/ljspeech/wavs/LJ018-0091.npy +tests/data/ljspeech/wavs/LJ017-0002.wav|tests/data/ljspeech/wavs/LJ017-0002.npy +tests/data/ljspeech/wavs/LJ035-0176.wav|tests/data/ljspeech/wavs/LJ035-0176.npy +tests/data/ljspeech/wavs/LJ044-0155.wav|tests/data/ljspeech/wavs/LJ044-0155.npy +tests/data/ljspeech/wavs/LJ046-0252.wav|tests/data/ljspeech/wavs/LJ046-0252.npy +tests/data/ljspeech/wavs/LJ016-0065.wav|tests/data/ljspeech/wavs/LJ016-0065.npy +tests/data/ljspeech/wavs/LJ016-0067.wav|tests/data/ljspeech/wavs/LJ016-0067.npy +tests/data/ljspeech/wavs/LJ041-0165.wav|tests/data/ljspeech/wavs/LJ041-0165.npy +tests/data/ljspeech/wavs/LJ038-0232.wav|tests/data/ljspeech/wavs/LJ038-0232.npy +tests/data/ljspeech/wavs/LJ006-0151.wav|tests/data/ljspeech/wavs/LJ006-0151.npy +tests/data/ljspeech/wavs/LJ017-0139.wav|tests/data/ljspeech/wavs/LJ017-0139.npy +tests/data/ljspeech/wavs/LJ008-0130.wav|tests/data/ljspeech/wavs/LJ008-0130.npy +tests/data/ljspeech/wavs/LJ029-0147.wav|tests/data/ljspeech/wavs/LJ029-0147.npy +tests/data/ljspeech/wavs/LJ014-0097.wav|tests/data/ljspeech/wavs/LJ014-0097.npy +tests/data/ljspeech/wavs/LJ032-0131.wav|tests/data/ljspeech/wavs/LJ032-0131.npy +tests/data/ljspeech/wavs/LJ038-0080.wav|tests/data/ljspeech/wavs/LJ038-0080.npy +tests/data/ljspeech/wavs/LJ041-0144.wav|tests/data/ljspeech/wavs/LJ041-0144.npy +tests/data/ljspeech/wavs/LJ040-0133.wav|tests/data/ljspeech/wavs/LJ040-0133.npy +tests/data/ljspeech/wavs/LJ035-0094.wav|tests/data/ljspeech/wavs/LJ035-0094.npy +tests/data/ljspeech/wavs/LJ019-0237.wav|tests/data/ljspeech/wavs/LJ019-0237.npy +tests/data/ljspeech/wavs/LJ032-0275.wav|tests/data/ljspeech/wavs/LJ032-0275.npy +tests/data/ljspeech/wavs/LJ048-0255.wav|tests/data/ljspeech/wavs/LJ048-0255.npy +tests/data/ljspeech/wavs/LJ006-0256.wav|tests/data/ljspeech/wavs/LJ006-0256.npy +tests/data/ljspeech/wavs/LJ040-0170.wav|tests/data/ljspeech/wavs/LJ040-0170.npy +tests/data/ljspeech/wavs/LJ029-0106.wav|tests/data/ljspeech/wavs/LJ029-0106.npy +tests/data/ljspeech/wavs/LJ016-0423.wav|tests/data/ljspeech/wavs/LJ016-0423.npy +tests/data/ljspeech/wavs/LJ005-0227.wav|tests/data/ljspeech/wavs/LJ005-0227.npy +tests/data/ljspeech/wavs/LJ038-0018.wav|tests/data/ljspeech/wavs/LJ038-0018.npy +tests/data/ljspeech/wavs/LJ035-0040.wav|tests/data/ljspeech/wavs/LJ035-0040.npy +tests/data/ljspeech/wavs/LJ028-0133.wav|tests/data/ljspeech/wavs/LJ028-0133.npy +tests/data/ljspeech/wavs/LJ029-0040.wav|tests/data/ljspeech/wavs/LJ029-0040.npy +tests/data/ljspeech/wavs/LJ028-0214.wav|tests/data/ljspeech/wavs/LJ028-0214.npy +tests/data/ljspeech/wavs/LJ007-0172.wav|tests/data/ljspeech/wavs/LJ007-0172.npy +tests/data/ljspeech/wavs/LJ012-0116.wav|tests/data/ljspeech/wavs/LJ012-0116.npy +tests/data/ljspeech/wavs/LJ035-0075.wav|tests/data/ljspeech/wavs/LJ035-0075.npy +tests/data/ljspeech/wavs/LJ047-0173.wav|tests/data/ljspeech/wavs/LJ047-0173.npy +tests/data/ljspeech/wavs/LJ041-0126.wav|tests/data/ljspeech/wavs/LJ041-0126.npy +tests/data/ljspeech/wavs/LJ019-0044.wav|tests/data/ljspeech/wavs/LJ019-0044.npy +tests/data/ljspeech/wavs/LJ050-0111.wav|tests/data/ljspeech/wavs/LJ050-0111.npy +tests/data/ljspeech/wavs/LJ050-0267.wav|tests/data/ljspeech/wavs/LJ050-0267.npy +tests/data/ljspeech/wavs/LJ005-0127.wav|tests/data/ljspeech/wavs/LJ005-0127.npy +tests/data/ljspeech/wavs/LJ011-0007.wav|tests/data/ljspeech/wavs/LJ011-0007.npy +tests/data/ljspeech/wavs/LJ016-0394.wav|tests/data/ljspeech/wavs/LJ016-0394.npy +tests/data/ljspeech/wavs/LJ033-0081.wav|tests/data/ljspeech/wavs/LJ033-0081.npy +tests/data/ljspeech/wavs/LJ011-0025.wav|tests/data/ljspeech/wavs/LJ011-0025.npy +tests/data/ljspeech/wavs/LJ049-0207.wav|tests/data/ljspeech/wavs/LJ049-0207.npy +tests/data/ljspeech/wavs/LJ031-0101.wav|tests/data/ljspeech/wavs/LJ031-0101.npy +tests/data/ljspeech/wavs/LJ002-0295.wav|tests/data/ljspeech/wavs/LJ002-0295.npy +tests/data/ljspeech/wavs/LJ009-0056.wav|tests/data/ljspeech/wavs/LJ009-0056.npy +tests/data/ljspeech/wavs/LJ045-0243.wav|tests/data/ljspeech/wavs/LJ045-0243.npy +tests/data/ljspeech/wavs/LJ005-0165.wav|tests/data/ljspeech/wavs/LJ005-0165.npy +tests/data/ljspeech/wavs/LJ012-0225.wav|tests/data/ljspeech/wavs/LJ012-0225.npy +tests/data/ljspeech/wavs/LJ028-0345.wav|tests/data/ljspeech/wavs/LJ028-0345.npy +tests/data/ljspeech/wavs/LJ003-0221.wav|tests/data/ljspeech/wavs/LJ003-0221.npy +tests/data/ljspeech/wavs/LJ015-0201.wav|tests/data/ljspeech/wavs/LJ015-0201.npy +tests/data/ljspeech/wavs/LJ029-0119.wav|tests/data/ljspeech/wavs/LJ029-0119.npy +tests/data/ljspeech/wavs/LJ012-0094.wav|tests/data/ljspeech/wavs/LJ012-0094.npy +tests/data/ljspeech/wavs/LJ008-0034.wav|tests/data/ljspeech/wavs/LJ008-0034.npy +tests/data/ljspeech/wavs/LJ011-0292.wav|tests/data/ljspeech/wavs/LJ011-0292.npy +tests/data/ljspeech/wavs/LJ041-0134.wav|tests/data/ljspeech/wavs/LJ041-0134.npy +tests/data/ljspeech/wavs/LJ041-0185.wav|tests/data/ljspeech/wavs/LJ041-0185.npy +tests/data/ljspeech/wavs/LJ041-0182.wav|tests/data/ljspeech/wavs/LJ041-0182.npy +tests/data/ljspeech/wavs/LJ006-0177.wav|tests/data/ljspeech/wavs/LJ006-0177.npy +tests/data/ljspeech/wavs/LJ018-0272.wav|tests/data/ljspeech/wavs/LJ018-0272.npy +tests/data/ljspeech/wavs/LJ043-0118.wav|tests/data/ljspeech/wavs/LJ043-0118.npy +tests/data/ljspeech/wavs/LJ045-0226.wav|tests/data/ljspeech/wavs/LJ045-0226.npy +tests/data/ljspeech/wavs/LJ036-0071.wav|tests/data/ljspeech/wavs/LJ036-0071.npy +tests/data/ljspeech/wavs/LJ046-0064.wav|tests/data/ljspeech/wavs/LJ046-0064.npy +tests/data/ljspeech/wavs/LJ029-0081.wav|tests/data/ljspeech/wavs/LJ029-0081.npy +tests/data/ljspeech/wavs/LJ045-0048.wav|tests/data/ljspeech/wavs/LJ045-0048.npy +tests/data/ljspeech/wavs/LJ028-0203.wav|tests/data/ljspeech/wavs/LJ028-0203.npy +tests/data/ljspeech/wavs/LJ007-0153.wav|tests/data/ljspeech/wavs/LJ007-0153.npy +tests/data/ljspeech/wavs/LJ036-0135.wav|tests/data/ljspeech/wavs/LJ036-0135.npy +tests/data/ljspeech/wavs/LJ009-0029.wav|tests/data/ljspeech/wavs/LJ009-0029.npy +tests/data/ljspeech/wavs/LJ028-0253.wav|tests/data/ljspeech/wavs/LJ028-0253.npy +tests/data/ljspeech/wavs/LJ031-0165.wav|tests/data/ljspeech/wavs/LJ031-0165.npy +tests/data/ljspeech/wavs/LJ032-0070.wav|tests/data/ljspeech/wavs/LJ032-0070.npy +tests/data/ljspeech/wavs/LJ049-0220.wav|tests/data/ljspeech/wavs/LJ049-0220.npy +tests/data/ljspeech/wavs/LJ038-0040.wav|tests/data/ljspeech/wavs/LJ038-0040.npy +tests/data/ljspeech/wavs/LJ049-0072.wav|tests/data/ljspeech/wavs/LJ049-0072.npy +tests/data/ljspeech/wavs/LJ006-0011.wav|tests/data/ljspeech/wavs/LJ006-0011.npy +tests/data/ljspeech/wavs/LJ038-0107.wav|tests/data/ljspeech/wavs/LJ038-0107.npy +tests/data/ljspeech/wavs/LJ048-0201.wav|tests/data/ljspeech/wavs/LJ048-0201.npy +tests/data/ljspeech/wavs/LJ028-0033.wav|tests/data/ljspeech/wavs/LJ028-0033.npy +tests/data/ljspeech/wavs/LJ003-0124.wav|tests/data/ljspeech/wavs/LJ003-0124.npy +tests/data/ljspeech/wavs/LJ032-0227.wav|tests/data/ljspeech/wavs/LJ032-0227.npy +tests/data/ljspeech/wavs/LJ049-0196.wav|tests/data/ljspeech/wavs/LJ049-0196.npy +tests/data/ljspeech/wavs/LJ006-0135.wav|tests/data/ljspeech/wavs/LJ006-0135.npy +tests/data/ljspeech/wavs/LJ012-0121.wav|tests/data/ljspeech/wavs/LJ012-0121.npy +tests/data/ljspeech/wavs/LJ028-0073.wav|tests/data/ljspeech/wavs/LJ028-0073.npy +tests/data/ljspeech/wavs/LJ017-0028.wav|tests/data/ljspeech/wavs/LJ017-0028.npy +tests/data/ljspeech/wavs/LJ048-0090.wav|tests/data/ljspeech/wavs/LJ048-0090.npy +tests/data/ljspeech/wavs/LJ026-0038.wav|tests/data/ljspeech/wavs/LJ026-0038.npy +tests/data/ljspeech/wavs/LJ032-0251.wav|tests/data/ljspeech/wavs/LJ032-0251.npy +tests/data/ljspeech/wavs/LJ034-0172.wav|tests/data/ljspeech/wavs/LJ034-0172.npy +tests/data/ljspeech/wavs/LJ007-0067.wav|tests/data/ljspeech/wavs/LJ007-0067.npy +tests/data/ljspeech/wavs/LJ002-0337.wav|tests/data/ljspeech/wavs/LJ002-0337.npy +tests/data/ljspeech/wavs/LJ011-0089.wav|tests/data/ljspeech/wavs/LJ011-0089.npy +tests/data/ljspeech/wavs/LJ012-0160.wav|tests/data/ljspeech/wavs/LJ012-0160.npy +tests/data/ljspeech/wavs/LJ037-0013.wav|tests/data/ljspeech/wavs/LJ037-0013.npy +tests/data/ljspeech/wavs/LJ048-0037.wav|tests/data/ljspeech/wavs/LJ048-0037.npy +tests/data/ljspeech/wavs/LJ029-0095.wav|tests/data/ljspeech/wavs/LJ029-0095.npy +tests/data/ljspeech/wavs/LJ019-0169.wav|tests/data/ljspeech/wavs/LJ019-0169.npy +tests/data/ljspeech/wavs/LJ008-0078.wav|tests/data/ljspeech/wavs/LJ008-0078.npy +tests/data/ljspeech/wavs/LJ047-0102.wav|tests/data/ljspeech/wavs/LJ047-0102.npy +tests/data/ljspeech/wavs/LJ037-0249.wav|tests/data/ljspeech/wavs/LJ037-0249.npy +tests/data/ljspeech/wavs/LJ040-0230.wav|tests/data/ljspeech/wavs/LJ040-0230.npy +tests/data/ljspeech/wavs/LJ008-0063.wav|tests/data/ljspeech/wavs/LJ008-0063.npy +tests/data/ljspeech/wavs/LJ007-0227.wav|tests/data/ljspeech/wavs/LJ007-0227.npy +tests/data/ljspeech/wavs/LJ014-0258.wav|tests/data/ljspeech/wavs/LJ014-0258.npy +tests/data/ljspeech/wavs/LJ034-0034.wav|tests/data/ljspeech/wavs/LJ034-0034.npy +tests/data/ljspeech/wavs/LJ020-0040.wav|tests/data/ljspeech/wavs/LJ020-0040.npy +tests/data/ljspeech/wavs/LJ047-0175.wav|tests/data/ljspeech/wavs/LJ047-0175.npy +tests/data/ljspeech/wavs/LJ046-0225.wav|tests/data/ljspeech/wavs/LJ046-0225.npy +tests/data/ljspeech/wavs/LJ038-0066.wav|tests/data/ljspeech/wavs/LJ038-0066.npy +tests/data/ljspeech/wavs/LJ038-0047.wav|tests/data/ljspeech/wavs/LJ038-0047.npy +tests/data/ljspeech/wavs/LJ037-0170.wav|tests/data/ljspeech/wavs/LJ037-0170.npy +tests/data/ljspeech/wavs/LJ048-0005.wav|tests/data/ljspeech/wavs/LJ048-0005.npy +tests/data/ljspeech/wavs/LJ038-0298.wav|tests/data/ljspeech/wavs/LJ038-0298.npy +tests/data/ljspeech/wavs/LJ008-0242.wav|tests/data/ljspeech/wavs/LJ008-0242.npy +tests/data/ljspeech/wavs/LJ029-0087.wav|tests/data/ljspeech/wavs/LJ029-0087.npy +tests/data/ljspeech/wavs/LJ034-0194.wav|tests/data/ljspeech/wavs/LJ034-0194.npy +tests/data/ljspeech/wavs/LJ008-0277.wav|tests/data/ljspeech/wavs/LJ008-0277.npy +tests/data/ljspeech/wavs/LJ012-0110.wav|tests/data/ljspeech/wavs/LJ012-0110.npy +tests/data/ljspeech/wavs/LJ030-0014.wav|tests/data/ljspeech/wavs/LJ030-0014.npy +tests/data/ljspeech/wavs/LJ048-0178.wav|tests/data/ljspeech/wavs/LJ048-0178.npy +tests/data/ljspeech/wavs/LJ041-0151.wav|tests/data/ljspeech/wavs/LJ041-0151.npy +tests/data/ljspeech/wavs/LJ045-0104.wav|tests/data/ljspeech/wavs/LJ045-0104.npy +tests/data/ljspeech/wavs/LJ036-0046.wav|tests/data/ljspeech/wavs/LJ036-0046.npy +tests/data/ljspeech/wavs/LJ044-0153.wav|tests/data/ljspeech/wavs/LJ044-0153.npy +tests/data/ljspeech/wavs/LJ043-0147.wav|tests/data/ljspeech/wavs/LJ043-0147.npy +tests/data/ljspeech/wavs/LJ043-0177.wav|tests/data/ljspeech/wavs/LJ043-0177.npy +tests/data/ljspeech/wavs/LJ004-0242.wav|tests/data/ljspeech/wavs/LJ004-0242.npy +tests/data/ljspeech/wavs/LJ029-0142.wav|tests/data/ljspeech/wavs/LJ029-0142.npy +tests/data/ljspeech/wavs/LJ003-0011.wav|tests/data/ljspeech/wavs/LJ003-0011.npy +tests/data/ljspeech/wavs/LJ005-0047.wav|tests/data/ljspeech/wavs/LJ005-0047.npy +tests/data/ljspeech/wavs/LJ048-0168.wav|tests/data/ljspeech/wavs/LJ048-0168.npy +tests/data/ljspeech/wavs/LJ044-0041.wav|tests/data/ljspeech/wavs/LJ044-0041.npy +tests/data/ljspeech/wavs/LJ048-0273.wav|tests/data/ljspeech/wavs/LJ048-0273.npy +tests/data/ljspeech/wavs/LJ048-0187.wav|tests/data/ljspeech/wavs/LJ048-0187.npy +tests/data/ljspeech/wavs/LJ005-0128.wav|tests/data/ljspeech/wavs/LJ005-0128.npy +tests/data/ljspeech/wavs/LJ048-0063.wav|tests/data/ljspeech/wavs/LJ048-0063.npy +tests/data/ljspeech/wavs/LJ019-0373.wav|tests/data/ljspeech/wavs/LJ019-0373.npy +tests/data/ljspeech/wavs/LJ037-0139.wav|tests/data/ljspeech/wavs/LJ037-0139.npy +tests/data/ljspeech/wavs/LJ005-0055.wav|tests/data/ljspeech/wavs/LJ005-0055.npy +tests/data/ljspeech/wavs/LJ031-0019.wav|tests/data/ljspeech/wavs/LJ031-0019.npy +tests/data/ljspeech/wavs/LJ050-0229.wav|tests/data/ljspeech/wavs/LJ050-0229.npy +tests/data/ljspeech/wavs/LJ048-0110.wav|tests/data/ljspeech/wavs/LJ048-0110.npy +tests/data/ljspeech/wavs/LJ042-0030.wav|tests/data/ljspeech/wavs/LJ042-0030.npy +tests/data/ljspeech/wavs/LJ002-0322.wav|tests/data/ljspeech/wavs/LJ002-0322.npy +tests/data/ljspeech/wavs/LJ019-0255.wav|tests/data/ljspeech/wavs/LJ019-0255.npy +tests/data/ljspeech/wavs/LJ046-0066.wav|tests/data/ljspeech/wavs/LJ046-0066.npy +tests/data/ljspeech/wavs/LJ018-0313.wav|tests/data/ljspeech/wavs/LJ018-0313.npy +tests/data/ljspeech/wavs/LJ041-0058.wav|tests/data/ljspeech/wavs/LJ041-0058.npy +tests/data/ljspeech/wavs/LJ028-0243.wav|tests/data/ljspeech/wavs/LJ028-0243.npy +tests/data/ljspeech/wavs/LJ028-0489.wav|tests/data/ljspeech/wavs/LJ028-0489.npy +tests/data/ljspeech/wavs/LJ029-0061.wav|tests/data/ljspeech/wavs/LJ029-0061.npy +tests/data/ljspeech/wavs/LJ003-0008.wav|tests/data/ljspeech/wavs/LJ003-0008.npy +tests/data/ljspeech/wavs/LJ050-0234.wav|tests/data/ljspeech/wavs/LJ050-0234.npy +tests/data/ljspeech/wavs/LJ026-0043.wav|tests/data/ljspeech/wavs/LJ026-0043.npy +tests/data/ljspeech/wavs/LJ016-0390.wav|tests/data/ljspeech/wavs/LJ016-0390.npy +tests/data/ljspeech/wavs/LJ034-0074.wav|tests/data/ljspeech/wavs/LJ034-0074.npy +tests/data/ljspeech/wavs/LJ031-0205.wav|tests/data/ljspeech/wavs/LJ031-0205.npy +tests/data/ljspeech/wavs/LJ046-0222.wav|tests/data/ljspeech/wavs/LJ046-0222.npy +tests/data/ljspeech/wavs/LJ044-0118.wav|tests/data/ljspeech/wavs/LJ044-0118.npy +tests/data/ljspeech/wavs/LJ016-0435.wav|tests/data/ljspeech/wavs/LJ016-0435.npy +tests/data/ljspeech/wavs/LJ041-0072.wav|tests/data/ljspeech/wavs/LJ041-0072.npy +tests/data/ljspeech/wavs/LJ035-0076.wav|tests/data/ljspeech/wavs/LJ035-0076.npy +tests/data/ljspeech/wavs/LJ006-0048.wav|tests/data/ljspeech/wavs/LJ006-0048.npy +tests/data/ljspeech/wavs/LJ014-0096.wav|tests/data/ljspeech/wavs/LJ014-0096.npy +tests/data/ljspeech/wavs/LJ012-0101.wav|tests/data/ljspeech/wavs/LJ012-0101.npy +tests/data/ljspeech/wavs/LJ028-0027.wav|tests/data/ljspeech/wavs/LJ028-0027.npy +tests/data/ljspeech/wavs/LJ006-0058.wav|tests/data/ljspeech/wavs/LJ006-0058.npy +tests/data/ljspeech/wavs/LJ035-0020.wav|tests/data/ljspeech/wavs/LJ035-0020.npy +tests/data/ljspeech/wavs/LJ034-0059.wav|tests/data/ljspeech/wavs/LJ034-0059.npy +tests/data/ljspeech/wavs/LJ001-0003.wav|tests/data/ljspeech/wavs/LJ001-0003.npy +tests/data/ljspeech/wavs/LJ040-0199.wav|tests/data/ljspeech/wavs/LJ040-0199.npy +tests/data/ljspeech/wavs/LJ011-0043.wav|tests/data/ljspeech/wavs/LJ011-0043.npy +tests/data/ljspeech/wavs/LJ004-0224.wav|tests/data/ljspeech/wavs/LJ004-0224.npy +tests/data/ljspeech/wavs/LJ049-0205.wav|tests/data/ljspeech/wavs/LJ049-0205.npy +tests/data/ljspeech/wavs/LJ006-0231.wav|tests/data/ljspeech/wavs/LJ006-0231.npy +tests/data/ljspeech/wavs/LJ045-0148.wav|tests/data/ljspeech/wavs/LJ045-0148.npy +tests/data/ljspeech/wavs/LJ012-0091.wav|tests/data/ljspeech/wavs/LJ012-0091.npy +tests/data/ljspeech/wavs/LJ002-0027.wav|tests/data/ljspeech/wavs/LJ002-0027.npy +tests/data/ljspeech/wavs/LJ048-0072.wav|tests/data/ljspeech/wavs/LJ048-0072.npy +tests/data/ljspeech/wavs/LJ006-0199.wav|tests/data/ljspeech/wavs/LJ006-0199.npy +tests/data/ljspeech/wavs/LJ019-0300.wav|tests/data/ljspeech/wavs/LJ019-0300.npy +tests/data/ljspeech/wavs/LJ018-0044.wav|tests/data/ljspeech/wavs/LJ018-0044.npy +tests/data/ljspeech/wavs/LJ047-0106.wav|tests/data/ljspeech/wavs/LJ047-0106.npy +tests/data/ljspeech/wavs/LJ045-0182.wav|tests/data/ljspeech/wavs/LJ045-0182.npy +tests/data/ljspeech/wavs/LJ012-0095.wav|tests/data/ljspeech/wavs/LJ012-0095.npy +tests/data/ljspeech/wavs/LJ031-0056.wav|tests/data/ljspeech/wavs/LJ031-0056.npy +tests/data/ljspeech/wavs/LJ007-0063.wav|tests/data/ljspeech/wavs/LJ007-0063.npy +tests/data/ljspeech/wavs/LJ048-0011.wav|tests/data/ljspeech/wavs/LJ048-0011.npy +tests/data/ljspeech/wavs/LJ028-0284.wav|tests/data/ljspeech/wavs/LJ028-0284.npy +tests/data/ljspeech/wavs/LJ004-0234.wav|tests/data/ljspeech/wavs/LJ004-0234.npy +tests/data/ljspeech/wavs/LJ041-0030.wav|tests/data/ljspeech/wavs/LJ041-0030.npy +tests/data/ljspeech/wavs/LJ039-0127.wav|tests/data/ljspeech/wavs/LJ039-0127.npy +tests/data/ljspeech/wavs/LJ039-0096.wav|tests/data/ljspeech/wavs/LJ039-0096.npy +tests/data/ljspeech/wavs/LJ032-0032.wav|tests/data/ljspeech/wavs/LJ032-0032.npy +tests/data/ljspeech/wavs/LJ012-0234.wav|tests/data/ljspeech/wavs/LJ012-0234.npy +tests/data/ljspeech/wavs/LJ016-0105.wav|tests/data/ljspeech/wavs/LJ016-0105.npy +tests/data/ljspeech/wavs/LJ031-0071.wav|tests/data/ljspeech/wavs/LJ031-0071.npy +tests/data/ljspeech/wavs/LJ007-0125.wav|tests/data/ljspeech/wavs/LJ007-0125.npy +tests/data/ljspeech/wavs/LJ017-0120.wav|tests/data/ljspeech/wavs/LJ017-0120.npy +tests/data/ljspeech/wavs/LJ050-0277.wav|tests/data/ljspeech/wavs/LJ050-0277.npy +tests/data/ljspeech/wavs/LJ012-0187.wav|tests/data/ljspeech/wavs/LJ012-0187.npy +tests/data/ljspeech/wavs/LJ038-0275.wav|tests/data/ljspeech/wavs/LJ038-0275.npy +tests/data/ljspeech/wavs/LJ018-0292.wav|tests/data/ljspeech/wavs/LJ018-0292.npy +tests/data/ljspeech/wavs/LJ016-0042.wav|tests/data/ljspeech/wavs/LJ016-0042.npy +tests/data/ljspeech/wavs/LJ008-0042.wav|tests/data/ljspeech/wavs/LJ008-0042.npy +tests/data/ljspeech/wavs/LJ040-0023.wav|tests/data/ljspeech/wavs/LJ040-0023.npy +tests/data/ljspeech/wavs/LJ033-0134.wav|tests/data/ljspeech/wavs/LJ033-0134.npy +tests/data/ljspeech/wavs/LJ033-0044.wav|tests/data/ljspeech/wavs/LJ033-0044.npy +tests/data/ljspeech/wavs/LJ019-0172.wav|tests/data/ljspeech/wavs/LJ019-0172.npy +tests/data/ljspeech/wavs/LJ047-0125.wav|tests/data/ljspeech/wavs/LJ047-0125.npy +tests/data/ljspeech/wavs/LJ003-0286.wav|tests/data/ljspeech/wavs/LJ003-0286.npy +tests/data/ljspeech/wavs/LJ038-0109.wav|tests/data/ljspeech/wavs/LJ038-0109.npy +tests/data/ljspeech/wavs/LJ003-0033.wav|tests/data/ljspeech/wavs/LJ003-0033.npy +tests/data/ljspeech/wavs/LJ012-0272.wav|tests/data/ljspeech/wavs/LJ012-0272.npy +tests/data/ljspeech/wavs/LJ029-0058.wav|tests/data/ljspeech/wavs/LJ029-0058.npy +tests/data/ljspeech/wavs/LJ028-0283.wav|tests/data/ljspeech/wavs/LJ028-0283.npy +tests/data/ljspeech/wavs/LJ041-0037.wav|tests/data/ljspeech/wavs/LJ041-0037.npy +tests/data/ljspeech/wavs/LJ039-0024.wav|tests/data/ljspeech/wavs/LJ039-0024.npy +tests/data/ljspeech/wavs/LJ038-0125.wav|tests/data/ljspeech/wavs/LJ038-0125.npy +tests/data/ljspeech/wavs/LJ033-0157.wav|tests/data/ljspeech/wavs/LJ033-0157.npy +tests/data/ljspeech/wavs/LJ043-0188.wav|tests/data/ljspeech/wavs/LJ043-0188.npy +tests/data/ljspeech/wavs/LJ043-0185.wav|tests/data/ljspeech/wavs/LJ043-0185.npy +tests/data/ljspeech/wavs/LJ040-0125.wav|tests/data/ljspeech/wavs/LJ040-0125.npy +tests/data/ljspeech/wavs/LJ037-0119.wav|tests/data/ljspeech/wavs/LJ037-0119.npy +tests/data/ljspeech/wavs/LJ041-0090.wav|tests/data/ljspeech/wavs/LJ041-0090.npy +tests/data/ljspeech/wavs/LJ036-0133.wav|tests/data/ljspeech/wavs/LJ036-0133.npy +tests/data/ljspeech/wavs/LJ007-0152.wav|tests/data/ljspeech/wavs/LJ007-0152.npy +tests/data/ljspeech/wavs/LJ037-0123.wav|tests/data/ljspeech/wavs/LJ037-0123.npy +tests/data/ljspeech/wavs/LJ044-0191.wav|tests/data/ljspeech/wavs/LJ044-0191.npy +tests/data/ljspeech/wavs/LJ009-0009.wav|tests/data/ljspeech/wavs/LJ009-0009.npy +tests/data/ljspeech/wavs/LJ044-0093.wav|tests/data/ljspeech/wavs/LJ044-0093.npy +tests/data/ljspeech/wavs/LJ007-0058.wav|tests/data/ljspeech/wavs/LJ007-0058.npy +tests/data/ljspeech/wavs/LJ011-0189.wav|tests/data/ljspeech/wavs/LJ011-0189.npy +tests/data/ljspeech/wavs/LJ004-0062.wav|tests/data/ljspeech/wavs/LJ004-0062.npy +tests/data/ljspeech/wavs/LJ032-0225.wav|tests/data/ljspeech/wavs/LJ032-0225.npy +tests/data/ljspeech/wavs/LJ001-0152.wav|tests/data/ljspeech/wavs/LJ001-0152.npy +tests/data/ljspeech/wavs/LJ006-0243.wav|tests/data/ljspeech/wavs/LJ006-0243.npy +tests/data/ljspeech/wavs/LJ013-0077.wav|tests/data/ljspeech/wavs/LJ013-0077.npy +tests/data/ljspeech/wavs/LJ041-0139.wav|tests/data/ljspeech/wavs/LJ041-0139.npy +tests/data/ljspeech/wavs/LJ010-0053.wav|tests/data/ljspeech/wavs/LJ010-0053.npy +tests/data/ljspeech/wavs/LJ005-0205.wav|tests/data/ljspeech/wavs/LJ005-0205.npy +tests/data/ljspeech/wavs/LJ014-0333.wav|tests/data/ljspeech/wavs/LJ014-0333.npy +tests/data/ljspeech/wavs/LJ038-0230.wav|tests/data/ljspeech/wavs/LJ038-0230.npy +tests/data/ljspeech/wavs/LJ003-0263.wav|tests/data/ljspeech/wavs/LJ003-0263.npy +tests/data/ljspeech/wavs/LJ011-0149.wav|tests/data/ljspeech/wavs/LJ011-0149.npy +tests/data/ljspeech/wavs/LJ009-0030.wav|tests/data/ljspeech/wavs/LJ009-0030.npy +tests/data/ljspeech/wavs/LJ019-0183.wav|tests/data/ljspeech/wavs/LJ019-0183.npy +tests/data/ljspeech/wavs/LJ031-0054.wav|tests/data/ljspeech/wavs/LJ031-0054.npy +tests/data/ljspeech/wavs/LJ019-0136.wav|tests/data/ljspeech/wavs/LJ019-0136.npy +tests/data/ljspeech/wavs/LJ025-0114.wav|tests/data/ljspeech/wavs/LJ025-0114.npy +tests/data/ljspeech/wavs/LJ005-0122.wav|tests/data/ljspeech/wavs/LJ005-0122.npy +tests/data/ljspeech/wavs/LJ033-0123.wav|tests/data/ljspeech/wavs/LJ033-0123.npy +tests/data/ljspeech/wavs/LJ029-0009.wav|tests/data/ljspeech/wavs/LJ029-0009.npy +tests/data/ljspeech/wavs/LJ029-0109.wav|tests/data/ljspeech/wavs/LJ029-0109.npy +tests/data/ljspeech/wavs/LJ037-0041.wav|tests/data/ljspeech/wavs/LJ037-0041.npy +tests/data/ljspeech/wavs/LJ031-0148.wav|tests/data/ljspeech/wavs/LJ031-0148.npy +tests/data/ljspeech/wavs/LJ049-0177.wav|tests/data/ljspeech/wavs/LJ049-0177.npy +tests/data/ljspeech/wavs/LJ040-0006.wav|tests/data/ljspeech/wavs/LJ040-0006.npy +tests/data/ljspeech/wavs/LJ049-0041.wav|tests/data/ljspeech/wavs/LJ049-0041.npy +tests/data/ljspeech/wavs/LJ049-0102.wav|tests/data/ljspeech/wavs/LJ049-0102.npy +tests/data/ljspeech/wavs/LJ012-0125.wav|tests/data/ljspeech/wavs/LJ012-0125.npy +tests/data/ljspeech/wavs/LJ047-0162.wav|tests/data/ljspeech/wavs/LJ047-0162.npy +tests/data/ljspeech/wavs/LJ007-0207.wav|tests/data/ljspeech/wavs/LJ007-0207.npy +tests/data/ljspeech/wavs/LJ033-0115.wav|tests/data/ljspeech/wavs/LJ033-0115.npy +tests/data/ljspeech/wavs/LJ041-0187.wav|tests/data/ljspeech/wavs/LJ041-0187.npy +tests/data/ljspeech/wavs/LJ011-0092.wav|tests/data/ljspeech/wavs/LJ011-0092.npy +tests/data/ljspeech/wavs/LJ034-0145.wav|tests/data/ljspeech/wavs/LJ034-0145.npy +tests/data/ljspeech/wavs/LJ031-0028.wav|tests/data/ljspeech/wavs/LJ031-0028.npy +tests/data/ljspeech/wavs/LJ030-0089.wav|tests/data/ljspeech/wavs/LJ030-0089.npy +tests/data/ljspeech/wavs/LJ019-0047.wav|tests/data/ljspeech/wavs/LJ019-0047.npy +tests/data/ljspeech/wavs/LJ019-0023.wav|tests/data/ljspeech/wavs/LJ019-0023.npy +tests/data/ljspeech/wavs/LJ028-0106.wav|tests/data/ljspeech/wavs/LJ028-0106.npy +tests/data/ljspeech/wavs/LJ028-0511.wav|tests/data/ljspeech/wavs/LJ028-0511.npy +tests/data/ljspeech/wavs/LJ035-0015.wav|tests/data/ljspeech/wavs/LJ035-0015.npy +tests/data/ljspeech/wavs/LJ017-0077.wav|tests/data/ljspeech/wavs/LJ017-0077.npy +tests/data/ljspeech/wavs/LJ032-0182.wav|tests/data/ljspeech/wavs/LJ032-0182.npy +tests/data/ljspeech/wavs/LJ031-0091.wav|tests/data/ljspeech/wavs/LJ031-0091.npy +tests/data/ljspeech/wavs/LJ049-0211.wav|tests/data/ljspeech/wavs/LJ049-0211.npy +tests/data/ljspeech/wavs/LJ029-0173.wav|tests/data/ljspeech/wavs/LJ029-0173.npy +tests/data/ljspeech/wavs/LJ045-0162.wav|tests/data/ljspeech/wavs/LJ045-0162.npy +tests/data/ljspeech/wavs/LJ043-0162.wav|tests/data/ljspeech/wavs/LJ043-0162.npy +tests/data/ljspeech/wavs/LJ048-0219.wav|tests/data/ljspeech/wavs/LJ048-0219.npy +tests/data/ljspeech/wavs/LJ017-0128.wav|tests/data/ljspeech/wavs/LJ017-0128.npy +tests/data/ljspeech/wavs/LJ031-0076.wav|tests/data/ljspeech/wavs/LJ031-0076.npy +tests/data/ljspeech/wavs/LJ009-0008.wav|tests/data/ljspeech/wavs/LJ009-0008.npy +tests/data/ljspeech/wavs/LJ045-0200.wav|tests/data/ljspeech/wavs/LJ045-0200.npy +tests/data/ljspeech/wavs/LJ006-0091.wav|tests/data/ljspeech/wavs/LJ006-0091.npy +tests/data/ljspeech/wavs/LJ037-0088.wav|tests/data/ljspeech/wavs/LJ037-0088.npy +tests/data/ljspeech/wavs/LJ045-0229.wav|tests/data/ljspeech/wavs/LJ045-0229.npy +tests/data/ljspeech/wavs/LJ007-0037.wav|tests/data/ljspeech/wavs/LJ007-0037.npy +tests/data/ljspeech/wavs/LJ009-0152.wav|tests/data/ljspeech/wavs/LJ009-0152.npy +tests/data/ljspeech/wavs/LJ037-0199.wav|tests/data/ljspeech/wavs/LJ037-0199.npy +tests/data/ljspeech/wavs/LJ049-0106.wav|tests/data/ljspeech/wavs/LJ049-0106.npy +tests/data/ljspeech/wavs/LJ041-0068.wav|tests/data/ljspeech/wavs/LJ041-0068.npy +tests/data/ljspeech/wavs/LJ034-0203.wav|tests/data/ljspeech/wavs/LJ034-0203.npy +tests/data/ljspeech/wavs/LJ002-0068.wav|tests/data/ljspeech/wavs/LJ002-0068.npy +tests/data/ljspeech/wavs/LJ005-0271.wav|tests/data/ljspeech/wavs/LJ005-0271.npy +tests/data/ljspeech/wavs/LJ010-0005.wav|tests/data/ljspeech/wavs/LJ010-0005.npy +tests/data/ljspeech/wavs/LJ005-0131.wav|tests/data/ljspeech/wavs/LJ005-0131.npy +tests/data/ljspeech/wavs/LJ036-0140.wav|tests/data/ljspeech/wavs/LJ036-0140.npy +tests/data/ljspeech/wavs/LJ035-0022.wav|tests/data/ljspeech/wavs/LJ035-0022.npy +tests/data/ljspeech/wavs/LJ014-0077.wav|tests/data/ljspeech/wavs/LJ014-0077.npy +tests/data/ljspeech/wavs/LJ050-0031.wav|tests/data/ljspeech/wavs/LJ050-0031.npy +tests/data/ljspeech/wavs/LJ013-0236.wav|tests/data/ljspeech/wavs/LJ013-0236.npy +tests/data/ljspeech/wavs/LJ034-0094.wav|tests/data/ljspeech/wavs/LJ034-0094.npy +tests/data/ljspeech/wavs/LJ002-0251.wav|tests/data/ljspeech/wavs/LJ002-0251.npy +tests/data/ljspeech/wavs/LJ005-0229.wav|tests/data/ljspeech/wavs/LJ005-0229.npy +tests/data/ljspeech/wavs/LJ005-0066.wav|tests/data/ljspeech/wavs/LJ005-0066.npy +tests/data/ljspeech/wavs/LJ005-0062.wav|tests/data/ljspeech/wavs/LJ005-0062.npy +tests/data/ljspeech/wavs/LJ049-0137.wav|tests/data/ljspeech/wavs/LJ049-0137.npy +tests/data/ljspeech/wavs/LJ007-0131.wav|tests/data/ljspeech/wavs/LJ007-0131.npy +tests/data/ljspeech/wavs/LJ039-0049.wav|tests/data/ljspeech/wavs/LJ039-0049.npy +tests/data/ljspeech/wavs/LJ037-0033.wav|tests/data/ljspeech/wavs/LJ037-0033.npy +tests/data/ljspeech/wavs/LJ004-0047.wav|tests/data/ljspeech/wavs/LJ004-0047.npy +tests/data/ljspeech/wavs/LJ007-0007.wav|tests/data/ljspeech/wavs/LJ007-0007.npy +tests/data/ljspeech/wavs/LJ043-0069.wav|tests/data/ljspeech/wavs/LJ043-0069.npy +tests/data/ljspeech/wavs/LJ005-0296.wav|tests/data/ljspeech/wavs/LJ005-0296.npy +tests/data/ljspeech/wavs/LJ016-0056.wav|tests/data/ljspeech/wavs/LJ016-0056.npy +tests/data/ljspeech/wavs/LJ019-0165.wav|tests/data/ljspeech/wavs/LJ019-0165.npy +tests/data/ljspeech/wavs/LJ016-0439.wav|tests/data/ljspeech/wavs/LJ016-0439.npy +tests/data/ljspeech/wavs/LJ045-0218.wav|tests/data/ljspeech/wavs/LJ045-0218.npy +tests/data/ljspeech/wavs/LJ032-0114.wav|tests/data/ljspeech/wavs/LJ032-0114.npy +tests/data/ljspeech/wavs/LJ048-0087.wav|tests/data/ljspeech/wavs/LJ048-0087.npy +tests/data/ljspeech/wavs/LJ041-0042.wav|tests/data/ljspeech/wavs/LJ041-0042.npy +tests/data/ljspeech/wavs/LJ032-0150.wav|tests/data/ljspeech/wavs/LJ032-0150.npy +tests/data/ljspeech/wavs/LJ048-0204.wav|tests/data/ljspeech/wavs/LJ048-0204.npy +tests/data/ljspeech/wavs/LJ049-0164.wav|tests/data/ljspeech/wavs/LJ049-0164.npy +tests/data/ljspeech/wavs/LJ006-0076.wav|tests/data/ljspeech/wavs/LJ006-0076.npy +tests/data/ljspeech/wavs/LJ050-0268.wav|tests/data/ljspeech/wavs/LJ050-0268.npy +tests/data/ljspeech/wavs/LJ048-0276.wav|tests/data/ljspeech/wavs/LJ048-0276.npy +tests/data/ljspeech/wavs/LJ019-0359.wav|tests/data/ljspeech/wavs/LJ019-0359.npy +tests/data/ljspeech/wavs/LJ035-0099.wav|tests/data/ljspeech/wavs/LJ035-0099.npy +tests/data/ljspeech/wavs/LJ006-0297.wav|tests/data/ljspeech/wavs/LJ006-0297.npy +tests/data/ljspeech/wavs/LJ013-0209.wav|tests/data/ljspeech/wavs/LJ013-0209.npy +tests/data/ljspeech/wavs/LJ045-0013.wav|tests/data/ljspeech/wavs/LJ045-0013.npy +tests/data/ljspeech/wavs/LJ009-0188.wav|tests/data/ljspeech/wavs/LJ009-0188.npy +tests/data/ljspeech/wavs/LJ045-0174.wav|tests/data/ljspeech/wavs/LJ045-0174.npy +tests/data/ljspeech/wavs/LJ003-0189.wav|tests/data/ljspeech/wavs/LJ003-0189.npy +tests/data/ljspeech/wavs/LJ031-0085.wav|tests/data/ljspeech/wavs/LJ031-0085.npy +tests/data/ljspeech/wavs/LJ031-0030.wav|tests/data/ljspeech/wavs/LJ031-0030.npy +tests/data/ljspeech/wavs/LJ032-0183.wav|tests/data/ljspeech/wavs/LJ032-0183.npy +tests/data/ljspeech/wavs/LJ034-0118.wav|tests/data/ljspeech/wavs/LJ034-0118.npy +tests/data/ljspeech/wavs/LJ006-0119.wav|tests/data/ljspeech/wavs/LJ006-0119.npy +tests/data/ljspeech/wavs/LJ031-0179.wav|tests/data/ljspeech/wavs/LJ031-0179.npy +tests/data/ljspeech/wavs/LJ004-0050.wav|tests/data/ljspeech/wavs/LJ004-0050.npy +tests/data/ljspeech/wavs/LJ011-0127.wav|tests/data/ljspeech/wavs/LJ011-0127.npy +tests/data/ljspeech/wavs/LJ047-0180.wav|tests/data/ljspeech/wavs/LJ047-0180.npy +tests/data/ljspeech/wavs/LJ005-0050.wav|tests/data/ljspeech/wavs/LJ005-0050.npy +tests/data/ljspeech/wavs/LJ019-0212.wav|tests/data/ljspeech/wavs/LJ019-0212.npy +tests/data/ljspeech/wavs/LJ018-0364.wav|tests/data/ljspeech/wavs/LJ018-0364.npy +tests/data/ljspeech/wavs/LJ047-0124.wav|tests/data/ljspeech/wavs/LJ047-0124.npy +tests/data/ljspeech/wavs/LJ049-0114.wav|tests/data/ljspeech/wavs/LJ049-0114.npy +tests/data/ljspeech/wavs/LJ013-0174.wav|tests/data/ljspeech/wavs/LJ013-0174.npy +tests/data/ljspeech/wavs/LJ048-0197.wav|tests/data/ljspeech/wavs/LJ048-0197.npy +tests/data/ljspeech/wavs/LJ039-0183.wav|tests/data/ljspeech/wavs/LJ039-0183.npy +tests/data/ljspeech/wavs/LJ005-0026.wav|tests/data/ljspeech/wavs/LJ005-0026.npy +tests/data/ljspeech/wavs/LJ030-0115.wav|tests/data/ljspeech/wavs/LJ030-0115.npy +tests/data/ljspeech/wavs/LJ032-0068.wav|tests/data/ljspeech/wavs/LJ032-0068.npy +tests/data/ljspeech/wavs/LJ032-0048.wav|tests/data/ljspeech/wavs/LJ032-0048.npy +tests/data/ljspeech/wavs/LJ006-0245.wav|tests/data/ljspeech/wavs/LJ006-0245.npy +tests/data/ljspeech/wavs/LJ019-0042.wav|tests/data/ljspeech/wavs/LJ019-0042.npy +tests/data/ljspeech/wavs/LJ006-0137.wav|tests/data/ljspeech/wavs/LJ006-0137.npy +tests/data/ljspeech/wavs/LJ034-0060.wav|tests/data/ljspeech/wavs/LJ034-0060.npy +tests/data/ljspeech/wavs/LJ016-0364.wav|tests/data/ljspeech/wavs/LJ016-0364.npy +tests/data/ljspeech/wavs/LJ041-0175.wav|tests/data/ljspeech/wavs/LJ041-0175.npy +tests/data/ljspeech/wavs/LJ019-0238.wav|tests/data/ljspeech/wavs/LJ019-0238.npy +tests/data/ljspeech/wavs/LJ049-0027.wav|tests/data/ljspeech/wavs/LJ049-0027.npy +tests/data/ljspeech/wavs/LJ019-0082.wav|tests/data/ljspeech/wavs/LJ019-0082.npy +tests/data/ljspeech/wavs/LJ049-0084.wav|tests/data/ljspeech/wavs/LJ049-0084.npy +tests/data/ljspeech/wavs/LJ043-0184.wav|tests/data/ljspeech/wavs/LJ043-0184.npy +tests/data/ljspeech/wavs/LJ019-0299.wav|tests/data/ljspeech/wavs/LJ019-0299.npy +tests/data/ljspeech/wavs/LJ043-0174.wav|tests/data/ljspeech/wavs/LJ043-0174.npy +tests/data/ljspeech/wavs/LJ035-0208.wav|tests/data/ljspeech/wavs/LJ035-0208.npy +tests/data/ljspeech/wavs/LJ006-0098.wav|tests/data/ljspeech/wavs/LJ006-0098.npy +tests/data/ljspeech/wavs/LJ026-0010.wav|tests/data/ljspeech/wavs/LJ026-0010.npy +tests/data/ljspeech/wavs/LJ050-0173.wav|tests/data/ljspeech/wavs/LJ050-0173.npy +tests/data/ljspeech/wavs/LJ050-0153.wav|tests/data/ljspeech/wavs/LJ050-0153.npy +tests/data/ljspeech/wavs/LJ031-0050.wav|tests/data/ljspeech/wavs/LJ031-0050.npy +tests/data/ljspeech/wavs/LJ048-0125.wav|tests/data/ljspeech/wavs/LJ048-0125.npy +tests/data/ljspeech/wavs/LJ017-0007.wav|tests/data/ljspeech/wavs/LJ017-0007.npy +tests/data/ljspeech/wavs/LJ037-0083.wav|tests/data/ljspeech/wavs/LJ037-0083.npy +tests/data/ljspeech/wavs/LJ031-0095.wav|tests/data/ljspeech/wavs/LJ031-0095.npy +tests/data/ljspeech/wavs/LJ037-0156.wav|tests/data/ljspeech/wavs/LJ037-0156.npy +tests/data/ljspeech/wavs/LJ047-0213.wav|tests/data/ljspeech/wavs/LJ047-0213.npy +tests/data/ljspeech/wavs/LJ043-0170.wav|tests/data/ljspeech/wavs/LJ043-0170.npy +tests/data/ljspeech/wavs/LJ048-0185.wav|tests/data/ljspeech/wavs/LJ048-0185.npy +tests/data/ljspeech/wavs/LJ049-0192.wav|tests/data/ljspeech/wavs/LJ049-0192.npy +tests/data/ljspeech/wavs/LJ009-0251.wav|tests/data/ljspeech/wavs/LJ009-0251.npy +tests/data/ljspeech/wavs/LJ006-0195.wav|tests/data/ljspeech/wavs/LJ006-0195.npy +tests/data/ljspeech/wavs/LJ006-0067.wav|tests/data/ljspeech/wavs/LJ006-0067.npy +tests/data/ljspeech/wavs/LJ048-0262.wav|tests/data/ljspeech/wavs/LJ048-0262.npy +tests/data/ljspeech/wavs/LJ034-0040.wav|tests/data/ljspeech/wavs/LJ034-0040.npy +tests/data/ljspeech/wavs/LJ019-0250.wav|tests/data/ljspeech/wavs/LJ019-0250.npy +tests/data/ljspeech/wavs/LJ014-0133.wav|tests/data/ljspeech/wavs/LJ014-0133.npy +tests/data/ljspeech/wavs/LJ006-0043.wav|tests/data/ljspeech/wavs/LJ006-0043.npy +tests/data/ljspeech/wavs/LJ029-0168.wav|tests/data/ljspeech/wavs/LJ029-0168.npy +tests/data/ljspeech/wavs/LJ039-0026.wav|tests/data/ljspeech/wavs/LJ039-0026.npy +tests/data/ljspeech/wavs/LJ045-0194.wav|tests/data/ljspeech/wavs/LJ045-0194.npy +tests/data/ljspeech/wavs/LJ038-0263.wav|tests/data/ljspeech/wavs/LJ038-0263.npy +tests/data/ljspeech/wavs/LJ034-0005.wav|tests/data/ljspeech/wavs/LJ034-0005.npy +tests/data/ljspeech/wavs/LJ030-0221.wav|tests/data/ljspeech/wavs/LJ030-0221.npy +tests/data/ljspeech/wavs/LJ032-0102.wav|tests/data/ljspeech/wavs/LJ032-0102.npy +tests/data/ljspeech/wavs/LJ033-0167.wav|tests/data/ljspeech/wavs/LJ033-0167.npy +tests/data/ljspeech/wavs/LJ031-0111.wav|tests/data/ljspeech/wavs/LJ031-0111.npy +tests/data/ljspeech/wavs/LJ029-0073.wav|tests/data/ljspeech/wavs/LJ029-0073.npy +tests/data/ljspeech/wavs/LJ008-0301.wav|tests/data/ljspeech/wavs/LJ008-0301.npy +tests/data/ljspeech/wavs/LJ041-0034.wav|tests/data/ljspeech/wavs/LJ041-0034.npy +tests/data/ljspeech/wavs/LJ045-0165.wav|tests/data/ljspeech/wavs/LJ045-0165.npy +tests/data/ljspeech/wavs/LJ032-0148.wav|tests/data/ljspeech/wavs/LJ032-0148.npy +tests/data/ljspeech/wavs/LJ029-0098.wav|tests/data/ljspeech/wavs/LJ029-0098.npy +tests/data/ljspeech/wavs/LJ050-0265.wav|tests/data/ljspeech/wavs/LJ050-0265.npy +tests/data/ljspeech/wavs/LJ048-0149.wav|tests/data/ljspeech/wavs/LJ048-0149.npy +tests/data/ljspeech/wavs/LJ005-0111.wav|tests/data/ljspeech/wavs/LJ005-0111.npy +tests/data/ljspeech/wavs/LJ007-0192.wav|tests/data/ljspeech/wavs/LJ007-0192.npy +tests/data/ljspeech/wavs/LJ006-0290.wav|tests/data/ljspeech/wavs/LJ006-0290.npy +tests/data/ljspeech/wavs/LJ039-0208.wav|tests/data/ljspeech/wavs/LJ039-0208.npy +tests/data/ljspeech/wavs/LJ037-0024.wav|tests/data/ljspeech/wavs/LJ037-0024.npy +tests/data/ljspeech/wavs/LJ006-0170.wav|tests/data/ljspeech/wavs/LJ006-0170.npy +tests/data/ljspeech/wavs/LJ012-0155.wav|tests/data/ljspeech/wavs/LJ012-0155.npy +tests/data/ljspeech/wavs/LJ030-0132.wav|tests/data/ljspeech/wavs/LJ030-0132.npy +tests/data/ljspeech/wavs/LJ040-0225.wav|tests/data/ljspeech/wavs/LJ040-0225.npy +tests/data/ljspeech/wavs/LJ011-0101.wav|tests/data/ljspeech/wavs/LJ011-0101.npy +tests/data/ljspeech/wavs/LJ047-0169.wav|tests/data/ljspeech/wavs/LJ047-0169.npy +tests/data/ljspeech/wavs/LJ007-0102.wav|tests/data/ljspeech/wavs/LJ007-0102.npy +tests/data/ljspeech/wavs/LJ048-0202.wav|tests/data/ljspeech/wavs/LJ048-0202.npy +tests/data/ljspeech/wavs/LJ009-0053.wav|tests/data/ljspeech/wavs/LJ009-0053.npy +tests/data/ljspeech/wavs/LJ016-0130.wav|tests/data/ljspeech/wavs/LJ016-0130.npy +tests/data/ljspeech/wavs/LJ046-0031.wav|tests/data/ljspeech/wavs/LJ046-0031.npy +tests/data/ljspeech/wavs/LJ035-0032.wav|tests/data/ljspeech/wavs/LJ035-0032.npy +tests/data/ljspeech/wavs/LJ048-0177.wav|tests/data/ljspeech/wavs/LJ048-0177.npy +tests/data/ljspeech/wavs/LJ029-0029.wav|tests/data/ljspeech/wavs/LJ029-0029.npy +tests/data/ljspeech/wavs/LJ005-0265.wav|tests/data/ljspeech/wavs/LJ005-0265.npy +tests/data/ljspeech/wavs/LJ046-0025.wav|tests/data/ljspeech/wavs/LJ046-0025.npy +tests/data/ljspeech/wavs/LJ007-0036.wav|tests/data/ljspeech/wavs/LJ007-0036.npy +tests/data/ljspeech/wavs/LJ050-0196.wav|tests/data/ljspeech/wavs/LJ050-0196.npy +tests/data/ljspeech/wavs/LJ012-0224.wav|tests/data/ljspeech/wavs/LJ012-0224.npy +tests/data/ljspeech/wavs/LJ035-0101.wav|tests/data/ljspeech/wavs/LJ035-0101.npy +tests/data/ljspeech/wavs/LJ039-0189.wav|tests/data/ljspeech/wavs/LJ039-0189.npy +tests/data/ljspeech/wavs/LJ036-0138.wav|tests/data/ljspeech/wavs/LJ036-0138.npy +tests/data/ljspeech/wavs/LJ034-0191.wav|tests/data/ljspeech/wavs/LJ034-0191.npy +tests/data/ljspeech/wavs/LJ048-0019.wav|tests/data/ljspeech/wavs/LJ048-0019.npy +tests/data/ljspeech/wavs/LJ011-0042.wav|tests/data/ljspeech/wavs/LJ011-0042.npy +tests/data/ljspeech/wavs/LJ034-0154.wav|tests/data/ljspeech/wavs/LJ034-0154.npy +tests/data/ljspeech/wavs/LJ007-0160.wav|tests/data/ljspeech/wavs/LJ007-0160.npy +tests/data/ljspeech/wavs/LJ047-0093.wav|tests/data/ljspeech/wavs/LJ047-0093.npy +tests/data/ljspeech/wavs/LJ045-0093.wav|tests/data/ljspeech/wavs/LJ045-0093.npy +tests/data/ljspeech/wavs/LJ027-0138.wav|tests/data/ljspeech/wavs/LJ027-0138.npy +tests/data/ljspeech/wavs/LJ037-0140.wav|tests/data/ljspeech/wavs/LJ037-0140.npy +tests/data/ljspeech/wavs/LJ046-0015.wav|tests/data/ljspeech/wavs/LJ046-0015.npy +tests/data/ljspeech/wavs/LJ045-0085.wav|tests/data/ljspeech/wavs/LJ045-0085.npy +tests/data/ljspeech/wavs/LJ050-0165.wav|tests/data/ljspeech/wavs/LJ050-0165.npy +tests/data/ljspeech/wavs/LJ019-0337.wav|tests/data/ljspeech/wavs/LJ019-0337.npy +tests/data/ljspeech/wavs/LJ050-0161.wav|tests/data/ljspeech/wavs/LJ050-0161.npy +tests/data/ljspeech/wavs/LJ006-0030.wav|tests/data/ljspeech/wavs/LJ006-0030.npy +tests/data/ljspeech/wavs/LJ050-0076.wav|tests/data/ljspeech/wavs/LJ050-0076.npy +tests/data/ljspeech/wavs/LJ011-0029.wav|tests/data/ljspeech/wavs/LJ011-0029.npy +tests/data/ljspeech/wavs/LJ007-0061.wav|tests/data/ljspeech/wavs/LJ007-0061.npy +tests/data/ljspeech/wavs/LJ041-0027.wav|tests/data/ljspeech/wavs/LJ041-0027.npy +tests/data/ljspeech/wavs/LJ030-0130.wav|tests/data/ljspeech/wavs/LJ030-0130.npy +tests/data/ljspeech/wavs/LJ029-0202.wav|tests/data/ljspeech/wavs/LJ029-0202.npy +tests/data/ljspeech/wavs/LJ050-0044.wav|tests/data/ljspeech/wavs/LJ050-0044.npy +tests/data/ljspeech/wavs/LJ032-0012.wav|tests/data/ljspeech/wavs/LJ032-0012.npy +tests/data/ljspeech/wavs/LJ036-0157.wav|tests/data/ljspeech/wavs/LJ036-0157.npy +tests/data/ljspeech/wavs/LJ008-0263.wav|tests/data/ljspeech/wavs/LJ008-0263.npy +tests/data/ljspeech/wavs/LJ009-0083.wav|tests/data/ljspeech/wavs/LJ009-0083.npy +tests/data/ljspeech/wavs/LJ019-0203.wav|tests/data/ljspeech/wavs/LJ019-0203.npy +tests/data/ljspeech/wavs/LJ028-0318.wav|tests/data/ljspeech/wavs/LJ028-0318.npy +tests/data/ljspeech/wavs/LJ005-0223.wav|tests/data/ljspeech/wavs/LJ005-0223.npy +tests/data/ljspeech/wavs/LJ004-0232.wav|tests/data/ljspeech/wavs/LJ004-0232.npy +tests/data/ljspeech/wavs/LJ012-0147.wav|tests/data/ljspeech/wavs/LJ012-0147.npy +tests/data/ljspeech/wavs/LJ006-0026.wav|tests/data/ljspeech/wavs/LJ006-0026.npy +tests/data/ljspeech/wavs/LJ049-0083.wav|tests/data/ljspeech/wavs/LJ049-0083.npy +tests/data/ljspeech/wavs/LJ042-0219.wav|tests/data/ljspeech/wavs/LJ042-0219.npy +tests/data/ljspeech/wavs/LJ044-0123.wav|tests/data/ljspeech/wavs/LJ044-0123.npy +tests/data/ljspeech/wavs/LJ006-0247.wav|tests/data/ljspeech/wavs/LJ006-0247.npy +tests/data/ljspeech/wavs/LJ047-0209.wav|tests/data/ljspeech/wavs/LJ047-0209.npy +tests/data/ljspeech/wavs/LJ037-0037.wav|tests/data/ljspeech/wavs/LJ037-0037.npy +tests/data/ljspeech/wavs/LJ020-0002.wav|tests/data/ljspeech/wavs/LJ020-0002.npy +tests/data/ljspeech/wavs/LJ048-0027.wav|tests/data/ljspeech/wavs/LJ048-0027.npy +tests/data/ljspeech/wavs/LJ007-0151.wav|tests/data/ljspeech/wavs/LJ007-0151.npy +tests/data/ljspeech/wavs/LJ044-0098.wav|tests/data/ljspeech/wavs/LJ044-0098.npy +tests/data/ljspeech/wavs/LJ047-0230.wav|tests/data/ljspeech/wavs/LJ047-0230.npy +tests/data/ljspeech/wavs/LJ029-0075.wav|tests/data/ljspeech/wavs/LJ029-0075.npy +tests/data/ljspeech/wavs/LJ039-0128.wav|tests/data/ljspeech/wavs/LJ039-0128.npy +tests/data/ljspeech/wavs/LJ047-0114.wav|tests/data/ljspeech/wavs/LJ047-0114.npy +tests/data/ljspeech/wavs/LJ031-0114.wav|tests/data/ljspeech/wavs/LJ031-0114.npy +tests/data/ljspeech/wavs/LJ027-0127.wav|tests/data/ljspeech/wavs/LJ027-0127.npy +tests/data/ljspeech/wavs/LJ011-0154.wav|tests/data/ljspeech/wavs/LJ011-0154.npy +tests/data/ljspeech/wavs/LJ005-0299.wav|tests/data/ljspeech/wavs/LJ005-0299.npy +tests/data/ljspeech/wavs/LJ031-0099.wav|tests/data/ljspeech/wavs/LJ031-0099.npy +tests/data/ljspeech/wavs/LJ002-0110.wav|tests/data/ljspeech/wavs/LJ002-0110.npy +tests/data/ljspeech/wavs/LJ007-0060.wav|tests/data/ljspeech/wavs/LJ007-0060.npy +tests/data/ljspeech/wavs/LJ031-0141.wav|tests/data/ljspeech/wavs/LJ031-0141.npy +tests/data/ljspeech/wavs/LJ001-0014.wav|tests/data/ljspeech/wavs/LJ001-0014.npy +tests/data/ljspeech/wavs/LJ035-0035.wav|tests/data/ljspeech/wavs/LJ035-0035.npy +tests/data/ljspeech/wavs/LJ034-0125.wav|tests/data/ljspeech/wavs/LJ034-0125.npy +tests/data/ljspeech/wavs/LJ032-0235.wav|tests/data/ljspeech/wavs/LJ032-0235.npy +tests/data/ljspeech/wavs/LJ018-0306.wav|tests/data/ljspeech/wavs/LJ018-0306.npy +tests/data/ljspeech/wavs/LJ009-0129.wav|tests/data/ljspeech/wavs/LJ009-0129.npy +tests/data/ljspeech/wavs/LJ001-0015.wav|tests/data/ljspeech/wavs/LJ001-0015.npy +tests/data/ljspeech/wavs/LJ007-0128.wav|tests/data/ljspeech/wavs/LJ007-0128.npy +tests/data/ljspeech/wavs/LJ038-0200.wav|tests/data/ljspeech/wavs/LJ038-0200.npy +tests/data/ljspeech/wavs/LJ032-0209.wav|tests/data/ljspeech/wavs/LJ032-0209.npy +tests/data/ljspeech/wavs/LJ041-0038.wav|tests/data/ljspeech/wavs/LJ041-0038.npy +tests/data/ljspeech/wavs/LJ046-0241.wav|tests/data/ljspeech/wavs/LJ046-0241.npy +tests/data/ljspeech/wavs/LJ047-0220.wav|tests/data/ljspeech/wavs/LJ047-0220.npy +tests/data/ljspeech/wavs/LJ034-0158.wav|tests/data/ljspeech/wavs/LJ034-0158.npy +tests/data/ljspeech/wavs/LJ045-0044.wav|tests/data/ljspeech/wavs/LJ045-0044.npy +tests/data/ljspeech/wavs/LJ045-0169.wav|tests/data/ljspeech/wavs/LJ045-0169.npy +tests/data/ljspeech/wavs/LJ007-0154.wav|tests/data/ljspeech/wavs/LJ007-0154.npy +tests/data/ljspeech/wavs/LJ044-0114.wav|tests/data/ljspeech/wavs/LJ044-0114.npy +tests/data/ljspeech/wavs/LJ030-0085.wav|tests/data/ljspeech/wavs/LJ030-0085.npy +tests/data/ljspeech/wavs/LJ048-0129.wav|tests/data/ljspeech/wavs/LJ048-0129.npy +tests/data/ljspeech/wavs/LJ041-0077.wav|tests/data/ljspeech/wavs/LJ041-0077.npy +tests/data/ljspeech/wavs/LJ045-0113.wav|tests/data/ljspeech/wavs/LJ045-0113.npy +tests/data/ljspeech/wavs/LJ049-0009.wav|tests/data/ljspeech/wavs/LJ049-0009.npy +tests/data/ljspeech/wavs/LJ007-0148.wav|tests/data/ljspeech/wavs/LJ007-0148.npy +tests/data/ljspeech/wavs/LJ033-0132.wav|tests/data/ljspeech/wavs/LJ033-0132.npy +tests/data/ljspeech/wavs/LJ049-0076.wav|tests/data/ljspeech/wavs/LJ049-0076.npy +tests/data/ljspeech/wavs/LJ041-0127.wav|tests/data/ljspeech/wavs/LJ041-0127.npy +tests/data/ljspeech/wavs/LJ019-0193.wav|tests/data/ljspeech/wavs/LJ019-0193.npy +tests/data/ljspeech/wavs/LJ007-0173.wav|tests/data/ljspeech/wavs/LJ007-0173.npy +tests/data/ljspeech/wavs/LJ038-0014.wav|tests/data/ljspeech/wavs/LJ038-0014.npy +tests/data/ljspeech/wavs/LJ049-0141.wav|tests/data/ljspeech/wavs/LJ049-0141.npy +tests/data/ljspeech/wavs/LJ003-0007.wav|tests/data/ljspeech/wavs/LJ003-0007.npy +tests/data/ljspeech/wavs/LJ002-0280.wav|tests/data/ljspeech/wavs/LJ002-0280.npy +tests/data/ljspeech/wavs/LJ032-0230.wav|tests/data/ljspeech/wavs/LJ032-0230.npy +tests/data/ljspeech/wavs/LJ007-0110.wav|tests/data/ljspeech/wavs/LJ007-0110.npy +tests/data/ljspeech/wavs/LJ046-0027.wav|tests/data/ljspeech/wavs/LJ046-0027.npy +tests/data/ljspeech/wavs/LJ007-0020.wav|tests/data/ljspeech/wavs/LJ007-0020.npy +tests/data/ljspeech/wavs/LJ048-0205.wav|tests/data/ljspeech/wavs/LJ048-0205.npy +tests/data/ljspeech/wavs/LJ007-0044.wav|tests/data/ljspeech/wavs/LJ007-0044.npy +tests/data/ljspeech/wavs/LJ010-0117.wav|tests/data/ljspeech/wavs/LJ010-0117.npy +tests/data/ljspeech/wavs/LJ038-0217.wav|tests/data/ljspeech/wavs/LJ038-0217.npy +tests/data/ljspeech/wavs/LJ031-0135.wav|tests/data/ljspeech/wavs/LJ031-0135.npy +tests/data/ljspeech/wavs/LJ007-0178.wav|tests/data/ljspeech/wavs/LJ007-0178.npy +tests/data/ljspeech/wavs/LJ035-0042.wav|tests/data/ljspeech/wavs/LJ035-0042.npy +tests/data/ljspeech/wavs/LJ033-0092.wav|tests/data/ljspeech/wavs/LJ033-0092.npy +tests/data/ljspeech/wavs/LJ041-0159.wav|tests/data/ljspeech/wavs/LJ041-0159.npy +tests/data/ljspeech/wavs/LJ035-0062.wav|tests/data/ljspeech/wavs/LJ035-0062.npy +tests/data/ljspeech/wavs/LJ034-0028.wav|tests/data/ljspeech/wavs/LJ034-0028.npy +tests/data/ljspeech/wavs/LJ034-0178.wav|tests/data/ljspeech/wavs/LJ034-0178.npy +tests/data/ljspeech/wavs/LJ029-0017.wav|tests/data/ljspeech/wavs/LJ029-0017.npy +tests/data/ljspeech/wavs/LJ005-0173.wav|tests/data/ljspeech/wavs/LJ005-0173.npy +tests/data/ljspeech/wavs/LJ007-0229.wav|tests/data/ljspeech/wavs/LJ007-0229.npy +tests/data/ljspeech/wavs/LJ020-0062.wav|tests/data/ljspeech/wavs/LJ020-0062.npy +tests/data/ljspeech/wavs/LJ030-0082.wav|tests/data/ljspeech/wavs/LJ030-0082.npy +tests/data/ljspeech/wavs/LJ036-0001.wav|tests/data/ljspeech/wavs/LJ036-0001.npy +tests/data/ljspeech/wavs/LJ045-0001.wav|tests/data/ljspeech/wavs/LJ045-0001.npy +tests/data/ljspeech/wavs/LJ006-0002.wav|tests/data/ljspeech/wavs/LJ006-0002.npy +tests/data/ljspeech/wavs/LJ048-0001.wav|tests/data/ljspeech/wavs/LJ048-0001.npy +tests/data/ljspeech/wavs/LJ034-0212.wav|tests/data/ljspeech/wavs/LJ034-0212.npy +tests/data/ljspeech/wavs/LJ029-0179.wav|tests/data/ljspeech/wavs/LJ029-0179.npy +tests/data/ljspeech/wavs/LJ034-0026.wav|tests/data/ljspeech/wavs/LJ034-0026.npy +tests/data/ljspeech/wavs/LJ007-0097.wav|tests/data/ljspeech/wavs/LJ007-0097.npy +tests/data/ljspeech/wavs/LJ025-0167.wav|tests/data/ljspeech/wavs/LJ025-0167.npy +tests/data/ljspeech/wavs/LJ007-0076.wav|tests/data/ljspeech/wavs/LJ007-0076.npy +tests/data/ljspeech/wavs/LJ018-0052.wav|tests/data/ljspeech/wavs/LJ018-0052.npy +tests/data/ljspeech/wavs/LJ032-0202.wav|tests/data/ljspeech/wavs/LJ032-0202.npy +tests/data/ljspeech/wavs/LJ050-0160.wav|tests/data/ljspeech/wavs/LJ050-0160.npy +tests/data/ljspeech/wavs/LJ037-0150.wav|tests/data/ljspeech/wavs/LJ037-0150.npy +tests/data/ljspeech/wavs/LJ007-0223.wav|tests/data/ljspeech/wavs/LJ007-0223.npy +tests/data/ljspeech/wavs/LJ007-0051.wav|tests/data/ljspeech/wavs/LJ007-0051.npy +tests/data/ljspeech/wavs/LJ050-0228.wav|tests/data/ljspeech/wavs/LJ050-0228.npy +tests/data/ljspeech/wavs/LJ038-0189.wav|tests/data/ljspeech/wavs/LJ038-0189.npy +tests/data/ljspeech/wavs/LJ037-0160.wav|tests/data/ljspeech/wavs/LJ037-0160.npy +tests/data/ljspeech/wavs/LJ048-0025.wav|tests/data/ljspeech/wavs/LJ048-0025.npy +tests/data/ljspeech/wavs/LJ007-0070.wav|tests/data/ljspeech/wavs/LJ007-0070.npy +tests/data/ljspeech/wavs/LJ038-0050.wav|tests/data/ljspeech/wavs/LJ038-0050.npy +tests/data/ljspeech/wavs/LJ032-0001.wav|tests/data/ljspeech/wavs/LJ032-0001.npy +tests/data/ljspeech/wavs/LJ037-0001.wav|tests/data/ljspeech/wavs/LJ037-0001.npy +tests/data/ljspeech/wavs/LJ041-0001.wav|tests/data/ljspeech/wavs/LJ041-0001.npy +tests/data/ljspeech/wavs/LJ030-0001.wav|tests/data/ljspeech/wavs/LJ030-0001.npy +tests/data/ljspeech/wavs/LJ029-0001.wav|tests/data/ljspeech/wavs/LJ029-0001.npy +tests/data/ljspeech/wavs/LJ047-0001.wav|tests/data/ljspeech/wavs/LJ047-0001.npy +tests/data/ljspeech/wavs/LJ033-0001.wav|tests/data/ljspeech/wavs/LJ033-0001.npy +tests/data/ljspeech/wavs/LJ035-0001.wav|tests/data/ljspeech/wavs/LJ035-0001.npy +tests/data/ljspeech/wavs/LJ040-0001.wav|tests/data/ljspeech/wavs/LJ040-0001.npy diff --git a/tests/data/ljspeech/wavs/LJ001-0001.npy b/tests/data/ljspeech/wavs/LJ001-0001.npy new file mode 100644 index 0000000000000000000000000000000000000000..e86cb27855486e9467134f05ea21efb427ad222d --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:070a2e370e4338b331fffee561cc236adecf077869c6bde9acd69ef8bfef7986 +size 474888 diff --git a/tests/data/ljspeech/wavs/LJ001-0001.wav b/tests/data/ljspeech/wavs/LJ001-0001.wav new file mode 100644 index 0000000000000000000000000000000000000000..a274be89422809113adc336e624afeb255cdc67a Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0001.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0002.npy b/tests/data/ljspeech/wavs/LJ001-0002.npy new file mode 100644 index 0000000000000000000000000000000000000000..8fd8829d3d7894e5b1a529364bcf3c87295c3611 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c36e568a8b57b77289cd157a9e7ae2c27cf955dd6a7da64ba3478c9c0d2334 +size 18920 diff --git a/tests/data/ljspeech/wavs/LJ001-0002.wav b/tests/data/ljspeech/wavs/LJ001-0002.wav new file mode 100644 index 0000000000000000000000000000000000000000..b1a0ed110ab9763dab7428f6273d696fecb4205d Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0002.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0003.npy b/tests/data/ljspeech/wavs/LJ001-0003.npy new file mode 100644 index 0000000000000000000000000000000000000000..52dc61f37288196aad89f404017d2b2827ffe961 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3629fc7aa5e0933858240fde841d61874cb601f5c2a6e756eaad03d5ded44083 +size 475460 diff --git a/tests/data/ljspeech/wavs/LJ001-0003.wav b/tests/data/ljspeech/wavs/LJ001-0003.wav new file mode 100644 index 0000000000000000000000000000000000000000..3329ddb448ed3bfff911bb90110defcc72e14bc2 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0003.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0004.npy b/tests/data/ljspeech/wavs/LJ001-0004.npy new file mode 100644 index 0000000000000000000000000000000000000000..e96bc5d66be61a705d28aafd3a587c91f8e004f2 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b59336c125906c331b7a319ee5b57f95f0bfefe13aed330b1d399766b3927f2 +size 137720 diff --git a/tests/data/ljspeech/wavs/LJ001-0004.wav b/tests/data/ljspeech/wavs/LJ001-0004.wav new file mode 100644 index 0000000000000000000000000000000000000000..ead8a0e3a6e7b05c116d910e5875b900a2050f9f Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0004.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0005.npy b/tests/data/ljspeech/wavs/LJ001-0005.npy new file mode 100644 index 0000000000000000000000000000000000000000..3a3244a3bfa0f20d2be183aed5bc656be8fc7ad1 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0dc904c41f7e9a357d538d8d3a453d8890255f37c4c433828e57439cc7130f6 +size 365356 diff --git a/tests/data/ljspeech/wavs/LJ001-0005.wav b/tests/data/ljspeech/wavs/LJ001-0005.wav new file mode 100644 index 0000000000000000000000000000000000000000..640f708c13ffd653794455aa0730ed6c143f2fc9 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0005.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0006.npy b/tests/data/ljspeech/wavs/LJ001-0006.npy new file mode 100644 index 0000000000000000000000000000000000000000..d8066965966018d3678cb1ec47d584cc5cfb9ee3 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9877b39550773704dcbe20ba0b3c3b227ec0cbbfdcd66202d1821cace4ac2d30 +size 138720 diff --git a/tests/data/ljspeech/wavs/LJ001-0006.wav b/tests/data/ljspeech/wavs/LJ001-0006.wav new file mode 100644 index 0000000000000000000000000000000000000000..15cffd544f2203ba85040fa21710f42d33187547 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0006.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0007.npy b/tests/data/ljspeech/wavs/LJ001-0007.npy new file mode 100644 index 0000000000000000000000000000000000000000..256c011e7b5706f9f9cd08acfcd7f4b1545891f8 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61511787cf80a867fca160cb26b1ed604d7a49f19e9d0242e896d1794fe2d7e5 +size 331788 diff --git a/tests/data/ljspeech/wavs/LJ001-0007.wav b/tests/data/ljspeech/wavs/LJ001-0007.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d33e4501e5e8d3479c4900f7fddae2ceacebb45 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0007.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0008.npy b/tests/data/ljspeech/wavs/LJ001-0008.npy new file mode 100644 index 0000000000000000000000000000000000000000..ce66ce70d757498ec0509bb008e87cb361c6bfce --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da63312469c7aee26a2f30bf6d51e55091d4291954ef63a0bef1c800dfd1aee0 +size 12288 diff --git a/tests/data/ljspeech/wavs/LJ001-0008.wav b/tests/data/ljspeech/wavs/LJ001-0008.wav new file mode 100644 index 0000000000000000000000000000000000000000..a1871dd8f907a04939949573d79a8312639f942c Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0008.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0009.npy b/tests/data/ljspeech/wavs/LJ001-0009.npy new file mode 100644 index 0000000000000000000000000000000000000000..f03e9f46e1a1b2b789bd45e3a8f6e345ca7be054 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c642cc9208a5491ac34fd1a35fcc5ab8acccfe264667ee2e265b87420701ec65 +size 264920 diff --git a/tests/data/ljspeech/wavs/LJ001-0009.wav b/tests/data/ljspeech/wavs/LJ001-0009.wav new file mode 100644 index 0000000000000000000000000000000000000000..b534f1b9db8b3baa4958ee39e445a7a1ed24f008 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0009.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0010.npy b/tests/data/ljspeech/wavs/LJ001-0010.npy new file mode 100644 index 0000000000000000000000000000000000000000..914a8eef226b5b6ba32237fc27a3a7d1865d7da9 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960003ef32931ea6f6d854a9a0ef7c7f3feae8676f1125cb7c2f820283e4cadd +size 339712 diff --git a/tests/data/ljspeech/wavs/LJ001-0010.wav b/tests/data/ljspeech/wavs/LJ001-0010.wav new file mode 100644 index 0000000000000000000000000000000000000000..01a2e68829a506063f8ed8b090a4516a02107a62 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0010.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0011.npy b/tests/data/ljspeech/wavs/LJ001-0011.npy new file mode 100644 index 0000000000000000000000000000000000000000..9d7f6accf4f577192f1bc2f9acba4097e23335c0 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d42639bca71945249b78b9a8c90803c52f764205b2495214f8f6d725e1cc5f0 +size 103844 diff --git a/tests/data/ljspeech/wavs/LJ001-0011.wav b/tests/data/ljspeech/wavs/LJ001-0011.wav new file mode 100644 index 0000000000000000000000000000000000000000..5ec8ce7e59694563b85fa34c590acd421008cff0 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0011.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0012.npy b/tests/data/ljspeech/wavs/LJ001-0012.npy new file mode 100644 index 0000000000000000000000000000000000000000..23951b397e170fa7d9d90edbaf3f2d4b941b2787 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6f6630d850f0a249f345f628649f0b0226becbe3cfa5f76fa67b531a628840 +size 286160 diff --git a/tests/data/ljspeech/wavs/LJ001-0012.wav b/tests/data/ljspeech/wavs/LJ001-0012.wav new file mode 100644 index 0000000000000000000000000000000000000000..6262db4bbfeb15ba298184ab0b7c7bf323f472df Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0012.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0013.npy b/tests/data/ljspeech/wavs/LJ001-0013.npy new file mode 100644 index 0000000000000000000000000000000000000000..9942e121f74ec271b79a5a52a2edb7e8ae9647bf --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5665c5702bf7df27fb5030310d3cf13461fc62d1affc0c08950bc7198975e204 +size 35488 diff --git a/tests/data/ljspeech/wavs/LJ001-0013.wav b/tests/data/ljspeech/wavs/LJ001-0013.wav new file mode 100644 index 0000000000000000000000000000000000000000..72eca1af1a396821c0814e3ff39e9d5752ba5b59 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0013.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0014.npy b/tests/data/ljspeech/wavs/LJ001-0014.npy new file mode 100644 index 0000000000000000000000000000000000000000..74611b8f947b694f740cfe6de2dc33ec043cd77d --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcec0d3679393290fc1a350a96e4876840fe6be81784c60b5a488447e0dbb67c +size 519968 diff --git a/tests/data/ljspeech/wavs/LJ001-0014.wav b/tests/data/ljspeech/wavs/LJ001-0014.wav new file mode 100644 index 0000000000000000000000000000000000000000..997d31d58c24547de8b78efdfdf503cae0a7e6b7 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0014.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0015.npy b/tests/data/ljspeech/wavs/LJ001-0015.npy new file mode 100644 index 0000000000000000000000000000000000000000..5b0583ab0be3ab4acd3c875d75af025bdb369ef6 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd175d2fd5b75c5982ab2302fe936746660be9eb0634e08444fe166b345a8182 +size 470176 diff --git a/tests/data/ljspeech/wavs/LJ001-0015.wav b/tests/data/ljspeech/wavs/LJ001-0015.wav new file mode 100644 index 0000000000000000000000000000000000000000..c63eb5347a06ef49c39b42911d38bec5cfd58d57 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0015.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0016.npy b/tests/data/ljspeech/wavs/LJ001-0016.npy new file mode 100644 index 0000000000000000000000000000000000000000..b1e6cb23c766bae5fe72f8c1c0e0c0019c6255d4 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7846187988bcdd4802df2f41a86f5fd9b4db7aa5f72c6728856afb930813640 +size 130304 diff --git a/tests/data/ljspeech/wavs/LJ001-0016.wav b/tests/data/ljspeech/wavs/LJ001-0016.wav new file mode 100644 index 0000000000000000000000000000000000000000..639b70c1bae56e32cb06db7c196af533108ffa39 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0016.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0017.npy b/tests/data/ljspeech/wavs/LJ001-0017.npy new file mode 100644 index 0000000000000000000000000000000000000000..0a65672ef7f448dcf8ebb55b9155356700f9dc29 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26601b716c26c2e10762cfd5354bfa6e105bece2db368ac811bcc2b2f3d730a7 +size 287156 diff --git a/tests/data/ljspeech/wavs/LJ001-0017.wav b/tests/data/ljspeech/wavs/LJ001-0017.wav new file mode 100644 index 0000000000000000000000000000000000000000..3a347aa4af624fb942e8ce1a438c76b278604b08 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0017.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0018.npy b/tests/data/ljspeech/wavs/LJ001-0018.npy new file mode 100644 index 0000000000000000000000000000000000000000..25d103f9396a63d5d50be59c1b29a8c37aa23cb1 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322ef39a44dd8b4a1e4cf817862b46741b3eca20d0122966648a395c5a02fb8d +size 290764 diff --git a/tests/data/ljspeech/wavs/LJ001-0018.wav b/tests/data/ljspeech/wavs/LJ001-0018.wav new file mode 100644 index 0000000000000000000000000000000000000000..911158a08c2b1a38142fe3f8c4b7b75fec2ec726 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0018.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0019.npy b/tests/data/ljspeech/wavs/LJ001-0019.npy new file mode 100644 index 0000000000000000000000000000000000000000..54cae852154923d3c368c48b88dcea0cb0965cce --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b631a97817dc8b2381eecd7b1e3d576b810013e6d13c1b483f4e987fff33de +size 222732 diff --git a/tests/data/ljspeech/wavs/LJ001-0019.wav b/tests/data/ljspeech/wavs/LJ001-0019.wav new file mode 100644 index 0000000000000000000000000000000000000000..cfd8c7e2337acd245168161b846f62a515bfd023 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0019.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0020.npy b/tests/data/ljspeech/wavs/LJ001-0020.npy new file mode 100644 index 0000000000000000000000000000000000000000..7297d03f85aa3f2d09beaca84824b617de924c02 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0f2b346e4f5f638f67f412ee749aa7251b97037f7b994cce9a28f36ccab987 +size 94764 diff --git a/tests/data/ljspeech/wavs/LJ001-0020.wav b/tests/data/ljspeech/wavs/LJ001-0020.wav new file mode 100644 index 0000000000000000000000000000000000000000..f342d46ba30826f43c02c1b9e25d57950446a970 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0020.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0021.npy b/tests/data/ljspeech/wavs/LJ001-0021.npy new file mode 100644 index 0000000000000000000000000000000000000000..db08db9353011f12291eabae31f80420bff14157 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751163d61e2760691c516f3e6ccb403d93113090f26300b3bcceb37bb0cc4cd3 +size 361248 diff --git a/tests/data/ljspeech/wavs/LJ001-0021.wav b/tests/data/ljspeech/wavs/LJ001-0021.wav new file mode 100644 index 0000000000000000000000000000000000000000..066b71c6bfd30186c1aabe5561f0d4b7c4a8b648 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0021.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0022.npy b/tests/data/ljspeech/wavs/LJ001-0022.npy new file mode 100644 index 0000000000000000000000000000000000000000..0a4bbb0b6bfa6b62d44062ea6f478c588c4095c2 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0ab17a10dddf31f65aa1ee32295aa24ba5ed5a84846876caab05fd355bf2a3 +size 242528 diff --git a/tests/data/ljspeech/wavs/LJ001-0022.wav b/tests/data/ljspeech/wavs/LJ001-0022.wav new file mode 100644 index 0000000000000000000000000000000000000000..c00a587af8a1073c19de3c1a4d6f7b8bd6fde74a Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0022.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0023.npy b/tests/data/ljspeech/wavs/LJ001-0023.npy new file mode 100644 index 0000000000000000000000000000000000000000..93b443ad19f429e4e2e643098a2b46783f8644e1 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bee5c39b0a7b4bdeb92dd66966f412e5f0a74c1b61e7aed26c1338d50eccc61 +size 374744 diff --git a/tests/data/ljspeech/wavs/LJ001-0023.wav b/tests/data/ljspeech/wavs/LJ001-0023.wav new file mode 100644 index 0000000000000000000000000000000000000000..aaa274d0ef33befe007ace594e080ea7c02b3da2 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0023.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0024.npy b/tests/data/ljspeech/wavs/LJ001-0024.npy new file mode 100644 index 0000000000000000000000000000000000000000..6cb7c8236573e7dcf677ae94fd2f33735e54c03b --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:560f6e537f98a3693aaea3130fe9b7cba4d7f838784b4a23863872dd959cf02c +size 318728 diff --git a/tests/data/ljspeech/wavs/LJ001-0024.wav b/tests/data/ljspeech/wavs/LJ001-0024.wav new file mode 100644 index 0000000000000000000000000000000000000000..14e7a3c137dd4e5b778ac45fd897f29b3adc73fa Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0024.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0025.npy b/tests/data/ljspeech/wavs/LJ001-0025.npy new file mode 100644 index 0000000000000000000000000000000000000000..7c47b76ccc00371f832d3c05d8700aa0e47e2da3 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddc56be2af40ee99df08060acc9389bdce6c18aa9dd510676a858ee7b7865236 +size 317120 diff --git a/tests/data/ljspeech/wavs/LJ001-0025.wav b/tests/data/ljspeech/wavs/LJ001-0025.wav new file mode 100644 index 0000000000000000000000000000000000000000..6e11513ab18d8909ac12ede4006af32765321fe3 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0025.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0026.npy b/tests/data/ljspeech/wavs/LJ001-0026.npy new file mode 100644 index 0000000000000000000000000000000000000000..33e1e4cf0e0961d3fd91a4d56d8d28b56c9a3139 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac908660dbc8bd14af7072a7e6d3461427b130651e8676ffd930533f99d7d204 +size 167488 diff --git a/tests/data/ljspeech/wavs/LJ001-0026.wav b/tests/data/ljspeech/wavs/LJ001-0026.wav new file mode 100644 index 0000000000000000000000000000000000000000..7efbb2988af2e1af142e0dbb98dda68851acb96c Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0026.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0027.npy b/tests/data/ljspeech/wavs/LJ001-0027.npy new file mode 100644 index 0000000000000000000000000000000000000000..9a166a83c897e72375a437dfb613144c0d325a71 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c6efe1cbac8a01c903cb714729f3f84e211fdc4d7933b67bc37e27b890e72a +size 441156 diff --git a/tests/data/ljspeech/wavs/LJ001-0027.wav b/tests/data/ljspeech/wavs/LJ001-0027.wav new file mode 100644 index 0000000000000000000000000000000000000000..5d86776a4dd406fee2cfb07f87ddf09431f075a0 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0027.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0028.npy b/tests/data/ljspeech/wavs/LJ001-0028.npy new file mode 100644 index 0000000000000000000000000000000000000000..29c4fd32f1dbd37f1f7f260e281307cdfc9ad80a --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594d24a53ccbeb23e17c094b5014b6071662bb5e9b686a8dfb30e907615f0d30 +size 134504 diff --git a/tests/data/ljspeech/wavs/LJ001-0028.wav b/tests/data/ljspeech/wavs/LJ001-0028.wav new file mode 100644 index 0000000000000000000000000000000000000000..fbd0d7783ca74b384c0ee06cbd2c28a5c8e0e34d Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0028.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0029.npy b/tests/data/ljspeech/wavs/LJ001-0029.npy new file mode 100644 index 0000000000000000000000000000000000000000..1914016e6028d46dc43c66b081cacf4dc77a78a8 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ac5a26fb9f8fbebc4d5e1e74f06180ce38df0945705bfc1f0d907fdef9c621 +size 126260 diff --git a/tests/data/ljspeech/wavs/LJ001-0029.wav b/tests/data/ljspeech/wavs/LJ001-0029.wav new file mode 100644 index 0000000000000000000000000000000000000000..d23c35c918aba1d0c9b59d837edb6168a8550706 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0029.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0030.npy b/tests/data/ljspeech/wavs/LJ001-0030.npy new file mode 100644 index 0000000000000000000000000000000000000000..a5ce595e205827d678cda715502a6e44545be871 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a0175cb17055f79df005cf5cb019c96697d861aebe33982ebac5942b5aa909 +size 223472 diff --git a/tests/data/ljspeech/wavs/LJ001-0030.wav b/tests/data/ljspeech/wavs/LJ001-0030.wav new file mode 100644 index 0000000000000000000000000000000000000000..44b15c5ad554fabfa240f74557f4064b998e6840 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0030.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0031.npy b/tests/data/ljspeech/wavs/LJ001-0031.npy new file mode 100644 index 0000000000000000000000000000000000000000..6bdfd096f6a3ea24c9dce52958c00539c8b39349 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7980886675c20b776c5032b7c482def93da5c07cf5c5e0159fd4ccc72aebfcba +size 267428 diff --git a/tests/data/ljspeech/wavs/LJ001-0031.wav b/tests/data/ljspeech/wavs/LJ001-0031.wav new file mode 100644 index 0000000000000000000000000000000000000000..c342b1a5259fe0e2a03dda763df7855d5b1ce86b Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0031.wav differ diff --git a/tests/data/ljspeech/wavs/LJ001-0032.npy b/tests/data/ljspeech/wavs/LJ001-0032.npy new file mode 100644 index 0000000000000000000000000000000000000000..6038ab27d26075d875ff675c7afaa0224fbd41f7 --- /dev/null +++ b/tests/data/ljspeech/wavs/LJ001-0032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acfb037e68528ad69761f00a0a199830a54b4b96b156b7e6b86fdda0ee14a748 +size 248192 diff --git a/tests/data/ljspeech/wavs/LJ001-0032.wav b/tests/data/ljspeech/wavs/LJ001-0032.wav new file mode 100644 index 0000000000000000000000000000000000000000..41dfbe14e96347f90b942a4d2612e199a8ae8467 Binary files /dev/null and b/tests/data/ljspeech/wavs/LJ001-0032.wav differ diff --git a/tests/inputs/common_voice.tsv b/tests/inputs/common_voice.tsv new file mode 100644 index 0000000000000000000000000000000000000000..39fc4190acff0267c220895db29c49eb2a2903a3 --- /dev/null +++ b/tests/inputs/common_voice.tsv @@ -0,0 +1,6 @@ +client_id path sentence up_votes down_votes age gender accent locale segment +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en diff --git a/tests/inputs/example_1.wav b/tests/inputs/example_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..b1a0ed110ab9763dab7428f6273d696fecb4205d Binary files /dev/null and b/tests/inputs/example_1.wav differ diff --git a/tests/inputs/scale_stats.npy b/tests/inputs/scale_stats.npy new file mode 100644 index 0000000000000000000000000000000000000000..74be37553ee6204095a6f791ebe10f8f10140fba --- /dev/null +++ b/tests/inputs/scale_stats.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e84c8c947d3cdead90cc37710c7b426562e2520e59500bc8e53c435152506c +size 10479 diff --git a/tests/inputs/server_config.json b/tests/inputs/server_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0cb9b948b8853357b75656ca62bfa1b4c18f80c8 --- /dev/null +++ b/tests/inputs/server_config.json @@ -0,0 +1,14 @@ +{ + "tts_checkpoint":"checkpoint_10.pth.tar", // tts checkpoint file + "tts_config":"dummy_model_config.json", // tts config.json file + "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. + "wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis. + "wavernn_file": null, // wavernn checkpoint file name + "wavernn_config": null, // wavernn config file + "vocoder_config":null, + "vocoder_checkpoint": null, + "is_wavernn_batched":true, + "port": 5002, + "use_cuda": false, + "debug": true +} diff --git a/tests/inputs/test_config.json b/tests/inputs/test_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ca4eef03fce3041275f5a90baaf093f93e8e0277 --- /dev/null +++ b/tests/inputs/test_config.json @@ -0,0 +1,69 @@ + { + "audio":{ + "audio_processor": "audio", // to use dictate different audio processors, if available. + "num_mels": 80, // size of the mel spec frame. + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "sample_rate": 22050, // wav sample-rate. If different than the original data, it is resampled. + "frame_length_ms": null, // stft window length in ms. + "frame_shift_ms": null, // stft window hop-lengh in ms. + "hop_length": 256, + "win_length": 1024, + "preemphasis": 0.97, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "min_level_db": -100, // normalization range + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 30,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + "signal_norm": true, // normalize the spec values in range [0, 1] + "symmetric_norm": true, // move normalization to range [-1, 1] + "clip_norm": true, // clip normalized values into the range. + "max_norm": 4, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "mel_fmin": 0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000, // maximum freq level for mel-spec. Tune for dataset!! + "do_trim_silence": false, + "spec_gain": 20 + }, + + "characters":{ + "pad": "_", + "eos": "~", + "bos": "^", + "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + "punctuations":"!'(),-.:;? ", + "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + }, + + "hidden_size": 128, + "embedding_size": 256, + "text_cleaner": "english_cleaners", + + "epochs": 2000, + "lr": 0.003, + "lr_patience": 5, + "lr_decay": 0.5, + "batch_size": 2, + "r": 5, + "mk": 1.0, + "num_loader_workers": 4, + "memory_size": 5, + + "save_step": 200, + "data_path": "tests/data/ljspeech/", + "output_path": "result", + "min_seq_len": 0, + "max_seq_len": 300, + "log_dir": "tests/outputs/", + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_gst": true, // use global style tokens + "gst": { // gst parameter if gst is enabled + "gst_style_input": null, // Condition the style input either on a + // -> wave file [path to wave] or + // -> dictionary using the style tokens {'token1': 'value', 'token2': 'value'} example {"0": 0.15, "1": 0.15, "5": -0.15} + // with the dictionary being len(dict) <= len(gst_style_tokens). + "gst_use_speaker_embedding": true, // if true pass speaker embedding in attention input GST. + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10 + } +} diff --git a/tests/inputs/test_glow_tts.json b/tests/inputs/test_glow_tts.json new file mode 100644 index 0000000000000000000000000000000000000000..ff8a81ea9c706226cee3e5f7356be368c60d6492 --- /dev/null +++ b/tests/inputs/test_glow_tts.json @@ -0,0 +1,149 @@ +{ + "model": "glow_tts", + "run_name": "glow-tts-gatedconv", + "run_description": "glow-tts model training with gated conv.", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Griffin-Lim + "power": 1.1, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 1.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + "add_blank": false, // if true add a new token after each token of the sentence. This increases the size of the input sequence, but has considerably improved the prosody of the GlowTTS model. + + // DISTRIBUTED TRAINING + "mixed_precision": false, + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54323" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // MODEL PARAMETERS + "use_mas": false, // use Monotonic Alignment Search if true. Otherwise use pre-computed attention alignments. + + // TRAINING + "batch_size": 2, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":1, + "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "data_dep_init_iter": 1, + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": true, // use noam warmup and lr schedule. + "grad_clip": 5.0, // upper limit for gradients for clipping. + "epochs": 1, // total number of epochs to train. + "lr": 1e-3, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + "hidden_channels_encoder": 192, + "hidden_channels_decoder": 192, + "hidden_channels_duration_predictor": 256, + "use_encoder_prenet": true, + "encoder_type": "rel_pos_transformer", + "encoder_params": { + "kernel_size":3, + "dropout_p": 0.1, + "num_layers": 6, + "num_heads": 2, + "hidden_channels_ffn": 768, + "input_length": null + }, + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 5000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + "apex_amp_level": null, + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 500, // DATASET-RELATED: maximum text length + "compute_f0": false, // compute f0 values in data-loader + "compute_input_seq_cache": true, + "use_noise_augment": true, + + // PATHS + "output_path": "tests/train_outputs/", + + // PHONEMES + "phoneme_cache_path": "tests/outputs/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_external_speaker_embedding_file": false, + "external_speaker_embedding_file": null, + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "tests/data/ljspeech/", + "meta_file_train": "metadata.csv", + "meta_file_val": "metadata.csv" + } + ] +} + + diff --git a/tests/inputs/test_speedy_speech.json b/tests/inputs/test_speedy_speech.json new file mode 100644 index 0000000000000000000000000000000000000000..ae4b8b2d0a2f4dc8cdff8320dd235f236865a59f --- /dev/null +++ b/tests/inputs/test_speedy_speech.json @@ -0,0 +1,153 @@ +{ + "model": "speedy_speech", + "run_name": "test_sample_dataset_run", + "run_description": "sample dataset test run", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (true), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1, + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "&", + // "bos": "*", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZÇÃÀÁÂÊÉÍÓÔÕÚÛabcdefghijklmnopqrstuvwxyzçãàáâêéíóôõúû!(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ'̃' " + // }, + + "add_blank": false, // if true add a new token after each token of the sentence. This increases the size of the input sequence, but has considerably improved the prosody of the GlowTTS model. + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // MODEL PARAMETERS + "positional_encoding": true, + "hidden_channels": 128, + "encoder_type": "residual_conv_bn", + "encoder_type": "residual_conv_bn", + "encoder_params":{ + "kernel_size": 4, + "dilations": [1, 2, 4, 1, 2, 4, 1, 2, 4, 1, 2, 4, 1], + "num_conv_blocks": 2, + "num_res_blocks": 13 + }, + "decoder_type": "residual_conv_bn", + "decoder_params":{ + "kernel_size": 4, + "dilations": [1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1, 2, 4, 8, 1], + "num_conv_blocks": 2, + "num_res_blocks": 17 + }, + + + // TRAINING + "batch_size":64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":32, + "r": 1, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + + // LOSS PARAMETERS + "ssim_alpha": 1, + "l1_alpha": 1, + "huber_alpha": 1, + + // VALIDATION + "run_eval": true, + "test_delay_epochs": -1, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "noam_schedule": true, // use noam warmup and lr schedule. + "grad_clip": 1.0, // upper limit for gradients for clipping. + "epochs": 1, // total number of epochs to train. + "lr": 0.002, // Initial learning rate. If Noam decay is active, maximum learning rate. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + + // TENSORBOARD and LOGGING + "print_step": 1, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 5000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.:set n + "mixed_precision": false, + + // DATA LOADING + "text_cleaner": "english_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 0, // number of evaluation data loader processes. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 300, // DATASET-RELATED: maximum text length + "compute_f0": false, // compute f0 values in data-loader + "compute_input_seq_cache": false, // if true, text sequences are computed before starting training. If phonemes are enabled, they are also computed at this stage. + + // PATHS + "output_path": "tests/train_outputs/", + + // PHONEMES + "phoneme_cache_path": "tests/train_outputs/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronoun[ciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + + + // DATASETS + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "tests/data/ljspeech/", + "meta_file_train": "metadata.csv", + "meta_file_val": "metadata.csv", + "meta_file_attn_mask": "tests/data/ljspeech/metadata_attn_mask.txt" + } + ] +} \ No newline at end of file diff --git a/tests/inputs/test_train_config.json b/tests/inputs/test_train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee0680e375576a3d9aef1a4c2c5344b24e67f43e --- /dev/null +++ b/tests/inputs/test_train_config.json @@ -0,0 +1,175 @@ +{ + "model": "Tacotron2", + "run_name": "test_sample_dataset_run", + "run_description": "sample dataset test run", + + // AUDIO PARAMETERS + "audio":{ + // stft parameters + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (true), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // Griffin-Lim + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 20.0, + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // VOCABULARY PARAMETERS + // if custom character set is not defined, + // default set in symbols.py is used + // "characters":{ + // "pad": "_", + // "eos": "~", + // "bos": "^", + // "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", + // "punctuations":"!'(),-.:;? ", + // "phonemes":"iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻʘɓǀɗǃʄǂɠǁʛpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟˈˌːˑʍwɥʜʢʡɕʑɺɧɚ˞ɫ" + // }, + + // DISTRIBUTED TRAINING + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 1, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + "eval_batch_size":1, + "r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. + "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "mixed_precision": false, + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 0, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // LOSS SETTINGS + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "decoder_loss_alpha": 0.5, // original decoder loss weight. If > 0, it is enabled + "postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled + "postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled + "decoder_ssim_alpha": 0.5, // decoder ssim loss weight. If > 0, it is enabled + "postnet_ssim_alpha": 0.25, // postnet ssim loss weight. If > 0, it is enabled + "ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled. + "stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples. + + // OPTIMIZER + "noam_schedule": false, // use noam warmup and lr schedule. + "grad_clip": 1.0, // upper limit for gradients for clipping. + "epochs": 1, // total number of epochs to train. + "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "wd": 0.000001, // Weight decay weight. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + + // TACOTRON PRENET + "memory_size": -1, // ONLY TACOTRON - size of the memory queue used fro storing last decoder predictions for auto-regression. If < 0, memory queue is disabled and decoder only uses the last prediction frame. + "prenet_type": "bn", // "original" or "bn". + "prenet_dropout": false, // enable/disable dropout at prenet. + + // TACOTRON ATTENTION + "attention_type": "original", // 'original' , 'graves', 'dynamic_convolution' + "attention_heads": 4, // number of attention heads (only for 'graves') + "attention_norm": "sigmoid", // softmax or sigmoid. + "windowing": false, // Enables attention windowing. Used only in eval mode. + "use_forward_attn": false, // if it uses forward attention. In general, it aligns faster. + "forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode. + "transition_agent": false, // enable/disable transition agent of forward attention. + "location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default. + "bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset. + "double_decoder_consistency": true, // use DDC explained here https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency-draft/ + "ddc_r": 7, // reduction rate for coarse decoder. + + // STOPNET + "stopnet": true, // Train stopnet predicting the end of synthesis. + "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. + + // TENSORBOARD and LOGGING + "print_step": 1, // Number of steps to log training on console. + "tb_plot_step": 100, // Number of steps to plot TB training figures. + "print_eval": false, // If True, it prints intermediate loss values in evalulation. + "save_step": 10000, // Number of training steps expected to save traninpg stats and checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "text_cleaner": "phoneme_cleaners", + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 153, // DATASET-RELATED: maximum text length + "compute_input_seq_cache": true, + + // PATHS + "output_path": "tests/train_outputs/", + + // PHONEMES + "phoneme_cache_path": "tests/train_outputs/phoneme_cache/", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + + // MULTI-SPEAKER and GST + "use_external_speaker_embedding_file": false, + "external_speaker_embedding_file": null, + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_gst": true, // use global style tokens + "gst": { // gst parameter if gst is enabled + "gst_style_input": null, // Condition the style input either on a + // -> wave file [path to wave] or + // -> dictionary using the style tokens {'token1': 'value', 'token2': 'value'} example {"0": 0.15, "1": 0.15, "5": -0.15} + // with the dictionary being len(dict) == len(gst_style_tokens). + "gst_use_speaker_embedding": true, // if true pass speaker embedding in attention input GST. + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10 + }, + + // DATASETS + "train_portion": 0.1, // dataset portion used for training. It is mainly for internal experiments. + "eval_portion": 0.1, // dataset portion used for training. It is mainly for internal experiments. + "datasets": // List of datasets. They all merged and they get different speaker_ids. + [ + { + "name": "ljspeech", + "path": "tests/data/ljspeech/", + "meta_file_train": "metadata.csv", + "meta_file_val": "metadata.csv" + } + ] + +} + diff --git a/tests/inputs/test_vocoder_audio_config.json b/tests/inputs/test_vocoder_audio_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08acc48cd34296c4549931ce440fda8d1882ba66 --- /dev/null +++ b/tests/inputs/test_vocoder_audio_config.json @@ -0,0 +1,24 @@ +{ + "audio":{ + "num_mels": 80, // size of the mel spec frame. + "num_freq": 513, // number of stft frequency levels. Size of the linear spectogram frame. + "sample_rate": 22050, // wav sample-rate. If different than the original data, it is resampled. + "frame_length_ms": null, // stft window length in ms. + "frame_shift_ms": null, // stft window hop-lengh in ms. + "hop_length": 256, + "win_length": 1024, + "preemphasis": 0.97, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "min_level_db": -100, // normalization range + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 30,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + "signal_norm": true, // normalize the spec values in range [0, 1] + "symmetric_norm": true, // move normalization to range [-1, 1] + "clip_norm": true, // clip normalized values into the range. + "max_norm": 4, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "mel_fmin": 0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000, // maximum freq level for mel-spec. Tune for dataset!! + "do_trim_silence": false + } +} + diff --git a/tests/inputs/test_vocoder_multiband_melgan_config.json b/tests/inputs/test_vocoder_multiband_melgan_config.json new file mode 100644 index 0000000000000000000000000000000000000000..442550c6cb3a6aa8941f7f0027e8ade120e2044a --- /dev/null +++ b/tests/inputs/test_vocoder_multiband_melgan_config.json @@ -0,0 +1,144 @@ +{ + "run_name": "multiband-melgan", + "run_description": "multiband melgan mean-var scaling", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // MODEL PARAMETERS + "use_pqmf": true, + + // LOSS PARAMETERS + "use_stft_loss": true, + "use_subband_stft_loss": true, + "use_mse_gan_loss": true, + "use_hinge_gan_loss": false, + "use_feat_match_loss": false, // use only with melgan discriminators + + // loss weights + "stft_loss_weight": 0.5, + "subband_stft_loss_weight": 0.5, + "mse_G_loss_weight": 2.5, + "hinge_G_loss_weight": 2.5, + "feat_match_loss_weight": 25, + + // multiscale stft loss parameters + "stft_loss_params": { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }, + + // subband multiscale stft loss parameters + "subband_stft_loss_params":{ + "n_ffts": [384, 683, 171], + "hop_lengths": [30, 60, 10], + "win_lengths": [150, 300, 60] + }, + + "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch + + // DISCRIMINATOR + "discriminator_model": "melgan_multiscale_discriminator", + "discriminator_model_params":{ + "base_channels": 16, + "max_channels":512, + "downsample_factors":[4, 4, 4] + }, + "steps_to_start_discriminator": 200000, // steps required to start GAN trainining.1 + + // GENERATOR + "generator_model": "multiband_melgan_generator", + "generator_model_params": { + "upsample_factors":[8, 4, 2], + "num_res_blocks": 4 + }, + + // DATASET + "data_path": "tests/data/ljspeech/wavs/", + "feature_path": null, + "seq_len": 16384, + "pad_short": 2000, + "conv_pad": 0, + "use_noise_augment": false, + "use_cache": true, + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 4, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. + + // VALIDATION + "run_eval": true, + "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + + // OPTIMIZER + "epochs": 1, // total number of epochs to train. + "wd": 0.0, // Weight decay weight. + "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "disc_clip_grad": -1, // Discriminator gradient clipping threshold. + "lr_scheduler_gen": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_gen_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_scheduler_disc": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_disc_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr_gen": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 1e-4, + + // TENSORBOARD and LOGGING + "print_step": 1, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, + + // PATHS + "output_path": "tests/train_outputs/" +} + diff --git a/tests/inputs/test_vocoder_wavegrad.json b/tests/inputs/test_vocoder_wavegrad.json new file mode 100644 index 0000000000000000000000000000000000000000..8fa0bbe12d1a10da2911e0196757c21cd01eeb47 --- /dev/null +++ b/tests/inputs/test_vocoder_wavegrad.json @@ -0,0 +1,114 @@ +{ + "run_name": "wavegrad-ljspeech", + "run_description": "wavegrad ljspeech", + + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 50.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 7600.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 1.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // DISTRIBUTED TRAINING + "mixed_precision": false, + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54322" + }, + + "target_loss": "avg_wavegrad_loss", // loss value to pick the best model to save after each epoch + + // MODEL PARAMETERS + "generator_model": "wavegrad", + "model_params":{ + "y_conv_channels":32, + "x_conv_channels":768, + "ublock_out_channels": [512, 512, 256, 128, 128], + "dblock_out_channels": [128, 128, 256, 512], + "upsample_factors": [4, 4, 4, 2, 2], + "upsample_dilations": [ + [1, 2, 1, 2], + [1, 2, 1, 2], + [1, 2, 4, 8], + [1, 2, 4, 8], + [1, 2, 4, 8]], + "use_weight_norm": true + }, + + // DATASET + "data_path": "tests/data/ljspeech/wavs/", // root data path. It finds all wav files recursively from there. + "feature_path": null, // if you use precomputed features + "seq_len": 6144, // 24 * hop_length + "pad_short": 0, // additional padding for short wavs + "conv_pad": 0, // additional padding against convolutions applied to spectrograms + "use_noise_augment": false, // add noise to the audio signal for augmentation + "use_cache": true, // use in memory cache to keep the computed features. This might cause OOM. + + "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. + + // TRAINING + "batch_size": 1, // Batch size for training. + "train_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 1000 + }, + "test_noise_schedule":{ + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 2 + }, + + // VALIDATION + "run_eval": true, // enable/disable evaluation run + + // OPTIMIZER + "epochs": 1, // total number of epochs to train. + "clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_params": { + "gamma": 0.5, + "milestones": [100000, 200000, 300000, 400000, 500000, 600000] + }, + "lr": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate. + + // TENSORBOARD and LOGGING + "print_step": 250, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 10000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 4, + + // PATHS + "output_path": "tests/train_outputs/" +} + diff --git a/tests/inputs/test_vocoder_wavernn_config.json b/tests/inputs/test_vocoder_wavernn_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9df32fefdbb4a7c64e46869a415e48201aa5c2ce --- /dev/null +++ b/tests/inputs/test_vocoder_wavernn_config.json @@ -0,0 +1,107 @@ +{ + "run_name": "wavernn_test", + "run_description": "wavernn_test training", + + // AUDIO PARAMETERS + "audio":{ + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "win_length": 1024, // stft window length in ms. + "hop_length": 256, // stft window hop-lengh in ms. + "frame_length_ms": null, // stft window length in ms.If null, 'win_length' is used. + "frame_shift_ms": null, // stft window hop-lengh in ms. If null, 'hop_length' is used. + + // Audio processing parameters + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "preemphasis": 0.0, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "ref_level_db": 0, // reference level db, theoretically 20db is the sound of air. + + // Silence trimming + "do_trim_silence": true,// enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + "trim_db": 60, // threshold for timming silence. Set this according to your dataset. + + // MelSpectrogram parameters + "num_mels": 80, // size of the mel spec frame. + "mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + "spec_gain": 20.0, // scaler value appplied after log transform of spectrogram. + + // Normalization parameters + "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. + "min_level_db": -100, // lower bound for normalization + "symmetric_norm": true, // move normalization to range [-1, 1] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored + }, + + // Generating / Synthesizing + "batched": true, + "target_samples": 11000, // target number of samples to be generated in each batch entry + "overlap_samples": 550, // number of samples for crossfading between batches + + // DISTRIBUTED TRAINING + // "distributed":{ + // "backend": "nccl", + // "url": "tcp:\/\/localhost:54321" + // }, + + // MODEL PARAMETERS + "use_aux_net": true, + "use_upsample_net": true, + "upsample_factors": [4, 8, 8], // this needs to correctly factorise hop_length + "seq_len": 1280, // has to be devideable by hop_length + "mode": "mold", // mold [string], gauss [string], bits [int] + "mulaw": false, // apply mulaw if mode is bits + "padding": 2, // pad the input for resnet to see wider input length + + // DATASET + //"use_gta": true, // use computed gta features from the tts model + "data_path": "tests/data/ljspeech/wavs/", // path containing training wav files + "feature_path": null, // path containing computed features from wav files if null compute them + + // MODEL PARAMETERS + "wavernn_model_params": { + "rnn_dims": 512, + "fc_dims": 512, + "compute_dims": 128, + "res_out_dims": 128, + "num_res_blocks": 10, + "use_aux_net": true, + "use_upsample_net": true, + "upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length + }, + "mixed_precision": false, + + // TRAINING + "batch_size": 4, // Batch size for training. Lower values than 32 might cause hard to learn attention. + "epochs": 1, // total number of epochs to train. + + // VALIDATION + "run_eval": true, + "test_every_epochs": 10, // Test after set number of epochs (Test every 20 epochs for example) + + // OPTIMIZER + "grad_clip": 4, // apply gradient clipping if > 0 + "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + "lr_scheduler_params": { + "gamma": 0.5, + "milestones": [200000, 400000, 600000] + }, + "lr": 1e-4, // initial learning rate + + // TENSORBOARD and LOGGING + "print_step": 25, // Number of steps to log traning on console. + "print_eval": false, // If True, it prints loss values for each step in eval run. + "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + + // DATA LOADING + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "eval_split_size": 10, // number of samples for testing + + // PATHS + "output_path": "tests/train_outputs/" +} + diff --git a/tests/outputs/dummy_model_config.json b/tests/outputs/dummy_model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3996e09ae1c035ef9b0c616a6fd72ca3f89e1d94 --- /dev/null +++ b/tests/outputs/dummy_model_config.json @@ -0,0 +1,103 @@ +{ + "run_name": "mozilla-no-loc-fattn-stopnet-sigmoid-loss_masking", + "run_description": "using forward attention, with original prenet, loss masking,separate stopnet, sigmoid. Compare this with 4817. Pytorch DPP", + + "audio":{ + // Audio processing parameters + "num_mels": 80, // size of the mel spec frame. + "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. + "sample_rate": 22050, // DATASET-RELATED: wav sample-rate. If different than the original data, it is resampled. + "hop_length": 256, + "win_length": 1024, + "preemphasis": 0.98, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis. + "min_level_db": -100, // normalization range + "ref_level_db": 20, // reference level db, theoretically 20db is the sound of air. + "power": 1.5, // value to sharpen wav signals after GL algorithm. + "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation. + // Normalization parameters + "signal_norm": true, // normalize the spec values in range [0, 1] + "symmetric_norm": false, // move normalization to range [-1, 1] + "max_norm": 1, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "clip_norm": true, // clip normalized values into the range. + "mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!! + "mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!! + "do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true) + }, + + "distributed":{ + "backend": "nccl", + "url": "tcp:\/\/localhost:54321" + }, + + "reinit_layers": [], + + "model": "Tacotron2", // one of the model in models/ + "grad_clip": 1, // upper limit for gradients for clipping. + "epochs": 1000, // total number of epochs to train. + "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_decay": false, // if true, Noam learning rate decaying is applied through training. + "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" + "windowing": false, // Enables attention windowing. Used only in eval mode. + "memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5. + "attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron. + "prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn". + "prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet. + "use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster. + "forward_attn_mask": false, + "attention_type": "original", + "attention_heads": 5, + "bidirectional_decoder": false, + "transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention. + "location_attn": false, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default. + "loss_masking": true, // enable / disable loss masking against the sequence padding. + "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. + "stopnet": true, // Train stopnet predicting the end of synthesis. + "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. + "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. + "use_gst": false, + "double_decoder_consistency": true, // use DDC explained here https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency-draft/ + "ddc_r": 7, // reduction rate for coarse decoder. + + "batch_size": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. + "eval_batch_size":16, + "r": 1, // Number of frames to predict for step. + "wd": 0.000001, // Weight decay weight. + "checkpoint": true, // If true, it saves checkpoints per "save_step" + "save_step": 1000, // Number of training steps expected to save traning stats and checkpoints. + "print_step": 10, // Number of steps to log traning on console. + "batch_group_size": 0, //Number of batches to shuffle after bucketing. + + "run_eval": true, + "test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time. + "test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences. + "data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument + "meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader. + "meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader. + "dataset": "mozilla", // DATASET-RELATED: one of mozilla_voice_tts.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py + "min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training + "max_seq_len": 150, // DATASET-RELATED: maximum text length + "output_path": "../keep/", // DATASET-RELATED: output path for all training outputs. + "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 4, // number of evaluation data loader processes. + "phoneme_cache_path": "mozilla_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder. + "use_phonemes": false, // use phonemes instead of raw characters. It is suggested for better pronounciation. + "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages + "text_cleaner": "phoneme_cleaners", + "use_speaker_embedding": false, // whether to use additional embeddings for separate speakers + + // MULTI-SPEAKER and GST + "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. + "use_gst": true, // use global style tokens + "gst": { // gst parameter if gst is enabled + "gst_style_input": null, // Condition the style input either on a + // -> wave file [path to wave] or + // -> dictionary using the style tokens {'token1': 'value', 'token2': 'value'} example {"0": 0.15, "1": 0.15, "5": -0.15} + // with the dictionary being len(dict) <= len(gst_style_tokens). + "gst_use_speaker_embedding": true, // if true pass speaker embedding in attention input GST. + "gst_embedding_dim": 512, + "gst_num_heads": 4, + "gst_style_tokens": 10 + } +} + + diff --git a/tests/symbols_tests.py b/tests/symbols_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..4e70b9d55004a8459a015ec969e8de220010b95e --- /dev/null +++ b/tests/symbols_tests.py @@ -0,0 +1,7 @@ +import unittest + +from TTS.tts.utils.text import phonemes + +class SymbolsTest(unittest.TestCase): + def test_uniqueness(self): #pylint: disable=no-self-use + assert sorted(phonemes) == sorted(list(set(phonemes))), " {} vs {} ".format(len(phonemes), len(set(phonemes))) diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000000000000000000000000000000000000..c00cd8f8fe27c18a5a880ff7b4818e72743d1940 --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,176 @@ +import os +import unittest + +from tests import get_tests_input_path, get_tests_output_path, get_tests_path + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config + +TESTS_PATH = get_tests_path() +OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + +os.makedirs(OUT_PATH, exist_ok=True) +conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + + +# pylint: disable=protected-access +class TestAudio(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestAudio, self).__init__(*args, **kwargs) + self.ap = AudioProcessor(**conf.audio) + + def test_audio_synthesis(self): + """ 1. load wav + 2. set normalization parameters + 3. extract mel-spec + 4. invert to wav and save the output + """ + print(" > Sanity check for the process wav -> mel -> wav") + + def _test(max_norm, signal_norm, symmetric_norm, clip_norm): + self.ap.max_norm = max_norm + self.ap.signal_norm = signal_norm + self.ap.symmetric_norm = symmetric_norm + self.ap.clip_norm = clip_norm + wav = self.ap.load_wav(WAV_FILE) + mel = self.ap.melspectrogram(wav) + wav_ = self.ap.inv_melspectrogram(mel) + file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav"\ + .format(max_norm, signal_norm, symmetric_norm, clip_norm) + print(" | > Creating wav file at : ", file_name) + self.ap.save_wav(wav_, OUT_PATH + file_name) + + # maxnorm = 1.0 + _test(1., False, False, False) + _test(1., True, False, False) + _test(1., True, True, False) + _test(1., True, False, True) + _test(1., True, True, True) + # maxnorm = 4.0 + _test(4., False, False, False) + _test(4., True, False, False) + _test(4., True, True, False) + _test(4., True, False, True) + _test(4., True, True, True) + + def test_normalize(self): + """Check normalization and denormalization for range values and consistency """ + print(" > Testing normalization and denormalization.") + wav = self.ap.load_wav(WAV_FILE) + wav = self.ap.sound_norm(wav) # normalize audio to get abetter normalization range below. + self.ap.signal_norm = False + x = self.ap.melspectrogram(wav) + x_old = x + + self.ap.signal_norm = True + self.ap.symmetric_norm = False + self.ap.clip_norm = False + self.ap.max_norm = 4.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + assert (x_old - x).sum() == 0 + # check value range + assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() + assert x_norm.min() >= 0 - 1, x_norm.min() + # check denorm. + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3, (x - x_).mean() + + self.ap.signal_norm = True + self.ap.symmetric_norm = False + self.ap.clip_norm = True + self.ap.max_norm = 4.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + + + assert (x_old - x).sum() == 0 + # check value range + assert x_norm.max() <= self.ap.max_norm, x_norm.max() + assert x_norm.min() >= 0, x_norm.min() + # check denorm. + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3, (x - x_).mean() + + self.ap.signal_norm = True + self.ap.symmetric_norm = True + self.ap.clip_norm = False + self.ap.max_norm = 4.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + + + assert (x_old - x).sum() == 0 + # check value range + assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() + assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min() #pylint: disable=invalid-unary-operand-type + assert x_norm.min() <= 0, x_norm.min() + # check denorm. + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3, (x - x_).mean() + + self.ap.signal_norm = True + self.ap.symmetric_norm = True + self.ap.clip_norm = True + self.ap.max_norm = 4.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + + + assert (x_old - x).sum() == 0 + # check value range + assert x_norm.max() <= self.ap.max_norm, x_norm.max() + assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type + assert x_norm.min() <= 0, x_norm.min() + # check denorm. + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3, (x - x_).mean() + + self.ap.signal_norm = True + self.ap.symmetric_norm = False + self.ap.max_norm = 1.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + + + assert (x_old - x).sum() == 0 + assert x_norm.max() <= self.ap.max_norm, x_norm.max() + assert x_norm.min() >= 0, x_norm.min() + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3 + + self.ap.signal_norm = True + self.ap.symmetric_norm = True + self.ap.max_norm = 1.0 + x_norm = self.ap.normalize(x) + print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") + + + assert (x_old - x).sum() == 0 + assert x_norm.max() <= self.ap.max_norm, x_norm.max() + assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type + assert x_norm.min() < 0, x_norm.min() + x_ = self.ap.denormalize(x_norm) + assert (x - x_).sum() < 1e-3 + + def test_scaler(self): + scaler_stats_path = os.path.join(get_tests_input_path(), 'scale_stats.npy') + conf.audio['stats_path'] = scaler_stats_path + conf.audio['preemphasis'] = 0.0 + conf.audio['do_trim_silence'] = True + conf.audio['signal_norm'] = True + + ap = AudioProcessor(**conf.audio) + mel_mean, mel_std, linear_mean, linear_std, _ = ap.load_stats(scaler_stats_path) + ap.setup_scaler(mel_mean, mel_std, linear_mean, linear_std) + + self.ap.signal_norm = False + self.ap.preemphasis = 0.0 + + # test scaler forward and backward transforms + wav = self.ap.load_wav(WAV_FILE) + mel_reference = self.ap.melspectrogram(wav) + mel_norm = ap.melspectrogram(wav) + mel_denorm = ap.denormalize(mel_norm) + assert abs(mel_reference - mel_denorm).max() < 1e-4 diff --git a/tests/test_demo_server.py b/tests/test_demo_server.py new file mode 100644 index 0000000000000000000000000000000000000000..bccff55df52e1088f92d855f646ddb2accd8d415 --- /dev/null +++ b/tests/test_demo_server.py @@ -0,0 +1,57 @@ +import os +import unittest + +from tests import get_tests_input_path, get_tests_output_path +from TTS.utils.synthesizer import Synthesizer +from TTS.tts.utils.generic_utils import setup_model +from TTS.tts.utils.io import save_checkpoint +from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.utils.io import load_config + + +class DemoServerTest(unittest.TestCase): + # pylint: disable=R0201 + def _create_random_model(self): + # pylint: disable=global-statement + global symbols, phonemes + config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json')) + if 'characters' in config.keys(): + symbols, phonemes = make_symbols(**config.characters) + + num_chars = len(phonemes) if config.use_phonemes else len(symbols) + model = setup_model(num_chars, 0, config) + output_path = os.path.join(get_tests_output_path()) + save_checkpoint(model, None, 10, 10, 1, output_path) + + def test_in_out(self): + self._create_random_model() + config = load_config(os.path.join(get_tests_input_path(), 'server_config.json')) + tts_root_path = get_tests_output_path() + config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint']) + config['tts_config'] = os.path.join(tts_root_path, config['tts_config']) + synthesizer = Synthesizer(config['tts_checkpoint'], config['tts_config'], None, None) + synthesizer.tts("Better this test works!!") + + def test_split_into_sentences(self): + """Check demo server sentences split as expected""" + print("\n > Testing demo server sentence splitting") + # pylint: disable=attribute-defined-outside-init + self.seg = Synthesizer.get_segmenter("en") + sis = Synthesizer.split_into_sentences + assert sis(self, 'Hello. Two sentences') == ['Hello.', 'Two sentences'] + assert sis(self, 'He went to meet the adviser from Scott, Waltman & Co. next morning.') == ['He went to meet the adviser from Scott, Waltman & Co. next morning.'] + assert sis(self, 'Let\'s run it past Sarah and co. They\'ll want to see this.') == ['Let\'s run it past Sarah and co.', 'They\'ll want to see this.'] + assert sis(self, 'Where is Bobby Jr.\'s rabbit?') == ['Where is Bobby Jr.\'s rabbit?'] + assert sis(self, 'Please inform the U.K. authorities right away.') == ['Please inform the U.K. authorities right away.'] + assert sis(self, 'Were David and co. at the event?') == ['Were David and co. at the event?'] + assert sis(self, 'paging dr. green, please come to theatre four immediately.') == ['paging dr. green, please come to theatre four immediately.'] + assert sis(self, 'The email format is Firstname.Lastname@example.com. I think you reversed them.') == ['The email format is Firstname.Lastname@example.com.', 'I think you reversed them.'] + assert sis(self, 'The demo site is: https://top100.example.com/subsection/latestnews.html. Please send us your feedback.') == ['The demo site is: https://top100.example.com/subsection/latestnews.html.', 'Please send us your feedback.'] + assert sis(self, 'Scowling at him, \'You are not done yet!\' she yelled.') == ['Scowling at him, \'You are not done yet!\' she yelled.'] # with the final lowercase "she" we see it's all one sentence + assert sis(self, 'Hey!! So good to see you.') == ['Hey!!', 'So good to see you.'] + assert sis(self, 'He went to Yahoo! but I don\'t know the division.') == ['He went to Yahoo! but I don\'t know the division.'] + assert sis(self, 'If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."') == ['If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."'] + assert sis(self, 'The address is not google.com.') == ['The address is not google.com.'] + assert sis(self, '1.) The first item 2.) The second item') == ['1.) The first item', '2.) The second item'] + assert sis(self, '1) The first item 2) The second item') == ['1) The first item', '2) The second item'] + assert sis(self, 'a. The first item b. The second item c. The third list item') == ['a. The first item', 'b. The second item', 'c. The third list item'] diff --git a/tests/test_encoder.py b/tests/test_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..4d4dbba116fd8e1660da898a946e26a4609ad065 --- /dev/null +++ b/tests/test_encoder.py @@ -0,0 +1,117 @@ +import os +import unittest + +import torch as T +from tests import get_tests_input_path + +from TTS.speaker_encoder.losses import GE2ELoss, AngleProtoLoss +from TTS.speaker_encoder.model import SpeakerEncoder +from TTS.utils.io import load_config + +file_path = get_tests_input_path() +c = load_config(os.path.join(file_path, "test_config.json")) + + +class SpeakerEncoderTests(unittest.TestCase): + # pylint: disable=R0201 + def test_in_out(self): + dummy_input = T.rand(4, 20, 80) # B x T x D + dummy_hidden = [T.rand(2, 4, 128), T.rand(2, 4, 128)] + model = SpeakerEncoder( + input_dim=80, proj_dim=256, lstm_dim=768, num_lstm_layers=3 + ) + # computing d vectors + output = model.forward(dummy_input) + assert output.shape[0] == 4 + assert output.shape[1] == 256 + output = model.inference(dummy_input) + assert output.shape[0] == 4 + assert output.shape[1] == 256 + # compute d vectors by passing LSTM hidden + # output = model.forward(dummy_input, dummy_hidden) + # assert output.shape[0] == 4 + # assert output.shape[1] == 20 + # assert output.shape[2] == 256 + # check normalization + output_norm = T.nn.functional.normalize(output, dim=1, p=2) + assert_diff = (output_norm - output).sum().item() + assert output.type() == "torch.FloatTensor" + assert ( + abs(assert_diff) < 1e-4 + ), f" [!] output_norm has wrong values - {assert_diff}" + # compute d for a given batch + dummy_input = T.rand(1, 240, 80) # B x T x D + output = model.compute_embedding(dummy_input, num_frames=160, overlap=0.5) + assert output.shape[0] == 1 + assert output.shape[1] == 256 + assert len(output.shape) == 2 + + +class GE2ELossTests(unittest.TestCase): + # pylint: disable=R0201 + def test_in_out(self): + # check random input + dummy_input = T.rand(4, 5, 64) # num_speaker x num_utterance x dim + loss = GE2ELoss(loss_method="softmax") + output = loss.forward(dummy_input) + assert output.item() >= 0.0 + # check all zeros + dummy_input = T.ones(4, 5, 64) # num_speaker x num_utterance x dim + loss = GE2ELoss(loss_method="softmax") + output = loss.forward(dummy_input) + assert output.item() >= 0.0 + # check speaker loss with orthogonal d-vectors + dummy_input = T.empty(3, 64) + dummy_input = T.nn.init.orthogonal_(dummy_input) + dummy_input = T.cat( + [ + dummy_input[0].repeat(5, 1, 1).transpose(0, 1), + dummy_input[1].repeat(5, 1, 1).transpose(0, 1), + dummy_input[2].repeat(5, 1, 1).transpose(0, 1), + ] + ) # num_speaker x num_utterance x dim + loss = GE2ELoss(loss_method="softmax") + output = loss.forward(dummy_input) + assert output.item() < 0.005 + +class AngleProtoLossTests(unittest.TestCase): + # pylint: disable=R0201 + def test_in_out(self): + # check random input + dummy_input = T.rand(4, 5, 64) # num_speaker x num_utterance x dim + loss = AngleProtoLoss() + output = loss.forward(dummy_input) + assert output.item() >= 0.0 + + # check all zeros + dummy_input = T.ones(4, 5, 64) # num_speaker x num_utterance x dim + loss = AngleProtoLoss() + output = loss.forward(dummy_input) + assert output.item() >= 0.0 + + # check speaker loss with orthogonal d-vectors + dummy_input = T.empty(3, 64) + dummy_input = T.nn.init.orthogonal_(dummy_input) + dummy_input = T.cat( + [ + dummy_input[0].repeat(5, 1, 1).transpose(0, 1), + dummy_input[1].repeat(5, 1, 1).transpose(0, 1), + dummy_input[2].repeat(5, 1, 1).transpose(0, 1), + ] + ) # num_speaker x num_utterance x dim + loss = AngleProtoLoss() + output = loss.forward(dummy_input) + assert output.item() < 0.005 + +# class LoaderTest(unittest.TestCase): +# def test_output(self): +# items = libri_tts("/home/erogol/Data/Libri-TTS/train-clean-360/") +# ap = AudioProcessor(**c['audio']) +# dataset = MyDataset(ap, items, 1.6, 64, 10) +# loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn) +# count = 0 +# for mel, spk in loader: +# print(mel.shape) +# if count == 4: +# break +# count += 1 diff --git a/tests/test_glow-tts_train.sh b/tests/test_glow-tts_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..add7292dec6f6250116fc453e2da4f7a5d6c70f3 --- /dev/null +++ b/tests/test_glow-tts_train.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --config_path $BASEDIR/inputs/test_glow_tts.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_glow_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/ diff --git a/tests/test_glow_tts.py b/tests/test_glow_tts.py new file mode 100644 index 0000000000000000000000000000000000000000..2d375031dba3d11c517df1e764f60a87d23c49e1 --- /dev/null +++ b/tests/test_glow_tts.py @@ -0,0 +1,133 @@ +import copy +import os +import unittest + +import torch +from tests import get_tests_input_path +from torch import optim + +from TTS.tts.layers.losses import GlowTTSLoss +from TTS.tts.models.glow_tts import GlowTts +from TTS.utils.io import load_config +from TTS.utils.audio import AudioProcessor + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + +ap = AudioProcessor(**c.audio) +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +def count_parameters(model): + r"""Count number of trainable parameters in a network""" + return sum(p.numel() for p in model.parameters() if p.requires_grad) + + +class GlowTTSTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + mel_spec = torch.rand(8, c.audio['num_mels'], 30).to(device) + linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + criterion = criterion = GlowTTSLoss() + + # model to train + model = GlowTts( + num_chars=32, + hidden_channels_enc=128, + hidden_channels_dec=128, + hidden_channels_dp=32, + out_channels=80, + encoder_type='rel_pos_transformer', + encoder_params={ + 'kernel_size': 3, + 'dropout_p': 0.1, + 'num_layers': 6, + 'num_heads': 2, + 'hidden_channels_ffn': 768, # 4 times the hidden_channels + 'input_length': None + }, + use_encoder_prenet=True, + num_flow_blocks_dec=12, + kernel_size_dec=5, + dilation_rate=5, + num_block_layers=4, + dropout_p_dec=0., + num_speakers=0, + c_in_channels=0, + num_splits=4, + num_squeeze=1, + sigmoid_scale=False, + mean_only=False).to(device) + + # reference model to compare model weights + model_ref = GlowTts( + num_chars=32, + hidden_channels_enc=128, + hidden_channels_dec=128, + hidden_channels_dp=32, + out_channels=80, + encoder_type='rel_pos_transformer', + encoder_params={ + 'kernel_size': 3, + 'dropout_p': 0.1, + 'num_layers': 6, + 'num_heads': 2, + 'hidden_channels_ffn': 768, # 4 times the hidden_channels + 'input_length': None + }, + use_encoder_prenet=True, + num_flow_blocks_dec=12, + kernel_size_dec=5, + dilation_rate=5, + num_block_layers=4, + dropout_p_dec=0., + num_speakers=0, + c_in_channels=0, + num_splits=4, + num_squeeze=1, + sigmoid_scale=False, + mean_only=False).to(device) + + model.train() + print(" > Num parameters for GlowTTS model:%s" % + (count_parameters(model))) + + # pass the state to ref model + model_ref.load_state_dict(copy.deepcopy(model.state_dict())) + + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(5): + z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, None) + optimizer.zero_grad() + loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths, + o_dur_log, o_total_dur, input_lengths) + loss = loss_dict['loss'] + loss.backward() + optimizer.step() + + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 \ No newline at end of file diff --git a/tests/test_layers.py b/tests/test_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..5426e1957ac498597a4a629666f54d86a23f568c --- /dev/null +++ b/tests/test_layers.py @@ -0,0 +1,220 @@ +import unittest +import torch as T + +from TTS.tts.layers.tacotron import Prenet, CBHG, Decoder, Encoder +from TTS.tts.layers.losses import L1LossMasked, SSIMLoss +from TTS.tts.utils.generic_utils import sequence_mask + +# pylint: disable=unused-variable + + +class PrenetTests(unittest.TestCase): + def test_in_out(self): #pylint: disable=no-self-use + layer = Prenet(128, out_features=[256, 128]) + dummy_input = T.rand(4, 128) + + print(layer) + output = layer(dummy_input) + assert output.shape[0] == 4 + assert output.shape[1] == 128 + + +class CBHGTests(unittest.TestCase): + def test_in_out(self): + #pylint: disable=attribute-defined-outside-init + layer = self.cbhg = CBHG( + 128, + K=8, + conv_bank_features=80, + conv_projections=[160, 128], + highway_features=80, + gru_features=80, + num_highways=4) + # B x D x T + dummy_input = T.rand(4, 128, 8) + + print(layer) + output = layer(dummy_input) + assert output.shape[0] == 4 + assert output.shape[1] == 8 + assert output.shape[2] == 160 + + +class DecoderTests(unittest.TestCase): + @staticmethod + def test_in_out(): + layer = Decoder( + in_channels=256, + frame_channels=80, + r=2, + memory_size=4, + attn_windowing=False, + attn_norm="sigmoid", + attn_K=5, + attn_type="original", + prenet_type='original', + prenet_dropout=True, + forward_attn=True, + trans_agent=True, + forward_attn_mask=True, + location_attn=True, + separate_stopnet=True) + dummy_input = T.rand(4, 8, 256) + dummy_memory = T.rand(4, 2, 80) + + output, alignment, stop_tokens = layer( + dummy_input, dummy_memory, mask=None) + + assert output.shape[0] == 4 + assert output.shape[1] == 80, "size not {}".format(output.shape[1]) + assert output.shape[2] == 2, "size not {}".format(output.shape[2]) + assert stop_tokens.shape[0] == 4 + +class EncoderTests(unittest.TestCase): + def test_in_out(self): #pylint: disable=no-self-use + layer = Encoder(128) + dummy_input = T.rand(4, 8, 128) + + print(layer) + output = layer(dummy_input) + print(output.shape) + assert output.shape[0] == 4 + assert output.shape[1] == 8 + assert output.shape[2] == 256 # 128 * 2 BiRNN + + +class L1LossMaskedTests(unittest.TestCase): + def test_in_out(self): #pylint: disable=no-self-use + # test input == target + layer = L1LossMasked(seq_len_norm=False) + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.ones(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 0.0 + + # test input != target + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 1.0, "1.0 vs {}".format(output.item()) + + # test if padded values of input makes any difference + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert output.item() == 1.0, "1.0 vs {}".format(output.item()) + + dummy_input = T.rand(4, 8, 128).float() + dummy_target = dummy_input.detach() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert output.item() == 0, "0 vs {}".format(output.item()) + + # seq_len_norm = True + # test input == target + layer = L1LossMasked(seq_len_norm=True) + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.ones(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 0.0 + + # test input != target + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 1.0, "1.0 vs {}".format(output.item()) + + # test if padded values of input makes any difference + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item()) + + dummy_input = T.rand(4, 8, 128).float() + dummy_target = dummy_input.detach() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert output.item() == 0, "0 vs {}".format(output.item()) + + +class SSIMLossTests(unittest.TestCase): + def test_in_out(self): #pylint: disable=no-self-use + # test input == target + layer = SSIMLoss() + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.ones(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 0.0 + + # test input != target + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert abs(output.item() - 1.0) < 1e-4 , "1.0 vs {}".format(output.item()) + + # test if padded values of input makes any difference + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert abs(output.item() - 1.0) < 1e-4, "1.0 vs {}".format(output.item()) + + dummy_input = T.rand(4, 8, 128).float() + dummy_target = dummy_input.detach() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert output.item() == 0, "0 vs {}".format(output.item()) + + # seq_len_norm = True + # test input == target + layer = L1LossMasked(seq_len_norm=True) + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.ones(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 0.0 + + # test input != target + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.ones(4) * 8).long() + output = layer(dummy_input, dummy_target, dummy_length) + assert output.item() == 1.0, "1.0 vs {}".format(output.item()) + + # test if padded values of input makes any difference + dummy_input = T.ones(4, 8, 128).float() + dummy_target = T.zeros(4, 8, 128).float() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item()) + + dummy_input = T.rand(4, 8, 128).float() + dummy_target = dummy_input.detach() + dummy_length = (T.arange(5, 9)).long() + mask = ( + (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2) + output = layer(dummy_input + mask, dummy_target, dummy_length) + assert output.item() == 0, "0 vs {}".format(output.item()) + diff --git a/tests/test_loader.py b/tests/test_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..b79aad191de9eedd5dcd9f344f9bd9ff53e6853f --- /dev/null +++ b/tests/test_loader.py @@ -0,0 +1,211 @@ +import os +import shutil +import unittest + +import numpy as np +import torch +from tests import get_tests_input_path, get_tests_output_path +from torch.utils.data import DataLoader + +from TTS.tts.datasets import TTSDataset +from TTS.tts.datasets.preprocess import ljspeech +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config + +#pylint: disable=unused-variable + +OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") +os.makedirs(OUTPATH, exist_ok=True) +c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) +ok_ljspeech = os.path.exists(c.data_path) + +DATA_EXIST = True +if not os.path.exists(c.data_path): + DATA_EXIST = False + +print(" > Dynamic data loader test: {}".format(DATA_EXIST)) + + +class TestTTSDataset(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestTTSDataset, self).__init__(*args, **kwargs) + self.max_loader_iter = 4 + self.ap = AudioProcessor(**c.audio) + + def _create_dataloader(self, batch_size, r, bgs): + items = ljspeech(c.data_path, 'metadata.csv') + dataset = TTSDataset.MyDataset( + r, + c.text_cleaner, + compute_linear_spec=True, + ap=self.ap, + meta_data=items, + tp=c.characters if 'characters' in c.keys() else None, + batch_group_size=bgs, + min_seq_len=c.min_seq_len, + max_seq_len=float("inf"), + use_phonemes=False) + dataloader = DataLoader( + dataset, + batch_size=batch_size, + shuffle=False, + collate_fn=dataset.collate_fn, + drop_last=True, + num_workers=c.num_loader_workers) + return dataloader, dataset + + def test_loader(self): + if ok_ljspeech: + dataloader, dataset = self._create_dataloader(2, c.r, 0) + + for i, data in enumerate(dataloader): + if i == self.max_loader_iter: + break + text_input = data[0] + text_lengths = data[1] + speaker_name = data[2] + linear_input = data[3] + mel_input = data[4] + mel_lengths = data[5] + stop_target = data[6] + item_idx = data[7] + + neg_values = text_input[text_input < 0] + check_count = len(neg_values) + assert check_count == 0, \ + " !! Negative values in text_input: {}".format(check_count) + # TODO: more assertion here + assert isinstance(speaker_name[0], str) + assert linear_input.shape[0] == c.batch_size + assert linear_input.shape[2] == self.ap.fft_size // 2 + 1 + assert mel_input.shape[0] == c.batch_size + assert mel_input.shape[2] == c.audio['num_mels'] + # check normalization ranges + if self.ap.symmetric_norm: + assert mel_input.max() <= self.ap.max_norm + assert mel_input.min() >= -self.ap.max_norm #pylint: disable=invalid-unary-operand-type + assert mel_input.min() < 0 + else: + assert mel_input.max() <= self.ap.max_norm + assert mel_input.min() >= 0 + + def test_batch_group_shuffle(self): + if ok_ljspeech: + dataloader, dataset = self._create_dataloader(2, c.r, 16) + last_length = 0 + frames = dataset.items + for i, data in enumerate(dataloader): + if i == self.max_loader_iter: + break + text_input = data[0] + text_lengths = data[1] + speaker_name = data[2] + linear_input = data[3] + mel_input = data[4] + mel_lengths = data[5] + stop_target = data[6] + item_idx = data[7] + + avg_length = mel_lengths.numpy().mean() + assert avg_length >= last_length + dataloader.dataset.sort_items() + is_items_reordered = False + for idx, item in enumerate(dataloader.dataset.items): + if item != frames[idx]: + is_items_reordered = True + break + assert is_items_reordered + + def test_padding_and_spec(self): + if ok_ljspeech: + dataloader, dataset = self._create_dataloader(1, 1, 0) + + for i, data in enumerate(dataloader): + if i == self.max_loader_iter: + break + text_input = data[0] + text_lengths = data[1] + speaker_name = data[2] + linear_input = data[3] + mel_input = data[4] + mel_lengths = data[5] + stop_target = data[6] + item_idx = data[7] + + # check mel_spec consistency + wav = np.asarray(self.ap.load_wav(item_idx[0]), dtype=np.float32) + mel = self.ap.melspectrogram(wav).astype('float32') + mel = torch.FloatTensor(mel).contiguous() + mel_dl = mel_input[0] + # NOTE: Below needs to check == 0 but due to an unknown reason + # there is a slight difference between two matrices. + # TODO: Check this assert cond more in detail. + assert abs(mel.T - mel_dl).max() < 1e-5, abs(mel.T - mel_dl).max() + + # check mel-spec correctness + mel_spec = mel_input[0].cpu().numpy() + wav = self.ap.inv_melspectrogram(mel_spec.T) + self.ap.save_wav(wav, OUTPATH + '/mel_inv_dataloader.wav') + shutil.copy(item_idx[0], OUTPATH + '/mel_target_dataloader.wav') + + # check linear-spec + linear_spec = linear_input[0].cpu().numpy() + wav = self.ap.inv_spectrogram(linear_spec.T) + self.ap.save_wav(wav, OUTPATH + '/linear_inv_dataloader.wav') + shutil.copy(item_idx[0], + OUTPATH + '/linear_target_dataloader.wav') + + # check the last time step to be zero padded + assert linear_input[0, -1].sum() != 0 + assert linear_input[0, -2].sum() != 0 + assert mel_input[0, -1].sum() != 0 + assert mel_input[0, -2].sum() != 0 + assert stop_target[0, -1] == 1 + assert stop_target[0, -2] == 0 + assert stop_target.sum() == 1 + assert len(mel_lengths.shape) == 1 + assert mel_lengths[0] == linear_input[0].shape[0] + assert mel_lengths[0] == mel_input[0].shape[0] + + # Test for batch size 2 + dataloader, dataset = self._create_dataloader(2, 1, 0) + + for i, data in enumerate(dataloader): + if i == self.max_loader_iter: + break + text_input = data[0] + text_lengths = data[1] + speaker_name = data[2] + linear_input = data[3] + mel_input = data[4] + mel_lengths = data[5] + stop_target = data[6] + item_idx = data[7] + + if mel_lengths[0] > mel_lengths[1]: + idx = 0 + else: + idx = 1 + + # check the first item in the batch + assert linear_input[idx, -1].sum() != 0 + assert linear_input[idx, -2].sum() != 0, linear_input + assert mel_input[idx, -1].sum() != 0 + assert mel_input[idx, -2].sum() != 0, mel_input + assert stop_target[idx, -1] == 1 + assert stop_target[idx, -2] == 0 + assert stop_target[idx].sum() == 1 + assert len(mel_lengths.shape) == 1 + assert mel_lengths[idx] == mel_input[idx].shape[0] + assert mel_lengths[idx] == linear_input[idx].shape[0] + + # check the second itme in the batch + assert linear_input[1 - idx, -1].sum() == 0 + assert mel_input[1 - idx, -1].sum() == 0 + assert stop_target[1, mel_lengths[1]-1] == 1 + assert stop_target[1, mel_lengths[1]:].sum() == 0 + assert len(mel_lengths.shape) == 1 + + # check batch zero-frame conditions (zero-frame disabled) + # assert (linear_input * stop_target.unsqueeze(2)).sum() == 0 + # assert (mel_input * stop_target.unsqueeze(2)).sum() == 0 diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py new file mode 100644 index 0000000000000000000000000000000000000000..8c7b16b01c8f9175f458aec1b590c32976e4925d --- /dev/null +++ b/tests/test_preprocessors.py @@ -0,0 +1,18 @@ +import unittest +import os +from tests import get_tests_input_path + +from TTS.tts.datasets.preprocess import common_voice + + +class TestPreprocessors(unittest.TestCase): + + def test_common_voice_preprocessor(self): #pylint: disable=no-self-use + root_path = get_tests_input_path() + meta_file = "common_voice.tsv" + items = common_voice(root_path, meta_file) + assert items[0][0] == 'The applicants are invited for coffee and visa is given immediately.' + assert items[0][1] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_20005954.wav") + + assert items[-1][0] == "Competition for limited resources has also resulted in some local conflicts." + assert items[-1][1] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_19737074.wav") diff --git a/tests/test_server_package.sh b/tests/test_server_package.sh new file mode 100644 index 0000000000000000000000000000000000000000..7e75415a706bbeae803170e1ae0d4458d47e6450 --- /dev/null +++ b/tests/test_server_package.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -xe + +if [[ ! -f tests/outputs/checkpoint_10.pth.tar ]]; then + echo "Missing dummy model in tests/outputs. This test needs to run after the Python unittests have been run." + exit 1 +fi + +rm -f dist/*.whl +python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json + +python -m venv /tmp/venv +source /tmp/venv/bin/activate +pip install --quiet --upgrade pip setuptools wheel +pip install --quiet dist/TTS*.whl + +# this is related to https://github.com/librosa/librosa/issues/1160 +pip install numba==0.48 + +python -m TTS.server.server & +SERVER_PID=$! + +echo 'Waiting for server...' +sleep 30 + +curl -o /tmp/audio.wav "http://localhost:5002/api/tts?text=synthesis%20schmynthesis" +python -c 'import sys; import wave; print(wave.open(sys.argv[1]).getnframes())' /tmp/audio.wav + +kill $SERVER_PID + +deactivate +rm -rf /tmp/venv + +rm /tmp/audio.wav +rm dist/*.whl diff --git a/tests/test_speedy_speech_layers.py b/tests/test_speedy_speech_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..a5567ac31283fc7dba191c1b089dc4fa9aef079b --- /dev/null +++ b/tests/test_speedy_speech_layers.py @@ -0,0 +1,168 @@ +import torch + +from TTS.tts.layers.speedy_speech.encoder import Encoder +from TTS.tts.layers.speedy_speech.decoder import Decoder +from TTS.tts.layers.speedy_speech.duration_predictor import DurationPredictor +from TTS.tts.utils.generic_utils import sequence_mask +from TTS.tts.models.speedy_speech import SpeedySpeech + + +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +def test_encoder(): + input_dummy = torch.rand(8, 14, 37).to(device) + input_lengths = torch.randint(31, 37, (8, )).long().to(device) + input_lengths[-1] = 37 + input_mask = torch.unsqueeze( + sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device) + + # residual bn conv encoder + layer = Encoder(out_channels=11, + in_hidden_channels=14, + encoder_type='residual_conv_bn').to(device) + output = layer(input_dummy, input_mask) + assert list(output.shape) == [8, 11, 37] + + # transformer encoder + layer = Encoder(out_channels=11, + in_hidden_channels=14, + encoder_type='transformer', + encoder_params={ + 'hidden_channels_ffn': 768, + 'num_heads': 2, + "kernel_size": 3, + "dropout_p": 0.1, + "num_layers": 6, + "rel_attn_window_size": 4, + "input_length": None + }).to(device) + output = layer(input_dummy, input_mask) + assert list(output.shape) == [8, 11, 37] + + +def test_decoder(): + input_dummy = torch.rand(8, 128, 37).to(device) + input_lengths = torch.randint(31, 37, (8, )).long().to(device) + input_lengths[-1] = 37 + + input_mask = torch.unsqueeze( + sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device) + + # residual bn conv decoder + layer = Decoder(out_channels=11, in_hidden_channels=128).to(device) + output = layer(input_dummy, input_mask) + assert list(output.shape) == [8, 11, 37] + + # transformer decoder + layer = Decoder(out_channels=11, + in_hidden_channels=128, + decoder_type='transformer', + decoder_params={ + 'hidden_channels_ffn': 128, + 'num_heads': 2, + "kernel_size": 3, + "dropout_p": 0.1, + "num_layers": 8, + "rel_attn_window_size": 4, + "input_length": None + }).to(device) + output = layer(input_dummy, input_mask) + assert list(output.shape) == [8, 11, 37] + + + # wavenet decoder + layer = Decoder(out_channels=11, + in_hidden_channels=128, + decoder_type='wavenet', + decoder_params={ + "num_blocks": 12, + "hidden_channels": 192, + "kernel_size": 5, + "dilation_rate": 1, + "num_layers": 4, + "dropout_p": 0.05 + }).to(device) + output = layer(input_dummy, input_mask) + assert list(output.shape) == [8, 11, 37] + + + +def test_duration_predictor(): + input_dummy = torch.rand(8, 128, 27).to(device) + input_lengths = torch.randint(20, 27, (8, )).long().to(device) + input_lengths[-1] = 27 + + x_mask = torch.unsqueeze(sequence_mask(input_lengths, input_dummy.size(2)), + 1).to(device) + + layer = DurationPredictor(hidden_channels=128).to(device) + + output = layer(input_dummy, x_mask) + assert list(output.shape) == [8, 1, 27] + + +def test_speedy_speech(): + num_chars = 7 + B = 8 + T_en = 37 + T_de = 74 + + x_dummy = torch.randint(0, 7, (B, T_en)).long().to(device) + x_lengths = torch.randint(31, T_en, (B, )).long().to(device) + x_lengths[-1] = T_en + + # set durations. max total duration should be equal to T_de + durations = torch.randint(1, 4, (B, T_en)) + durations = durations * (T_de / durations.sum(1)).unsqueeze(1) + durations = durations.to(torch.long).to(device) + max_dur = durations.sum(1).max() + durations[:, 0] += T_de - max_dur if T_de > max_dur else 0 + + y_lengths = durations.sum(1) + + model = SpeedySpeech(num_chars, out_channels=80, hidden_channels=128) + if use_cuda: + model.cuda() + + # forward pass + o_de, o_dr, attn = model(x_dummy, x_lengths, y_lengths, durations) + + assert list(o_de.shape) == [B, 80, T_de], f"{list(o_de.shape)}" + assert list(attn.shape) == [B, T_de, T_en] + assert list(o_dr.shape) == [B, T_en] + + # with speaker embedding + model = SpeedySpeech(num_chars, + out_channels=80, + hidden_channels=128, + num_speakers=10, + c_in_channels=256).to(device) + model.forward(x_dummy, + x_lengths, + y_lengths, + durations, + g=torch.randint(0, 10, (B,)).to(device)) + + assert list(o_de.shape) == [B, 80, T_de], f"{list(o_de.shape)}" + assert list(attn.shape) == [B, T_de, T_en] + assert list(o_dr.shape) == [B, T_en] + + + # with speaker external embedding + model = SpeedySpeech(num_chars, + out_channels=80, + hidden_channels=128, + num_speakers=10, + external_c=True, + c_in_channels=256).to(device) + model.forward(x_dummy, + x_lengths, + y_lengths, + durations, + g=torch.rand((B,256)).to(device)) + + assert list(o_de.shape) == [B, 80, T_de], f"{list(o_de.shape)}" + assert list(attn.shape) == [B, T_de, T_en] + assert list(o_dr.shape) == [B, T_en] \ No newline at end of file diff --git a/tests/test_speedy_speech_train.sh b/tests/test_speedy_speech_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..e0c850000b3cd8f190331af2fc84e35c35b90bb4 --- /dev/null +++ b/tests/test_speedy_speech_train.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --config_path $BASEDIR/inputs/test_speedy_speech.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_speedy_speech.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/ diff --git a/tests/test_stft_torch.py b/tests/test_stft_torch.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_tacotron2_model.py b/tests/test_tacotron2_model.py new file mode 100644 index 0000000000000000000000000000000000000000..38f4c737ca2c533594f16fa88768cdba7da6bb3a --- /dev/null +++ b/tests/test_tacotron2_model.py @@ -0,0 +1,295 @@ +import copy +import os +import unittest + +import torch +from tests import get_tests_input_path +from torch import nn, optim + +from TTS.tts.layers.losses import MSELossMasked +from TTS.tts.models.tacotron2 import Tacotron2 +from TTS.utils.io import load_config +from TTS.utils.audio import AudioProcessor + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + +ap = AudioProcessor(**c.audio) +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +class TacotronTrainTest(unittest.TestCase): + def test_train_step(self): # pylint: disable=no-self-use + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 128, (8, )).long().to(device) + input_lengths = torch.sort(input_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[0] = 30 + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + criterion = MSELossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5).to(device) + model.train() + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for i in range(5): + mel_out, mel_postnet_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + assert torch.sigmoid(stop_tokens).data.max() <= 1.0 + assert torch.sigmoid(stop_tokens).data.min() >= 0.0 + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(mel_postnet_out, mel_postnet_spec, mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + + +class MultiSpeakeTacotronTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 128, (8, )).long().to(device) + input_lengths = torch.sort(input_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[0] = 30 + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_embeddings = torch.rand(8, 55).to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + criterion = MSELossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55).to(device) + model.train() + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for i in range(5): + mel_out, mel_postnet_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings) + assert torch.sigmoid(stop_tokens).data.max() <= 1.0 + assert torch.sigmoid(stop_tokens).data.min() >= 0.0 + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(mel_postnet_out, mel_postnet_spec, mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + +class TacotronGSTTrainTest(unittest.TestCase): + #pylint: disable=no-self-use + def test_train_step(self): + # with random gst mel style + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 128, (8, )).long().to(device) + input_lengths = torch.sort(input_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[0] = 30 + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + criterion = MSELossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens']).to(device) + model.train() + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for i in range(10): + mel_out, mel_postnet_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + assert torch.sigmoid(stop_tokens).data.max() <= 1.0 + assert torch.sigmoid(stop_tokens).data.min() >= 0.0 + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(mel_postnet_out, mel_postnet_spec, mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for name_param, param_ref in zip(model.named_parameters(), model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + name, param = name_param + if name == 'gst_layer.encoder.recurrence.weight_hh_l0': + #print(param.grad) + continue + assert (param != param_ref).any( + ), "param {} {} with shape {} not updated!! \n{}\n{}".format( + name, count, param.shape, param, param_ref) + count += 1 + + # with file gst style + mel_spec = torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :30].unsqueeze(0).transpose(1, 2).to(device) + mel_spec = mel_spec.repeat(8, 1, 1) + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 128, (8, )).long().to(device) + input_lengths = torch.sort(input_lengths, descending=True)[0] + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[0] = 30 + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + criterion = MSELossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens']).to(device) + model.train() + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for i in range(10): + mel_out, mel_postnet_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + assert torch.sigmoid(stop_tokens).data.max() <= 1.0 + assert torch.sigmoid(stop_tokens).data.min() >= 0.0 + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(mel_postnet_out, mel_postnet_spec, mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for name_param, param_ref in zip(model.named_parameters(), model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + name, param = name_param + if name == 'gst_layer.encoder.recurrence.weight_hh_l0': + #print(param.grad) + continue + assert (param != param_ref).any( + ), "param {} {} with shape {} not updated!! \n{}\n{}".format( + name, count, param.shape, param, param_ref) + count += 1 + +class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 128, (8, )).long().to(device) + input_lengths = torch.sort(input_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[0] = 30 + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_embeddings = torch.rand(8, 55).to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + criterion = MSELossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens'], gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding']).to(device) + model.train() + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for i in range(5): + mel_out, mel_postnet_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings) + assert torch.sigmoid(stop_tokens).data.max() <= 1.0 + assert torch.sigmoid(stop_tokens).data.min() >= 0.0 + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(mel_postnet_out, mel_postnet_spec, mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for name_param, param_ref in zip(model.named_parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + name, param = name_param + if name == 'gst_layer.encoder.recurrence.weight_hh_l0': + continue + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 \ No newline at end of file diff --git a/tests/test_tacotron2_tf_model.py b/tests/test_tacotron2_tf_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b792cfa7e9032f8e4d70fa55ad6fda5436af6a5e --- /dev/null +++ b/tests/test_tacotron2_tf_model.py @@ -0,0 +1,137 @@ +import os +import unittest + +import numpy as np +import tensorflow as tf +import torch +from tests import get_tests_input_path +from TTS.tts.tf.models.tacotron2 import Tacotron2 +from TTS.tts.tf.utils.tflite import (convert_tacotron2_to_tflite, + load_tflite_model) +from TTS.utils.io import load_config + +tf.get_logger().setLevel('INFO') + + + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + + +class TacotronTFTrainTest(unittest.TestCase): + + @staticmethod + def generate_dummy_inputs(): + chars_seq = torch.randint(0, 24, (8, 128)).long().to(device) + chars_seq_lengths = torch.randint(100, 128, (8, )).long().to(device) + chars_seq_lengths = torch.sort(chars_seq_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + chars_seq = tf.convert_to_tensor(chars_seq.cpu().numpy()) + chars_seq_lengths = tf.convert_to_tensor(chars_seq_lengths.cpu().numpy()) + mel_spec = tf.convert_to_tensor(mel_spec.cpu().numpy()) + return chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids + + def test_train_step(self): + ''' test forward pass ''' + chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids = self.generate_dummy_inputs() + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(chars_seq.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5) + # training pass + output = model(chars_seq, chars_seq_lengths, mel_spec, training=True) + + # check model output shapes + assert np.all(output[0].shape == mel_spec.shape) + assert np.all(output[1].shape == mel_spec.shape) + assert output[2].shape[2] == chars_seq.shape[1] + assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r) + assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r) + + # inference pass + output = model(chars_seq, training=False) + + def test_forward_attention(self,): + chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids = self.generate_dummy_inputs() + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(chars_seq.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, forward_attn=True) + # training pass + output = model(chars_seq, chars_seq_lengths, mel_spec, training=True) + + # check model output shapes + assert np.all(output[0].shape == mel_spec.shape) + assert np.all(output[1].shape == mel_spec.shape) + assert output[2].shape[2] == chars_seq.shape[1] + assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r) + assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r) + + # inference pass + output = model(chars_seq, training=False) + + def test_tflite_conversion(self, ): #pylint:disable=no-self-use + model = Tacotron2(num_chars=24, + num_speakers=0, + r=3, + postnet_output_dim=80, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm='sigmoid', + prenet_type='original', + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + attn_K=0, + separate_stopnet=True, + bidirectional_decoder=False, + enable_tflite=True) + model.build_inference() + convert_tacotron2_to_tflite(model, output_path='test_tacotron2.tflite', experimental_converter=True) + # init tflite model + tflite_model = load_tflite_model('test_tacotron2.tflite') + # fake input + inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32) #pylint:disable=unexpected-keyword-arg + # run inference + # get input and output details + input_details = tflite_model.get_input_details() + output_details = tflite_model.get_output_details() + # reshape input tensor for the new input shape + tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape) #pylint:disable=unexpected-keyword-arg + tflite_model.allocate_tensors() + detail = input_details[0] + input_shape = detail['shape'] + tflite_model.set_tensor(detail['index'], inputs) + # run the tflite_model + tflite_model.invoke() + # collect outputs + decoder_output = tflite_model.get_tensor(output_details[0]['index']) + postnet_output = tflite_model.get_tensor(output_details[1]['index']) + # remove tflite binary + os.remove('test_tacotron2.tflite') diff --git a/tests/test_tacotron_model.py b/tests/test_tacotron_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c56a65658ca89bcc01495d301097ea27e0426fa2 --- /dev/null +++ b/tests/test_tacotron_model.py @@ -0,0 +1,359 @@ +import copy +import os +import unittest + +import torch +from tests import get_tests_input_path +from torch import nn, optim + +from TTS.tts.layers.losses import L1LossMasked +from TTS.tts.models.tacotron import Tacotron +from TTS.utils.io import load_config +from TTS.utils.audio import AudioProcessor + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + +ap = AudioProcessor(**c.audio) +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +def count_parameters(model): + r"""Count number of trainable parameters in a network""" + return sum(p.numel() for p in model.parameters() if p.requires_grad) + + +class TacotronTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze() + + criterion = L1LossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron( + num_chars=32, + num_speakers=5, + postnet_output_dim=c.audio['fft_size'], + decoder_output_dim=c.audio['num_mels'], + r=c.r, + memory_size=c.memory_size + ).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor + model.train() + print(" > Num parameters for Tacotron model:%s" % + (count_parameters(model))) + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(5): + mel_out, linear_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(linear_out, linear_spec, + mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + +class MultiSpeakeTacotronTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_embeddings = torch.rand(8, 55).to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze() + + criterion = L1LossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron( + num_chars=32, + num_speakers=5, + postnet_output_dim=c.audio['fft_size'], + decoder_output_dim=c.audio['num_mels'], + r=c.r, + memory_size=c.memory_size, + speaker_embedding_dim=55, + ).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor + model.train() + print(" > Num parameters for Tacotron model:%s" % + (count_parameters(model))) + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(5): + mel_out, linear_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, + speaker_embeddings=speaker_embeddings) + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(linear_out, linear_spec, + mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + +class TacotronGSTTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + # with random gst mel style + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + mel_spec = torch.rand(8, 120, c.audio['num_mels']).to(device) + linear_spec = torch.rand(8, 120, c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, 120, (8, )).long().to(device) + mel_lengths[-1] = 120 + stop_targets = torch.zeros(8, 120, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze() + + criterion = L1LossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron( + num_chars=32, + num_speakers=5, + gst=True, + gst_embedding_dim=c.gst['gst_embedding_dim'], + gst_num_heads=c.gst['gst_num_heads'], + gst_style_tokens=c.gst['gst_style_tokens'], + postnet_output_dim=c.audio['fft_size'], + decoder_output_dim=c.audio['num_mels'], + r=c.r, + memory_size=c.memory_size + ).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor + model.train() + # print(model) + print(" > Num parameters for Tacotron GST model:%s" % + (count_parameters(model))) + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(10): + mel_out, linear_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(linear_out, linear_spec, + mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + + # with file gst style + mel_spec = torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :120].unsqueeze(0).transpose(1, 2).to(device) + mel_spec = mel_spec.repeat(8, 1, 1) + + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + linear_spec = torch.rand(8, mel_spec.size(1), c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, mel_spec.size(1), (8, )).long().to(device) + mel_lengths[-1] = mel_spec.size(1) + stop_targets = torch.zeros(8, mel_spec.size(1), 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze() + + criterion = L1LossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron( + num_chars=32, + num_speakers=5, + gst=True, + gst_embedding_dim=c.gst['gst_embedding_dim'], + gst_num_heads=c.gst['gst_num_heads'], + gst_style_tokens=c.gst['gst_style_tokens'], + postnet_output_dim=c.audio['fft_size'], + decoder_output_dim=c.audio['num_mels'], + r=c.r, + memory_size=c.memory_size + ).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor + model.train() + # print(model) + print(" > Num parameters for Tacotron GST model:%s" % + (count_parameters(model))) + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(10): + mel_out, linear_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(linear_out, linear_spec, + mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + +class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase): + @staticmethod + def test_train_step(): + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) + input_lengths[-1] = 128 + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + mel_lengths[-1] = mel_spec.size(1) + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_embeddings = torch.rand(8, 55).to(device) + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(input_dummy.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > + 0.0).unsqueeze(2).float().squeeze() + + criterion = L1LossMasked(seq_len_norm=False).to(device) + criterion_st = nn.BCEWithLogitsLoss().to(device) + model = Tacotron( + num_chars=32, + num_speakers=5, + postnet_output_dim=c.audio['fft_size'], + decoder_output_dim=c.audio['num_mels'], + gst=True, + gst_embedding_dim=c.gst['gst_embedding_dim'], + gst_num_heads=c.gst['gst_num_heads'], + gst_style_tokens=c.gst['gst_style_tokens'], + gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding'], + r=c.r, + memory_size=c.memory_size, + speaker_embedding_dim=55, + ).to(device) #FIXME: missing num_speakers parameter to Tacotron ctor + model.train() + print(" > Num parameters for Tacotron model:%s" % + (count_parameters(model))) + model_ref = copy.deepcopy(model) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=c.lr) + for _ in range(5): + mel_out, linear_out, align, stop_tokens = model.forward( + input_dummy, input_lengths, mel_spec, mel_lengths, + speaker_embeddings=speaker_embeddings) + optimizer.zero_grad() + loss = criterion(mel_out, mel_spec, mel_lengths) + stop_loss = criterion_st(stop_tokens, stop_targets) + loss = loss + criterion(linear_out, linear_spec, + mel_lengths) + stop_loss + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for name_param, param_ref in zip(model.named_parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + name, param = name_param + if name == 'gst_layer.encoder.recurrence.weight_hh_l0': + continue + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1 + diff --git a/tests/test_tacotron_train.sh b/tests/test_tacotron_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..9268ea96df27ba78bfd1cbc932554616b11cf71f --- /dev/null +++ b/tests/test_tacotron_train.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_train_config.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/ diff --git a/tests/test_text_cleaners.py b/tests/test_text_cleaners.py new file mode 100644 index 0000000000000000000000000000000000000000..7a2abe72f3368fc863871d37be5cfe9817206b3b --- /dev/null +++ b/tests/test_text_cleaners.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners + + +def test_time() -> None: + assert english_cleaners("It's 11:00") == "it's eleven a m" + assert english_cleaners("It's 9:01") == "it's nine oh one a m" + assert english_cleaners("It's 16:00") == "it's four p m" + assert english_cleaners("It's 00:00 am") == "it's twelve a m" + + +def test_currency() -> None: + assert phoneme_cleaners("It's $10.50") == "It's ten dollars fifty cents" + assert phoneme_cleaners("£1.1") == "one pound sterling one penny" + assert phoneme_cleaners("¥1") == "one yen" + + +def test_expand_numbers() -> None: + assert "minus one" == phoneme_cleaners("-1") + assert "one" == phoneme_cleaners("1") diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..2f68c6e7ac63fc8a656e85bbf059585323ecd168 --- /dev/null +++ b/tests/test_text_processing.py @@ -0,0 +1,174 @@ +import os +# pylint: disable=unused-wildcard-import +# pylint: disable=wildcard-import +# pylint: disable=unused-import +import unittest +from tests import get_tests_input_path +from TTS.tts.utils.text import * +from tests import get_tests_path +from TTS.utils.io import load_config + +conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + +def test_phoneme_to_sequence(): + + text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" + text_cleaner = ["phoneme_cleaners"] + lang = "en-us" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "ɹiːsənt ɹɪsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪnkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹɪspɑːnsəbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjuːleɪʃən ænd lɜːnɪŋ!" + assert text_hat == text_hat_with_params == gt + + # multiple punctuations + text = "Be a voice, not an! echo?" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # not ending with punctuation + text = "Be a voice, not an! echo" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # original + text = "Be a voice, not an echo!" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # extra space after the sentence + text = "Be a voice, not an! echo. " + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ." + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # extra space after the sentence + text = "Be a voice, not an! echo. " + sequence = phoneme_to_sequence(text, text_cleaner, lang, True) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # padding char + text = "_Be a _voice, not an! echo_" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + +def test_phoneme_to_sequence_with_blank_token(): + + text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" + text_cleaner = ["phoneme_cleaners"] + lang = "en-us" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "ɹiːsənt ɹɪsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪnkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹɪspɑːnsəbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjuːleɪʃən ænd lɜːnɪŋ!" + assert text_hat == text_hat_with_params == gt + + # multiple punctuations + text = "Be a voice, not an! echo?" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # not ending with punctuation + text = "Be a voice, not an! echo" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # original + text = "Be a voice, not an echo!" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # extra space after the sentence + text = "Be a voice, not an! echo. " + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ." + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # extra space after the sentence + text = "Be a voice, not an! echo. " + sequence = phoneme_to_sequence(text, text_cleaner, lang, True) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + + # padding char + text = "_Be a _voice, not an! echo_" + sequence = phoneme_to_sequence(text, text_cleaner, lang) + text_hat = sequence_to_phoneme(sequence) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True) + text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True) + gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" + print(text_hat) + print(len(sequence)) + assert text_hat == text_hat_with_params == gt + +def test_text2phone(): + text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" + gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!" + lang = "en-us" + ph = text2phone(text, lang) + assert gt == ph diff --git a/tests/test_train_tts.py b/tests/test_train_tts.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_vocoder_gan_datasets.py b/tests/test_vocoder_gan_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..2a487d9a560dc87c329d15afe722fc54dde8922c --- /dev/null +++ b/tests/test_vocoder_gan_datasets.py @@ -0,0 +1,95 @@ +import os + +import numpy as np +from tests import get_tests_path, get_tests_input_path, get_tests_output_path +from torch.utils.data import DataLoader + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.vocoder.datasets.gan_dataset import GANDataset +from TTS.vocoder.datasets.preprocess import load_wav_data + +file_path = os.path.dirname(os.path.realpath(__file__)) +OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") +os.makedirs(OUTPATH, exist_ok=True) + +C = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) + +test_data_path = os.path.join(get_tests_path(), "data/ljspeech/") +ok_ljspeech = os.path.exists(test_data_path) + + +def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, use_noise_augment, use_cache, num_workers): + ''' run dataloader with given parameters and check conditions ''' + ap = AudioProcessor(**C.audio) + _, train_items = load_wav_data(test_data_path, 10) + dataset = GANDataset(ap, + train_items, + seq_len=seq_len, + hop_len=hop_len, + pad_short=2000, + conv_pad=conv_pad, + return_segments=return_segments, + use_noise_augment=use_noise_augment, + use_cache=use_cache) + loader = DataLoader(dataset=dataset, + batch_size=batch_size, + shuffle=True, + num_workers=num_workers, + pin_memory=True, + drop_last=True) + + max_iter = 10 + count_iter = 0 + + # return random segments or return the whole audio + if return_segments: + for item1, _ in loader: + feat1, wav1 = item1 + # feat2, wav2 = item2 + expected_feat_shape = (batch_size, ap.num_mels, seq_len // hop_len + conv_pad * 2) + + # check shapes + assert np.all(feat1.shape == expected_feat_shape), f" [!] {feat1.shape} vs {expected_feat_shape}" + assert (feat1.shape[2] - conv_pad * 2) * hop_len == wav1.shape[2] + + # check feature vs audio match + if not use_noise_augment: + for idx in range(batch_size): + audio = wav1[idx].squeeze() + feat = feat1[idx] + mel = ap.melspectrogram(audio) + # the first 2 and the last 2 frames are skipped due to the padding + # differences in stft + assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum() <= 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum()}' + + count_iter += 1 + # if count_iter == max_iter: + # break + else: + for item in loader: + feat, wav = item + expected_feat_shape = (batch_size, ap.num_mels, (wav.shape[-1] // hop_len) + (conv_pad * 2)) + assert np.all(feat.shape == expected_feat_shape), f" [!] {feat.shape} vs {expected_feat_shape}" + assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2] + count_iter += 1 + if count_iter == max_iter: + break + + +def test_parametrized_gan_dataset(): + ''' test dataloader with different parameters ''' + params = [ + [32, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 0], + [32, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 4], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, True, True, 0], + [1, C.audio['hop_length'], C.audio['hop_length'], 0, True, True, True, 0], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, True, True, True, 0], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, False, True, True, 0], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 0], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, True, False, 0], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, False, False, False, 0], + ] + for param in params: + print(param) + gan_dataset_case(*param) diff --git a/tests/test_vocoder_gan_train.sh b/tests/test_vocoder_gan_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..0ed2b599b3e63ce6dd4e7cb96a05bf353cf909b5 --- /dev/null +++ b/tests/test_vocoder_gan_train.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# create run dir +mkdir $BASEDIR/train_outputs +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_gan.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER diff --git a/tests/test_vocoder_losses.py b/tests/test_vocoder_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..965e68ad1273f8ebb4840274b0a66329614987fe --- /dev/null +++ b/tests/test_vocoder_losses.py @@ -0,0 +1,54 @@ +import os + +import torch +from tests import get_tests_input_path, get_tests_output_path, get_tests_path + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT + +TESTS_PATH = get_tests_path() + +OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") +os.makedirs(OUT_PATH, exist_ok=True) + +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + +C = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) +ap = AudioProcessor(**C.audio) + + +def test_torch_stft(): + torch_stft = TorchSTFT(ap.fft_size, ap.hop_length, ap.win_length) + # librosa stft + wav = ap.load_wav(WAV_FILE) + M_librosa = abs(ap._stft(wav)) # pylint: disable=protected-access + # torch stft + wav = torch.from_numpy(wav[None, :]).float() + M_torch = torch_stft(wav) + # check the difference b/w librosa and torch outputs + assert (M_librosa - M_torch[0].data.numpy()).max() < 1e-5 + + +def test_stft_loss(): + stft_loss = STFTLoss(ap.fft_size, ap.hop_length, ap.win_length) + wav = ap.load_wav(WAV_FILE) + wav = torch.from_numpy(wav[None, :]).float() + loss_m, loss_sc = stft_loss(wav, wav) + assert loss_m + loss_sc == 0 + loss_m, loss_sc = stft_loss(wav, torch.rand_like(wav)) + assert loss_sc < 1.0 + assert loss_m + loss_sc > 0 + + +def test_multiscale_stft_loss(): + stft_loss = MultiScaleSTFTLoss([ap.fft_size//2, ap.fft_size, ap.fft_size*2], + [ap.hop_length // 2, ap.hop_length, ap.hop_length * 2], + [ap.win_length // 2, ap.win_length, ap.win_length * 2]) + wav = ap.load_wav(WAV_FILE) + wav = torch.from_numpy(wav[None, :]).float() + loss_m, loss_sc = stft_loss(wav, wav) + assert loss_m + loss_sc == 0 + loss_m, loss_sc = stft_loss(wav, torch.rand_like(wav)) + assert loss_sc < 1.0 + assert loss_m + loss_sc > 0 diff --git a/tests/test_vocoder_melgan_discriminator.py b/tests/test_vocoder_melgan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..a4564b5654255ff9cab6ee082b9c74e38d20b2c3 --- /dev/null +++ b/tests/test_vocoder_melgan_discriminator.py @@ -0,0 +1,26 @@ +import numpy as np +import torch + +from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator +from TTS.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator + + +def test_melgan_discriminator(): + model = MelganDiscriminator() + print(model) + dummy_input = torch.rand((4, 1, 256 * 10)) + output, _ = model(dummy_input) + assert np.all(output.shape == (4, 1, 10)) + + +def test_melgan_multi_scale_discriminator(): + model = MelganMultiscaleDiscriminator() + print(model) + dummy_input = torch.rand((4, 1, 256 * 16)) + scores, feats = model(dummy_input) + assert len(scores) == 3 + assert len(scores) == len(feats) + assert np.all(scores[0].shape == (4, 1, 64)) + assert np.all(feats[0][0].shape == (4, 16, 4096)) + assert np.all(feats[0][1].shape == (4, 64, 1024)) + assert np.all(feats[0][2].shape == (4, 256, 256)) diff --git a/tests/test_vocoder_melgan_generator.py b/tests/test_vocoder_melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..fedf630184805628368ca93e5223a1c10e9e0c5e --- /dev/null +++ b/tests/test_vocoder_melgan_generator.py @@ -0,0 +1,13 @@ +import numpy as np +import torch + +from TTS.vocoder.models.melgan_generator import MelganGenerator + +def test_melgan_generator(): + model = MelganGenerator() + print(model) + dummy_input = torch.rand((4, 80, 64)) + output = model(dummy_input) + assert np.all(output.shape == (4, 1, 64 * 256)) + output = model.inference(dummy_input) + assert np.all(output.shape == (4, 1, (64 + 4) * 256)) diff --git a/tests/test_vocoder_parallel_wavegan_discriminator.py b/tests/test_vocoder_parallel_wavegan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..b496e216ba8820113c5c30fffe3e307d5fb00463 --- /dev/null +++ b/tests/test_vocoder_parallel_wavegan_discriminator.py @@ -0,0 +1,41 @@ +import numpy as np +import torch + +from TTS.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator + + +def test_pwgan_disciminator(): + model = ParallelWaveganDiscriminator( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=10, + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True) + dummy_x = torch.rand((4, 1, 64 * 256)) + output = model(dummy_x) + assert np.all(output.shape == (4, 1, 64 * 256)) + model.remove_weight_norm() + + +def test_redisual_pwgan_disciminator(): + model = ResidualParallelWaveganDiscriminator( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}) + dummy_x = torch.rand((4, 1, 64 * 256)) + output = model(dummy_x) + assert np.all(output.shape == (4, 1, 64 * 256)) + model.remove_weight_norm() diff --git a/tests/test_vocoder_parallel_wavegan_generator.py b/tests/test_vocoder_parallel_wavegan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..9eed0eeef4bff78676073bc3948d33386ca7bd8e --- /dev/null +++ b/tests/test_vocoder_parallel_wavegan_generator.py @@ -0,0 +1,27 @@ +import numpy as np +import torch + +from TTS.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator + + +def test_pwgan_generator(): + model = ParallelWaveganGenerator( + in_channels=1, + out_channels=1, + kernel_size=3, + num_res_blocks=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + bias=True, + use_weight_norm=True, + upsample_factors=[4, 4, 4, 4]) + dummy_c = torch.rand((2, 80, 5)) + output = model(dummy_c) + assert np.all(output.shape == (2, 1, 5 * 256)), output.shape + model.remove_weight_norm() + output = model.inference(dummy_c) + assert np.all(output.shape == (2, 1, (5 + 4) * 256)) diff --git a/tests/test_vocoder_pqmf.py b/tests/test_vocoder_pqmf.py new file mode 100644 index 0000000000000000000000000000000000000000..1f141dd23bb6e28123c134a44db1f70c5dcfdb62 --- /dev/null +++ b/tests/test_vocoder_pqmf.py @@ -0,0 +1,27 @@ +import os +import torch + +import soundfile as sf +from librosa.core import load + +from tests import get_tests_path, get_tests_input_path +from TTS.vocoder.layers.pqmf import PQMF + + +TESTS_PATH = get_tests_path() +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +def test_pqmf(): + w, sr = load(WAV_FILE) + + layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) + w, sr = load(WAV_FILE) + w2 = torch.from_numpy(w[None, None, :]) + b2 = layer.analysis(w2) + w2_ = layer.synthesis(b2) + + print(w2_.max()) + print(w2_.min()) + print(w2_.mean()) + sf.write('pqmf_output.wav', w2_.flatten().detach(), sr) diff --git a/tests/test_vocoder_rwd.py b/tests/test_vocoder_rwd.py new file mode 100644 index 0000000000000000000000000000000000000000..424d3b498c76442cd48e1b40df0945cb357ce642 --- /dev/null +++ b/tests/test_vocoder_rwd.py @@ -0,0 +1,21 @@ +import torch +import numpy as np + +from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscriminator + + +def test_rwd(): + layer = RandomWindowDiscriminator(cond_channels=80, + window_sizes=(512, 1024, 2048, 4096, + 8192), + cond_disc_downsample_factors=[ + (8, 4, 2, 2, 2), (8, 4, 2, 2), + (8, 4, 2), (8, 4), (4, 2, 2) + ], + hop_length=256) + x = torch.rand([4, 1, 22050]) + c = torch.rand([4, 80, 22050 // 256]) + + scores, _ = layer(x, c) + assert len(scores) == 10 + assert np.all(scores[0].shape == (4, 1, 1)) diff --git a/tests/test_vocoder_tf_melgan_generator.py b/tests/test_vocoder_tf_melgan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..6796822535af39899d65b724ab5a380789ffb188 --- /dev/null +++ b/tests/test_vocoder_tf_melgan_generator.py @@ -0,0 +1,13 @@ +import numpy as np +import tensorflow as tf + +from TTS.vocoder.tf.models.melgan_generator import MelganGenerator + + +def test_melgan_generator(): + hop_length = 256 + model = MelganGenerator() + # pylint: disable=no-value-for-parameter + dummy_input = tf.random.uniform((4, 80, 64)) + output = model(dummy_input, training=False) + assert np.all(output.shape == (4, 1, 64 * hop_length)), output.shape diff --git a/tests/test_vocoder_tf_pqmf.py b/tests/test_vocoder_tf_pqmf.py new file mode 100644 index 0000000000000000000000000000000000000000..a1c4f692b8f5d7dabf553b174e9ceb9f58a4974d --- /dev/null +++ b/tests/test_vocoder_tf_pqmf.py @@ -0,0 +1,28 @@ +import os +import tensorflow as tf + +import soundfile as sf +from librosa.core import load + +from tests import get_tests_path, get_tests_input_path +from TTS.vocoder.tf.layers.pqmf import PQMF + + +TESTS_PATH = get_tests_path() +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +def test_pqmf(): + w, sr = load(WAV_FILE) + + layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) + w, sr = load(WAV_FILE) + w2 = tf.convert_to_tensor(w[None, None, :]) + b2 = layer.analysis(w2) + w2_ = layer.synthesis(b2) + w2_ = w2.numpy() + + print(w2_.max()) + print(w2_.min()) + print(w2_.mean()) + sf.write('tf_pqmf_output.wav', w2_.flatten(), sr) diff --git a/tests/test_vocoder_wavegrad_train.sh b/tests/test_vocoder_wavegrad_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..33ffe865c2ec93b856f8854ca62a626e17507fdd --- /dev/null +++ b/tests/test_vocoder_wavegrad_train.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# create run dir +mkdir -p $BASEDIR/train_outputs +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/inputs/test_vocoder_wavegrad.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER \ No newline at end of file diff --git a/tests/test_vocoder_wavernn.py b/tests/test_vocoder_wavernn.py new file mode 100644 index 0000000000000000000000000000000000000000..2464cfa3de70c2bcbc1b1bde2b2aae23eea0fc6d --- /dev/null +++ b/tests/test_vocoder_wavernn.py @@ -0,0 +1,31 @@ +import numpy as np +import torch +import random +from TTS.vocoder.models.wavernn import WaveRNN + + +def test_wavernn(): + model = WaveRNN( + rnn_dims=512, + fc_dims=512, + mode=10, + mulaw=False, + pad=2, + use_aux_net=True, + use_upsample_net=True, + upsample_factors=[4, 8, 8], + feat_dims=80, + compute_dims=128, + res_out_dims=128, + num_res_blocks=10, + hop_length=256, + sample_rate=22050, + ) + dummy_x = torch.rand((2, 1280)) + dummy_m = torch.rand((2, 80, 9)) + y_size = random.randrange(20, 60) + dummy_y = torch.rand((80, y_size)) + output = model(dummy_x, dummy_m) + assert np.all(output.shape == (2, 1280, 4 * 256)), output.shape + output = model.inference(dummy_y, True, 5500, 550) + assert np.all(output.shape == (256 * (y_size - 1),)) diff --git a/tests/test_vocoder_wavernn_datasets.py b/tests/test_vocoder_wavernn_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..a95e247ab13a69338e78cbc419d0639084820f15 --- /dev/null +++ b/tests/test_vocoder_wavernn_datasets.py @@ -0,0 +1,92 @@ +import os +import shutil + +import numpy as np +from tests import get_tests_path, get_tests_input_path, get_tests_output_path +from torch.utils.data import DataLoader + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset +from TTS.vocoder.datasets.preprocess import load_wav_feat_data, preprocess_wav_files + +file_path = os.path.dirname(os.path.realpath(__file__)) +OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") +os.makedirs(OUTPATH, exist_ok=True) + +C = load_config(os.path.join(get_tests_input_path(), + "test_vocoder_wavernn_config.json")) + +test_data_path = os.path.join(get_tests_path(), "data/ljspeech/") +test_mel_feat_path = os.path.join(test_data_path, "mel") +test_quant_feat_path = os.path.join(test_data_path, "quant") +ok_ljspeech = os.path.exists(test_data_path) + + +def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_workers): + """ run dataloader with given parameters and check conditions """ + ap = AudioProcessor(**C.audio) + + C.batch_size = batch_size + C.mode = mode + C.seq_len = seq_len + C.data_path = test_data_path + + preprocess_wav_files(test_data_path, C, ap) + _, train_items = load_wav_feat_data( + test_data_path, test_mel_feat_path, 5) + + dataset = WaveRNNDataset(ap=ap, + items=train_items, + seq_len=seq_len, + hop_len=hop_len, + pad=pad, + mode=mode, + mulaw=mulaw + ) + # sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader(dataset, + shuffle=True, + collate_fn=dataset.collate, + batch_size=batch_size, + num_workers=num_workers, + pin_memory=True, + ) + + max_iter = 10 + count_iter = 0 + + try: + for data in loader: + x_input, mels, _ = data + expected_feat_shape = (ap.num_mels, + (x_input.shape[-1] // hop_len) + (pad * 2)) + assert np.all( + mels.shape[1:] == expected_feat_shape), f" [!] {mels.shape} vs {expected_feat_shape}" + + assert (mels.shape[2] - pad * 2) * hop_len == x_input.shape[1] + count_iter += 1 + if count_iter == max_iter: + break + # except AssertionError: + # shutil.rmtree(test_mel_feat_path) + # shutil.rmtree(test_quant_feat_path) + finally: + shutil.rmtree(test_mel_feat_path) + shutil.rmtree(test_quant_feat_path) + + +def test_parametrized_wavernn_dataset(): + ''' test dataloader with different parameters ''' + params = [ + [16, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, 10, True, 0], + [16, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, "mold", False, 4], + [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, 9, False, 0], + [1, C.audio['hop_length'], C.audio['hop_length'], 2, 10, True, 0], + [1, C.audio['hop_length'], C.audio['hop_length'], 2, "mold", False, 0], + [1, C.audio['hop_length'] * 5, C.audio['hop_length'], 4, 10, False, 2], + [1, C.audio['hop_length'] * 5, C.audio['hop_length'], 2, "mold", False, 0], + ] + for param in params: + print(param) + wavernn_dataset_case(*param) diff --git a/tests/test_vocoder_wavernn_train.sh b/tests/test_vocoder_wavernn_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..40e860127b0166c80205003e2408cfdf660903bc --- /dev/null +++ b/tests/test_vocoder_wavernn_train.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -xe +BASEDIR=$(dirname "$0") +echo "$BASEDIR" +# create run dir +mkdir -p $BASEDIR/train_outputs +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/$LATEST_FOLDER \ No newline at end of file diff --git a/tests/test_wavegrad_layers.py b/tests/test_wavegrad_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..d81ae47d6c4c8cfe0565afd1585908f2afba7568 --- /dev/null +++ b/tests/test_wavegrad_layers.py @@ -0,0 +1,92 @@ +import torch + +from TTS.vocoder.layers.wavegrad import PositionalEncoding, FiLM, UBlock, DBlock +from TTS.vocoder.models.wavegrad import Wavegrad + + +def test_positional_encoding(): + layer = PositionalEncoding(50) + inp = torch.rand(32, 50, 100) + nl = torch.rand(32) + o = layer(inp, nl) + + assert o.shape[0] == 32 + assert o.shape[1] == 50 + assert o.shape[2] == 100 + assert isinstance(o, torch.FloatTensor) + + +def test_film(): + layer = FiLM(50, 76) + inp = torch.rand(32, 50, 100) + nl = torch.rand(32) + shift, scale = layer(inp, nl) + + assert shift.shape[0] == 32 + assert shift.shape[1] == 76 + assert shift.shape[2] == 100 + assert isinstance(shift, torch.FloatTensor) + + assert scale.shape[0] == 32 + assert scale.shape[1] == 76 + assert scale.shape[2] == 100 + assert isinstance(scale, torch.FloatTensor) + + layer.apply_weight_norm() + layer.remove_weight_norm() + + +def test_ublock(): + inp1 = torch.rand(32, 50, 100) + inp2 = torch.rand(32, 50, 50) + nl = torch.rand(32) + + layer_film = FiLM(50, 100) + layer = UBlock(50, 100, 2, [1, 2, 4, 8]) + + scale, shift = layer_film(inp1, nl) + o = layer(inp2, shift, scale) + + assert o.shape[0] == 32 + assert o.shape[1] == 100 + assert o.shape[2] == 100 + assert isinstance(o, torch.FloatTensor) + + layer.apply_weight_norm() + layer.remove_weight_norm() + + +def test_dblock(): + inp = torch.rand(32, 50, 130) + layer = DBlock(50, 100, 2) + o = layer(inp) + + assert o.shape[0] == 32 + assert o.shape[1] == 100 + assert o.shape[2] == 65 + assert isinstance(o, torch.FloatTensor) + + layer.apply_weight_norm() + layer.remove_weight_norm() + + +def test_wavegrad_forward(): + x = torch.rand(32, 1, 20 * 300) + c = torch.rand(32, 80, 20) + noise_scale = torch.rand(32) + + model = Wavegrad(in_channels=80, + out_channels=1, + upsample_factors=[5, 5, 3, 2, 2], + upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], + [1, 2, 4, 8], [1, 2, 4, 8], + [1, 2, 4, 8]]) + o = model.forward(x, c, noise_scale) + + assert o.shape[0] == 32 + assert o.shape[1] == 1 + assert o.shape[2] == 20 * 300 + assert isinstance(o, torch.FloatTensor) + + model.apply_weight_norm() + model.remove_weight_norm() diff --git a/tests/test_wavegrad_train.py b/tests/test_wavegrad_train.py new file mode 100644 index 0000000000000000000000000000000000000000..700e94d1b0b0da4f0a94fd5728a43a8333c0d7d3 --- /dev/null +++ b/tests/test_wavegrad_train.py @@ -0,0 +1,62 @@ +import unittest + +import numpy as np +import torch +from torch import optim +from TTS.vocoder.models.wavegrad import Wavegrad + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +class WavegradTrainTest(unittest.TestCase): + def test_train_step(self): # pylint: disable=no-self-use + """Test if all layers are updated in a basic training cycle""" + input_dummy = torch.rand(8, 1, 20 * 300).to(device) + mel_spec = torch.rand(8, 80, 20).to(device) + + criterion = torch.nn.L1Loss().to(device) + model = Wavegrad(in_channels=80, + out_channels=1, + upsample_factors=[5, 5, 3, 2, 2], + upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], + [1, 2, 4, 8], [1, 2, 4, 8], + [1, 2, 4, 8]]) + + model_ref = Wavegrad(in_channels=80, + out_channels=1, + upsample_factors=[5, 5, 3, 2, 2], + upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], + [1, 2, 4, 8], [1, 2, 4, 8], + [1, 2, 4, 8]]) + model.train() + model.to(device) + betas = np.linspace(1e-6, 1e-2, 1000) + model.compute_noise_level(betas) + model_ref.load_state_dict(model.state_dict()) + model_ref.to(device) + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + assert (param - param_ref).sum() == 0, param + count += 1 + optimizer = optim.Adam(model.parameters(), lr=0.001) + for i in range(5): + y_hat = model.forward(input_dummy, mel_spec, torch.rand(8).to(device)) + optimizer.zero_grad() + loss = criterion(y_hat, input_dummy) + loss.backward() + optimizer.step() + # check parameter changes + count = 0 + for param, param_ref in zip(model.parameters(), + model_ref.parameters()): + # ignore pre-higway layer since it works conditional + # if count not in [145, 59]: + assert (param != param_ref).any( + ), "param {} with shape {} not updated!! \n{}\n{}".format( + count, param.shape, param, param_ref) + count += 1