{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KX0IBmbjeydD", "outputId": "da61afd9-8a4e-478a-fcbc-4c7d0466ced9" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip --q install datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "PbGUt9dbnriq" }, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "from IPython.display import Audio\n", "import torch\n", "import numpy as np\n", "import pandas as pd\n", "import soundfile as sf\n", "import matplotlib.pyplot as plt\n", "plt.style.use(\"seaborn-whitegrid\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Zl13ArjoGBq9", "outputId": "eb811f29-0434-4e0e-a043-017b25d42c3d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wUSLY8BmnrfA", "outputId": "04ce4306-3c03-4c36-e9c5-4f01ffe9d908" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading cleansada-version-01.zip to /content\n", " 98% 1.01G/1.03G [00:11<00:00, 109MB/s]\n", "100% 1.03G/1.03G [00:11<00:00, 96.0MB/s]\n" ] } ], "source": [ "def DownloadDataset(username,key):\n", " import json\n", " keys={\"username\":username,\"key\":key}\n", " ! mkdir ~/.kaggle\n", " json_object = json.dumps(keys, indent=4)\n", " with open(r\"/root/.kaggle/kaggle.json\", \"w\") as outfile:\n", " outfile.write(json_object)\n", " ! chmod 600 ~/.kaggle/kaggle.json\n", " ! kaggle datasets download --unzip engmahmoodanaam/cleansada-version-01\n", "\n", "#.............................................\n", "\n", "DownloadDataset( username = \"engmahmoodanaam\",\n", " key = \"4a457b4fc3516f9d9b913d770ea64884\"\n", " )" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "l74W1-6ZnrbX" }, "outputs": [], "source": [ "def GetDataset(path_csv,path_audio):\n", " df = pd.read_csv(path_csv)\n", " audios_data = []\n", " audios_samplerate = []\n", " for idx, row in df.iterrows():\n", " filename = f\"{path_audio}/{row['SegmentID']}.wav\"\n", " audiodata, samplerate = sf.read(filename)\n", " audios_data.append(audiodata)\n", " audios_samplerate.append(samplerate)\n", "\n", " df['audio'] = audios_data\n", " df['samplerate'] = audios_samplerate\n", " df['text'] = df['ProcessedText']\n", " df = df[['text','audio','samplerate']]\n", "\n", "\n", " return df\n", "\n", "#.............................................\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "mfMp94fvnrXw", "outputId": "26e03dbe-2713-4651-ad7a-6973ec6ce9cd" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 70,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 69,\n \"samples\": [\n \"\\u0648\\u0634 \\u0633\\u0648\\u064a\\u062a \\u062d\\u062a\\u0649 \\u062a\\u0635\\u064a\\u0631\\u0648\\u0646 \\u0628\\u0647\\u0627\\u0644\\u062c\\u062d\\u0648\\u062f \\u0627\\u064a\\u0647\",\n \"\\u064a\\u0627 \\u0633\\u0644\\u0627\\u0645 \\u0627\\u0646\\u062a\\u0647\\u0649 \\u0627\\u0644\\u0645\\u0648\\u0636\\u0648\\u0639\",\n \"\\u0645\\u0627 \\u0631\\u0627\\u062d \\u0645\\u0631\\u0631 \\u0644\\u0647 \\u0628\\u0633\\u0647\\u0648\\u0644\\u0629\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"audio\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"samplerate\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 16000,\n \"max\": 16000,\n \"num_unique_values\": 1,\n \"samples\": [\n 16000\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textaudiosamplerate
0يا سلام انتهى الموضوع[3.0517578125e-05, 0.000823974609375, 0.001464...16000
1يعني يا ابو مسامح[-0.0538330078125, -0.0531005859375, -0.046112...16000
2حصة موافقة[0.001007080078125, -0.004058837890625, -0.003...16000
3والله يا هي فكرة[-0.02789306640625, -0.03045654296875, -0.0335...16000
4فكرة تبي تخليك تطير من الفرح[0.05316162109375, 0.031829833984375, 0.021728...16000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "text/plain": [ " text \\\n", "0 يا سلام انتهى الموضوع \n", "1 يعني يا ابو مسامح \n", "2 حصة موافقة \n", "3 والله يا هي فكرة \n", "4 فكرة تبي تخليك تطير من الفرح \n", "\n", " audio samplerate \n", "0 [3.0517578125e-05, 0.000823974609375, 0.001464... 16000 \n", "1 [-0.0538330078125, -0.0531005859375, -0.046112... 16000 \n", "2 [0.001007080078125, -0.004058837890625, -0.003... 16000 \n", "3 [-0.02789306640625, -0.03045654296875, -0.0335... 16000 \n", "4 [0.05316162109375, 0.031829833984375, 0.021728... 16000 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = GetDataset(\n", " path_csv = r'/content/cleanSada-01/data.csv',\n", " path_audio = r'/content/cleanSada-01/Audios'\n", " ).iloc[:70]\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dlgCJUn1n4SX", "outputId": "b773b0f0-6c12-480a-c05e-25317fadfabc" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df shape: (70, 3)\n" ] } ], "source": [ "print('df shape: ',df.shape)" ] }, { "cell_type": "markdown", "metadata": { "id": "T1pF6RY1PKrM" }, "source": [ "\n", "\n", "---\n", "\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZFG8CCvdM4hW", "outputId": "a7e8f7d1-29c4-47d1-ab77-bf2b36b7b393" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train_df shape: (56, 3)\n", "eval_df shape: (14, 3)\n", "full_generation_df shape: (5, 3)\n" ] } ], "source": [ "full_generation_index = 0\n", "full_generation_df = df.iloc[full_generation_index:full_generation_index+5].reset_index(drop=True)\n", "\n", "\n", "train_df = df.sample(frac=0.8,random_state=42)\n", "eval_df = df.drop(train_df.index)\n", "\n", "\n", "\n", "print('train_df shape: ',train_df.shape)\n", "print('eval_df shape: ',eval_df.shape)\n", "print('full_generation_df shape: ',full_generation_df.shape)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 93 }, "id": "gbGcXR1qOIcG", "outputId": "8b57ba29-f453-4d5d-e09a-c580b0f6f82a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "فكرة تبي تخليك تطير من الفرح\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index = 4\n", "print(full_generation_df['text'][index])\n", "Audio(full_generation_df['audio'][index], rate=full_generation_df['samplerate'][index])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hYYJxFCYn5Hv", "outputId": "fdac8ead-eacc-4b86-f0d7-bb5691973dcb" }, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['text', 'audio', 'samplerate', '__index_level_0__'],\n", " num_rows: 56\n", " })\n", " eval: Dataset({\n", " features: ['text', 'audio', 'samplerate', '__index_level_0__'],\n", " num_rows: 14\n", " })\n", " full_generation: Dataset({\n", " features: ['text', 'audio', 'samplerate'],\n", " num_rows: 5\n", " })\n", "})" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import DatasetDict,Dataset\n", "\n", "ds = {\n", " \"train\": Dataset.from_pandas(train_df),\n", " \"eval\": Dataset.from_pandas(eval_df),\n", " \"full_generation\": Dataset.from_pandas(full_generation_df),\n", " }\n", "\n", "dataset = DatasetDict(ds)\n", "\n", "dataset" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 113, "referenced_widgets": [ "998749ebfcae4b8a87904df802acf7a9", "2e5c554ee6114f229c5690cad6bf7060", "ce22b3e69ecf4e21bfe5e22e16341195", "bcefb41a71474150b6d52ba641bbf5c7", "5560fbcf95184ec6a45756562eacf4a0", "0c658108d0414932afd140a1430d2a0d", "3b70e66d78a74386bdc8d854f237e0ad", "152a67c06b674f17995767e44f64a679", "3de5299f28754fbb906272165c95ea57", "3acb7316c01b4e0fb986bdf739c8b025", "b6fa7dd5ebba4e6788b0c242530659fb", "f24acb6723dc47fe9b1f114a95c7ac40", "0f54060ed4814eb2ad5cd35a92b6c3d4", "c286b7a0030c4fd1aa570461c099970a", "65e0bf2886ba420fa28f2511f4c7d824", "e4aca46b0cb74622a52c42c12de0b6c2", "a192ba8570464b8bbc6dda1345a2a84f", "9b0aae7e4ede47459cdaae19571c6c57", "35479653780e4d1591e4ce1b72b88229", "a51be96ada234a8b8b46c1de7e90ed92", "bc77557196504559a8964f37ef89dd9f", "9c47eb81217f4c248d9984748669a322", "efa47ac3f3314000b04b3bd2c3ea704a", "5f81921b0d114db5b860aedcc8074844", "7851afc2f61a463e858bbe8bf74109b0", "14b6310fc034427da62a7a7a0614242e", "a1927a9e8ec1418e9767fa934e4c2023", "6a52c70f0c744e7a85528f1b3deec4d9", "56987c5fdd284298b3c66672575a4482", "64a6d77a5e994c83948a45e491de3c42", "393232064a2a4af8a1866c6296a610ed", "7d8f7f5854224704846c343b7d06548a", "bfa7f944c9d447408803db3ab01692bd" ] }, "id": "JKlxoOjtn5EO", "outputId": "db5291b1-9727-4e68-bf47-fe550e8a0926" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "998749ebfcae4b8a87904df802acf7a9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Saving the dataset (0/1 shards): 0%| | 0/56 [00:00\u001b[0m \u001b[31m[54 lines of output]\u001b[0m\n", " \u001b[31m \u001b[0m Running from numpy source directory.\n", " \u001b[31m \u001b[0m :461: UserWarning: Unrecognized setuptools command, proceeding with generating Cython sources and expanding templates\n", " \u001b[31m \u001b[0m Cythonizing sources\n", " \u001b[31m \u001b[0m Processing numpy/random/_bounded_integers.pxd.in\n", " \u001b[31m \u001b[0m Processing numpy/random/_mt19937.pyx\n", " \u001b[31m \u001b[0m /tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py:75: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", " \u001b[31m \u001b[0m required_version = LooseVersion('0.29.14')\n", " \u001b[31m \u001b[0m /tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py:77: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", " \u001b[31m \u001b[0m if LooseVersion(cython_version) < required_version:\n", " \u001b[31m \u001b[0m \n", " \u001b[31m \u001b[0m Error compiling Cython file:\n", " \u001b[31m \u001b[0m ------------------------------------------------------------\n", " \u001b[31m \u001b[0m ...\n", " \u001b[31m \u001b[0m for i in range(1, RK_STATE_LEN):\n", " \u001b[31m \u001b[0m self.rng_state.key[i] = val[i]\n", " \u001b[31m \u001b[0m self.rng_state.pos = i\n", " \u001b[31m \u001b[0m \n", " \u001b[31m \u001b[0m self._bitgen.state = &self.rng_state\n", " \u001b[31m \u001b[0m self._bitgen.next_uint64 = &mt19937_uint64\n", " \u001b[31m \u001b[0m ^\n", " \u001b[31m \u001b[0m ------------------------------------------------------------\n", " \u001b[31m \u001b[0m \n", " \u001b[31m \u001b[0m _mt19937.pyx:138:35: Cannot assign type 'uint64_t (*)(void *) except? -1 nogil' to 'uint64_t (*)(void *) noexcept nogil'. Exception values are incompatible. Suggest adding 'noexcept' to the type of the value being assigned.\n", " \u001b[31m \u001b[0m Traceback (most recent call last):\n", " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 238, in \n", " \u001b[31m \u001b[0m main()\n", " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 234, in main\n", " \u001b[31m \u001b[0m find_process_files(root_dir)\n", " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 225, in find_process_files\n", " \u001b[31m \u001b[0m process(root_dir, fromfile, tofile, function, hash_db)\n", " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 191, in process\n", " \u001b[31m \u001b[0m processor_function(fromfile, tofile)\n", " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 80, in process_pyx\n", " \u001b[31m \u001b[0m subprocess.check_call(\n", " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/subprocess.py\", line 369, in check_call\n", " \u001b[31m \u001b[0m raise CalledProcessError(retcode, cmd)\n", " \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['/usr/local/bin/python', '-m', 'cython', '-3', '--fast-fail', '-o', '_mt19937.c', '_mt19937.pyx']' returned non-zero exit status 1.\n", " \u001b[31m \u001b[0m Traceback (most recent call last):\n", " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 351, in \n", " \u001b[31m \u001b[0m main()\n", " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 333, in main\n", " \u001b[31m \u001b[0m json_out['return_val'] = hook(**hook_input['kwargs'])\n", " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 152, in prepare_metadata_for_build_wheel\n", " \u001b[31m \u001b[0m return hook(metadata_directory, config_settings)\n", " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 373, in prepare_metadata_for_build_wheel\n", " \u001b[31m \u001b[0m self.run_setup()\n", " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 502, in run_setup\n", " \u001b[31m \u001b[0m super().run_setup(setup_script=setup_script)\n", " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 318, in run_setup\n", " \u001b[31m \u001b[0m exec(code, locals())\n", " \u001b[31m \u001b[0m File \"\", line 488, in \n", " \u001b[31m \u001b[0m File \"\", line 469, in setup_package\n", " \u001b[31m \u001b[0m File \"\", line 275, in generate_cython\n", " \u001b[31m \u001b[0m RuntimeError: Running cythonize failed!\n", " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n", " \n", " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", "\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n", "\n", "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n", "\u001b[31m╰─>\u001b[0m See above for output.\n", "\n", "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n", "\u001b[1;36mhint\u001b[0m: See above for details.\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "[Errno 2] No such file or directory: 'vits # If you restart runtime'\n", "/home/user/app/VitsModelSplit/vits\n", "/home/user/app/VitsModelSplit/vits/monotonic_align\n", "mkdir: cannot create directory ‘monotonic_align’: File exists\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "running build_ext\n", "building 'monotonic_align.core' extension\n", "gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -fPIC -I/usr/local/lib/python3.10/site-packages/numpy/core/include -I/usr/local/include/python3.10 -c core.c -o build/temp.linux-x86_64-cpython-310/core.o\n", "\u001b[01m\u001b[Kcore.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[K__Pyx_InitGlobals\u001b[m\u001b[K’:\n", "\u001b[01m\u001b[Kcore.c:16766:1:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[KPyEval_InitThreads\u001b[m\u001b[K’ is deprecated [\u001b[01;35m\u001b[K\u001b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wdeprecated-declarations\u0007-Wdeprecated-declarations\u001b]8;;\u0007\u001b[m\u001b[K]\n", "16766 | \u001b[01;35m\u001b[KPyEval_InitThreads\u001b[m\u001b[K();\n", " | \u001b[01;35m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", "In file included from \u001b[01m\u001b[K/usr/local/include/python3.10/Python.h:130\u001b[m\u001b[K,\n", " from \u001b[01m\u001b[Kcore.c:16\u001b[m\u001b[K:\n", "\u001b[01m\u001b[K/usr/local/include/python3.10/ceval.h:122:37:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " 122 | Py_DEPRECATED(3.9) PyAPI_FUNC(void) \u001b[01;36m\u001b[KPyEval_InitThreads\u001b[m\u001b[K(void);\n", " | \u001b[01;36m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", "gcc -shared build/temp.linux-x86_64-cpython-310/core.o -L/usr/local/lib -o build/lib.linux-x86_64-cpython-310/monotonic_align/core.cpython-310-x86_64-linux-gnu.so\n", "copying build/lib.linux-x86_64-cpython-310/monotonic_align/core.cpython-310-x86_64-linux-gnu.so -> monotonic_align\n", "/home/user/app/VitsModelSplit/vits\n" ] } ], "source": [ "%cd vits\n", "!pip install -r requirements.txt\n", "\n", "%cd vits # If you restart runtime\n", "%cd monotonic_align\n", "%mkdir monotonic_align\n", "!python setup.py build_ext --inplace\n", "%cd .." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/user/app/VitsModelSplit/vits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] } ], "source": [ "%cd VitsModelSplit/vits" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "running build_ext\n", "copying build/lib.linux-x86_64-cpython-310/VitsModelSplit/monotonic_align/core.cpython-310-x86_64-linux-gnu.so -> VitsModelSplit/monotonic_align\n", "error: could not create 'VitsModelSplit/monotonic_align/core.cpython-310-x86_64-linux-gnu.so': No such file or directory\n" ] } ], "source": [ "\n", "!python setup.py build_ext --inplace\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cd" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "import vits.monotonic_align as monotonic_align" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "monotonic_align.maximum_path" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vits.monotonic_align" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/site-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML\n", " warnings.warn(\"Can't initialize NVML\")\n" ] }, { "data": { "text/plain": [ "False" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.is_available()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 284, "referenced_widgets": [ "57c834af4a294010b7caed36de6b7d79", "135b012f933d48b4a867cbf525227081", "3dcd356e8e954cf38f67a2bff568e3f5", "b8796459e1c74bb983353ddd127cd56d", "c13279147dad4105a639d58f8c2d2fcf", "8459abce62c44e79a1583a389f864afc", "855b9032317c4f768bd898ae1b6af3e9", "efa12fb1ad4648f0b4bcf3b78a1302fe", "81d38112f6e446fcacf0dec1798b4f5d", "2550b3ce363e4b82a4a5bcde6c145c98", "9e802ae6031d4ac19ce817011cc28d06", "61d7ca93814042c6aadf56b15caf5997", "b360db76baf4418ca4aeed29fd81b720", "eab93aa2a75946b08da2215f0cbdb288", "35f5fee12eed4173900f2927ef5c7afd", "d477521b301d4664bf8ab43702cd0a5a", "9e698b2e5d134a9191ffdbad58ca6ad1", "8a1e0ee9f0314de982638e52595f8a53", "a8d82aff9c2b49e49c9de456da8fe511", "fd62c14630f04d458df5893f09c0dcca", "5c905b7209854add9c77393a0849409f", "c9f04136846c44f7a648e507c86c3c0b", "79ca4d4fd27e4c1db631f48e2baf3aaa", "f2cdce487c734cb1893175e1e4bce082", "52c1bf0ebfaf487bbc3dbfcadf1faa41", "5ebaf35c0f09443ca9ca1c5b645b8947", "84f7b3d0681946dc8973b02b6b895cae", "430deedff8c2485c9555d17a09f1423e", "a48d9f97129047c28b050be572e74362", "c0fe61fa22f640b89551ffcf4b4760b2", "dfd9f39b7361440bba04466a7347b2ea", "abb4c4940c664c80bd84f71099f0ac6f", "d3ab433c06684e129be478bfe557cd97", "7f715538e29b47e0a11e2b19a2001466", "d8a608d149b14a1887eaf59390477371", "8285ecf950364d5db6ab30da5e4bf0f2", "cb1c3fd26b78419bbae1879ab1a13917", "5657da9a99d6443a83bd8e43419b6f4f", "447331f21a89479491620f0ca29bef24", "f9e2a30b7838405b967dcea4adeaca17", "4095f81dbe3242cba316f3dcd9bf7e0a", "d7c8934bf00a430fa4abd49807d8ba02", "64403b30855e46878cedc5039f5b8c2e", "b8cac84385674af485ab72bb3af43e0f", "dbdef225b00746c290947de559043d6e", "6f80be9ccbd04c6aaa4ba6294bd419d7", "c7c5935ced304a44b971fc6b1120b6c7", "cf89a91dc6644e518b43799abfc0aa91", "0a0751554fb4455d88c2f561b0f0a24a", "9255782df8d74156aacaea6b4202fbd6", "020c2499c1664282aa97cc630bc851a3", "d06d2b0f0887462583f6ba08ffdcc1bd", "c3ff5b9880fb47ecb710754a1b4fbe42", "8422dfc2296c47fc904c2f01973a595e", "56da9e4fa333494e89a9b62b0fd87295" ] }, "id": "gjQ-uoS1xSKt", "outputId": "eea81d60-c380-40ca-dc50-2ed0399cd646" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at facebook/mms-tts-ara were not used when initializing VitsModel: ['flow.flows.0.wavenet.in_layers.0.weight_g', 'flow.flows.0.wavenet.in_layers.0.weight_v', 'flow.flows.0.wavenet.in_layers.1.weight_g', 'flow.flows.0.wavenet.in_layers.1.weight_v', 'flow.flows.0.wavenet.in_layers.2.weight_g', 'flow.flows.0.wavenet.in_layers.2.weight_v', 'flow.flows.0.wavenet.in_layers.3.weight_g', 'flow.flows.0.wavenet.in_layers.3.weight_v', 'flow.flows.0.wavenet.res_skip_layers.0.weight_g', 'flow.flows.0.wavenet.res_skip_layers.0.weight_v', 'flow.flows.0.wavenet.res_skip_layers.1.weight_g', 'flow.flows.0.wavenet.res_skip_layers.1.weight_v', 'flow.flows.0.wavenet.res_skip_layers.2.weight_g', 'flow.flows.0.wavenet.res_skip_layers.2.weight_v', 'flow.flows.0.wavenet.res_skip_layers.3.weight_g', 'flow.flows.0.wavenet.res_skip_layers.3.weight_v', 'flow.flows.1.wavenet.in_layers.0.weight_g', 'flow.flows.1.wavenet.in_layers.0.weight_v', 'flow.flows.1.wavenet.in_layers.1.weight_g', 'flow.flows.1.wavenet.in_layers.1.weight_v', 'flow.flows.1.wavenet.in_layers.2.weight_g', 'flow.flows.1.wavenet.in_layers.2.weight_v', 'flow.flows.1.wavenet.in_layers.3.weight_g', 'flow.flows.1.wavenet.in_layers.3.weight_v', 'flow.flows.1.wavenet.res_skip_layers.0.weight_g', 'flow.flows.1.wavenet.res_skip_layers.0.weight_v', 'flow.flows.1.wavenet.res_skip_layers.1.weight_g', 'flow.flows.1.wavenet.res_skip_layers.1.weight_v', 'flow.flows.1.wavenet.res_skip_layers.2.weight_g', 'flow.flows.1.wavenet.res_skip_layers.2.weight_v', 'flow.flows.1.wavenet.res_skip_layers.3.weight_g', 'flow.flows.1.wavenet.res_skip_layers.3.weight_v', 'flow.flows.2.wavenet.in_layers.0.weight_g', 'flow.flows.2.wavenet.in_layers.0.weight_v', 'flow.flows.2.wavenet.in_layers.1.weight_g', 'flow.flows.2.wavenet.in_layers.1.weight_v', 'flow.flows.2.wavenet.in_layers.2.weight_g', 'flow.flows.2.wavenet.in_layers.2.weight_v', 'flow.flows.2.wavenet.in_layers.3.weight_g', 'flow.flows.2.wavenet.in_layers.3.weight_v', 'flow.flows.2.wavenet.res_skip_layers.0.weight_g', 'flow.flows.2.wavenet.res_skip_layers.0.weight_v', 'flow.flows.2.wavenet.res_skip_layers.1.weight_g', 'flow.flows.2.wavenet.res_skip_layers.1.weight_v', 'flow.flows.2.wavenet.res_skip_layers.2.weight_g', 'flow.flows.2.wavenet.res_skip_layers.2.weight_v', 'flow.flows.2.wavenet.res_skip_layers.3.weight_g', 'flow.flows.2.wavenet.res_skip_layers.3.weight_v', 'flow.flows.3.wavenet.in_layers.0.weight_g', 'flow.flows.3.wavenet.in_layers.0.weight_v', 'flow.flows.3.wavenet.in_layers.1.weight_g', 'flow.flows.3.wavenet.in_layers.1.weight_v', 'flow.flows.3.wavenet.in_layers.2.weight_g', 'flow.flows.3.wavenet.in_layers.2.weight_v', 'flow.flows.3.wavenet.in_layers.3.weight_g', 'flow.flows.3.wavenet.in_layers.3.weight_v', 'flow.flows.3.wavenet.res_skip_layers.0.weight_g', 'flow.flows.3.wavenet.res_skip_layers.0.weight_v', 'flow.flows.3.wavenet.res_skip_layers.1.weight_g', 'flow.flows.3.wavenet.res_skip_layers.1.weight_v', 'flow.flows.3.wavenet.res_skip_layers.2.weight_g', 'flow.flows.3.wavenet.res_skip_layers.2.weight_v', 'flow.flows.3.wavenet.res_skip_layers.3.weight_g', 'flow.flows.3.wavenet.res_skip_layers.3.weight_v', 'posterior_encoder.wavenet.in_layers.0.weight_g', 'posterior_encoder.wavenet.in_layers.0.weight_v', 'posterior_encoder.wavenet.in_layers.1.weight_g', 'posterior_encoder.wavenet.in_layers.1.weight_v', 'posterior_encoder.wavenet.in_layers.10.weight_g', 'posterior_encoder.wavenet.in_layers.10.weight_v', 'posterior_encoder.wavenet.in_layers.11.weight_g', 'posterior_encoder.wavenet.in_layers.11.weight_v', 'posterior_encoder.wavenet.in_layers.12.weight_g', 'posterior_encoder.wavenet.in_layers.12.weight_v', 'posterior_encoder.wavenet.in_layers.13.weight_g', 'posterior_encoder.wavenet.in_layers.13.weight_v', 'posterior_encoder.wavenet.in_layers.14.weight_g', 'posterior_encoder.wavenet.in_layers.14.weight_v', 'posterior_encoder.wavenet.in_layers.15.weight_g', 'posterior_encoder.wavenet.in_layers.15.weight_v', 'posterior_encoder.wavenet.in_layers.2.weight_g', 'posterior_encoder.wavenet.in_layers.2.weight_v', 'posterior_encoder.wavenet.in_layers.3.weight_g', 'posterior_encoder.wavenet.in_layers.3.weight_v', 'posterior_encoder.wavenet.in_layers.4.weight_g', 'posterior_encoder.wavenet.in_layers.4.weight_v', 'posterior_encoder.wavenet.in_layers.5.weight_g', 'posterior_encoder.wavenet.in_layers.5.weight_v', 'posterior_encoder.wavenet.in_layers.6.weight_g', 'posterior_encoder.wavenet.in_layers.6.weight_v', 'posterior_encoder.wavenet.in_layers.7.weight_g', 'posterior_encoder.wavenet.in_layers.7.weight_v', 'posterior_encoder.wavenet.in_layers.8.weight_g', 'posterior_encoder.wavenet.in_layers.8.weight_v', 'posterior_encoder.wavenet.in_layers.9.weight_g', 'posterior_encoder.wavenet.in_layers.9.weight_v', 'posterior_encoder.wavenet.res_skip_layers.0.weight_g', 'posterior_encoder.wavenet.res_skip_layers.0.weight_v', 'posterior_encoder.wavenet.res_skip_layers.1.weight_g', 'posterior_encoder.wavenet.res_skip_layers.1.weight_v', 'posterior_encoder.wavenet.res_skip_layers.10.weight_g', 'posterior_encoder.wavenet.res_skip_layers.10.weight_v', 'posterior_encoder.wavenet.res_skip_layers.11.weight_g', 'posterior_encoder.wavenet.res_skip_layers.11.weight_v', 'posterior_encoder.wavenet.res_skip_layers.12.weight_g', 'posterior_encoder.wavenet.res_skip_layers.12.weight_v', 'posterior_encoder.wavenet.res_skip_layers.13.weight_g', 'posterior_encoder.wavenet.res_skip_layers.13.weight_v', 'posterior_encoder.wavenet.res_skip_layers.14.weight_g', 'posterior_encoder.wavenet.res_skip_layers.14.weight_v', 'posterior_encoder.wavenet.res_skip_layers.15.weight_g', 'posterior_encoder.wavenet.res_skip_layers.15.weight_v', 'posterior_encoder.wavenet.res_skip_layers.2.weight_g', 'posterior_encoder.wavenet.res_skip_layers.2.weight_v', 'posterior_encoder.wavenet.res_skip_layers.3.weight_g', 'posterior_encoder.wavenet.res_skip_layers.3.weight_v', 'posterior_encoder.wavenet.res_skip_layers.4.weight_g', 'posterior_encoder.wavenet.res_skip_layers.4.weight_v', 'posterior_encoder.wavenet.res_skip_layers.5.weight_g', 'posterior_encoder.wavenet.res_skip_layers.5.weight_v', 'posterior_encoder.wavenet.res_skip_layers.6.weight_g', 'posterior_encoder.wavenet.res_skip_layers.6.weight_v', 'posterior_encoder.wavenet.res_skip_layers.7.weight_g', 'posterior_encoder.wavenet.res_skip_layers.7.weight_v', 'posterior_encoder.wavenet.res_skip_layers.8.weight_g', 'posterior_encoder.wavenet.res_skip_layers.8.weight_v', 'posterior_encoder.wavenet.res_skip_layers.9.weight_g', 'posterior_encoder.wavenet.res_skip_layers.9.weight_v']\n", "- This IS expected if you are initializing VitsModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing VitsModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of VitsModel were not initialized from the model checkpoint at facebook/mms-tts-ara and are newly initialized: ['discriminator.discriminators.0.convs.0.bias', 'discriminator.discriminators.0.convs.0.weight', 'discriminator.discriminators.0.convs.1.bias', 'discriminator.discriminators.0.convs.1.weight', 'discriminator.discriminators.0.convs.2.bias', 'discriminator.discriminators.0.convs.2.weight', 'discriminator.discriminators.0.convs.3.bias', 'discriminator.discriminators.0.convs.3.weight', 'discriminator.discriminators.0.convs.4.bias', 'discriminator.discriminators.0.convs.4.weight', 'discriminator.discriminators.0.convs.5.bias', 'discriminator.discriminators.0.convs.5.weight', 'discriminator.discriminators.0.final_conv.bias', 'discriminator.discriminators.0.final_conv.weight', 'discriminator.discriminators.1.convs.0.bias', 'discriminator.discriminators.1.convs.0.weight', 'discriminator.discriminators.1.convs.1.bias', 'discriminator.discriminators.1.convs.1.weight', 'discriminator.discriminators.1.convs.2.bias', 'discriminator.discriminators.1.convs.2.weight', 'discriminator.discriminators.1.convs.3.bias', 'discriminator.discriminators.1.convs.3.weight', 'discriminator.discriminators.1.convs.4.bias', 'discriminator.discriminators.1.convs.4.weight', 'discriminator.discriminators.1.final_conv.bias', 'discriminator.discriminators.1.final_conv.weight', 'discriminator.discriminators.2.convs.0.bias', 'discriminator.discriminators.2.convs.0.weight', 'discriminator.discriminators.2.convs.1.bias', 'discriminator.discriminators.2.convs.1.weight', 'discriminator.discriminators.2.convs.2.bias', 'discriminator.discriminators.2.convs.2.weight', 'discriminator.discriminators.2.convs.3.bias', 'discriminator.discriminators.2.convs.3.weight', 'discriminator.discriminators.2.convs.4.bias', 'discriminator.discriminators.2.convs.4.weight', 'discriminator.discriminators.2.final_conv.bias', 'discriminator.discriminators.2.final_conv.weight', 'discriminator.discriminators.3.convs.0.bias', 'discriminator.discriminators.3.convs.0.weight', 'discriminator.discriminators.3.convs.1.bias', 'discriminator.discriminators.3.convs.1.weight', 'discriminator.discriminators.3.convs.2.bias', 'discriminator.discriminators.3.convs.2.weight', 'discriminator.discriminators.3.convs.3.bias', 'discriminator.discriminators.3.convs.3.weight', 'discriminator.discriminators.3.convs.4.bias', 'discriminator.discriminators.3.convs.4.weight', 'discriminator.discriminators.3.final_conv.bias', 'discriminator.discriminators.3.final_conv.weight', 'discriminator.discriminators.4.convs.0.bias', 'discriminator.discriminators.4.convs.0.weight', 'discriminator.discriminators.4.convs.1.bias', 'discriminator.discriminators.4.convs.1.weight', 'discriminator.discriminators.4.convs.2.bias', 'discriminator.discriminators.4.convs.2.weight', 'discriminator.discriminators.4.convs.3.bias', 'discriminator.discriminators.4.convs.3.weight', 'discriminator.discriminators.4.convs.4.bias', 'discriminator.discriminators.4.convs.4.weight', 'discriminator.discriminators.4.final_conv.bias', 'discriminator.discriminators.4.final_conv.weight', 'discriminator.discriminators.5.convs.0.bias', 'discriminator.discriminators.5.convs.0.weight', 'discriminator.discriminators.5.convs.1.bias', 'discriminator.discriminators.5.convs.1.weight', 'discriminator.discriminators.5.convs.2.bias', 'discriminator.discriminators.5.convs.2.weight', 'discriminator.discriminators.5.convs.3.bias', 'discriminator.discriminators.5.convs.3.weight', 'discriminator.discriminators.5.convs.4.bias', 'discriminator.discriminators.5.convs.4.weight', 'discriminator.discriminators.5.final_conv.bias', 'discriminator.discriminators.5.final_conv.weight', 'flow.flows.0.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.0.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.0.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.1.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.1.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.10.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.10.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.11.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.11.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.12.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.12.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.13.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.13.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.14.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.14.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.15.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.15.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.2.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.2.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.3.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.4.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.4.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.5.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.5.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.6.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.6.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.7.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.7.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.8.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.8.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.9.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.9.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.10.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.10.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.11.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.11.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.12.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.12.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.13.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.13.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.14.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.14.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.15.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.15.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.4.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.4.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.5.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.5.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.6.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.6.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.7.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.7.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.8.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.8.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.9.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.9.parametrizations.weight.original1']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "model = VitsModel.from_pretrained(\"facebook/mms-tts-ara\",cache_dir=\"./\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-ara\",cache_dir=\"./\")\n", "feature_extractor = VitsFeatureExtractor()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 713, "referenced_widgets": [ "5fe8c49187324ff5a9e7a1902a56e6c1", "c75e944aeb794d5fb3b17d6db308d5c9", "5c2193563191495ba8b767e3fe53ad34", "f3e903c659be4a5cb5b5c861d4bc2cb6", "eaf8644ba06746008ebc0bbf1f6d1f58", "ed2b79e52630424782ba93485693ea8e", "312aff17ac5745a78ac0a0e5114d2e66", "8354096236c84017bba7441e2442fa42", "a93fd7b8c44a4b41b175db4d7dd3d321", "ad11453c933147cd88e46285c9c44d1e", "51f7dae0eed843ffbe3681681adfe2b4", "0c95cda53fc64f11ab33b3b7eba32752", "c3d16dd56b4e42f1bc90ec39b05dd30e", "f5072addecb0409fb88432072cb1b389", "d62c0f26002241a3b46e5315870fa6f1", "85cdb8ca0dba4279b6716e53950d1143", "347caf53164840b3a5cfa7d82f49a01a", "22056322f3984123a377201d9d429146", "c847db691b394603a73ae87915d3ed04", "a31dec7811dd4d45aeedfc5840228943", "a84c1ab2d30e448b9aeb8c27efe04d7f", "3d4326b689a647c295d9a798224f37dd", "819790829e504c78a3645180cb802589", "270c2a900df74d9d8b687a8ad2b90053", "339c8eed6a5c41c9aebcdb823cfcbb3e", "6e2304b4162f4757be1e663612c6b263", "bb4ffb1d46d84c24b26c5e58369359b6", "20860987ba3341aa999727de64e97099", "f83498582e404ffeb934d7480342cf5a", "9aea6400a4d04c748fa5a2a7ad90e155", "2196612c515e49ad9ef9576c277dc39a", "eaaf2cb486ce4e7e90049f6ff1c201a6", "b1b627ee58eb4b9297d9ca93301a3576", "beaba909476d4d1299b862d396b7b24f", "90452370798e435f8ec1568e3faca4b2", "9cad3b9f73f4418e8e257f708e4d2f78", "9afb1f8f10014dda847a587d935ee552", "9f437c4ec8bb436abdb364e9dba256e5", "366fe10d4c1e4a5c9dba52a9d5955ab2", "9c6b341f1c82422aa30c9e9a4a737bd0", "790fa683f6d54cf6a7fab8a838e5b996", "f71d71156f3948bd8217d36f00f65e32", "017db64686184080b1a5cb9c9b3236d0", "f693e52fd9ea4c1986957104f7415a0c", "8d5dcab702944737879e96f849401339", "f0b172a83f434f2c81734e1c8216677d", "6b14177921df4db6a1d725fe8b6db251", "b883163a1ee4439a97c210cf759ca01f", "a4a3a38618ff47ddb8826edee43de913", "81f20981fbed444997db3b3872be22a3", "67799d1b16f24379a92bffdfeb19914c", "169c9039501145828fae8c3d0d6979de", "75a93cefcdde4078ab270864899c6b8f", "902d41e40b3f420db0871365c8bd9665", "d9bb28ace6de471c9f9307c64e8c1f38", "a7c3e7f16ea84282a87c261b672b67df", "8b5ce15ee86d4db6bb9f495423511205", "40bcba1944594b3898458a33c2b116ad", "0d9c4b5f9aea438da6ca5bda7b8c8fde", "07dcb7da40e24821bc02a7ab08c23e74", "e9ba94addb0e4da585a0a1c1a049dacc", "d15b00851c0c44a09d54d524256655a4", "531fc44a16e343b2adf5cbf562aa6ada", "63cb8174b6a648488a4a4c9b7f2fcb63", "ff8e8ae333814136b615e33b734f650d", "4057e65d28674c119c96c9292804d07c", "db06e8aed1474220b32ae3248d39748b", "654e88d4747540ce8ed0170609c520b5", "403b1089c57a4fea91b8fa2c4957a91a", "c7a19333c43047458b6b754af7203f1d", "0c38053a5e40419bbfbf9777d1947f62", "3b745e441db3401c95f231078fca4cf4", "4302a7d26f174e20aa5391a97a895860", "2442a8cc799c4b5ca593aeb47bbc47b1", "ed5037f18ee8489abb802b4f2d36b540", "91d8558d8e0042baac37e3e83199ddf1", "b7b817a1400840ec80819d232df37e5c", "b91ceb88d032476aa4de3dca3efc676d", "5962e7de7fc846e180edc8eba0a083cb", "3b8671570fcd4e83978a4ff3435550b8", "0b8c274470a04cfb8e18cc6980d43419", "1484a6cea27f4084b4df138165c909be", "2c7acd7bd0aa4e4ca3f879ea5c47e0d1", "de14de5e58304e658ffca6225c724998", "91aeefae393647f09face5eea070d0c2", "e5dce5ae71054c1ead47f42b178d4d71", "41b907d0ab7f4d58a356d72594f17baa", "eac84897079a452dbb0e22b3081b87a9", "c194eb49e9d84dca99b054845f1ceaa4", "defddf038323411aa81b5fa7b64a201e", "acbf0c6175ad4483b8baa67060c52c14", "70b07c878bea4b26a71135dda9e47749", "e5976c493938462eacf1e939d442f03d", "26b2eb080c7c4e958edca4959f0c8951", "4198ee7869ec4ef3b4c56748d4e9752b", "a219e27d8fe44af1b15092bfad83f43f", "65f8f4edc481444f9bcadbed1647a9e5", "a4acbb2bbaea404e8e91a945e32dca5d", "498f0edc46eb4e3790e2e7f7f2eaed40", "bda5fdc8c3f74530b98ae28769b49ab6", "f50b92560b8c4b86b505d18bd72a4aeb", "e8cb34f0f8054a1b97146d134bdf38ae", "6062b8e2d4e6466eb8995a899918cbdc", "fb600097eb4a4bb7a68b6e8832860b94", "929a10e1233a4dd793f1205e5e53409a", "4386c14a16ce40cda419c9030345581d", "039b5b6c083848ffa087cbe0c463da5a", "f71575651feb4e9ebecf9624cb922d91", "7ef42115ee38460c82a198a4d90ac269", "76f899e8b9a043caa3bc8d164279952c", "e03cbc92104d4a5f9a584225c111574c", "5aa238fa9b5e4430a4238406456caa77", "8e48fd70bdba4fc5b515d0bbf994a770", "3ffa7d6309dd4bd994ba710b72d68299", "9cd405360ec840dea97d00d4b46bc367", "2459318b633e4b12ab400341bb3b43cb", "20f71610e3284e0cba496430e75c5eee", "5c9ef3ba1ac146f7aec7905b0978ecae", "aee5d7b9d2994c419bd614810fa6e6b2", "d40b97c2c7414be58e15401464352a05", "66bea563a202481e85cf916b60711fa3", "69661dc87b7348d9a69dd1efe2363214", "0edcbaccd4074f1f9b0965d907d4e35e", "db6a12d0be6f48dfae06d360af20a4cd", "24f4431025a64f979059affd5eea0a50", "b32f78bbae0249239682a88174051195", "ed3df636482043029fe38ebcfb8f1f2f", "a11a6872aba44566a039af46fac49615", "838395a6a04346afbfd930ca9bb86082", "3e8f5fadfd3c43d2809aa89975f7348b", "57c711c754dc43879942f645c4fe68d5", "a51bc4a7234e454993d09e2d126defa7", "5cc078eff4734f5f810999fc4abbaec3", "fefdf0d8046c41788ecaad39a573f9e3", "cd833887722b40b4b0178c816d578fc7", "1abe8d9bc1144fd69e7a6b1e12a267f4", "2139e7a11a564465b6d3dab42d7979c3", "dac8c067b12a483cb2cacf5984ab49e6", "39d8a1b664ff4a03b015107cf55f53bf", "dd482220d6b448c0b9da1fdce0269cf7", "d617604ab51645e5b2140e608f15056e", "850e12143fa3428492f1c642137ed68c", "ce6d4e3919d248c8b4eef6090d9e68a6" ] }, "id": "e5OwBRCzwU1X", "outputId": "d7e962da-0a89-4796-dddc-31fb306054d9" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Parameter 'function'= of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n", "WARNING:datasets.fingerprint:Parameter 'function'= of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5fe8c49187324ff5a9e7a1902a56e6c1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "preprocess: 0%| | 0/10 [00:00