diff --git "a/SpeechT5_finetune_technicalTerm.ipynb" "b/SpeechT5_finetune_technicalTerm.ipynb" new file mode 100644--- /dev/null +++ "b/SpeechT5_finetune_technicalTerm.ipynb" @@ -0,0 +1,20646 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "6c3dd62b8cd14e91b8d47d734dbbaced": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9d8f8793e20c4fa2a3c371f3b3812611", + "IPY_MODEL_41a1a574b4b249d89f5f8760f6050a2d", + "IPY_MODEL_ed8cedae4db6442f9937d2ffb5233ffa", + "IPY_MODEL_8bccf75943dc43f985572f454ee7b09c" + ], + "layout": "IPY_MODEL_a13585e1bcd84f5a8786d34882e3c762" + } + }, + "34d191d760cc4c0888e08bab1a99e281": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2cef5d7da100426e8dfd33dbffa658fa", + "placeholder": "", + "style": "IPY_MODEL_d9225211b21744ffb45276704bbe809c", + "value": "
Step | \n", + "Training Loss | \n", + "Validation Loss | \n", + "
---|---|---|
100 | \n", + "3.772000 | \n", + "0.420103 | \n", + "
200 | \n", + "3.591200 | \n", + "0.405249 | \n", + "
300 | \n", + "3.460400 | \n", + "0.394766 | \n", + "
400 | \n", + "3.389400 | \n", + "0.390611 | \n", + "
500 | \n", + "3.373700 | \n", + "0.386506 | \n", + "
600 | \n", + "3.362800 | \n", + "0.385102 | \n", + "
700 | \n", + "3.323600 | \n", + "0.382134 | \n", + "
800 | \n", + "3.306000 | \n", + "0.381117 | \n", + "
900 | \n", + "3.285900 | \n", + "0.379681 | \n", + "
1000 | \n", + "3.266300 | \n", + "0.376319 | \n", + "
1100 | \n", + "3.236800 | \n", + "0.375682 | \n", + "
1200 | \n", + "3.210700 | \n", + "0.374939 | \n", + "
1300 | \n", + "3.203500 | \n", + "0.372964 | \n", + "
1400 | \n", + "3.196900 | \n", + "0.372788 | \n", + "
1500 | \n", + "3.210700 | \n", + "0.371661 | \n", + "
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:2816: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 1876}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.\n",
+ " warnings.warn(\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=1500, training_loss=3.39983762105306, metrics={'train_runtime': 4837.6694, 'train_samples_per_second': 9.922, 'train_steps_per_second': 0.31, 'total_flos': 5483826441583776.0, 'train_loss': 3.39983762105306, 'epoch': 4.580152671755725})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 34
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trainer.push_to_hub()"
+ ],
+ "metadata": {
+ "id": "T3aPr-chnqM_",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 52
+ },
+ "outputId": "288fef0f-ba56-478a-8dcc-264dd6ddd90a"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "CommitInfo(commit_url='https://huggingface.co/DeepDiveDev/speecht5_finetuned_English/commit/8bbc92b5968125e1ff51b8fea16e90aa40c5f267', commit_message='End of training', commit_description='', oid='8bbc92b5968125e1ff51b8fea16e90aa40c5f267', pr_url=None, pr_revision=None, pr_num=None)"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 35
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "#Dataset on Technical Term"
+ ],
+ "metadata": {
+ "id": "yoOXf1BXOdg4"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install datasets\n",
+ "import pandas as pd\n",
+ "from datasets import Dataset, load_dataset\n",
+ "\n",
+ "# Load your dataset from Excel file using pd.read_excel\n",
+ "df = pd.read_excel('/content/drive/MyDrive/TTS_Eng/TTS-English.xlsx')\n",
+ "# Convert the pandas DataFrame to a Hugging Face Dataset\n",
+ "dataset = Dataset.from_pandas(df)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "K8YzZRrPf6mz",
+ "outputId": "109e7328-faa4-4274-fef3-ae2c1e628106"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting datasets\n",
+ " Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.1)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n",
+ "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (16.1.0)\n",
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
+ " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
+ "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5)\n",
+ "Collecting xxhash (from datasets)\n",
+ " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+ "Collecting multiprocess<0.70.17 (from datasets)\n",
+ " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
+ "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.6.1)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n",
+ "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.7)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.15.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n",
+ "Downloading datasets-3.0.2-py3-none-any.whl (472 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m472.7/472.7 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: xxhash, dill, multiprocess, datasets\n",
+ "Successfully installed datasets-3.0.2 dill-0.3.8 multiprocess-0.70.16 xxhash-3.5.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "len(dataset)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "k7SgmYMUUSSj",
+ "outputId": "1a9f1ee6-58d5-406a-a921-a39647c5e0c3"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "133"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install transformers\n",
+ "from transformers import AutoProcessor\n",
+ "\n",
+ "# Assuming you want to use a specific processor, replace \"facebook/wav2vec2-base-960h\" with the desired model\n",
+ "processor = AutoProcessor.from_pretrained(\"facebook/wav2vec2-base-960h\")\n",
+ "\n",
+ "tokenizer = processor.tokenizer"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 527,
+ "referenced_widgets": [
+ "bf0d33ef88d6454a9a4ea5a78883d9e9",
+ "b0700ca20203401fa7e2eb9a2b60aa0f",
+ "4225f3ecaa9046519340846aaba8616a",
+ "09b941636ca64b238d3e0a761573f45e",
+ "f2c3e3edbf5040be97bc87c1711d31bc",
+ "9b6cbb6586b24fa095476410bc1d80b9",
+ "cd3f004541194be29b29c99864fcf146",
+ "d4f6744cd7ce4a57aa784fbbee4c0621",
+ "7aaba4f57f7d4effbbf66933c1ebc61b",
+ "9f8c7e1a0b5e4942855b6a2143e1eed7",
+ "3805153ecfb743ffbb0e337a77004560",
+ "36f8c5fce0fb4b93be7295042913e208",
+ "7f3217a296c04423804d8290a17e946a",
+ "0429e54da0404f4298c47efe1c6b66e2",
+ "50b8cc6f892b4a149494951839bd3993",
+ "42e0f0238e9149468a716b765dc8baff",
+ "a81d5a2e37f945fca8b0cda4bdab0b91",
+ "22c4ce4eac7542409c62d058825e761c",
+ "2294bcb50eef48e698d6723958af9c94",
+ "56dc4123de544d50a9d5dde76876e00b",
+ "151b4d2db3bc4797ad977f722806ed64",
+ "692a45c766aa43acb1b7f7ad7ee1772d",
+ "4f20b5912c994e3986d3c4d30eb99143",
+ "a1c4cf9ad32b49a4996fbb3f321745a9",
+ "60183325ec9c4bb1967e83fe3be5dd32",
+ "369ff789de224c6f910cf8758420f0c8",
+ "f42a309ff1994451b50cf9d49d1f7a6d",
+ "f115fc561c044c8c9d79f1a189766fcf",
+ "5f5a995405d84c1b80ee0b22ad955696",
+ "79c5b6542ba34375a8f44d54709bab45",
+ "95ce8dec3e7f49b4b3311e7739da70d5",
+ "0d5030b6ed24403da75989467066f260",
+ "ef78e68136b448d3b8b603040e2de130",
+ "520715b2218b45068326603a6639746c",
+ "d30d15c3d0b745eda19f46e4c9c1d6c6",
+ "9f452748eb134bc0ac00f4038fd45a88",
+ "a3d6d1ae1a764fa782c5e42bc07823ba",
+ "db4f5325442b47f68cc0dfe18a4113b1",
+ "37dc19476c7c466482cac9c4509b3ad5",
+ "d01f1c3db49845a8bdb69a2cd8613018",
+ "48b70ae7b3a346ceac5a48b36fbddffc",
+ "e7716f8922284f30bd33b605ba90940e",
+ "38c3bbe6735e4bdf99263c6251a79eeb",
+ "6e90888770ce4e919567ec299fe5c28e",
+ "ef48464237604789b200054eaa4a23d9",
+ "5bc9d68ab2a04312a5a9d4b5977fd691",
+ "7586d1a50f924338887b3d66b6a7947a",
+ "c6cbd5b7493746299e01d7b7a3a1e3be",
+ "ab76bf76b190487b96317ce4f4583ae6",
+ "b37be5cbddd94530bac98f7f2301ac6d",
+ "8f51b4c1f2b043d4b640baf4a8cec808",
+ "c62b866ecf204cb3803f0226d166d213",
+ "f0aa4b397e3c402cbfb4d1c3af8aacf6",
+ "d05fe02294354059a6ce512f94ff1b17",
+ "1397290d20c44f67b51176023c0090bb"
+ ]
+ },
+ "id": "ZyUozQcJV0w2",
+ "outputId": "44da5f87-59e3-4bc1-a64f-3582c58602af"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.24.7)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.5)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (2024.6.1)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (4.12.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "preprocessor_config.json: 0%| | 0.00/159 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "bf0d33ef88d6454a9a4ea5a78883d9e9"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/163 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "36f8c5fce0fb4b93be7295042913e208"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "config.json: 0%| | 0.00/1.60k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "4f20b5912c994e3986d3c4d30eb99143"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "vocab.json: 0%| | 0.00/291 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "520715b2218b45068326603a6639746c"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/85.0 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "ef48464237604789b200054eaa4a23d9"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "from datasets import Dataset, load_dataset\n",
+ "\n",
+ "# Load your dataset from Excel file using pd.read_excel\n",
+ "df = pd.read_excel('/content/drive/MyDrive/TTS_Eng/TTS-English.xlsx')\n",
+ "# Convert the pandas DataFrame to a Hugging Face Dataset\n",
+ "dataset = Dataset.from_pandas(df)\n",
+ "\n",
+ "tokenizer = processor.tokenizer\n",
+ "\n",
+ "def extract_all_chars(batch):\n",
+ " text_examples = [text for text in batch[\"Text Example\"] if text is not None]\n",
+ " all_text = \" \".join(text_examples)\n",
+ " vocab = list(set(all_text))\n",
+ " return {\"vocab\": [vocab], \"all_text\": [all_text]}\n",
+ "\n",
+ "# Only remove unnecessary columns, but keep the text column\n",
+ "vocabs = dataset.map(\n",
+ " extract_all_chars,\n",
+ " batched=True,\n",
+ " batch_size=-1,\n",
+ " keep_in_memory=True,\n",
+ " # Instead of removing all columns, specify only the ones you want to remove.\n",
+ " # Replace with the actual columns you want to remove if needed.\n",
+ " remove_columns=[col for col in dataset.column_names if col != \"transcript\"],\n",
+ ")\n",
+ "\n",
+ "dataset_vocab = set(vocabs[\"vocab\"][0])\n",
+ "tokenizer_vocab = {k for k,_ in tokenizer.get_vocab().items()}"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 49,
+ "referenced_widgets": [
+ "3659e5c8996d42ccbd6b165960b6ebcb",
+ "221fa7adbd3046fa813a272a333f0a58",
+ "1c65128e06bd45ddafbb996947f4746d",
+ "11faa4b79c774470a6256e5bed9fc606",
+ "038fbc256aa943818c51112507e502f7",
+ "f3ba4f7daed0414b8c28cc7354af8d99",
+ "d17f3eb739dd479b80a7ef85471b7937",
+ "ed20aaccf62041a6a3fdaef43fc0b9f7",
+ "ae882f381a714169a48f4d34124c6d02",
+ "c1f6fb263ee04e1482b3d95414abb0a1",
+ "c607e763b8b940e6afbe677799d79fa6"
+ ]
+ },
+ "id": "_7K8SGgLgohy",
+ "outputId": "b1e65d74-316e-4fda-eeb5-b461dca82aa5"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Map: 0%| | 0/133 [00:00, ? examples/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "3659e5c8996d42ccbd6b165960b6ebcb"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset_vocab - tokenizer_vocab"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "KuF4QM_DV8RK",
+ "outputId": "17ef0ff3-3914-4a50-dd66-475b943df7a9"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{' ',\n",
+ " '\"',\n",
+ " '%',\n",
+ " ',',\n",
+ " '-',\n",
+ " '.',\n",
+ " '0',\n",
+ " '2',\n",
+ " '4',\n",
+ " '5',\n",
+ " 'a',\n",
+ " 'b',\n",
+ " 'c',\n",
+ " 'd',\n",
+ " 'e',\n",
+ " 'f',\n",
+ " 'g',\n",
+ " 'h',\n",
+ " 'i',\n",
+ " 'j',\n",
+ " 'k',\n",
+ " 'l',\n",
+ " 'm',\n",
+ " 'n',\n",
+ " 'o',\n",
+ " 'p',\n",
+ " 'q',\n",
+ " 'r',\n",
+ " 's',\n",
+ " 't',\n",
+ " 'u',\n",
+ " 'v',\n",
+ " 'w',\n",
+ " 'x',\n",
+ " 'y',\n",
+ " 'z',\n",
+ " '’'}"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "replacements = [\n",
+ " ('à', 'a'),\n",
+ " ('ç', 'c'),\n",
+ " ('è', 'e'),\n",
+ " ('ë', 'e'),\n",
+ " ('í', 'i'),\n",
+ " ('ï', 'i'),\n",
+ " ('ö', 'o'),\n",
+ " ('ü', 'u'),\n",
+ " ('’', \"'\"), # Replacing curly apostrophe with a standard one\n",
+ " ('%', ''), # Option to remove the percentage symbol\n",
+ " ('0', '0'), # Keep 0 as it is (no change)\n",
+ " ('2', '2'), # Keep 2 as it is (no change)\n",
+ " ('4', '4'), # Keep 4 as it is (no change)\n",
+ " ('5', '5'), # Keep 5 as it is (no change)\n",
+ " (' ', ' ') # Ensure spaces remain unchanged\n",
+ "]\n",
+ "\n",
+ "def cleanup_text(inputs):\n",
+ " text_column_name = \"Text Example\" # Update with the correct column name from your dataset\n",
+ " # Check if the value is not None using the correct column name\n",
+ " if inputs[text_column_name] is not None:\n",
+ " for src, dst in replacements:\n",
+ " # Update this line to use the correct column name\n",
+ " inputs[text_column_name] = inputs[text_column_name].replace(src, dst)\n",
+ " return inputs\n",
+ "\n",
+ "# Apply the function to the dataset\n",
+ "dataset = dataset.map(cleanup_text)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 49,
+ "referenced_widgets": [
+ "25e3bbaabea54530b09017d1b89614c2",
+ "bc2c8c6b5a63418cbf967553012c6fb2",
+ "1838da789aae4c07874c9d3b777dde5a",
+ "a6b1e677a10b4d2ca91dd2e9da1d7aea",
+ "cceb7ceda8f94f9d87c62562f6803efe",
+ "35d690584d9942c5922d17b3d6efda15",
+ "ec18c6121c734b7788387cb7eb903657",
+ "370e5ff52db4411d914bbaa5a2ccc5d3",
+ "6a467e0fbed14e63b7884a8abd3c802b",
+ "b0f3fa5644784a39b02491fd820d4868",
+ "d413f0963826479cabada7a15c680ce5"
+ ]
+ },
+ "id": "0snxxfPdje6w",
+ "outputId": "46aed7b0-d27b-42f2-fd33-d45547d40a50"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Map: 0%| | 0/133 [00:00, ? examples/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "25e3bbaabea54530b09017d1b89614c2"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install speechbrain\n",
+ "import os\n",
+ "import torch\n",
+ "from speechbrain.pretrained import EncoderClassifier\n",
+ "\n",
+ "spk_model_name = \"speechbrain/spkrec-xvect-voxceleb\"\n",
+ "\n",
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+ "speaker_model = EncoderClassifier.from_hparams(\n",
+ " source=spk_model_name,\n",
+ " run_opts={\"device\": device},\n",
+ " savedir=os.path.join(\"/tmp\", spk_model_name),\n",
+ ")\n",
+ "\n",
+ "\n",
+ "def create_speaker_embedding(waveform):\n",
+ " with torch.no_grad():\n",
+ " speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))\n",
+ " speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)\n",
+ " speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()\n",
+ " return speaker_embeddings"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000,
+ "referenced_widgets": [
+ "fa8757aaa0a741cf839b9241c827d971",
+ "b13821240ecc45a385b2a2b1effa86f7",
+ "e68376643bcf4350b60e91f57784a5bd",
+ "c43dde973e8e437185ae3fc4a70223e7",
+ "408e99ba3be2433eaed5461ad9f09867",
+ "e62bf5257aec4e688b52a275fd157da3",
+ "cab5270d6de94e1592d9bda2e2cdfe1a",
+ "624ab1702ede421682fa47530ed99913",
+ "e58492565fa045fbaf0d9052e8574922",
+ "6dafbc1cdbbd4a099ded9ed44e88fc50",
+ "ea9dc779cb664fb7835a0d5a132f82f1",
+ "1289e1adffb040c79930ab4709dc7224",
+ "bbf4f9046aa24a69b65d2361081f0288",
+ "50dae551f435430f9e95905c36a5d9e3",
+ "c33e6a32b0634c7ea1b1131127475fdf",
+ "92e09ba91998416a9797b0f1def75add",
+ "ff4914ff0f924cb18d743ecb8f10fec4",
+ "74d02c59834942b69fa4c9b80b09f48a",
+ "50d58beaee4442b4a69197fea3f1d442",
+ "942355e5534f4404899e18436e07fa30",
+ "d158d0fac7674b45a58d83924abbc07c",
+ "fc0e64fad426421896620ac66dcafa63",
+ "2d381129b2474b9ca0227cd790c4439b",
+ "14bbec4c5e7f4f11b4bf1c6387991a7d",
+ "a85c5408987049d893af6c46b40b4ee6",
+ "b962c1297c6540659780411a92632a5f",
+ "737785d6a113473c8e7aec888505713b",
+ "630d28962ac542659f4c424f385a65f4",
+ "d39893e895c54010bf6f2aa00ec1d8d2",
+ "2263fcc46a3346c499619903ef1309c8",
+ "8c4a3e2f5e0241b5bb88c2871d2bfb10",
+ "1de033bae98c4f718c133b34c74f07a6",
+ "32804329bfe34cc0adb781561a371878",
+ "30b68f4079514c8db53790ed98a57a0f",
+ "fd93f9dfc78e495f8d8c1de4ad4399fe",
+ "73576e72b9164e7f87ff44b0200e4b7e",
+ "b4e4186c328147d3b71ef5444f59f78f",
+ "72e6d7b5d2b0416a8014b15f2c0f131d",
+ "d8e396928f8e4cd49bb56fab121e0195",
+ "13de4de37676426b95639c1b1eb6cc90",
+ "e5f0d98066fa4f28851baa54938c861a",
+ "9bcba59f919341f7973cc6e842016a6d",
+ "09cba933c19345538cfe6985485f3f28",
+ "1980e08341ad43f4aab42dd43f033d09",
+ "8c4a1685d4af4fd2bd4e403a7c333744",
+ "d03b3cb3e8444b4f83ce92c4c8a12195",
+ "97c5de7959924eb39c57ec305cb7043d",
+ "28dc36b8a8b341e2adedefd37b394d00",
+ "e5dc20b4193d4bc2afe967914d46a7dd",
+ "ad2247b2855b4660a2e08ebb1f005182",
+ "331fb19489804a7097effd9eca84a310",
+ "b9c1f6499f214190b4b9f90fa32c9049",
+ "95f57a74b13b4d0ab1102be0f45e3c3d",
+ "27a28aff2f464af58bf82bb65f4662ab",
+ "d610309145cf47fcbf587ba4ea6f6c10"
+ ]
+ },
+ "id": "j-izN78sjlR9",
+ "outputId": "0518403a-2cbc-4d96-e7fc-e8611c1e5263"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting speechbrain\n",
+ " Downloading speechbrain-1.0.1-py3-none-any.whl.metadata (24 kB)\n",
+ "Collecting hyperpyyaml (from speechbrain)\n",
+ " Downloading HyperPyYAML-1.2.2-py3-none-any.whl.metadata (7.6 kB)\n",
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from speechbrain) (1.4.2)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from speechbrain) (1.26.4)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from speechbrain) (24.1)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from speechbrain) (1.13.1)\n",
+ "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from speechbrain) (0.2.0)\n",
+ "Requirement already satisfied: torch>=1.9 in /usr/local/lib/python3.10/dist-packages (from speechbrain) (2.4.1+cu121)\n",
+ "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from speechbrain) (2.4.1+cu121)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from speechbrain) (4.66.5)\n",
+ "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from speechbrain) (0.24.7)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (3.16.1)\n",
+ "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (4.12.2)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (1.13.3)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (3.4.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (3.1.4)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->speechbrain) (2024.6.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->speechbrain) (6.0.2)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->speechbrain) (2.32.3)\n",
+ "Collecting ruamel.yaml>=0.17.28 (from hyperpyyaml->speechbrain)\n",
+ " Downloading ruamel.yaml-0.18.6-py3-none-any.whl.metadata (23 kB)\n",
+ "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml>=0.17.28->hyperpyyaml->speechbrain)\n",
+ " Downloading ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.9->speechbrain) (3.0.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->speechbrain) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->speechbrain) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->speechbrain) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->speechbrain) (2024.8.30)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.9->speechbrain) (1.3.0)\n",
+ "Downloading speechbrain-1.0.1-py3-none-any.whl (807 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m807.2/807.2 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading HyperPyYAML-1.2.2-py3-none-any.whl (16 kB)\n",
+ "Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (722 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m722.2/722.2 kB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: ruamel.yaml.clib, ruamel.yaml, hyperpyyaml, speechbrain\n",
+ "Successfully installed hyperpyyaml-1.2.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.12 speechbrain-1.0.1\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n",
+ "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (16.1.0)\n",
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
+ "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5)\n",
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n",
+ "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
+ "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.6.1)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n",
+ "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.7)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2024.9.11)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.4.5)\n",
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.19.1)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.4.1+cu121)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.15.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (1.13.3)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.4.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.1.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->transformers[torch]) (3.0.2)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->transformers[torch]) (1.3.0)\n",
+ "Collecting sklearn\n",
+ " Using cached sklearn-0.0.post12.tar.gz (2.6 kB)\n",
+ " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
+ " \n",
+ " \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
+ " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
+ " \u001b[31m╰─>\u001b[0m See above for output.\n",
+ " \n",
+ " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n",
+ "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
+ "\n",
+ "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
+ "\u001b[31m╰─>\u001b[0m See above for output.\n",
+ "\n",
+ "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
+ "\u001b[1;36mhint\u001b[0m: See above for details.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "max_steps is given, it will override any value given in num_train_epochs\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install datasets transformers[torch] accelerate\n",
+ "!pip install sklearn\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import datasets\n",
+ "import numpy as np\n",
+ "\n",
+ "# Assuming 'dataset' is your Hugging Face Dataset\n",
+ "# Rename or add the required columns to match the model's expected input.\n",
+ "# For example, if your dataset has a column named 'text' containing the target text,\n",
+ "# rename it to 'decoder_input_values':\n",
+ "dataset = dataset.rename_column('Text Example', 'decoder_input_values')\n",
+ "# Similarly, rename or add other necessary columns like 'input_values', 'attention_mask', and 'decoder_attention_mask'.\n",
+ "\n",
+ "# Convert the Hugging Face Dataset to a NumPy array or list\n",
+ "# before splitting. Assuming 'dataset' is a Hugging Face Dataset\n",
+ "dataset_np = dataset[:] # If dataset is a DatasetDict, select the desired split, e.g., dataset[\"train\"][:]\n",
+ "#dataset_np = np.array(dataset) #If you need a NumPy array specifically\n",
+ "\n",
+ "# Split the NumPy array using train_test_split\n",
+ "train_indices, test_indices = train_test_split(np.arange(len(dataset_np)), test_size=0.2, random_state=42) # Adjust test_size and random_state as needed\n",
+ "\n",
+ "# Create new Hugging Face Datasets using the split indices\n",
+ "train_dataset = dataset.select(train_indices)\n",
+ "test_dataset = dataset.select(test_indices)\n",
+ "\n",
+ "# You might need to define a data collator to prepare the data for the model:\n",
+ "def data_collator(features):\n",
+ " # Implement logic to collate data based on the columns present in your dataset\n",
+ " # and the format required by the SpeechT5ForTextToSpeech model.\n",
+ " # This might involve padding, creating attention masks, etc.\n",
+ " # You can refer to the documentation for examples of data collators:\n",
+ " # https://huggingface.co/docs/transformers/main_classes/data_collator\n",
+ " pass # Replace with your data collation logic\n",
+ "\n",
+ "# Now you can pass these datasets to the trainer\n",
+ "from transformers import Seq2SeqTrainer\n",
+ "trainer = Seq2SeqTrainer(\n",
+ " args=training_args,\n",
+ " model=model,\n",
+ " train_dataset=train_dataset, # Use train_dataset instead of dataset[\"train\"]\n",
+ " eval_dataset=test_dataset, # Use test_dataset instead of dataset[\"test\"]\n",
+ " data_collator=data_collator,\n",
+ " tokenizer=processor, # Assuming 'processor' is your SpeechT5Processor\n",
+ ")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "f5200UGhmm1y",
+ "outputId": "589f5ef9-f923-4e43-db28-a2cb3187fae3"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.0.2)\n",
+ "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.34.2)\n",
+ "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.1)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n",
+ "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (16.1.0)\n",
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
+ "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5)\n",
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n",
+ "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
+ "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.6.1)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n",
+ "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.7)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2024.9.11)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.4.5)\n",
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.19.1)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.4.1+cu121)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.15.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (1.13.3)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.4.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.1.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->transformers[torch]) (3.0.2)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->transformers[torch]) (1.3.0)\n",
+ "Collecting sklearn\n",
+ " Using cached sklearn-0.0.post12.tar.gz (2.6 kB)\n",
+ " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
+ " \n",
+ " \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
+ " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
+ " \u001b[31m╰─>\u001b[0m See above for output.\n",
+ " \n",
+ " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n",
+ "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
+ "\n",
+ "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
+ "\u001b[31m╰─>\u001b[0m See above for output.\n",
+ "\n",
+ "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
+ "\u001b[1;36mhint\u001b[0m: See above for details.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "max_steps is given, it will override any value given in num_train_epochs\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import SpeechT5HifiGan\n",
+ "vocoder = SpeechT5HifiGan.from_pretrained(\"microsoft/speecht5_hifigan\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 81,
+ "referenced_widgets": [
+ "f7f64603670540cca1cfa1a6bb95d8d4",
+ "eecd754bff9d41f5803b1d543d8eb859",
+ "af79f746f2d94ac9802b1f0e3bac9ef5",
+ "b15adb94a0544448aa19d7847e0bc3d2",
+ "2d6eef2155c94cd0aa87769b945f77cb",
+ "b57381a0a4b140118f4acb711ae0b930",
+ "39bab19c0cf64667b772f2292c5f70b7",
+ "54e297f441114a2eb00a418eee763bde",
+ "24a3f21743fe43e49b5eac5b13070564",
+ "b4316acb39974b45beae8e5adadfdbeb",
+ "ddbdb7af43a147eb9a4b74a6677fe0aa",
+ "7e85f12904de48beabb658b439d35fc4",
+ "211f9427940545128436e2472903176f",
+ "8cf800c4dfce4ea1b513881fd9e0a101",
+ "f887a58fdd7a495f8420760c9566deb1",
+ "b5991c8c911b46b3a4d7889b51df7759",
+ "442e7734e9874c72b085c11a49d1e710",
+ "f1858842eec545a9bfa48a8b49db3966",
+ "5787962f856f46e9b4d15b9b8f009a15",
+ "102b5eb12c8344f587cfeba6f70f78f8",
+ "20d218dcba4b48b6afc9ccbf43904cac",
+ "4452e9c0da20434b976c3719764ea341"
+ ]
+ },
+ "id": "2VZIjVFjnCzW",
+ "outputId": "c6a477c3-5761-4606-ef64-ab907b1b1532"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "config.json: 0%| | 0.00/636 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "f7f64603670540cca1cfa1a6bb95d8d4"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/50.7M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "7e85f12904de48beabb658b439d35fc4"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(example[\"decoder_input_values\"]) # Check if the values exist and are valid"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "amRLGntkzAZM",
+ "outputId": "332bce82-6c89-4963-fc23-efcd67645d4a"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "When you connect to an external service, make sure the API returns the correct status codes.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(processed_example[\"input_ids\"]) # Ensure input_ids are not None\n",
+ "print(processed_example[\"attention_mask\"]) # Ensure attention_mask is not None"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "010uMrqWzHkU",
+ "outputId": "562a7f45-4beb-4583-9fff-809338c88499"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "tensor([[ 4, 38, 11, 5, 9, 4, 22, 8, 16, 4, 17, 8, 9, 9, 5, 17, 6, 4,\n",
+ " 6, 8, 4, 7, 9, 4, 5, 37, 6, 5, 13, 9, 7, 15, 4, 12, 5, 13,\n",
+ " 27, 10, 17, 5, 23, 4, 18, 7, 28, 5, 4, 12, 16, 13, 5, 4, 6, 11,\n",
+ " 5, 4, 33, 49, 30, 4, 13, 5, 6, 16, 13, 9, 12, 4, 6, 11, 5, 4,\n",
+ " 17, 8, 13, 13, 5, 17, 6, 2]])\n",
+ "tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1]])\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "#Interence"
+ ],
+ "metadata": {
+ "id": "mlFzR1_9pNgV"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import SpeechT5ForTextToSpeech\n",
+ "\n",
+ "model = SpeechT5ForTextToSpeech.from_pretrained(\"DeepDiveDev/speecht5_finetuned_English\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 237,
+ "referenced_widgets": [
+ "9371b25e7a2744e49a56e1aac8741469",
+ "a6fa0e1363884ef3b6ee59f4daf0dc88",
+ "4e4030b299e848daa74d5675b1fc67f9",
+ "700c531271eb42348cc80187b5d98a5f",
+ "0df8bc3570e8417bbb72c8e186e9194d",
+ "dd184a876f784f408a5e1e2cd23cf1f6",
+ "5f5db4187db647598bc479557e509fd4",
+ "7f9773fbb2964a87b750e47bd28c9b44",
+ "459fe1e5144d4844baafccfae7e4bbd4",
+ "c0cae8dde43c40f8a4b8afe007443662",
+ "f434847f1a814f00ab26b49d0f83d100",
+ "4eeb7fb7774b4a3a8c033a5106976dfc",
+ "505cdbd6d5ce42048e7a2b19dab3cb15",
+ "3fee0d226f1c41be9fd34f4d81d6c198",
+ "22cb9edee06d4b9d910e6ed0510faef3",
+ "5d915c0eac7841a1b6fc639a38514fee",
+ "1f3922bab8a34c37a3ccac4438b65de0",
+ "eef5ca64f98f43778470edef77154ca0",
+ "c8764666c2cf48fbb2f35f8171d38eb8",
+ "90d69e36ac8f4f24bcf6aefc9eccc894",
+ "07af8551040b407abb8fbef1d1671205",
+ "75452d64c2b942fa80103b60f20793e6",
+ "a91ef2fb784f4e299f72478283098550",
+ "dc28dfbb43dd4216a4f57b25bdacb85a",
+ "199eede9e0ac456e8fb86ac751278e67",
+ "377081804d714eb59394da49bf06704d",
+ "e2f2fb59c3454794a4959eef3b4d3c11",
+ "dac011b1128649c9b1f2f75f4d822b91",
+ "a90c9a9f1a9748ac82ef3aad7c0566dd",
+ "52231342af134f188e345da31791a488",
+ "d88147051650489ebaa9b8daf2b5bbec",
+ "4530108e89904ba3ad13707a4782a720",
+ "ff322dafaa184d45a5971966cb194955"
+ ]
+ },
+ "id": "n0cNBOWQq_Ae",
+ "outputId": "85f16e3b-2d43-4f58-9dd0-5002704ac2db"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "config.json: 0%| | 0.00/2.11k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "9371b25e7a2744e49a56e1aac8741469"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/578M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "4eeb7fb7774b4a3a8c033a5106976dfc"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "generation_config.json: 0%| | 0.00/190 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "a91ef2fb784f4e299f72478283098550"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Example MOS collection\n",
+ "mos_scores = {\n",
+ " 'sentence_1.wav': [4, 5, 4],\n",
+ " 'sentence_2.wav': [3, 4, 4],\n",
+ " # Add more scores for each audio sample\n",
+ "}\n",
+ "\n",
+ "# Calculate the average MOS score for each audio\n",
+ "average_mos = {file: sum(scores)/len(scores) for file, scores in mos_scores.items()}\n",
+ "print(average_mos)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "HJ8GNhpM3awJ",
+ "outputId": "c6c5c3d1-7214-456e-cd73-e9abeb175eeb"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'sentence_1.wav': 4.333333333333333, 'sentence_2.wav': 3.6666666666666665}\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import re\n",
+ "\n",
+ "number_words = {\n",
+ " 0: \"zero\", 1: \"one\", 2: \"two\", 3: \"three\", 4: \"four\", 5: \"five\", 6: \"six\", 7: \"seven\", 8: \"eight\", 9: \"nine\",\n",
+ " 10: \"ten\", 11: \"eleven\", 12: \"twelve\", 13: \"thirteen\", 14: \"fourteen\", 15: \"fifteen\", 16: \"sixteen\", 17: \"seventeen\",\n",
+ " 18: \"eighteen\", 19: \"nineteen\", 20: \"twenty\", 30: \"thirty\", 40: \"forty\", 50: \"fifty\", 60: \"sixty\", 70: \"seventy\",\n",
+ " 80: \"eighty\", 90: \"ninety\", 100: \"hundred\", 1000: \"thousand\"\n",
+ "}\n",
+ "\n",
+ "def number_to_words(number):\n",
+ " if number < 20:\n",
+ " return number_words[number]\n",
+ " elif number < 100:\n",
+ " tens, unit = divmod(number, 10)\n",
+ " return number_words[tens * 10] + (\"-\" + number_words[unit] if unit else \"\")\n",
+ " elif number < 1000:\n",
+ " hundreds, remainder = divmod(number, 100)\n",
+ " return (number_words[hundreds] + \" hundred\" if hundreds > 1 else \"hundred\") + (\" and \" + number_to_words(remainder) if remainder else \"\")\n",
+ " elif number < 1000000:\n",
+ " thousands, remainder = divmod(number, 1000)\n",
+ " return number_to_words(thousands) + \" thousand\" + (\" \" + number_to_words(remainder) if remainder else \"\")\n",
+ " elif number < 1000000000:\n",
+ " millions, remainder = divmod(number, 1000000)\n",
+ " return number_to_words(millions) + \" million\" + (\" \" + number_to_words(remainder) if remainder else \"\")\n",
+ " elif number < 1000000000000:\n",
+ " billions, remainder = divmod(number, 1000000000)\n",
+ " return number_to_words(billions) + \" billion\" + (\" \" + number_to_words(remainder) if remainder else \"\")\n",
+ " else:\n",
+ " return str(number)\n",
+ "\n",
+ "def replace_numbers_with_words(text):\n",
+ " def replace(match):\n",
+ " number = int(match.group())\n",
+ " return number_to_words(number)\n",
+ "\n",
+ " # Find the numbers and change with words.\n",
+ " result = re.sub(r'\\b\\d+\\b', replace, text)\n",
+ "\n",
+ " return result"
+ ],
+ "metadata": {
+ "id": "WcG5qSO89itj"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import re\n",
+ "\n",
+ "# Dictionary of technical words and their spoken equivalents (each letter separated)\n",
+ "technical_words = {\n",
+ " \"API\": \"A P I\",\n",
+ " \"CUDA\": \"C U D A\",\n",
+ " \"OAuth\": \"O Auth\",\n",
+ " \"LLM\": \"L L M\",\n",
+ " \"HTTP\": \"H T T P\",\n",
+ " \"HTTPS\": \"H T T P S\",\n",
+ " \"URL\": \"U R L\",\n",
+ " \"SQL\": \"S Q L\",\n",
+ " \"JSON\": \"J S O N\",\n",
+ " \"XML\": \"X M L\",\n",
+ " \"REST\": \"R E S T\",\n",
+ " \"JWT\": \"J W T\",\n",
+ " \"FTP\": \"F T P\",\n",
+ " \"SSH\": \"S S H\",\n",
+ " \"GPU\": \"G P U\",\n",
+ " \"CPU\": \"C P U\",\n",
+ " \"IP\": \"I P\",\n",
+ " \"RAM\": \"R A M\",\n",
+ " \"ROM\": \"R O M\",\n",
+ " \"ID\": \"I D\",\n",
+ " \"UID\": \"U I D\",\n",
+ " \"UUID\": \"U U I D\",\n",
+ " \"NLP\": \"N L P\",\n",
+ " \"ML\": \"M L\",\n",
+ " \"AI\": \"A I\",\n",
+ " \"IoT\": \"I O T\",\n",
+ " \"VPN\": \"V P N\",\n",
+ " \"DNS\": \"D N S\",\n",
+ " \"SMTP\": \"S M T P\",\n",
+ " \"KNN\": \"K N N\",\n",
+ " \"CNN\": \"C N N\",\n",
+ " \"LSTM\": \"L S T M\",\n",
+ " \"GRU\": \"G R U\",\n",
+ " # Add more technical terms with letters separated\n",
+ "}\n",
+ "\n",
+ "def technical_words_to_speech(word):\n",
+ " # Convert technical words to their spoken form, with each letter separated by spaces\n",
+ " return technical_words.get(word, word) # If not in the dictionary, return the original word\n",
+ "\n",
+ "def replace_technical_words_with_speech(text):\n",
+ " # Split the text into words and check for technical terms\n",
+ " words = re.findall(r'\\b\\w+\\b', text)\n",
+ "\n",
+ " result = []\n",
+ " for word in words:\n",
+ " # Convert each word to its spoken form if it's a technical term\n",
+ " spoken_form = technical_words_to_speech(word)\n",
+ " result.append(spoken_form)\n",
+ "\n",
+ " return \" \".join(result)\n",
+ "\n",
+ "# Example usage\n",
+ "text = \"I will use an API with OAuth and CUDA to train the LLM model on a GPU.\"\n",
+ "spoken_text = replace_technical_words_with_speech(text)\n",
+ "print(spoken_text)"
+ ],
+ "metadata": {
+ "id": "WaHEQ1eX9nHx"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "inputs = processor(text=final_text, return_tensors=\"pt\")"
+ ],
+ "metadata": {
+ "id": "pGmsxMrG-I_9"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import SpeechT5HifiGan\n",
+ "\n",
+ "vocoder = SpeechT5HifiGan.from_pretrained(\"microsoft/speecht5_hifigan\")\n",
+ "speech = model.generate_speech(inputs[\"input_ids\"], speaker_embeddings, vocoder=vocoder)"
+ ],
+ "metadata": {
+ "id": "ayrv9MJu-N-c"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from IPython.display import Audio\n",
+ "import soundfile as sf\n",
+ "\n",
+ "Audio(speech.numpy(), rate=16000)\n",
+ "# Save the audio to a file (e.g., 'output.wav')\n",
+ "sf.write('output.wav', speech.numpy(), 16000)"
+ ],
+ "metadata": {
+ "id": "9znt0YGZ-UXw"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file