Alyosha11
/

sound

Model card Files Files and versions Community

Alyosha11 commited on Dec 13, 2024

Commit

3e961d9

verified ·

1 Parent(s): 7ef2538

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +8 -0
Dockerfile +42 -0
GPT_SoVITS_Inference.ipynb +152 -0
LICENSE +21 -0
README.md +348 -0
YouTubeAudioText/.gitattributes +59 -0
YouTubeAudioText/data4.zip +0 -0
YouTubeAudioText/data7.zip +0 -0
api.py +940 -0
colab_webui.ipynb +97 -0
go-webui.bat +2 -0
go-webui.ps1 +4 -0
install.sh +6 -0
pretrained_models/chinese-hubert-base/config.json +71 -0
pretrained_models/chinese-hubert-base/preprocessor_config.json +9 -0
requirements.txt +36 -0
wandb/debug-internal.log +16 -0
wandb/debug.log +29 -0
wandb/run-20241203_183434-y6gzynz8/files/output.log +0 -0
wandb/run-20241203_184628-qm0jlwqu/files/config.yaml +103 -0
wandb/run-20241203_184628-qm0jlwqu/files/output.log +0 -0
wandb/run-20241203_184628-qm0jlwqu/files/wandb-summary.json +1 -0
wandb/run-20241203_185057-b425fq2v/files/wandb-metadata.json +47 -0
wandb/run-20241203_185057-b425fq2v/logs/debug-core.log +14 -0
wandb/run-20241203_185057-b425fq2v/logs/debug.log +27 -0
wandb/run-20241203_185057-b425fq2v/run-b425fq2v.wandb +0 -0
wandb/run-20241203_185203-najxbup6/files/output.log +82 -0
wandb/run-20241203_185203-najxbup6/files/wandb-metadata.json +47 -0
wandb/run-20241203_185203-najxbup6/logs/debug-core.log +14 -0
wandb/run-20241203_185203-najxbup6/logs/debug-internal.log +16 -0
wandb/run-20241203_185203-najxbup6/run-najxbup6.wandb +0 -0
wandb/run-20241203_185257-7zp6kxhx/files/output.log +0 -0
wandb/run-20241203_185257-7zp6kxhx/files/wandb-metadata.json +47 -0
wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log +12 -0
wandb/run-20241203_185257-7zp6kxhx/logs/debug-internal.log +11 -0
wandb/run-20241203_185453-2eog1nt2/files/config.yaml +103 -0
wandb/run-20241203_185453-2eog1nt2/files/output.log +1140 -0
wandb/run-20241203_185453-2eog1nt2/files/wandb-metadata.json +47 -0
wandb/run-20241203_185453-2eog1nt2/files/wandb-summary.json +1 -0
wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log +14 -0
wandb/run-20241203_185453-2eog1nt2/logs/debug-internal.log +16 -0
wandb/run-20241203_185453-2eog1nt2/run-2eog1nt2.wandb +0 -0
wandb/run-20241203_185605-tfl0uvkj/files/config.yaml +103 -0
wandb/run-20241203_185605-tfl0uvkj/files/output.log +81 -0
wandb/run-20241203_185605-tfl0uvkj/files/wandb-metadata.json +47 -0
wandb/run-20241203_185605-tfl0uvkj/files/wandb-summary.json +1 -0
wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log +14 -0
wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log +16 -0
wandb/run-20241203_185605-tfl0uvkj/logs/debug.log +27 -0
wandb/run-20241203_185605-tfl0uvkj/run-tfl0uvkj.wandb +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,8 @@

+docs
+logs
+output
+reference
+SoVITS_weights
+GPT_weights
+TEMP
+.git

Dockerfile ADDED Viewed

	@@ -0,0 +1,42 @@

+# Base CUDA image
+FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
+LABEL maintainer="[email protected]"
+LABEL version="dev-20240209"
+LABEL description="Docker image for GPT-SoVITS"
+# Install 3rd party apps
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Etc/UTC
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \
+    git lfs install && \
+    rm -rf /var/lib/apt/lists/*
+# Copy only requirements.txt initially to leverage Docker cache
+WORKDIR /workspace
+COPY requirements.txt /workspace/
+RUN pip install --no-cache-dir -r requirements.txt
+# Define a build-time argument for image type
+ARG IMAGE_TYPE=full
+# Conditional logic based on the IMAGE_TYPE argument
+# Always copy the Docker directory, but only use it if IMAGE_TYPE is not "elite"
+COPY ./Docker /workspace/Docker
+# elite 类型的镜像里面不包含额外的模型
+RUN if [ "$IMAGE_TYPE" != "elite" ]; then \
+        chmod +x /workspace/Docker/download.sh && \
+        /workspace/Docker/download.sh && \
+        python /workspace/Docker/download.py && \
+        python -m nltk.downloader averaged_perceptron_tagger cmudict; \
+    fi
+# Copy the rest of the application
+COPY . /workspace
+EXPOSE 9871 9872 9873 9874 9880
+CMD ["python", "webui.py"]

GPT_SoVITS_Inference.ipynb ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
+      ],
+      "metadata": {
+        "id": "himHYZmra7ix"
+      }
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "e9b7iFV3dm1f"
+      },
+      "source": [
+        "!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n",
+        "%cd GPT-SoVITS\n",
+        "!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n",
+        "!pip install -r requirements.txt"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Download pretrained models 下载预训练模型\n",
+        "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
+        "!mkdir -p /content/GPT-SoVITS/tools/damo_asr/models\n",
+        "!mkdir -p /content/GPT-SoVITS/tools/uvr5\n",
+        "%cd /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
+        "!git clone https://huggingface.co/lj1995/GPT-SoVITS\n",
+        "%cd /content/GPT-SoVITS/tools/damo_asr/models\n",
+        "!git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git\n",
+        "!git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git\n",
+        "!git clone https://www.modelscope.cn/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git\n",
+        "# @title UVR5 pretrains 安装uvr5模型\n",
+        "%cd /content/GPT-SoVITS/tools/uvr5\n",
+        "!git clone https://huggingface.co/Delik/uvr5_weights\n",
+        "!git config core.sparseCheckout true\n",
+        "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
+      ],
+      "metadata": {
+        "id": "0NgxXg5sjv7z",
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Create folder models 创建文件夹模型\n",
+        "import os\n",
+        "base_directory = \"/content/GPT-SoVITS\"\n",
+        "folder_names = [\"SoVITS_weights\", \"GPT_weights\"]\n",
+        "\n",
+        "for folder_name in folder_names:\n",
+        "  if os.path.exists(os.path.join(base_directory, folder_name)):\n",
+        "    print(f\"The folder '{folder_name}' already exists. (文件夹'{folder_name}'已经存在。)\")\n",
+        "  else:\n",
+        "    os.makedirs(os.path.join(base_directory, folder_name))\n",
+        "    print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建！)\")\n",
+        "\n",
+        "print(\"All folders have been created. (所有文件夹均已创建。)\")"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "cPDEH-9czOJF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import requests\n",
+        "import zipfile\n",
+        "import shutil\n",
+        "import os\n",
+        "\n",
+        "#@title Import model 导入模型 (HuggingFace)\n",
+        "hf_link = 'https://huggingface.co/modelloosrvcc/Nagisa_Shingetsu_GPT-SoVITS/resolve/main/Nagisa.zip' #@param {type: \"string\"}\n",
+        "\n",
+        "output_path = '/content/'\n",
+        "\n",
+        "response = requests.get(hf_link)\n",
+        "with open(output_path + 'file.zip', 'wb') as file:\n",
+        "    file.write(response.content)\n",
+        "\n",
+        "with zipfile.ZipFile(output_path + 'file.zip', 'r') as zip_ref:\n",
+        "    zip_ref.extractall(output_path)\n",
+        "\n",
+        "os.remove(output_path + \"file.zip\")\n",
+        "\n",
+        "source_directory = output_path\n",
+        "SoVITS_destination_directory = '/content/GPT-SoVITS/SoVITS_weights'\n",
+        "GPT_destination_directory = '/content/GPT-SoVITS/GPT_weights'\n",
+        "\n",
+        "for filename in os.listdir(source_directory):\n",
+        "    if filename.endswith(\".pth\"):\n",
+        "        source_path = os.path.join(source_directory, filename)\n",
+        "        destination_path = os.path.join(SoVITS_destination_directory, filename)\n",
+        "        shutil.move(source_path, destination_path)\n",
+        "\n",
+        "for filename in os.listdir(source_directory):\n",
+        "    if filename.endswith(\".ckpt\"):\n",
+        "        source_path = os.path.join(source_directory, filename)\n",
+        "        destination_path = os.path.join(GPT_destination_directory, filename)\n",
+        "        shutil.move(source_path, destination_path)\n",
+        "\n",
+        "print(f'Model downloaded. (模型已下载。)')"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "vbZY-LnM0tzq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title launch WebUI 启动WebUI\n",
+        "!/usr/local/bin/pip install ipykernel\n",
+        "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
+        "%cd /content/GPT-SoVITS/\n",
+        "!/usr/local/bin/python  webui.py"
+      ],
+      "metadata": {
+        "id": "4oRGUzkrk8C7",
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 RVC-Boss
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,348 @@

+<div align="center">
+<h1>GPT-SoVITS-WebUI</h1>
+A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
+[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/RVC-Boss/GPT-SoVITS)
+<a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
+[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+**English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md)
+</div>
+---
+## Features:
+1. **Zero-shot TTS:** Input a 5-second vocal sample and experience instant text-to-speech conversion.
+2. **Few-shot TTS:** Fine-tune the model with just 1 minute of training data for improved voice similarity and realism.
+3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, Korean, Cantonese and Chinese.
+4. **WebUI Tools:** Integrated tools include voice accompaniment separation, automatic training set segmentation, Chinese ASR, and text labeling, assisting beginners in creating training datasets and GPT/SoVITS models.
+**Check out our [demo video](https://www.bilibili.com/video/BV12g4y1m7Uw) here!**
+Unseen speakers few-shot fine-tuning demo:
+https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
+**User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
+## Installation
+For users in China, you can [click here](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official) to use AutoDL Cloud Docker to experience the full functionality online.
+### Tested Environments
+- Python 3.9, PyTorch 2.0.1, CUDA 11
+- Python 3.10.13, PyTorch 2.1.2, CUDA 12.3
+- Python 3.9, PyTorch 2.2.2, macOS 14.4.1 (Apple silicon)
+- Python 3.9, PyTorch 2.2.2, CPU devices
+_Note: numba==0.56.4 requires py<3.11_
+### Windows
+If you are a Windows user (tested with win>=10), you can [download the integrated package](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI.
+**Users in China can [download the package here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO).**
+### Linux
+```bash
+conda create -n GPTSoVits python=3.9
+conda activate GPTSoVits
+bash install.sh
+```
+### macOS
+**Note: The models trained with GPUs on Macs result in significantly lower quality compared to those trained on other devices, so we are temporarily using CPUs instead.**
+1. Install Xcode command-line tools by running `xcode-select --install`.
+2. Install FFmpeg by running `brew install ffmpeg`.
+3. Install the program by running the following commands:
+```bash
+conda create -n GPTSoVits python=3.9
+conda activate GPTSoVits
+pip install -r requirements.txt
+```
+### Install Manually
+#### Install FFmpeg
+##### Conda Users
+```bash
+conda install ffmpeg
+```
+##### Ubuntu/Debian Users
+```bash
+sudo apt install ffmpeg
+sudo apt install libsox-dev
+conda install -c conda-forge 'ffmpeg<7'
+```
+##### Windows Users
+Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
+Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
+##### MacOS Users
+```bash
+brew install ffmpeg
+```
+#### Install Dependences
+```bash
+pip install -r requirements.txt
+```
+### Using Docker
+#### docker-compose.yaml configuration
+0. Regarding image tags: Due to rapid updates in the codebase and the slow process of packaging and testing images, please check [Docker Hub](https://hub.docker.com/r/breakstring/gpt-sovits) for the currently packaged latest images and select as per your situation, or alternatively, build locally using a Dockerfile according to your own needs.
+1. Environment Variables：
+- is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
+2. Volumes Configuration，The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
+3. shm_size： The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
+4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
+#### Running with docker compose
+```
+docker compose -f "docker-compose.yaml" up -d
+```
+#### Running with docker command
+As above, modify the corresponding parameters based on your actual situation, then run the following command:
+```
+docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
+```
+## Pretrained Models
+**Users in China can [download all these models here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#nVNhX).**
+1. Download pretrained models from [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) and place them in `GPT_SoVITS/pretrained_models`.
+2. Download G2PW models from [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip), unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.(Chinese TTS Only)
+3. For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
+4. For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
+5. For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint.
+## Dataset Format
+The TTS annotation .list file format:
+```
+vocal_path|speaker_name|language|text
+```
+Language dictionary:
+- 'zh': Chinese
+- 'ja': Japanese
+- 'en': English
+- 'ko': Korean
+- 'yue': Cantonese
+Example:
+```
+D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
+```
+## Finetune and inference
+ ### Open WebUI
+ #### Integrated Package Users
+ Double-click `go-webui.bat`or use `go-webui.ps1`
+ if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1`
+ #### Others
+ ```bash
+ python webui.py <language(optional)>
+ ```
+ if you want to switch to V1,then
+ ```bash
+ python webui.py v1 <language(optional)>
+ ```
+Or maunally switch version in WebUI
+ ### Finetune
+ #### Path Auto-filling is now supported
+     1.Fill in the audio path
+     2.Slice the audio into small chunks
+     3.Denoise(optinal)
+     4.ASR
+     5.Proofreading ASR transcriptions
+     6.Go to the next Tab, then finetune the model
+ ### Open Inference WebUI
+ #### Integrated Package Users
+ Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at  `1-GPT-SoVITS-TTS/1C-inference`
+ #### Others
+ ```bash
+ python GPT_SoVITS/inference_webui.py <language(optional)>
+ ```
+ OR
+ ```bash
+ python webui.py
+ ```
+then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
+ ## V2 Release Notes
+New Features:
+1. Support Korean and Cantonese
+2. An optimized text frontend
+3. Pre-trained model extended from 2k hours to 5k hours
+4. Improved synthesis quality for low-quality reference audio
+    [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) )
+Use v2 from v1 environment:
+1. `pip install -r requirements.txt` to update some packages
+2. Clone the latest codes from github.
+3. Download v2 pretrained models from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) and put them into `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`.
+    Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（Download G2PW models,  unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
+## Todo List
+- [x] **High Priority:**
+  - [x] Localization in Japanese and English.
+  - [x] User guide.
+  - [x] Japanese and English dataset fine tune training.
+- [ ] **Features:**
+  - [x] Zero-shot voice conversion (5s) / few-shot voice conversion (1min).
+  - [x] TTS speaking speed control.
+  - [ ] ~~Enhanced TTS emotion control.~~
+  - [ ] Experiment with changing SoVITS token inputs to probability distribution of GPT vocabs (transformer latent).
+  - [x] Improve English and Japanese text frontend.
+  - [ ] Develop tiny and larger-sized TTS models.
+  - [x] Colab scripts.
+  - [ ] Try expand training dataset (2k hours -> 10k hours).
+  - [x] better sovits base model (enhanced audio quality)
+  - [ ] model mix
+## (Additional) Method for running from the command line
+Use the command line to open the WebUI for UVR5
+```
+python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
+```
+<!-- If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing
+```
+python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
+``` -->
+This is how the audio segmentation of the dataset is done using the command line
+```
+python audio_slicer.py \
+    --input_path "<path_to_original_audio_file_or_directory>" \
+    --output_root "<directory_where_subdivided_audio_clips_will_be_saved>" \
+    --threshold <volume_threshold> \
+    --min_length <minimum_duration_of_each_subclip> \
+    --min_interval <shortest_time_gap_between_adjacent_subclips>
+    --hop_size <step_size_for_computing_volume_curve>
+```
+This is how dataset ASR processing is done using the command line(Only Chinese)
+```
+python tools/asr/funasr_asr.py -i <input> -o <output>
+```
+ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
+(No progress bars, GPU performance may cause time delays)
+```
+python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
+```
+A custom list save path is enabled
+## Credits
+Special thanks to the following projects and contributors:
+### Theoretical Research
+- [ar-vits](https://github.com/innnky/ar-vits)
+- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
+- [vits](https://github.com/jaywalnut310/vits)
+- [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556)
+- [contentvec](https://github.com/auspicious3000/contentvec/)
+- [hifi-gan](https://github.com/jik876/hifi-gan)
+- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
+### Pretrained Models
+- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
+- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
+### Text Frontend for Inference
+- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
+- [LangSegment](https://github.com/juntaosun/LangSegment)
+- [g2pW](https://github.com/GitYCC/g2pW)
+- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
+- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
+### WebUI Tools
+- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
+- [audio-slicer](https://github.com/openvpi/audio-slicer)
+- [SubFix](https://github.com/cronrpc/SubFix)
+- [FFmpeg](https://github.com/FFmpeg/FFmpeg)
+- [gradio](https://github.com/gradio-app/gradio)
+- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
+- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
+Thankful to @Naozumi520 for providing the Cantonese training set and for the guidance on Cantonese-related knowledge.
+## Thanks to all contributors for their efforts
+<a href="https://github.com/RVC-Boss/GPT-SoVITS/graphs/contributors" target="_blank">
+  <img src="https://contrib.rocks/image?repo=RVC-Boss/GPT-SoVITS" />
+</a>

YouTubeAudioText/.gitattributes ADDED Viewed

	@@ -0,0 +1,59 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mds filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text

YouTubeAudioText/data4.zip ADDED Viewed

File without changes

YouTubeAudioText/data7.zip ADDED Viewed

File without changes

api.py ADDED Viewed

	@@ -0,0 +1,940 @@

+"""
+# api.py usage
+` python api.py -dr "123.wav" -dt "一二三。" -dl "zh" `
+## 执行参数:
+`-s` - `SoVITS模型路径, 可在 config.py 中指定`
+`-g` - `GPT模型路径, 可在 config.py 中指定`
+调用请求缺少参考音频时使用
+`-dr` - `默认参考音频路径`
+`-dt` - `默认参考音频文本`
+`-dl` - `默认参考音频语种, "中文","英文","日文","韩文","粤语,"zh","en","ja","ko","yue"`
+`-d` - `推理设备, "cuda","cpu"`
+`-a` - `绑定地址, 默认"127.0.0.1"`
+`-p` - `绑定端口, 默认9880, 可在 config.py 中指定`
+`-fp` - `覆盖 config.py 使用全精度`
+`-hp` - `覆盖 config.py 使用半精度`
+`-sm` - `流式返回模式, 默认不启用, "close","c", "normal","n", "keepalive","k"`
+·-mt` - `返回的音频编码格式, 流式默认ogg, 非流式默认wav, "wav", "ogg", "aac"`
+·-st` - `返回的音频数据类型, 默认int16, "int16", "int32"`
+·-cp` - `文本切分符号设定, 默认为空, 以",.，。"字符串的方式传入`
+`-hb` - `cnhubert路径`
+`-b` - `bert路径`
+## 调用:
+### 推理
+endpoint: `/`
+使用执行参数指定的参考音频:
+GET:
+    `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。&text_language=zh`
+POST:
+```json
+{
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh"
+}
+```
+使用执行参数指定的参考音频并设定分割符号:
+GET:
+    `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。&text_language=zh&cut_punc=，。`
+POST:
+```json
+{
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh",
+    "cut_punc": "，。",
+}
+```
+手动指定当次推理所使用的参考音频:
+GET:
+    `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。&text_language=zh`
+POST:
+```json
+{
+    "refer_wav_path": "123.wav",
+    "prompt_text": "一二三。",
+    "prompt_language": "zh",
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh"
+}
+```
+RESP:
+成功: 直接返回 wav 音频流， http code 200
+失败: 返回包含错误信息的 json, http code 400
+手动指定当次推理所使用的参考音频，并提供参数:
+GET:
+    `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。&text_language=zh&top_k=20&top_p=0.6&temperature=0.6&speed=1&inp_refs="456.wav"&inp_refs="789.wav"`
+POST:
+```json
+{
+    "refer_wav_path": "123.wav",
+    "prompt_text": "一二三。",
+    "prompt_language": "zh",
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh",
+    "top_k": 20,
+    "top_p": 0.6,
+    "temperature": 0.6,
+    "speed": 1,
+    "inp_refs": ["456.wav","789.wav"]
+}
+```
+RESP:
+成功: 直接返回 wav 音频流， http code 200
+失败: 返回包含错误信息的 json, http code 400
+### 更换默认参考音频
+endpoint: `/change_refer`
+key与推理端一样
+GET:
+    `http://127.0.0.1:9880/change_refer?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh`
+POST:
+```json
+{
+    "refer_wav_path": "123.wav",
+    "prompt_text": "一二三。",
+    "prompt_language": "zh"
+}
+```
+RESP:
+成功: json, http code 200
+失败: json, 400
+### 命令控制
+endpoint: `/control`
+command:
+"restart": 重新运行
+"exit": 结束运行
+GET:
+    `http://127.0.0.1:9880/control?command=restart`
+POST:
+```json
+{
+    "command": "restart"
+}
+```
+RESP: 无
+"""
+import argparse
+import os,re
+import sys
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+sys.path.append("%s/GPT_SoVITS" % (now_dir))
+import signal
+import LangSegment
+from time import time as ttime
+import torch
+import librosa
+import soundfile as sf
+from fastapi import FastAPI, Request, Query, HTTPException
+from fastapi.responses import StreamingResponse, JSONResponse
+import uvicorn
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+import numpy as np
+from feature_extractor import cnhubert
+from io import BytesIO
+from module.models import SynthesizerTrn
+from AR.models.t2s_lightning_module import Text2SemanticLightningModule
+from text import cleaned_text_to_sequence
+from text.cleaner import clean_text
+from module.mel_processing import spectrogram_torch
+from tools.my_utils import load_audio
+import config as global_config
+import logging
+import subprocess
+class DefaultRefer:
+    def __init__(self, path, text, language):
+        self.path = args.default_refer_path
+        self.text = args.default_refer_text
+        self.language = args.default_refer_language
+    def is_ready(self) -> bool:
+        return is_full(self.path, self.text, self.language)
+def is_empty(*items):  # 任意一项不为空返回False
+    for item in items:
+        if item is not None and item != "":
+            return False
+    return True
+def is_full(*items):  # 任意一项为空返回False
+    for item in items:
+        if item is None or item == "":
+            return False
+    return True
+class Speaker:
+    def __init__(self, name, gpt, sovits, phones = None, bert = None, prompt = None):
+        self.name = name
+        self.sovits = sovits
+        self.gpt = gpt
+        self.phones = phones
+        self.bert = bert
+        self.prompt = prompt
+speaker_list = {}
+class Sovits:
+    def __init__(self, vq_model, hps):
+        self.vq_model = vq_model
+        self.hps = hps
+def get_sovits_weights(sovits_path):
+    dict_s2 = torch.load(sovits_path, map_location="cpu")
+    hps = dict_s2["config"]
+    hps = DictToAttrRecursive(hps)
+    hps.model.semantic_frame_rate = "25hz"
+    if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
+        hps.model.version = "v1"
+    else:
+        hps.model.version = "v2"
+    logger.info(f"模型版本: {hps.model.version}")
+    model_params_dict = vars(hps.model)
+    vq_model = SynthesizerTrn(
+        hps.data.filter_length // 2 + 1,
+        hps.train.segment_size // hps.data.hop_length,
+        n_speakers=hps.data.n_speakers,
+        **model_params_dict
+    )
+    if ("pretrained" not in sovits_path):
+        del vq_model.enc_q
+    if is_half == True:
+        vq_model = vq_model.half().to(device)
+    else:
+        vq_model = vq_model.to(device)
+    vq_model.eval()
+    vq_model.load_state_dict(dict_s2["weight"], strict=False)
+    sovits = Sovits(vq_model, hps)
+    return sovits
+class Gpt:
+    def __init__(self, max_sec, t2s_model):
+        self.max_sec = max_sec
+        self.t2s_model = t2s_model
+global hz
+hz = 50
+def get_gpt_weights(gpt_path):
+    dict_s1 = torch.load(gpt_path, map_location="cpu")
+    config = dict_s1["config"]
+    max_sec = config["data"]["max_sec"]
+    t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
+    t2s_model.load_state_dict(dict_s1["weight"])
+    if is_half == True:
+        t2s_model = t2s_model.half()
+    t2s_model = t2s_model.to(device)
+    t2s_model.eval()
+    total = sum([param.nelement() for param in t2s_model.parameters()])
+    logger.info("Number of parameter: %.2fM" % (total / 1e6))
+    gpt = Gpt(max_sec, t2s_model)
+    return gpt
+def change_gpt_sovits_weights(gpt_path,sovits_path):
+    try:
+        gpt = get_gpt_weights(gpt_path)
+        sovits = get_sovits_weights(sovits_path)
+    except Exception as e:
+        return JSONResponse({"code": 400, "message": str(e)}, status_code=400)
+    speaker_list["default"] = Speaker(name="default", gpt=gpt, sovits=sovits)
+    return JSONResponse({"code": 0, "message": "Success"}, status_code=200)
+def get_bert_feature(text, word2ph):
+    with torch.no_grad():
+        inputs = tokenizer(text, return_tensors="pt")
+        for i in inputs:
+            inputs[i] = inputs[i].to(device)  #####输入是long不用管精度问题，精度随bert_model
+        res = bert_model(**inputs, output_hidden_states=True)
+        res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
+    assert len(word2ph) == len(text)
+    phone_level_feature = []
+    for i in range(len(word2ph)):
+        repeat_feature = res[i].repeat(word2ph[i], 1)
+        phone_level_feature.append(repeat_feature)
+    phone_level_feature = torch.cat(phone_level_feature, dim=0)
+    # if(is_half==True):phone_level_feature=phone_level_feature.half()
+    return phone_level_feature.T
+def clean_text_inf(text, language, version):
+    phones, word2ph, norm_text = clean_text(text, language, version)
+    phones = cleaned_text_to_sequence(phones, version)
+    return phones, word2ph, norm_text
+def get_bert_inf(phones, word2ph, norm_text, language):
+    language=language.replace("all_","")
+    if language == "zh":
+        bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype)
+    else:
+        bert = torch.zeros(
+            (1024, len(phones)),
+            dtype=torch.float16 if is_half == True else torch.float32,
+        ).to(device)
+    return bert
+from text import chinese
+def get_phones_and_bert(text,language,version,final=False):
+    if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
+        language = language.replace("all_","")
+        if language == "en":
+            LangSegment.setfilters(["en"])
+            formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text))
+        else:
+            # 因无法区别中日韩文汉字,以用户输入为准
+            formattext = text
+        while "  " in formattext:
+            formattext = formattext.replace("  ", " ")
+        if language == "zh":
+            if re.search(r'[A-Za-z]', formattext):
+                formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
+                formattext = chinese.mix_text_normalize(formattext)
+                return get_phones_and_bert(formattext,"zh",version)
+            else:
+                phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
+                bert = get_bert_feature(norm_text, word2ph).to(device)
+        elif language == "yue" and re.search(r'[A-Za-z]', formattext):
+                formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
+                formattext = chinese.mix_text_normalize(formattext)
+                return get_phones_and_bert(formattext,"yue",version)
+        else:
+            phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
+            bert = torch.zeros(
+                (1024, len(phones)),
+                dtype=torch.float16 if is_half == True else torch.float32,
+            ).to(device)
+    elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
+        textlist=[]
+        langlist=[]
+        LangSegment.setfilters(["zh","ja","en","ko"])
+        if language == "auto":
+            for tmp in LangSegment.getTexts(text):
+                langlist.append(tmp["lang"])
+                textlist.append(tmp["text"])
+        elif language == "auto_yue":
+            for tmp in LangSegment.getTexts(text):
+                if tmp["lang"] == "zh":
+                    tmp["lang"] = "yue"
+                langlist.append(tmp["lang"])
+                textlist.append(tmp["text"])
+        else:
+            for tmp in LangSegment.getTexts(text):
+                if tmp["lang"] == "en":
+                    langlist.append(tmp["lang"])
+                else:
+                    # 因无法区别中日韩文汉字,以用户输入为准
+                    langlist.append(language)
+                textlist.append(tmp["text"])
+        phones_list = []
+        bert_list = []
+        norm_text_list = []
+        for i in range(len(textlist)):
+            lang = langlist[i]
+            phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
+            bert = get_bert_inf(phones, word2ph, norm_text, lang)
+            phones_list.append(phones)
+            norm_text_list.append(norm_text)
+            bert_list.append(bert)
+        bert = torch.cat(bert_list, dim=1)
+        phones = sum(phones_list, [])
+        norm_text = ''.join(norm_text_list)
+    if not final and len(phones) < 6:
+        return get_phones_and_bert("." + text,language,version,final=True)
+    return phones,bert.to(torch.float16 if is_half == True else torch.float32),norm_text
+class DictToAttrRecursive(dict):
+    def __init__(self, input_dict):
+        super().__init__(input_dict)
+        for key, value in input_dict.items():
+            if isinstance(value, dict):
+                value = DictToAttrRecursive(value)
+            self[key] = value
+            setattr(self, key, value)
+    def __getattr__(self, item):
+        try:
+            return self[item]
+        except KeyError:
+            raise AttributeError(f"Attribute {item} not found")
+    def __setattr__(self, key, value):
+        if isinstance(value, dict):
+            value = DictToAttrRecursive(value)
+        super(DictToAttrRecursive, self).__setitem__(key, value)
+        super().__setattr__(key, value)
+    def __delattr__(self, item):
+        try:
+            del self[item]
+        except KeyError:
+            raise AttributeError(f"Attribute {item} not found")
+def get_spepc(hps, filename):
+    audio,_ = librosa.load(filename, int(hps.data.sampling_rate))
+    audio = torch.FloatTensor(audio)
+    maxx=audio.abs().max()
+    if(maxx>1):
+        audio/=min(2,maxx)
+    audio_norm = audio
+    audio_norm = audio_norm.unsqueeze(0)
+    spec = spectrogram_torch(audio_norm, hps.data.filter_length, hps.data.sampling_rate, hps.data.hop_length,
+                             hps.data.win_length, center=False)
+    return spec
+def pack_audio(audio_bytes, data, rate):
+    if media_type == "ogg":
+        audio_bytes = pack_ogg(audio_bytes, data, rate)
+    elif media_type == "aac":
+        audio_bytes = pack_aac(audio_bytes, data, rate)
+    else:
+        # wav无法流式, 先暂存raw
+        audio_bytes = pack_raw(audio_bytes, data, rate)
+    return audio_bytes
+def pack_ogg(audio_bytes, data, rate):
+    # Author: AkagawaTsurunaki
+    # Issue:
+    #   Stack overflow probabilistically occurs
+    #   when the function `sf_writef_short` of `libsndfile_64bit.dll` is called
+    #   using the Python library `soundfile`
+    # Note:
+    #   This is an issue related to `libsndfile`, not this project itself.
+    #   It happens when you generate a large audio tensor (about 499804 frames in my PC)
+    #   and try to convert it to an ogg file.
+    # Related:
+    #   https://github.com/RVC-Boss/GPT-SoVITS/issues/1199
+    #   https://github.com/libsndfile/libsndfile/issues/1023
+    #   https://github.com/bastibe/python-soundfile/issues/396
+    # Suggestion:
+    #   Or split the whole audio data into smaller audio segment to avoid stack overflow?
+    def handle_pack_ogg():
+        with sf.SoundFile(audio_bytes, mode='w', samplerate=rate, channels=1, format='ogg') as audio_file:
+            audio_file.write(data)
+    import threading
+    # See: https://docs.python.org/3/library/threading.html
+    # The stack size of this thread is at least 32768
+    # If stack overflow error still occurs, just modify the `stack_size`.
+    # stack_size = n * 4096, where n should be a positive integer.
+    # Here we chose n = 4096.
+    stack_size = 4096 * 4096
+    try:
+        threading.stack_size(stack_size)
+        pack_ogg_thread = threading.Thread(target=handle_pack_ogg)
+        pack_ogg_thread.start()
+        pack_ogg_thread.join()
+    except RuntimeError as e:
+        # If changing the thread stack size is unsupported, a RuntimeError is raised.
+        print("RuntimeError: {}".format(e))
+        print("Changing the thread stack size is unsupported.")
+    except ValueError as e:
+        # If the specified stack size is invalid, a ValueError is raised and the stack size is unmodified.
+        print("ValueError: {}".format(e))
+        print("The specified stack size is invalid.")
+    return audio_bytes
+def pack_raw(audio_bytes, data, rate):
+    audio_bytes.write(data.tobytes())
+    return audio_bytes
+def pack_wav(audio_bytes, rate):
+    if is_int32:
+        data = np.frombuffer(audio_bytes.getvalue(),dtype=np.int32)
+        wav_bytes = BytesIO()
+        sf.write(wav_bytes, data, rate, format='WAV', subtype='PCM_32')
+    else:
+        data = np.frombuffer(audio_bytes.getvalue(),dtype=np.int16)
+        wav_bytes = BytesIO()
+        sf.write(wav_bytes, data, rate, format='WAV')
+    return wav_bytes
+def pack_aac(audio_bytes, data, rate):
+    if is_int32:
+        pcm = 's32le'
+        bit_rate = '256k'
+    else:
+        pcm = 's16le'
+        bit_rate = '128k'
+    process = subprocess.Popen([
+        'ffmpeg',
+        '-f', pcm,  # 输入16位有符号小端整数PCM
+        '-ar', str(rate),  # 设置采样率
+        '-ac', '1',  # 单声道
+        '-i', 'pipe:0',  # 从管道读取输入
+        '-c:a', 'aac',  # 音频编码器为AAC
+        '-b:a', bit_rate,  # 比特率
+        '-vn',  # 不包含视频
+        '-f', 'adts',  # 输出AAC数据流格式
+        'pipe:1'  # 将输出写入管道
+    ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    out, _ = process.communicate(input=data.tobytes())
+    audio_bytes.write(out)
+    return audio_bytes
+def read_clean_buffer(audio_bytes):
+    audio_chunk = audio_bytes.getvalue()
+    audio_bytes.truncate(0)
+    audio_bytes.seek(0)
+    return audio_bytes, audio_chunk
+def cut_text(text, punc):
+    punc_list = [p for p in punc if p in {",", ".", ";", "?", "!", "、", "，", "。", "？", "！", "；", "：", "…"}]
+    if len(punc_list) > 0:
+        punds = r"[" + "".join(punc_list) + r"]"
+        text = text.strip("\n")
+        items = re.split(f"({punds})", text)
+        mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
+        # 在句子不存在符号或句尾无符号的时候保证文本完整
+        if len(items)%2 == 1:
+            mergeitems.append(items[-1])
+        text = "\n".join(mergeitems)
+    while "\n\n" in text:
+        text = text.replace("\n\n", "\n")
+    return text
+def only_punc(text):
+    return not any(t.isalnum() or t.isalpha() for t in text)
+splits = {"，", "。", "？", "！", ",", ".", "?", "!", "~", ":", "：", "—", "…", }
+def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, top_k= 15, top_p = 0.6, temperature = 0.6, speed = 1, inp_refs = None, spk = "default"):
+    infer_sovits = speaker_list[spk].sovits
+    vq_model = infer_sovits.vq_model
+    hps = infer_sovits.hps
+    infer_gpt = speaker_list[spk].gpt
+    t2s_model = infer_gpt.t2s_model
+    max_sec = infer_gpt.max_sec
+    t0 = ttime()
+    prompt_text = prompt_text.strip("\n")
+    if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
+    prompt_language, text = prompt_language, text.strip("\n")
+    dtype = torch.float16 if is_half == True else torch.float32
+    zero_wav = np.zeros(int(hps.data.sampling_rate * 0.3), dtype=np.float16 if is_half == True else np.float32)
+    with torch.no_grad():
+        wav16k, sr = librosa.load(ref_wav_path, sr=16000)
+        wav16k = torch.from_numpy(wav16k)
+        zero_wav_torch = torch.from_numpy(zero_wav)
+        if (is_half == True):
+            wav16k = wav16k.half().to(device)
+            zero_wav_torch = zero_wav_torch.half().to(device)
+        else:
+            wav16k = wav16k.to(device)
+            zero_wav_torch = zero_wav_torch.to(device)
+        wav16k = torch.cat([wav16k, zero_wav_torch])
+        ssl_content = ssl_model.model(wav16k.unsqueeze(0))["last_hidden_state"].transpose(1, 2)  # .float()
+        codes = vq_model.extract_latent(ssl_content)
+        prompt_semantic = codes[0, 0]
+        prompt = prompt_semantic.unsqueeze(0).to(device)
+        refers=[]
+        if(inp_refs):
+            for path in inp_refs:
+                try:
+                    refer = get_spepc(hps, path).to(dtype).to(device)
+                    refers.append(refer)
+                except Exception as e:
+                    logger.error(e)
+        if(len(refers)==0):
+            refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
+    t1 = ttime()
+    version = vq_model.version
+    os.environ['version'] = version
+    prompt_language = dict_language[prompt_language.lower()]
+    text_language = dict_language[text_language.lower()]
+    phones1, bert1, norm_text1 = get_phones_and_bert(prompt_text, prompt_language, version)
+    texts = text.split("\n")
+    audio_bytes = BytesIO()
+    for text in texts:
+        # 简单防止纯符号引发参考音频泄露
+        if only_punc(text):
+            continue
+        audio_opt = []
+        if (text[-1] not in splits): text += "。" if text_language != "en" else "."
+        phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
+        bert = torch.cat([bert1, bert2], 1)
+        all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
+        bert = bert.to(device).unsqueeze(0)
+        all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device)
+        t2 = ttime()
+        with torch.no_grad():
+            pred_semantic, idx = t2s_model.model.infer_panel(
+                all_phoneme_ids,
+                all_phoneme_len,
+                prompt,
+                bert,
+                # prompt_phone_len=ph_offset,
+                top_k = top_k,
+                top_p = top_p,
+                temperature = temperature,
+                early_stop_num=hz * max_sec)
+            pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
+        t3 = ttime()
+        audio = \
+            vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0),
+                            refers,speed=speed).detach().cpu().numpy()[
+                0, 0]  ###试试重建不带上prompt部分
+        max_audio=np.abs(audio).max()
+        if max_audio>1:
+            audio/=max_audio
+        audio_opt.append(audio)
+        audio_opt.append(zero_wav)
+        t4 = ttime()
+        if is_int32:
+            audio_bytes = pack_audio(audio_bytes,(np.concatenate(audio_opt, 0) * 2147483647).astype(np.int32),hps.data.sampling_rate)
+        else:
+            audio_bytes = pack_audio(audio_bytes,(np.concatenate(audio_opt, 0) * 32768).astype(np.int16),hps.data.sampling_rate)
+    # logger.info("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3))
+        if stream_mode == "normal":
+            audio_bytes, audio_chunk = read_clean_buffer(audio_bytes)
+            yield audio_chunk
+    if not stream_mode == "normal":
+        if media_type == "wav":
+            audio_bytes = pack_wav(audio_bytes,hps.data.sampling_rate)
+        yield audio_bytes.getvalue()
+def handle_control(command):
+    if command == "restart":
+        os.execl(g_config.python_exec, g_config.python_exec, *sys.argv)
+    elif command == "exit":
+        os.kill(os.getpid(), signal.SIGTERM)
+        exit(0)
+def handle_change(path, text, language):
+    if is_empty(path, text, language):
+        return JSONResponse({"code": 400, "message": '缺少任意一项以下参数: "path", "text", "language"'}, status_code=400)
+    if path != "" or path is not None:
+        default_refer.path = path
+    if text != "" or text is not None:
+        default_refer.text = text
+    if language != "" or language is not None:
+        default_refer.language = language
+    logger.info(f"当前默认参考音频路径: {default_refer.path}")
+    logger.info(f"当前默认参考音频文本: {default_refer.text}")
+    logger.info(f"当前默认参考音频语种: {default_refer.language}")
+    logger.info(f"is_ready: {default_refer.is_ready()}")
+    return JSONResponse({"code": 0, "message": "Success"}, status_code=200)
+def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc, top_k, top_p, temperature, speed, inp_refs):
+    if (
+            refer_wav_path == "" or refer_wav_path is None
+            or prompt_text == "" or prompt_text is None
+            or prompt_language == "" or prompt_language is None
+    ):
+        refer_wav_path, prompt_text, prompt_language = (
+            default_refer.path,
+            default_refer.text,
+            default_refer.language,
+        )
+        if not default_refer.is_ready():
+            return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
+    if cut_punc == None:
+        text = cut_text(text,default_cut_punc)
+    else:
+        text = cut_text(text,cut_punc)
+    return StreamingResponse(get_tts_wav(refer_wav_path, prompt_text, prompt_language, text, text_language, top_k, top_p, temperature, speed, inp_refs), media_type="audio/"+media_type)
+# --------------------------------
+# 初始化部分
+# --------------------------------
+dict_language = {
+    "中文": "all_zh",
+    "粤语": "all_yue",
+    "英文": "en",
+    "日文": "all_ja",
+    "韩文": "all_ko",
+    "中英混合": "zh",
+    "粤英混合": "yue",
+    "日英混合": "ja",
+    "韩英混合": "ko",
+    "多语种混合": "auto",    #多语种启动切分识别语种
+    "多语种混合(粤语)": "auto_yue",
+    "all_zh": "all_zh",
+    "all_yue": "all_yue",
+    "en": "en",
+    "all_ja": "all_ja",
+    "all_ko": "all_ko",
+    "zh": "zh",
+    "yue": "yue",
+    "ja": "ja",
+    "ko": "ko",
+    "auto": "auto",
+    "auto_yue": "auto_yue",
+}
+# logger
+logging.config.dictConfig(uvicorn.config.LOGGING_CONFIG)
+logger = logging.getLogger('uvicorn')
+# 获取配置
+g_config = global_config.Config()
+# 获取参数
+parser = argparse.ArgumentParser(description="GPT-SoVITS api")
+parser.add_argument("-s", "--sovits_path", type=str, default=g_config.sovits_path, help="SoVITS模型路径")
+parser.add_argument("-g", "--gpt_path", type=str, default=g_config.gpt_path, help="GPT模型路径")
+parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="默认参考音频路径")
+parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本")
+parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种")
+parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu")
+parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0")
+parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880")
+parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度")
+parser.add_argument("-hp", "--half_precision", action="store_true", default=False, help="覆盖config.is_half为True, 使用半精度")
+# bool值的用法为 `python ./api.py -fp ...`
+# 此时 full_precision==True, half_precision==False
+parser.add_argument("-sm", "--stream_mode", type=str, default="close", help="流式返回模式, close / normal / keepalive")
+parser.add_argument("-mt", "--media_type", type=str, default="wav", help="音频编码格式, wav / ogg / aac")
+parser.add_argument("-st", "--sub_type", type=str, default="int16", help="音频数据类型, int16 / int32")
+parser.add_argument("-cp", "--cut_punc", type=str, default="", help="文本切分符号设定, 符号范围,.;?!、，。？！；：…")
+# 切割常用分句符为 `python ./api.py -cp ".?!。？！"`
+parser.add_argument("-hb", "--hubert_path", type=str, default=g_config.cnhubert_path, help="覆盖config.cnhubert_path")
+parser.add_argument("-b", "--bert_path", type=str, default=g_config.bert_path, help="覆盖config.bert_path")
+args = parser.parse_args()
+sovits_path = args.sovits_path
+gpt_path = args.gpt_path
+device = args.device
+port = args.port
+host = args.bind_addr
+cnhubert_base_path = args.hubert_path
+bert_path = args.bert_path
+default_cut_punc = args.cut_punc
+# 应用参数配置
+default_refer = DefaultRefer(args.default_refer_path, args.default_refer_text, args.default_refer_language)
+# 模型路径检查
+if sovits_path == "":
+    sovits_path = g_config.pretrained_sovits_path
+    logger.warn(f"未指定SoVITS模型路径, fallback后当前值: {sovits_path}")
+if gpt_path == "":
+    gpt_path = g_config.pretrained_gpt_path
+    logger.warn(f"未指定GPT模型路径, fallback后当前值: {gpt_path}")
+# 指定默认参考音频, 调用方 未提供/未给全 参考音频参数时使用
+if default_refer.path == "" or default_refer.text == "" or default_refer.language == "":
+    default_refer.path, default_refer.text, default_refer.language = "", "", ""
+    logger.info("未指定默认参考音频")
+else:
+    logger.info(f"默认参考音频路径: {default_refer.path}")
+    logger.info(f"默认参考音频文本: {default_refer.text}")
+    logger.info(f"默认参考音频语种: {default_refer.language}")
+# 获取��精度
+is_half = g_config.is_half
+if args.full_precision:
+    is_half = False
+if args.half_precision:
+    is_half = True
+if args.full_precision and args.half_precision:
+    is_half = g_config.is_half  # 炒饭fallback
+logger.info(f"半精: {is_half}")
+# 流式返回模式
+if args.stream_mode.lower() in ["normal","n"]:
+    stream_mode = "normal"
+    logger.info("流式返回已开启")
+else:
+    stream_mode = "close"
+# 音频编码格式
+if args.media_type.lower() in ["aac","ogg"]:
+    media_type = args.media_type.lower()
+elif stream_mode == "close":
+    media_type = "wav"
+else:
+    media_type = "ogg"
+logger.info(f"编码格式: {media_type}")
+# 音频数据类型
+if args.sub_type.lower() == 'int32':
+    is_int32 = True
+    logger.info(f"数据类型: int32")
+else:
+    is_int32 = False
+    logger.info(f"数据类型: int16")
+# 初始化模型
+cnhubert.cnhubert_base_path = cnhubert_base_path
+tokenizer = AutoTokenizer.from_pretrained(bert_path)
+bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
+ssl_model = cnhubert.get_model()
+if is_half:
+    bert_model = bert_model.half().to(device)
+    ssl_model = ssl_model.half().to(device)
+else:
+    bert_model = bert_model.to(device)
+    ssl_model = ssl_model.to(device)
+change_gpt_sovits_weights(gpt_path = gpt_path, sovits_path = sovits_path)
+# --------------------------------
+# 接口部分
+# --------------------------------
+app = FastAPI()
+@app.post("/set_model")
+async def set_model(request: Request):
+    json_post_raw = await request.json()
+    return change_gpt_sovits_weights(
+        gpt_path = json_post_raw.get("gpt_model_path"),
+        sovits_path = json_post_raw.get("sovits_model_path")
+    )
+@app.get("/set_model")
+async def set_model(
+        gpt_model_path: str = None,
+        sovits_model_path: str = None,
+):
+    return change_gpt_sovits_weights(gpt_path = gpt_model_path, sovits_path = sovits_model_path)
+@app.post("/control")
+async def control(request: Request):
+    json_post_raw = await request.json()
+    return handle_control(json_post_raw.get("command"))
+@app.get("/control")
+async def control(command: str = None):
+    return handle_control(command)
+@app.post("/change_refer")
+async def change_refer(request: Request):
+    json_post_raw = await request.json()
+    return handle_change(
+        json_post_raw.get("refer_wav_path"),
+        json_post_raw.get("prompt_text"),
+        json_post_raw.get("prompt_language")
+    )
+@app.get("/change_refer")
+async def change_refer(
+        refer_wav_path: str = None,
+        prompt_text: str = None,
+        prompt_language: str = None
+):
+    return handle_change(refer_wav_path, prompt_text, prompt_language)
+@app.post("/")
+async def tts_endpoint(request: Request):
+    json_post_raw = await request.json()
+    return handle(
+        json_post_raw.get("refer_wav_path"),
+        json_post_raw.get("prompt_text"),
+        json_post_raw.get("prompt_language"),
+        json_post_raw.get("text"),
+        json_post_raw.get("text_language"),
+        json_post_raw.get("cut_punc"),
+        json_post_raw.get("top_k", 15),
+        json_post_raw.get("top_p", 1.0),
+        json_post_raw.get("temperature", 1.0),
+        json_post_raw.get("speed", 1.0),
+        json_post_raw.get("inp_refs", [])
+    )
+@app.get("/")
+async def tts_endpoint(
+        refer_wav_path: str = None,
+        prompt_text: str = None,
+        prompt_language: str = None,
+        text: str = None,
+        text_language: str = None,
+        cut_punc: str = None,
+        top_k: int = 15,
+        top_p: float = 1.0,
+        temperature: float = 1.0,
+        speed: float = 1.0,
+        inp_refs: list = Query(default=[])
+):
+    return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc, top_k, top_p, temperature, speed, inp_refs)
+if __name__ == "__main__":
+    uvicorn.run(app, host=host, port=port, workers=1)

colab_webui.ipynb ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "环境配置 environment"
+      ],
+      "metadata": {
+        "id": "_o6a8GS2lWQM"
+      }
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "e9b7iFV3dm1f"
+      },
+      "source": [
+        "!pip install -q condacolab\n",
+        "# Setting up condacolab and installing packages\n",
+        "import condacolab\n",
+        "condacolab.install_from_url(\"https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-2-Linux-x86_64.sh\")\n",
+        "%cd -q /content\n",
+        "!git clone https://github.com/RVC-Boss/GPT-SoVITS\n",
+        "!conda install -y -q -c pytorch -c nvidia cudatoolkit\n",
+        "%cd -q /content/GPT-SoVITS\n",
+        "!conda install -y -q -c conda-forge gcc gxx ffmpeg cmake -c pytorch -c nvidia\n",
+        "!/usr/local/bin/pip install -r requirements.txt"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Download pretrained models 下载预训练模型\n",
+        "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
+        "!mkdir -p /content/GPT-SoVITS/tools/damo_asr/models\n",
+        "!mkdir -p /content/GPT-SoVITS/tools/uvr5\n",
+        "%cd /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
+        "!git clone https://huggingface.co/lj1995/GPT-SoVITS\n",
+        "%cd /content/GPT-SoVITS/tools/damo_asr/models\n",
+        "!git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git\n",
+        "!git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git\n",
+        "!git clone https://www.modelscope.cn/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git\n",
+        "# @title UVR5 pretrains 安装uvr5模型\n",
+        "%cd /content/GPT-SoVITS/tools/uvr5\n",
+        "%rm -r uvr5_weights\n",
+        "!git clone https://huggingface.co/Delik/uvr5_weights\n",
+        "!git config core.sparseCheckout true\n",
+        "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
+      ],
+      "metadata": {
+        "id": "0NgxXg5sjv7z"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title launch WebUI 启动WebUI\n",
+        "!/usr/local/bin/pip install ipykernel\n",
+        "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
+        "%cd /content/GPT-SoVITS/\n",
+        "!/usr/local/bin/python  webui.py"
+      ],
+      "metadata": {
+        "id": "4oRGUzkrk8C7"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

go-webui.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ runtime\python.exe webui.py zh_CN
2	+ pause

go-webui.ps1 ADDED Viewed

	@@ -0,0 +1,4 @@

+$ErrorActionPreference = "SilentlyContinue"
+chcp 65001
+& "$PSScriptRoot\runtime\python.exe" "$PSScriptRoot\webui.py zh_CN"
+pause

install.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/bin/bash
+conda install -c conda-forge gcc
+conda install -c conda-forge gxx
+conda install ffmpeg cmake
+conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 pytorch-cuda=11.8 -c pytorch -c nvidia
+pip install -r requirements.txt

pretrained_models/chinese-hubert-base/config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertModel"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 12,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0.dev0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

pretrained_models/chinese-hubert-base/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+numpy==1.23.4
+scipy
+tensorboard
+librosa==0.9.2
+numba==0.56.4
+pytorch-lightning
+gradio>=4.0,<=4.24.0
+ffmpeg-python
+onnxruntime; sys_platform == 'darwin'
+onnxruntime-gpu; sys_platform != 'darwin'
+tqdm
+funasr==1.0.27
+cn2an
+pypinyin
+pyopenjtalk>=0.3.4
+g2p_en
+torchaudio
+modelscope==1.10.0
+sentencepiece
+transformers
+chardet
+PyYAML
+psutil
+jieba_fast
+jieba
+LangSegment>=0.2.0
+Faster_Whisper
+wordsegment
+rotary_embedding_torch
+pyjyutping
+g2pk2
+ko_pron
+opencc; sys_platform != 'linux'
+opencc==1.1.1; sys_platform == 'linux'
+python_mecab_ko; sys_platform != 'win32'
+fastapi<0.112.2

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2024-12-04T06:07:02.658364831Z","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2024-12-04T06:07:02.658507656Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug-core.log"}
+{"time":"2024-12-04T06:07:02.774333381Z","level":"INFO","msg":"created new stream","id":"yfryieml"}
+{"time":"2024-12-04T06:07:02.774640768Z","level":"INFO","msg":"stream: started","id":"yfryieml"}
+{"time":"2024-12-04T06:07:02.7747967Z","level":"INFO","msg":"writer: Do: started","stream_id":"yfryieml"}
+{"time":"2024-12-04T06:07:02.774821631Z","level":"INFO","msg":"handler: started","stream_id":"yfryieml"}
+{"time":"2024-12-04T06:07:02.774802368Z","level":"INFO","msg":"sender: started","stream_id":"yfryieml"}
+{"time":"2024-12-04T06:07:03.049190221Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-12-04T07:59:52.403867882Z","level":"INFO","msg":"stream: closing","id":"yfryieml"}
+{"time":"2024-12-04T07:59:52.404822987Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-12-04T07:59:52.415019985Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-12-04T07:59:53.952775387Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-12-04T07:59:54.233678016Z","level":"INFO","msg":"handler: closed","stream_id":"yfryieml"}
+{"time":"2024-12-04T07:59:54.233767202Z","level":"INFO","msg":"writer: Close: closed","stream_id":"yfryieml"}
+{"time":"2024-12-04T07:59:54.233797446Z","level":"INFO","msg":"sender: closed","stream_id":"yfryieml"}
+{"time":"2024-12-04T07:59:54.235023872Z","level":"INFO","msg":"stream: closed","id":"yfryieml"}

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,29 @@

+2024-12-04 06:07:02,640 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2024-12-04 06:07:02,640 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Configure stats pid to 47802
+2024-12-04 06:07:02,641 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
+2024-12-04 06:07:02,641 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
+2024-12-04 06:07:02,642 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-12-04 06:07:02,642 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-12-04 06:07:02,643 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
+2024-12-04 06:07:02,643 INFO    MainThread:47802 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-12-04 06:07:02,644 INFO    MainThread:47802 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug.log
+2024-12-04 06:07:02,645 INFO    MainThread:47802 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug-internal.log
+2024-12-04 06:07:02,645 INFO    MainThread:47802 [wandb_init.py:init():619] calling init triggers
+2024-12-04 06:07:02,646 INFO    MainThread:47802 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
+2024-12-04 06:07:02,646 INFO    MainThread:47802 [wandb_init.py:init():669] starting backend
+2024-12-04 06:07:02,646 INFO    MainThread:47802 [wandb_init.py:init():673] sending inform_init request
+2024-12-04 06:07:02,654 INFO    MainThread:47802 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-12-04 06:07:02,655 INFO    MainThread:47802 [wandb_init.py:init():686] backend started and connected
+2024-12-04 06:07:02,671 INFO    MainThread:47802 [wandb_init.py:init():781] updated telemetry
+2024-12-04 06:07:02,711 INFO    MainThread:47802 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2024-12-04 06:07:03,035 INFO    MainThread:47802 [wandb_init.py:init():867] starting run threads in backend
+2024-12-04 06:07:03,310 INFO    MainThread:47802 [wandb_run.py:_console_start():2456] atexit reg
+2024-12-04 06:07:03,310 INFO    MainThread:47802 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2024-12-04 06:07:03,311 INFO    MainThread:47802 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2024-12-04 06:07:03,311 INFO    MainThread:47802 [wandb_run.py:_redirect():2395] Redirects installed.
+2024-12-04 06:07:03,315 INFO    MainThread:47802 [wandb_init.py:init():911] run started, returning control to user process
+2024-12-04 06:07:05,437 INFO    MainThread:47802 [wandb_watch.py:_watch():71] Watching
+2024-12-04 06:07:15,972 INFO    MainThread:47802 [wandb_run.py:_config_callback():1387] config_cb None None {'config': {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}, 'output_dir': 'logs/s1', 'is_train': True}
+2024-12-04 06:07:15,973 INFO    MainThread:47802 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
+2024-12-04 07:59:52,403 WARNING MsgRouterThr:47802 [router.py:message_loop():75] message_loop has been closed

wandb/run-20241203_183434-y6gzynz8/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20241203_184628-qm0jlwqu/files/config.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "2":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "3":
+                - 1
+                - 13
+                - 16
+                - 23
+                - 55
+            "4": 3.10.12
+            "5": 0.18.7
+            "6": 4.46.3
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64
+data:
+    value:
+        batch_size: 8
+        cleaned_text: true
+        filter_length: 2048
+        hop_length: 640
+        max_frames: 60
+        max_sec: 60
+        mel_channels: 128
+        mel_fmax: null
+        mel_fmin: 0
+        num_workers: 4
+        pad_val: 1024
+        training_files: data8
+        win_length: 2048
+model:
+    value:
+        EOS: 2047
+        attention_dropout: 0.1
+        attn_pdrop: 0.1
+        block_size: 1000
+        dropout: 0.1
+        embd_pdrop: 0.1
+        embedding_dim: 768
+        ffn_hidden: 3072
+        head: 12
+        hidden_dim: 768
+        hidden_dropout: 0.1
+        max_mel_positions: 8000
+        max_text_positions: 2048
+        n_embd: 768
+        n_layer: 12
+        num_layers: 6
+        pad_val: 1024
+        phoneme_vocab_size: 2048
+        postnet_dim: 384
+        postnet_layers: 3
+        prenet_dim: 384
+        prenet_layers: 3
+        resid_pdrop: 0.1
+        semantic_dim: 1024
+        vocab_size: 2048
+output_dir:
+    value: logs/s1
+train:
+    value:
+        batch_size: 8
+        epochs: 15
+        exp_name: gpt_training
+        half_weights_save_dir: weights/s1
+        if_save_every_weights: true
+        if_save_latest: true
+        precision: 32
+        save_every_n_epoch: 5
+        seed: 1234
+        wandb:
+            entity: null
+            log_interval: 100
+            name: stage1_training
+            project: gpt-sovits-hindi
+train_phoneme_path:
+    value: data8/phoneme.txt
+train_semantic_path:
+    value: data8/semantic.tsv

wandb/run-20241203_184628-qm0jlwqu/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20241203_184628-qm0jlwqu/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":4}}

wandb/run-20241203_185057-b425fq2v/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
+  "python": "3.10.12",
+  "startedAt": "2024-12-03T18:50:57.819397Z",
+  "args": [
+    "-c",
+    "configs/s1.yaml"
+  ],
+  "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
+  "codePath": "GPT_SoVITS/s1_train.py",
+  "git": {
+    "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
+    "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
+  },
+  "email": "[email protected]",
+  "root": "/workspace/GPT-SoVITS",
+  "host": "7a6bba088bf1",
+  "username": "root",
+  "executable": "/usr/bin/python",
+  "codePathLocal": "GPT_SoVITS/s1_train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA A40",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "42949672960",
+      "used": "3612397568"
+    }
+  },
+  "memory": {
+    "total": "540662632448"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A40",
+      "memoryTotal": "48305799168",
+      "cudaCores": 10752,
+      "architecture": "Ampere"
+    }
+  ],
+  "cudaVersion": "12.7"
+}

wandb/run-20241203_185057-b425fq2v/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-12-03T18:50:57.298838127Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp3grczmmy/port-20897.txt","pid":20897,"debug":false,"disable-analytics":false}
+{"time":"2024-12-03T18:50:57.298925737Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-12-03T18:50:57.300062798Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":20897}
+{"time":"2024-12-03T18:50:57.300116485Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36211,"Zone":""}}
+{"time":"2024-12-03T18:50:57.480327477Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:50:57.822370995Z","level":"INFO","msg":"handleInformInit: received","streamId":"b425fq2v","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:50:57.948271348Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"b425fq2v","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.009900112Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.010177746Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.010280082Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-12-03T18:51:01.010428582Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.720218142Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.720272183Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51888"}
+{"time":"2024-12-03T18:51:01.720296805Z","level":"INFO","msg":"server is closed"}

wandb/run-20241203_185057-b425fq2v/logs/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2024-12-03 18:50:57,786 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2024-12-03 18:50:57,787 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Configure stats pid to 20897
+2024-12-03 18:50:57,787 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
+2024-12-03 18:50:57,788 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
+2024-12-03 18:50:57,788 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-12-03 18:50:57,788 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-12-03 18:50:57,789 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
+2024-12-03 18:50:57,789 INFO    MainThread:20897 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-12-03 18:50:57,789 INFO    MainThread:20897 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241203_185057-b425fq2v/logs/debug.log
+2024-12-03 18:50:57,790 INFO    MainThread:20897 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241203_185057-b425fq2v/logs/debug-internal.log
+2024-12-03 18:50:57,791 INFO    MainThread:20897 [wandb_init.py:init():619] calling init triggers
+2024-12-03 18:50:57,791 INFO    MainThread:20897 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
+2024-12-03 18:50:57,791 INFO    MainThread:20897 [wandb_init.py:init():669] starting backend
+2024-12-03 18:50:57,792 INFO    MainThread:20897 [wandb_init.py:init():673] sending inform_init request
+2024-12-03 18:50:57,815 INFO    MainThread:20897 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-12-03 18:50:57,817 INFO    MainThread:20897 [wandb_init.py:init():686] backend started and connected
+2024-12-03 18:50:57,852 INFO    MainThread:20897 [wandb_init.py:init():781] updated telemetry
+2024-12-03 18:50:57,966 INFO    MainThread:20897 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2024-12-03 18:50:58,251 INFO    MainThread:20897 [wandb_init.py:init():867] starting run threads in backend
+2024-12-03 18:50:58,566 INFO    MainThread:20897 [wandb_run.py:_console_start():2456] atexit reg
+2024-12-03 18:50:58,566 INFO    MainThread:20897 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2024-12-03 18:50:58,566 INFO    MainThread:20897 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2024-12-03 18:50:58,567 INFO    MainThread:20897 [wandb_run.py:_redirect():2395] Redirects installed.
+2024-12-03 18:50:58,569 INFO    MainThread:20897 [wandb_init.py:init():911] run started, returning control to user process
+2024-12-03 18:50:59,137 INFO    MainThread:20897 [wandb_watch.py:_watch():71] Watching
+2024-12-03 18:51:01,010 WARNING MsgRouterThr:20897 [router.py:message_loop():75] message_loop has been closed

wandb/run-20241203_185057-b425fq2v/run-b425fq2v.wandb ADDED Viewed

Binary file (16.1 kB). View file

wandb/run-20241203_185203-najxbup6/files/output.log ADDED Viewed

	@@ -0,0 +1,82 @@

+GPU available: True (cuda), used: True
+TPU available: False, using: 0 TPU cores
+HPU available: False, using: 0 HPUs
+Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
+----------------------------------------------------------------------------------------------------
+distributed_backend=nccl
+All distributed processes registered. Starting with 1 processes
+----------------------------------------------------------------------------------------------------
+/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
+semantic_data_len: 726
+phoneme_data_len: 727
+       data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
+0      data8/wavs/1.wav  साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
+1      data8/wavs/2.wav  कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
+2      data8/wavs/3.wav  हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
+3      data8/wavs/4.wav  नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
+4      data8/wavs/5.wav  में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
+..                  ...                                                ...
+721  data8/wavs/722.wav  होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
+722  data8/wavs/723.wav  की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
+723  data8/wavs/724.wav  उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
+724  data8/wavs/725.wav  है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
+725  data8/wavs/726.wav  हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
+[726 rows x 2 columns]
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
+    main(args)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
+    trainer.fit(model, data_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
+    call._call_and_handle_interrupt(
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
+    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
+    return function(*args, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
+    self._run(model, ckpt_path=ckpt_path)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 943, in _run
+    call._call_setup_hook(self)  # allow user to set up LightningModule in accelerator environment
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 102, in _call_setup_hook
+    _call_lightning_datamodule_hook(trainer, "setup", stage=fn)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 189, in _call_lightning_datamodule_hook
+    return fn(*args, **kwargs)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/data_module.py", line 30, in setup
+    self._train_dataset = Text2SemanticDataset(
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 117, in __init__
+    self.init_batch()
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in init_batch
+    semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids]  # Convert to character codes if needed
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in <listcomp>
+    semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids]  # Convert to character codes if needed
+TypeError: ord() expected a character, but string of length 2 found
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
+    main(args)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
+    trainer.fit(model, data_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
+    call._call_and_handle_interrupt(
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
+    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
+    return function(*args, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
+    self._run(model, ckpt_path=ckpt_path)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 943, in _run
+    call._call_setup_hook(self)  # allow user to set up LightningModule in accelerator environment
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 102, in _call_setup_hook
+    _call_lightning_datamodule_hook(trainer, "setup", stage=fn)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 189, in _call_lightning_datamodule_hook
+    return fn(*args, **kwargs)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/data_module.py", line 30, in setup
+    self._train_dataset = Text2SemanticDataset(
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 117, in __init__
+    self.init_batch()
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in init_batch
+    semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids]  # Convert to character codes if needed
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in <listcomp>
+    semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids]  # Convert to character codes if needed
+TypeError: ord() expected a character, but string of length 2 found

wandb/run-20241203_185203-najxbup6/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
+  "python": "3.10.12",
+  "startedAt": "2024-12-03T18:52:04.054803Z",
+  "args": [
+    "-c",
+    "configs/s1.yaml"
+  ],
+  "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
+  "codePath": "GPT_SoVITS/s1_train.py",
+  "git": {
+    "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
+    "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
+  },
+  "email": "[email protected]",
+  "root": "/workspace/GPT-SoVITS",
+  "host": "7a6bba088bf1",
+  "username": "root",
+  "executable": "/usr/bin/python",
+  "codePathLocal": "GPT_SoVITS/s1_train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA A40",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "42949672960",
+      "used": "3612413952"
+    }
+  },
+  "memory": {
+    "total": "540662632448"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A40",
+      "memoryTotal": "48305799168",
+      "cudaCores": 10752,
+      "architecture": "Ampere"
+    }
+  ],
+  "cudaVersion": "12.7"
+}

wandb/run-20241203_185203-najxbup6/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-12-03T18:52:03.57165883Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6yh_tti_/port-21367.txt","pid":21367,"debug":false,"disable-analytics":false}
+{"time":"2024-12-03T18:52:03.571726063Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-12-03T18:52:03.572934846Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":21367}
+{"time":"2024-12-03T18:52:03.57285693Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46379,"Zone":""}}
+{"time":"2024-12-03T18:52:03.737251507Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:04.058031183Z","level":"INFO","msg":"handleInformInit: received","streamId":"najxbup6","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:04.183470455Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"najxbup6","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.130482168Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.130659224Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.130755688Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-12-03T18:52:08.130860268Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.858356392Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.858424292Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:36504"}
+{"time":"2024-12-03T18:52:08.85844791Z","level":"INFO","msg":"server is closed"}

wandb/run-20241203_185203-najxbup6/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2024-12-03T18:52:04.061594638Z","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2024-12-03T18:52:04.062683181Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185203-najxbup6/logs/debug-core.log"}
+{"time":"2024-12-03T18:52:04.181494866Z","level":"INFO","msg":"created new stream","id":"najxbup6"}
+{"time":"2024-12-03T18:52:04.182422476Z","level":"INFO","msg":"stream: started","id":"najxbup6"}
+{"time":"2024-12-03T18:52:04.182576293Z","level":"INFO","msg":"writer: Do: started","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:04.182633903Z","level":"INFO","msg":"handler: started","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:04.182665857Z","level":"INFO","msg":"sender: started","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:04.568505243Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-12-03T18:52:08.130657144Z","level":"INFO","msg":"stream: closing","id":"najxbup6"}
+{"time":"2024-12-03T18:52:08.130915038Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-12-03T18:52:08.13386141Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-12-03T18:52:08.667615718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-12-03T18:52:08.85195332Z","level":"INFO","msg":"handler: closed","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:08.852082887Z","level":"INFO","msg":"sender: closed","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:08.852047068Z","level":"INFO","msg":"writer: Close: closed","stream_id":"najxbup6"}
+{"time":"2024-12-03T18:52:08.856221306Z","level":"INFO","msg":"stream: closed","id":"najxbup6"}

wandb/run-20241203_185203-najxbup6/run-najxbup6.wandb ADDED Viewed

Binary file (16.3 kB). View file

wandb/run-20241203_185257-7zp6kxhx/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20241203_185257-7zp6kxhx/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
+  "python": "3.10.12",
+  "startedAt": "2024-12-03T18:52:57.631714Z",
+  "args": [
+    "-c",
+    "configs/s1.yaml"
+  ],
+  "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
+  "codePath": "GPT_SoVITS/s1_train.py",
+  "git": {
+    "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
+    "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
+  },
+  "email": "[email protected]",
+  "root": "/workspace/GPT-SoVITS",
+  "host": "7a6bba088bf1",
+  "username": "root",
+  "executable": "/usr/bin/python",
+  "codePathLocal": "GPT_SoVITS/s1_train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA A40",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "42949672960",
+      "used": "3612430336"
+    }
+  },
+  "memory": {
+    "total": "540662632448"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A40",
+      "memoryTotal": "48305799168",
+      "cudaCores": 10752,
+      "architecture": "Ampere"
+    }
+  ],
+  "cudaVersion": "12.7"
+}

wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2024-12-03T18:52:57.125257617Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpg94bhy5s/port-21847.txt","pid":21847,"debug":false,"disable-analytics":false}
+{"time":"2024-12-03T18:52:57.125312331Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-12-03T18:52:57.126540671Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":21847}
+{"time":"2024-12-03T18:52:57.126474677Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44225,"Zone":""}}
+{"time":"2024-12-03T18:52:57.297685759Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:52:57.634807084Z","level":"INFO","msg":"handleInformInit: received","streamId":"7zp6kxhx","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:52:57.766148785Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7zp6kxhx","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:53:07.832970963Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:53:07.833273182Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-12-03T18:53:07.833246872Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:53:07.833479032Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:35782"}
+{"time":"2024-12-03T18:53:08.261131593Z","level":"INFO","msg":"Parent process exited, terminating service process."}

wandb/run-20241203_185257-7zp6kxhx/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2024-12-03T18:52:57.637678482Z","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2024-12-03T18:52:57.638824343Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log"}
+{"time":"2024-12-03T18:52:57.7637763Z","level":"INFO","msg":"created new stream","id":"7zp6kxhx"}
+{"time":"2024-12-03T18:52:57.764894735Z","level":"INFO","msg":"stream: started","id":"7zp6kxhx"}
+{"time":"2024-12-03T18:52:57.765059685Z","level":"INFO","msg":"writer: Do: started","stream_id":"7zp6kxhx"}
+{"time":"2024-12-03T18:52:57.76510391Z","level":"INFO","msg":"handler: started","stream_id":"7zp6kxhx"}
+{"time":"2024-12-03T18:52:57.765125664Z","level":"INFO","msg":"sender: started","stream_id":"7zp6kxhx"}
+{"time":"2024-12-03T18:52:58.098522624Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-12-03T18:53:07.833139123Z","level":"INFO","msg":"stream: closing","id":"7zp6kxhx"}
+{"time":"2024-12-03T18:53:07.833395573Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-12-03T18:53:07.835643464Z","level":"INFO","msg":"Stopped system monitor"}

wandb/run-20241203_185453-2eog1nt2/files/config.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "2":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "3":
+                - 1
+                - 13
+                - 16
+                - 23
+                - 55
+            "4": 3.10.12
+            "5": 0.18.7
+            "6": 4.46.3
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64
+data:
+    value:
+        batch_size: 8
+        cleaned_text: true
+        filter_length: 2048
+        hop_length: 640
+        max_frames: 60
+        max_sec: 60
+        mel_channels: 128
+        mel_fmax: null
+        mel_fmin: 0
+        num_workers: 4
+        pad_val: 1024
+        training_files: data8
+        win_length: 2048
+model:
+    value:
+        EOS: 2047
+        attention_dropout: 0.1
+        attn_pdrop: 0.1
+        block_size: 1000
+        dropout: 0.1
+        embd_pdrop: 0.1
+        embedding_dim: 768
+        ffn_hidden: 3072
+        head: 12
+        hidden_dim: 768
+        hidden_dropout: 0.1
+        max_mel_positions: 8000
+        max_text_positions: 2048
+        n_embd: 768
+        n_layer: 12
+        num_layers: 6
+        pad_val: 1024
+        phoneme_vocab_size: 2048
+        postnet_dim: 384
+        postnet_layers: 3
+        prenet_dim: 384
+        prenet_layers: 3
+        resid_pdrop: 0.1
+        semantic_dim: 1024
+        vocab_size: 2048
+output_dir:
+    value: logs/s1
+train:
+    value:
+        batch_size: 8
+        epochs: 15
+        exp_name: gpt_training
+        half_weights_save_dir: weights/s1
+        if_save_every_weights: true
+        if_save_latest: true
+        precision: 32
+        save_every_n_epoch: 5
+        seed: 1234
+        wandb:
+            entity: null
+            log_interval: 100
+            name: stage1_training
+            project: gpt-sovits-hindi
+train_phoneme_path:
+    value: data8/phoneme.txt
+train_semantic_path:
+    value: data8/semantic.tsv

wandb/run-20241203_185453-2eog1nt2/files/output.log ADDED Viewed

	@@ -0,0 +1,1140 @@

+GPU available: True (cuda), used: True
+TPU available: False, using: 0 TPU cores
+HPU available: False, using: 0 HPUs
+Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
+----------------------------------------------------------------------------------------------------
+distributed_backend=nccl
+All distributed processes registered. Starting with 1 processes
+----------------------------------------------------------------------------------------------------
+/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
+semantic_data_len: 726
+phoneme_data_len: 727
+       data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
+0      data8/wavs/1.wav  साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
+1      data8/wavs/2.wav  कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
+2      data8/wavs/3.wav  हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
+3      data8/wavs/4.wav  नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
+4      data8/wavs/5.wav  में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
+..                  ...                                                ...
+721  data8/wavs/722.wav  होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
+722  data8/wavs/723.wav  की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
+723  data8/wavs/724.wav  उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
+724  data8/wavs/725.wav  है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
+725  data8/wavs/726.wav  हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
+[726 rows x 2 columns]
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ॉ'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
+    phoneme_ids = cleaned_text_to_sequence(phoneme, version)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
+    phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
+KeyError: 'ऑ'
+Detected KeyboardInterrupt, attempting graceful shutdown ...

wandb/run-20241203_185453-2eog1nt2/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
+  "python": "3.10.12",
+  "startedAt": "2024-12-03T18:54:53.423670Z",
+  "args": [
+    "-c",
+    "configs/s1.yaml"
+  ],
+  "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
+  "codePath": "GPT_SoVITS/s1_train.py",
+  "git": {
+    "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
+    "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
+  },
+  "email": "[email protected]",
+  "root": "/workspace/GPT-SoVITS",
+  "host": "7a6bba088bf1",
+  "username": "root",
+  "executable": "/usr/bin/python",
+  "codePathLocal": "GPT_SoVITS/s1_train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA A40",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "42949672960",
+      "used": "3612602368"
+    }
+  },
+  "memory": {
+    "total": "540662632448"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A40",
+      "memoryTotal": "48305799168",
+      "cudaCores": 10752,
+      "architecture": "Ampere"
+    }
+  ],
+  "cudaVersion": "12.7"
+}

wandb/run-20241203_185453-2eog1nt2/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":5}}

wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-12-03T18:54:52.901098169Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1th9fdp8/port-22344.txt","pid":22344,"debug":false,"disable-analytics":false}
+{"time":"2024-12-03T18:54:52.901139573Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-12-03T18:54:52.901707465Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":22344}
+{"time":"2024-12-03T18:54:52.901751292Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37953,"Zone":""}}
+{"time":"2024-12-03T18:54:53.085077672Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:53.427167893Z","level":"INFO","msg":"handleInformInit: received","streamId":"2eog1nt2","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:53.557164059Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"2eog1nt2","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:59.344386691Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:59.344540917Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:59.34469848Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:54:59.344689478Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-12-03T18:55:00.055284536Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:55:00.055376635Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53598"}
+{"time":"2024-12-03T18:55:00.055426719Z","level":"INFO","msg":"server is closed"}

wandb/run-20241203_185453-2eog1nt2/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2024-12-03T18:54:53.430425698Z","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2024-12-03T18:54:53.431636023Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log"}
+{"time":"2024-12-03T18:54:53.555785799Z","level":"INFO","msg":"created new stream","id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:53.556507344Z","level":"INFO","msg":"stream: started","id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:53.55667089Z","level":"INFO","msg":"writer: Do: started","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:53.556777182Z","level":"INFO","msg":"handler: started","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:53.556777662Z","level":"INFO","msg":"sender: started","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:53.924884022Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-12-03T18:54:59.344524138Z","level":"INFO","msg":"stream: closing","id":"2eog1nt2"}
+{"time":"2024-12-03T18:54:59.344765045Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-12-03T18:54:59.346382242Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-12-03T18:54:59.826491777Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-12-03T18:55:00.049648629Z","level":"INFO","msg":"handler: closed","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:55:00.049753825Z","level":"INFO","msg":"writer: Close: closed","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:55:00.049814222Z","level":"INFO","msg":"sender: closed","stream_id":"2eog1nt2"}
+{"time":"2024-12-03T18:55:00.052569227Z","level":"INFO","msg":"stream: closed","id":"2eog1nt2"}

wandb/run-20241203_185453-2eog1nt2/run-2eog1nt2.wandb ADDED Viewed

Binary file (165 kB). View file

wandb/run-20241203_185605-tfl0uvkj/files/config.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "2":
+                - 1
+                - 5
+                - 9
+                - 11
+                - 49
+                - 53
+                - 55
+                - 103
+                - 105
+            "3":
+                - 1
+                - 13
+                - 16
+                - 23
+                - 55
+            "4": 3.10.12
+            "5": 0.18.7
+            "6": 4.46.3
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64
+data:
+    value:
+        batch_size: 8
+        cleaned_text: true
+        filter_length: 2048
+        hop_length: 640
+        max_frames: 60
+        max_sec: 60
+        mel_channels: 128
+        mel_fmax: null
+        mel_fmin: 0
+        num_workers: 4
+        pad_val: 1024
+        training_files: data8
+        win_length: 2048
+model:
+    value:
+        EOS: 2047
+        attention_dropout: 0.1
+        attn_pdrop: 0.1
+        block_size: 1000
+        dropout: 0.1
+        embd_pdrop: 0.1
+        embedding_dim: 768
+        ffn_hidden: 3072
+        head: 12
+        hidden_dim: 768
+        hidden_dropout: 0.1
+        max_mel_positions: 8000
+        max_text_positions: 2048
+        n_embd: 768
+        n_layer: 12
+        num_layers: 6
+        pad_val: 1024
+        phoneme_vocab_size: 2048
+        postnet_dim: 384
+        postnet_layers: 3
+        prenet_dim: 384
+        prenet_layers: 3
+        resid_pdrop: 0.1
+        semantic_dim: 1024
+        vocab_size: 2048
+output_dir:
+    value: logs/s1
+train:
+    value:
+        batch_size: 8
+        epochs: 15
+        exp_name: gpt_training
+        half_weights_save_dir: weights/s1
+        if_save_every_weights: true
+        if_save_latest: true
+        precision: 32
+        save_every_n_epoch: 5
+        seed: 1234
+        wandb:
+            entity: null
+            log_interval: 100
+            name: stage1_training
+            project: gpt-sovits-hindi
+train_phoneme_path:
+    value: data8/phoneme.txt
+train_semantic_path:
+    value: data8/semantic.tsv

wandb/run-20241203_185605-tfl0uvkj/files/output.log ADDED Viewed

	@@ -0,0 +1,81 @@

+GPU available: True (cuda), used: True
+TPU available: False, using: 0 TPU cores
+HPU available: False, using: 0 HPUs
+Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
+----------------------------------------------------------------------------------------------------
+distributed_backend=nccl
+All distributed processes registered. Starting with 1 processes
+----------------------------------------------------------------------------------------------------
+/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
+semantic_data_len: 726
+phoneme_data_len: 727
+       data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
+0      data8/wavs/1.wav  साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
+1      data8/wavs/2.wav  कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
+2      data8/wavs/3.wav  हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
+3      data8/wavs/4.wav  नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
+4      data8/wavs/5.wav  में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
+..                  ...                                                ...
+721  data8/wavs/722.wav  होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
+722  data8/wavs/723.wav  की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
+723  data8/wavs/724.wav  उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
+724  data8/wavs/725.wav  है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
+725  data8/wavs/726.wav  हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
+[726 rows x 2 columns]
+deleted 48 audios who's phoneme/sec are bigger than 25 or smaller than 3
+dataset.__len__(): 678
+LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
+    main(args)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
+    trainer.fit(model, data_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
+    call._call_and_handle_interrupt(
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
+    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
+    return function(*args, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
+    self._run(model, ckpt_path=ckpt_path)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 957, in _run
+    self.strategy.setup(self)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 174, in setup
+    self.setup_optimizers(trainer)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 138, in setup_optimizers
+    self.optimizers, self.lr_scheduler_configs = _init_optimizers_and_lr_schedulers(self.lightning_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/optimizer.py", line 179, in _init_optimizers_and_lr_schedulers
+    optim_conf = call._call_lightning_module_hook(model.trainer, "configure_optimizers", pl_module=model)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 167, in _call_lightning_module_hook
+    output = fn(*args, **kwargs)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/models/t2s_lightning_module.py", line 134, in configure_optimizers
+    init_lr=self.config["optimizer"]["lr_init"],
+KeyError: 'optimizer'
+Traceback (most recent call last):
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
+    main(args)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
+    trainer.fit(model, data_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
+    call._call_and_handle_interrupt(
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
+    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
+    return function(*args, **kwargs)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
+    self._run(model, ckpt_path=ckpt_path)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 957, in _run
+    self.strategy.setup(self)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 174, in setup
+    self.setup_optimizers(trainer)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 138, in setup_optimizers
+    self.optimizers, self.lr_scheduler_configs = _init_optimizers_and_lr_schedulers(self.lightning_module)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/optimizer.py", line 179, in _init_optimizers_and_lr_schedulers
+    optim_conf = call._call_lightning_module_hook(model.trainer, "configure_optimizers", pl_module=model)
+  File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 167, in _call_lightning_module_hook
+    output = fn(*args, **kwargs)
+  File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/models/t2s_lightning_module.py", line 134, in configure_optimizers
+    init_lr=self.config["optimizer"]["lr_init"],
+KeyError: 'optimizer'

wandb/run-20241203_185605-tfl0uvkj/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
+  "python": "3.10.12",
+  "startedAt": "2024-12-03T18:56:05.716152Z",
+  "args": [
+    "-c",
+    "configs/s1.yaml"
+  ],
+  "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
+  "codePath": "GPT_SoVITS/s1_train.py",
+  "git": {
+    "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
+    "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
+  },
+  "email": "[email protected]",
+  "root": "/workspace/GPT-SoVITS",
+  "host": "7a6bba088bf1",
+  "username": "root",
+  "executable": "/usr/bin/python",
+  "codePathLocal": "GPT_SoVITS/s1_train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA A40",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "42949672960",
+      "used": "3612618752"
+    }
+  },
+  "memory": {
+    "total": "540662632448"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A40",
+      "memoryTotal": "48305799168",
+      "cudaCores": 10752,
+      "architecture": "Ampere"
+    }
+  ],
+  "cudaVersion": "12.7"
+}

wandb/run-20241203_185605-tfl0uvkj/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":12}}

wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-12-03T18:56:05.255120107Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpjj94wwee/port-22844.txt","pid":22844,"debug":false,"disable-analytics":false}
+{"time":"2024-12-03T18:56:05.255154724Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-12-03T18:56:05.255597668Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":22844}
+{"time":"2024-12-03T18:56:05.255639452Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43237,"Zone":""}}
+{"time":"2024-12-03T18:56:05.437957868Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:05.720550265Z","level":"INFO","msg":"handleInformInit: received","streamId":"tfl0uvkj","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:05.846473014Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tfl0uvkj","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:17.728386531Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:17.728524639Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:17.728611687Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-12-03T18:56:17.728766722Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:18.402657245Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:18.402725927Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:57582"}
+{"time":"2024-12-03T18:56:18.402744573Z","level":"INFO","msg":"server is closed"}

wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2024-12-03T18:56:05.722088979Z","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2024-12-03T18:56:05.723035183Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log"}
+{"time":"2024-12-03T18:56:05.845849169Z","level":"INFO","msg":"created new stream","id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:05.846223002Z","level":"INFO","msg":"stream: started","id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:05.846266483Z","level":"INFO","msg":"writer: Do: started","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:05.846270711Z","level":"INFO","msg":"handler: started","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:05.846290518Z","level":"INFO","msg":"sender: started","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:06.133251456Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-12-03T18:56:17.728529149Z","level":"INFO","msg":"stream: closing","id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:17.728772449Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-12-03T18:56:17.731915231Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-12-03T18:56:18.14960212Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-12-03T18:56:18.392894708Z","level":"INFO","msg":"handler: closed","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:18.392995863Z","level":"INFO","msg":"writer: Close: closed","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:18.393014563Z","level":"INFO","msg":"sender: closed","stream_id":"tfl0uvkj"}
+{"time":"2024-12-03T18:56:18.400170737Z","level":"INFO","msg":"stream: closed","id":"tfl0uvkj"}

wandb/run-20241203_185605-tfl0uvkj/logs/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2024-12-03 18:56:05,696 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2024-12-03 18:56:05,697 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Configure stats pid to 22844
+2024-12-03 18:56:05,698 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
+2024-12-03 18:56:05,698 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
+2024-12-03 18:56:05,698 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-12-03 18:56:05,698 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-12-03 18:56:05,698 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
+2024-12-03 18:56:05,699 INFO    MainThread:22844 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-12-03 18:56:05,699 INFO    MainThread:22844 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug.log
+2024-12-03 18:56:05,700 INFO    MainThread:22844 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log
+2024-12-03 18:56:05,700 INFO    MainThread:22844 [wandb_init.py:init():619] calling init triggers
+2024-12-03 18:56:05,700 INFO    MainThread:22844 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
+2024-12-03 18:56:05,701 INFO    MainThread:22844 [wandb_init.py:init():669] starting backend
+2024-12-03 18:56:05,701 INFO    MainThread:22844 [wandb_init.py:init():673] sending inform_init request
+2024-12-03 18:56:05,714 INFO    MainThread:22844 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-12-03 18:56:05,715 INFO    MainThread:22844 [wandb_init.py:init():686] backend started and connected
+2024-12-03 18:56:05,728 INFO    MainThread:22844 [wandb_init.py:init():781] updated telemetry
+2024-12-03 18:56:05,760 INFO    MainThread:22844 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2024-12-03 18:56:06,101 INFO    MainThread:22844 [wandb_init.py:init():867] starting run threads in backend
+2024-12-03 18:56:06,391 INFO    MainThread:22844 [wandb_run.py:_console_start():2456] atexit reg
+2024-12-03 18:56:06,391 INFO    MainThread:22844 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2024-12-03 18:56:06,393 INFO    MainThread:22844 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2024-12-03 18:56:06,393 INFO    MainThread:22844 [wandb_run.py:_redirect():2395] Redirects installed.
+2024-12-03 18:56:06,395 INFO    MainThread:22844 [wandb_init.py:init():911] run started, returning control to user process
+2024-12-03 18:56:06,941 INFO    MainThread:22844 [wandb_watch.py:_watch():71] Watching
+2024-12-03 18:56:17,728 WARNING MsgRouterThr:22844 [router.py:message_loop():75] message_loop has been closed

wandb/run-20241203_185605-tfl0uvkj/run-tfl0uvkj.wandb ADDED Viewed

Binary file (21.4 kB). View file