Alyosha11 commited on
Commit
3e961d9
·
verified ·
1 Parent(s): 7ef2538

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +8 -0
  2. Dockerfile +42 -0
  3. GPT_SoVITS_Inference.ipynb +152 -0
  4. LICENSE +21 -0
  5. README.md +348 -0
  6. YouTubeAudioText/.gitattributes +59 -0
  7. YouTubeAudioText/data4.zip +0 -0
  8. YouTubeAudioText/data7.zip +0 -0
  9. api.py +940 -0
  10. colab_webui.ipynb +97 -0
  11. go-webui.bat +2 -0
  12. go-webui.ps1 +4 -0
  13. install.sh +6 -0
  14. pretrained_models/chinese-hubert-base/config.json +71 -0
  15. pretrained_models/chinese-hubert-base/preprocessor_config.json +9 -0
  16. requirements.txt +36 -0
  17. wandb/debug-internal.log +16 -0
  18. wandb/debug.log +29 -0
  19. wandb/run-20241203_183434-y6gzynz8/files/output.log +0 -0
  20. wandb/run-20241203_184628-qm0jlwqu/files/config.yaml +103 -0
  21. wandb/run-20241203_184628-qm0jlwqu/files/output.log +0 -0
  22. wandb/run-20241203_184628-qm0jlwqu/files/wandb-summary.json +1 -0
  23. wandb/run-20241203_185057-b425fq2v/files/wandb-metadata.json +47 -0
  24. wandb/run-20241203_185057-b425fq2v/logs/debug-core.log +14 -0
  25. wandb/run-20241203_185057-b425fq2v/logs/debug.log +27 -0
  26. wandb/run-20241203_185057-b425fq2v/run-b425fq2v.wandb +0 -0
  27. wandb/run-20241203_185203-najxbup6/files/output.log +82 -0
  28. wandb/run-20241203_185203-najxbup6/files/wandb-metadata.json +47 -0
  29. wandb/run-20241203_185203-najxbup6/logs/debug-core.log +14 -0
  30. wandb/run-20241203_185203-najxbup6/logs/debug-internal.log +16 -0
  31. wandb/run-20241203_185203-najxbup6/run-najxbup6.wandb +0 -0
  32. wandb/run-20241203_185257-7zp6kxhx/files/output.log +0 -0
  33. wandb/run-20241203_185257-7zp6kxhx/files/wandb-metadata.json +47 -0
  34. wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log +12 -0
  35. wandb/run-20241203_185257-7zp6kxhx/logs/debug-internal.log +11 -0
  36. wandb/run-20241203_185453-2eog1nt2/files/config.yaml +103 -0
  37. wandb/run-20241203_185453-2eog1nt2/files/output.log +1140 -0
  38. wandb/run-20241203_185453-2eog1nt2/files/wandb-metadata.json +47 -0
  39. wandb/run-20241203_185453-2eog1nt2/files/wandb-summary.json +1 -0
  40. wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log +14 -0
  41. wandb/run-20241203_185453-2eog1nt2/logs/debug-internal.log +16 -0
  42. wandb/run-20241203_185453-2eog1nt2/run-2eog1nt2.wandb +0 -0
  43. wandb/run-20241203_185605-tfl0uvkj/files/config.yaml +103 -0
  44. wandb/run-20241203_185605-tfl0uvkj/files/output.log +81 -0
  45. wandb/run-20241203_185605-tfl0uvkj/files/wandb-metadata.json +47 -0
  46. wandb/run-20241203_185605-tfl0uvkj/files/wandb-summary.json +1 -0
  47. wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log +14 -0
  48. wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log +16 -0
  49. wandb/run-20241203_185605-tfl0uvkj/logs/debug.log +27 -0
  50. wandb/run-20241203_185605-tfl0uvkj/run-tfl0uvkj.wandb +0 -0
.dockerignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ docs
2
+ logs
3
+ output
4
+ reference
5
+ SoVITS_weights
6
+ GPT_weights
7
+ TEMP
8
+ .git
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base CUDA image
2
+ FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
3
+
4
+ LABEL maintainer="[email protected]"
5
+ LABEL version="dev-20240209"
6
+ LABEL description="Docker image for GPT-SoVITS"
7
+
8
+
9
+ # Install 3rd party apps
10
+ ENV DEBIAN_FRONTEND=noninteractive
11
+ ENV TZ=Etc/UTC
12
+ RUN apt-get update && \
13
+ apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \
14
+ git lfs install && \
15
+ rm -rf /var/lib/apt/lists/*
16
+
17
+ # Copy only requirements.txt initially to leverage Docker cache
18
+ WORKDIR /workspace
19
+ COPY requirements.txt /workspace/
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Define a build-time argument for image type
23
+ ARG IMAGE_TYPE=full
24
+
25
+ # Conditional logic based on the IMAGE_TYPE argument
26
+ # Always copy the Docker directory, but only use it if IMAGE_TYPE is not "elite"
27
+ COPY ./Docker /workspace/Docker
28
+ # elite 类型的镜像里面不包含额外的模型
29
+ RUN if [ "$IMAGE_TYPE" != "elite" ]; then \
30
+ chmod +x /workspace/Docker/download.sh && \
31
+ /workspace/Docker/download.sh && \
32
+ python /workspace/Docker/download.py && \
33
+ python -m nltk.downloader averaged_perceptron_tagger cmudict; \
34
+ fi
35
+
36
+
37
+ # Copy the rest of the application
38
+ COPY . /workspace
39
+
40
+ EXPOSE 9871 9872 9873 9874 9880
41
+
42
+ CMD ["python", "webui.py"]
GPT_SoVITS_Inference.ipynb ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "accelerator": "GPU"
13
+ },
14
+ "cells": [
15
+ {
16
+ "cell_type": "markdown",
17
+ "source": [
18
+ "# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
19
+ ],
20
+ "metadata": {
21
+ "id": "himHYZmra7ix"
22
+ }
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "metadata": {
27
+ "id": "e9b7iFV3dm1f"
28
+ },
29
+ "source": [
30
+ "!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n",
31
+ "%cd GPT-SoVITS\n",
32
+ "!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n",
33
+ "!pip install -r requirements.txt"
34
+ ],
35
+ "execution_count": null,
36
+ "outputs": []
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "source": [
41
+ "# @title Download pretrained models 下载预训练模型\n",
42
+ "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
43
+ "!mkdir -p /content/GPT-SoVITS/tools/damo_asr/models\n",
44
+ "!mkdir -p /content/GPT-SoVITS/tools/uvr5\n",
45
+ "%cd /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
46
+ "!git clone https://huggingface.co/lj1995/GPT-SoVITS\n",
47
+ "%cd /content/GPT-SoVITS/tools/damo_asr/models\n",
48
+ "!git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git\n",
49
+ "!git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git\n",
50
+ "!git clone https://www.modelscope.cn/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git\n",
51
+ "# @title UVR5 pretrains 安装uvr5模型\n",
52
+ "%cd /content/GPT-SoVITS/tools/uvr5\n",
53
+ "!git clone https://huggingface.co/Delik/uvr5_weights\n",
54
+ "!git config core.sparseCheckout true\n",
55
+ "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
56
+ ],
57
+ "metadata": {
58
+ "id": "0NgxXg5sjv7z",
59
+ "cellView": "form"
60
+ },
61
+ "execution_count": null,
62
+ "outputs": []
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "source": [
67
+ "#@title Create folder models 创建文件夹模型\n",
68
+ "import os\n",
69
+ "base_directory = \"/content/GPT-SoVITS\"\n",
70
+ "folder_names = [\"SoVITS_weights\", \"GPT_weights\"]\n",
71
+ "\n",
72
+ "for folder_name in folder_names:\n",
73
+ " if os.path.exists(os.path.join(base_directory, folder_name)):\n",
74
+ " print(f\"The folder '{folder_name}' already exists. (文件夹'{folder_name}'已经存在。)\")\n",
75
+ " else:\n",
76
+ " os.makedirs(os.path.join(base_directory, folder_name))\n",
77
+ " print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建!)\")\n",
78
+ "\n",
79
+ "print(\"All folders have been created. (所有文件夹均已创建。)\")"
80
+ ],
81
+ "metadata": {
82
+ "cellView": "form",
83
+ "id": "cPDEH-9czOJF"
84
+ },
85
+ "execution_count": null,
86
+ "outputs": []
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "source": [
91
+ "import requests\n",
92
+ "import zipfile\n",
93
+ "import shutil\n",
94
+ "import os\n",
95
+ "\n",
96
+ "#@title Import model 导入模型 (HuggingFace)\n",
97
+ "hf_link = 'https://huggingface.co/modelloosrvcc/Nagisa_Shingetsu_GPT-SoVITS/resolve/main/Nagisa.zip' #@param {type: \"string\"}\n",
98
+ "\n",
99
+ "output_path = '/content/'\n",
100
+ "\n",
101
+ "response = requests.get(hf_link)\n",
102
+ "with open(output_path + 'file.zip', 'wb') as file:\n",
103
+ " file.write(response.content)\n",
104
+ "\n",
105
+ "with zipfile.ZipFile(output_path + 'file.zip', 'r') as zip_ref:\n",
106
+ " zip_ref.extractall(output_path)\n",
107
+ "\n",
108
+ "os.remove(output_path + \"file.zip\")\n",
109
+ "\n",
110
+ "source_directory = output_path\n",
111
+ "SoVITS_destination_directory = '/content/GPT-SoVITS/SoVITS_weights'\n",
112
+ "GPT_destination_directory = '/content/GPT-SoVITS/GPT_weights'\n",
113
+ "\n",
114
+ "for filename in os.listdir(source_directory):\n",
115
+ " if filename.endswith(\".pth\"):\n",
116
+ " source_path = os.path.join(source_directory, filename)\n",
117
+ " destination_path = os.path.join(SoVITS_destination_directory, filename)\n",
118
+ " shutil.move(source_path, destination_path)\n",
119
+ "\n",
120
+ "for filename in os.listdir(source_directory):\n",
121
+ " if filename.endswith(\".ckpt\"):\n",
122
+ " source_path = os.path.join(source_directory, filename)\n",
123
+ " destination_path = os.path.join(GPT_destination_directory, filename)\n",
124
+ " shutil.move(source_path, destination_path)\n",
125
+ "\n",
126
+ "print(f'Model downloaded. (模型已下载。)')"
127
+ ],
128
+ "metadata": {
129
+ "cellView": "form",
130
+ "id": "vbZY-LnM0tzq"
131
+ },
132
+ "execution_count": null,
133
+ "outputs": []
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "source": [
138
+ "# @title launch WebUI 启动WebUI\n",
139
+ "!/usr/local/bin/pip install ipykernel\n",
140
+ "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
141
+ "%cd /content/GPT-SoVITS/\n",
142
+ "!/usr/local/bin/python webui.py"
143
+ ],
144
+ "metadata": {
145
+ "id": "4oRGUzkrk8C7",
146
+ "cellView": "form"
147
+ },
148
+ "execution_count": null,
149
+ "outputs": []
150
+ }
151
+ ]
152
+ }
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 RVC-Boss
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+
3
+
4
+ <h1>GPT-SoVITS-WebUI</h1>
5
+ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
6
+
7
+ [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/RVC-Boss/GPT-SoVITS)
8
+
9
+ <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
10
+
11
+ <!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
12
+
13
+ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
14
+ [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
15
+ [![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
16
+ [![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
17
+
18
+ **English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md)
19
+
20
+ </div>
21
+
22
+ ---
23
+
24
+ ## Features:
25
+
26
+ 1. **Zero-shot TTS:** Input a 5-second vocal sample and experience instant text-to-speech conversion.
27
+
28
+ 2. **Few-shot TTS:** Fine-tune the model with just 1 minute of training data for improved voice similarity and realism.
29
+
30
+ 3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, Korean, Cantonese and Chinese.
31
+
32
+ 4. **WebUI Tools:** Integrated tools include voice accompaniment separation, automatic training set segmentation, Chinese ASR, and text labeling, assisting beginners in creating training datasets and GPT/SoVITS models.
33
+
34
+ **Check out our [demo video](https://www.bilibili.com/video/BV12g4y1m7Uw) here!**
35
+
36
+ Unseen speakers few-shot fine-tuning demo:
37
+
38
+ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
39
+
40
+ **User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
41
+
42
+ ## Installation
43
+
44
+ For users in China, you can [click here](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official) to use AutoDL Cloud Docker to experience the full functionality online.
45
+
46
+ ### Tested Environments
47
+
48
+ - Python 3.9, PyTorch 2.0.1, CUDA 11
49
+ - Python 3.10.13, PyTorch 2.1.2, CUDA 12.3
50
+ - Python 3.9, PyTorch 2.2.2, macOS 14.4.1 (Apple silicon)
51
+ - Python 3.9, PyTorch 2.2.2, CPU devices
52
+
53
+ _Note: numba==0.56.4 requires py<3.11_
54
+
55
+ ### Windows
56
+
57
+ If you are a Windows user (tested with win>=10), you can [download the integrated package](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI.
58
+
59
+ **Users in China can [download the package here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO).**
60
+
61
+ ### Linux
62
+
63
+ ```bash
64
+ conda create -n GPTSoVits python=3.9
65
+ conda activate GPTSoVits
66
+ bash install.sh
67
+ ```
68
+
69
+ ### macOS
70
+
71
+ **Note: The models trained with GPUs on Macs result in significantly lower quality compared to those trained on other devices, so we are temporarily using CPUs instead.**
72
+
73
+ 1. Install Xcode command-line tools by running `xcode-select --install`.
74
+ 2. Install FFmpeg by running `brew install ffmpeg`.
75
+ 3. Install the program by running the following commands:
76
+
77
+ ```bash
78
+ conda create -n GPTSoVits python=3.9
79
+ conda activate GPTSoVits
80
+ pip install -r requirements.txt
81
+ ```
82
+
83
+ ### Install Manually
84
+
85
+ #### Install FFmpeg
86
+
87
+ ##### Conda Users
88
+
89
+ ```bash
90
+ conda install ffmpeg
91
+ ```
92
+
93
+ ##### Ubuntu/Debian Users
94
+
95
+ ```bash
96
+ sudo apt install ffmpeg
97
+ sudo apt install libsox-dev
98
+ conda install -c conda-forge 'ffmpeg<7'
99
+ ```
100
+
101
+ ##### Windows Users
102
+
103
+ Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
104
+
105
+ Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
106
+
107
+ ##### MacOS Users
108
+ ```bash
109
+ brew install ffmpeg
110
+ ```
111
+
112
+ #### Install Dependences
113
+
114
+ ```bash
115
+ pip install -r requirements.txt
116
+ ```
117
+
118
+ ### Using Docker
119
+
120
+ #### docker-compose.yaml configuration
121
+
122
+ 0. Regarding image tags: Due to rapid updates in the codebase and the slow process of packaging and testing images, please check [Docker Hub](https://hub.docker.com/r/breakstring/gpt-sovits) for the currently packaged latest images and select as per your situation, or alternatively, build locally using a Dockerfile according to your own needs.
123
+ 1. Environment Variables:
124
+
125
+ - is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
126
+
127
+ 2. Volumes Configuration,The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
128
+ 3. shm_size: The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
129
+ 4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
130
+
131
+ #### Running with docker compose
132
+
133
+ ```
134
+ docker compose -f "docker-compose.yaml" up -d
135
+ ```
136
+
137
+ #### Running with docker command
138
+
139
+ As above, modify the corresponding parameters based on your actual situation, then run the following command:
140
+
141
+ ```
142
+ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
143
+ ```
144
+
145
+ ## Pretrained Models
146
+
147
+ **Users in China can [download all these models here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#nVNhX).**
148
+
149
+ 1. Download pretrained models from [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) and place them in `GPT_SoVITS/pretrained_models`.
150
+
151
+ 2. Download G2PW models from [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip), unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.(Chinese TTS Only)
152
+
153
+ 3. For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
154
+
155
+ 4. For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
156
+
157
+ 5. For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint.
158
+
159
+ ## Dataset Format
160
+
161
+ The TTS annotation .list file format:
162
+
163
+ ```
164
+ vocal_path|speaker_name|language|text
165
+ ```
166
+
167
+ Language dictionary:
168
+
169
+ - 'zh': Chinese
170
+ - 'ja': Japanese
171
+ - 'en': English
172
+ - 'ko': Korean
173
+ - 'yue': Cantonese
174
+
175
+ Example:
176
+
177
+ ```
178
+ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
179
+ ```
180
+
181
+ ## Finetune and inference
182
+
183
+ ### Open WebUI
184
+
185
+ #### Integrated Package Users
186
+
187
+ Double-click `go-webui.bat`or use `go-webui.ps1`
188
+ if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1`
189
+
190
+ #### Others
191
+
192
+ ```bash
193
+ python webui.py <language(optional)>
194
+ ```
195
+
196
+ if you want to switch to V1,then
197
+
198
+ ```bash
199
+ python webui.py v1 <language(optional)>
200
+ ```
201
+ Or maunally switch version in WebUI
202
+
203
+ ### Finetune
204
+
205
+ #### Path Auto-filling is now supported
206
+
207
+ 1.Fill in the audio path
208
+
209
+ 2.Slice the audio into small chunks
210
+
211
+ 3.Denoise(optinal)
212
+
213
+ 4.ASR
214
+
215
+ 5.Proofreading ASR transcriptions
216
+
217
+ 6.Go to the next Tab, then finetune the model
218
+
219
+ ### Open Inference WebUI
220
+
221
+ #### Integrated Package Users
222
+
223
+ Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
224
+
225
+ #### Others
226
+
227
+ ```bash
228
+ python GPT_SoVITS/inference_webui.py <language(optional)>
229
+ ```
230
+ OR
231
+
232
+ ```bash
233
+ python webui.py
234
+ ```
235
+ then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
236
+
237
+ ## V2 Release Notes
238
+
239
+ New Features:
240
+
241
+ 1. Support Korean and Cantonese
242
+
243
+ 2. An optimized text frontend
244
+
245
+ 3. Pre-trained model extended from 2k hours to 5k hours
246
+
247
+ 4. Improved synthesis quality for low-quality reference audio
248
+
249
+ [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) )
250
+
251
+ Use v2 from v1 environment:
252
+
253
+ 1. `pip install -r requirements.txt` to update some packages
254
+
255
+ 2. Clone the latest codes from github.
256
+
257
+ 3. Download v2 pretrained models from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) and put them into `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`.
258
+
259
+ Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)(Download G2PW models, unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
260
+
261
+ ## Todo List
262
+
263
+ - [x] **High Priority:**
264
+
265
+ - [x] Localization in Japanese and English.
266
+ - [x] User guide.
267
+ - [x] Japanese and English dataset fine tune training.
268
+
269
+ - [ ] **Features:**
270
+ - [x] Zero-shot voice conversion (5s) / few-shot voice conversion (1min).
271
+ - [x] TTS speaking speed control.
272
+ - [ ] ~~Enhanced TTS emotion control.~~
273
+ - [ ] Experiment with changing SoVITS token inputs to probability distribution of GPT vocabs (transformer latent).
274
+ - [x] Improve English and Japanese text frontend.
275
+ - [ ] Develop tiny and larger-sized TTS models.
276
+ - [x] Colab scripts.
277
+ - [ ] Try expand training dataset (2k hours -> 10k hours).
278
+ - [x] better sovits base model (enhanced audio quality)
279
+ - [ ] model mix
280
+
281
+ ## (Additional) Method for running from the command line
282
+ Use the command line to open the WebUI for UVR5
283
+ ```
284
+ python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
285
+ ```
286
+ <!-- If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing
287
+ ```
288
+ python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
289
+ ``` -->
290
+ This is how the audio segmentation of the dataset is done using the command line
291
+ ```
292
+ python audio_slicer.py \
293
+ --input_path "<path_to_original_audio_file_or_directory>" \
294
+ --output_root "<directory_where_subdivided_audio_clips_will_be_saved>" \
295
+ --threshold <volume_threshold> \
296
+ --min_length <minimum_duration_of_each_subclip> \
297
+ --min_interval <shortest_time_gap_between_adjacent_subclips>
298
+ --hop_size <step_size_for_computing_volume_curve>
299
+ ```
300
+ This is how dataset ASR processing is done using the command line(Only Chinese)
301
+ ```
302
+ python tools/asr/funasr_asr.py -i <input> -o <output>
303
+ ```
304
+ ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
305
+
306
+ (No progress bars, GPU performance may cause time delays)
307
+ ```
308
+ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
309
+ ```
310
+ A custom list save path is enabled
311
+
312
+ ## Credits
313
+
314
+ Special thanks to the following projects and contributors:
315
+
316
+ ### Theoretical Research
317
+ - [ar-vits](https://github.com/innnky/ar-vits)
318
+ - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
319
+ - [vits](https://github.com/jaywalnut310/vits)
320
+ - [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556)
321
+ - [contentvec](https://github.com/auspicious3000/contentvec/)
322
+ - [hifi-gan](https://github.com/jik876/hifi-gan)
323
+ - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
324
+ ### Pretrained Models
325
+ - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
326
+ - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
327
+ ### Text Frontend for Inference
328
+ - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
329
+ - [LangSegment](https://github.com/juntaosun/LangSegment)
330
+ - [g2pW](https://github.com/GitYCC/g2pW)
331
+ - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
332
+ - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
333
+ ### WebUI Tools
334
+ - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
335
+ - [audio-slicer](https://github.com/openvpi/audio-slicer)
336
+ - [SubFix](https://github.com/cronrpc/SubFix)
337
+ - [FFmpeg](https://github.com/FFmpeg/FFmpeg)
338
+ - [gradio](https://github.com/gradio-app/gradio)
339
+ - [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
340
+ - [FunASR](https://github.com/alibaba-damo-academy/FunASR)
341
+
342
+ Thankful to @Naozumi520 for providing the Cantonese training set and for the guidance on Cantonese-related knowledge.
343
+
344
+ ## Thanks to all contributors for their efforts
345
+
346
+ <a href="https://github.com/RVC-Boss/GPT-SoVITS/graphs/contributors" target="_blank">
347
+ <img src="https://contrib.rocks/image?repo=RVC-Boss/GPT-SoVITS" />
348
+ </a>
YouTubeAudioText/.gitattributes ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mds filter=lfs diff=lfs merge=lfs -text
13
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
14
+ *.model filter=lfs diff=lfs merge=lfs -text
15
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
16
+ *.npy filter=lfs diff=lfs merge=lfs -text
17
+ *.npz filter=lfs diff=lfs merge=lfs -text
18
+ *.onnx filter=lfs diff=lfs merge=lfs -text
19
+ *.ot filter=lfs diff=lfs merge=lfs -text
20
+ *.parquet filter=lfs diff=lfs merge=lfs -text
21
+ *.pb filter=lfs diff=lfs merge=lfs -text
22
+ *.pickle filter=lfs diff=lfs merge=lfs -text
23
+ *.pkl filter=lfs diff=lfs merge=lfs -text
24
+ *.pt filter=lfs diff=lfs merge=lfs -text
25
+ *.pth filter=lfs diff=lfs merge=lfs -text
26
+ *.rar filter=lfs diff=lfs merge=lfs -text
27
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
28
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
30
+ *.tar filter=lfs diff=lfs merge=lfs -text
31
+ *.tflite filter=lfs diff=lfs merge=lfs -text
32
+ *.tgz filter=lfs diff=lfs merge=lfs -text
33
+ *.wasm filter=lfs diff=lfs merge=lfs -text
34
+ *.xz filter=lfs diff=lfs merge=lfs -text
35
+ *.zip filter=lfs diff=lfs merge=lfs -text
36
+ *.zst filter=lfs diff=lfs merge=lfs -text
37
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
38
+ # Audio files - uncompressed
39
+ *.pcm filter=lfs diff=lfs merge=lfs -text
40
+ *.sam filter=lfs diff=lfs merge=lfs -text
41
+ *.raw filter=lfs diff=lfs merge=lfs -text
42
+ # Audio files - compressed
43
+ *.aac filter=lfs diff=lfs merge=lfs -text
44
+ *.flac filter=lfs diff=lfs merge=lfs -text
45
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
46
+ *.ogg filter=lfs diff=lfs merge=lfs -text
47
+ *.wav filter=lfs diff=lfs merge=lfs -text
48
+ # Image files - uncompressed
49
+ *.bmp filter=lfs diff=lfs merge=lfs -text
50
+ *.gif filter=lfs diff=lfs merge=lfs -text
51
+ *.png filter=lfs diff=lfs merge=lfs -text
52
+ *.tiff filter=lfs diff=lfs merge=lfs -text
53
+ # Image files - compressed
54
+ *.jpg filter=lfs diff=lfs merge=lfs -text
55
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
56
+ *.webp filter=lfs diff=lfs merge=lfs -text
57
+ # Video files - compressed
58
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
59
+ *.webm filter=lfs diff=lfs merge=lfs -text
YouTubeAudioText/data4.zip ADDED
File without changes
YouTubeAudioText/data7.zip ADDED
File without changes
api.py ADDED
@@ -0,0 +1,940 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ # api.py usage
3
+
4
+ ` python api.py -dr "123.wav" -dt "一二三。" -dl "zh" `
5
+
6
+ ## 执行参数:
7
+
8
+ `-s` - `SoVITS模型路径, 可在 config.py 中指定`
9
+ `-g` - `GPT模型路径, 可在 config.py 中指定`
10
+
11
+ 调用请求缺少参考音频时使用
12
+ `-dr` - `默认参考音频路径`
13
+ `-dt` - `默认参考音频文本`
14
+ `-dl` - `默认参考音频语种, "中文","英文","日文","韩文","粤语,"zh","en","ja","ko","yue"`
15
+
16
+ `-d` - `推理设备, "cuda","cpu"`
17
+ `-a` - `绑定地址, 默认"127.0.0.1"`
18
+ `-p` - `绑定端口, 默认9880, 可在 config.py 中指定`
19
+ `-fp` - `覆盖 config.py 使用全精度`
20
+ `-hp` - `覆盖 config.py 使用半精度`
21
+ `-sm` - `流式返回模式, 默认不启用, "close","c", "normal","n", "keepalive","k"`
22
+ ·-mt` - `返回的音频编码格式, 流式默认ogg, 非流式默认wav, "wav", "ogg", "aac"`
23
+ ·-st` - `返回的音频数据类型, 默认int16, "int16", "int32"`
24
+ ·-cp` - `文本切分符号设定, 默认为空, 以",.,。"字符串的方式传入`
25
+
26
+ `-hb` - `cnhubert路径`
27
+ `-b` - `bert路径`
28
+
29
+ ## 调用:
30
+
31
+ ### 推理
32
+
33
+ endpoint: `/`
34
+
35
+ 使用执行参数指定的参考音频:
36
+ GET:
37
+ `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh`
38
+ POST:
39
+ ```json
40
+ {
41
+ "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
42
+ "text_language": "zh"
43
+ }
44
+ ```
45
+
46
+ 使用执行参数指定的参考音频并设定分割符号:
47
+ GET:
48
+ `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh&cut_punc=,。`
49
+ POST:
50
+ ```json
51
+ {
52
+ "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
53
+ "text_language": "zh",
54
+ "cut_punc": ",。",
55
+ }
56
+ ```
57
+
58
+ 手动指定当次推理所使用的参考音频:
59
+ GET:
60
+ `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh`
61
+ POST:
62
+ ```json
63
+ {
64
+ "refer_wav_path": "123.wav",
65
+ "prompt_text": "一二三。",
66
+ "prompt_language": "zh",
67
+ "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
68
+ "text_language": "zh"
69
+ }
70
+ ```
71
+
72
+ RESP:
73
+ 成功: 直接返回 wav 音频流, http code 200
74
+ 失败: 返回包含错误信息的 json, http code 400
75
+
76
+ 手动指定当次推理所使用的参考音频,并提供参数:
77
+ GET:
78
+ `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh&top_k=20&top_p=0.6&temperature=0.6&speed=1&inp_refs="456.wav"&inp_refs="789.wav"`
79
+ POST:
80
+ ```json
81
+ {
82
+ "refer_wav_path": "123.wav",
83
+ "prompt_text": "一二三。",
84
+ "prompt_language": "zh",
85
+ "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
86
+ "text_language": "zh",
87
+ "top_k": 20,
88
+ "top_p": 0.6,
89
+ "temperature": 0.6,
90
+ "speed": 1,
91
+ "inp_refs": ["456.wav","789.wav"]
92
+ }
93
+ ```
94
+
95
+ RESP:
96
+ 成功: 直接返回 wav 音频流, http code 200
97
+ 失败: 返回包含错误信息的 json, http code 400
98
+
99
+
100
+ ### 更换默认参考音频
101
+
102
+ endpoint: `/change_refer`
103
+
104
+ key与推理端一样
105
+
106
+ GET:
107
+ `http://127.0.0.1:9880/change_refer?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh`
108
+ POST:
109
+ ```json
110
+ {
111
+ "refer_wav_path": "123.wav",
112
+ "prompt_text": "一二三。",
113
+ "prompt_language": "zh"
114
+ }
115
+ ```
116
+
117
+ RESP:
118
+ 成功: json, http code 200
119
+ 失败: json, 400
120
+
121
+
122
+ ### 命令控制
123
+
124
+ endpoint: `/control`
125
+
126
+ command:
127
+ "restart": 重新运行
128
+ "exit": 结束运行
129
+
130
+ GET:
131
+ `http://127.0.0.1:9880/control?command=restart`
132
+ POST:
133
+ ```json
134
+ {
135
+ "command": "restart"
136
+ }
137
+ ```
138
+
139
+ RESP: 无
140
+
141
+ """
142
+
143
+
144
+ import argparse
145
+ import os,re
146
+ import sys
147
+
148
+ now_dir = os.getcwd()
149
+ sys.path.append(now_dir)
150
+ sys.path.append("%s/GPT_SoVITS" % (now_dir))
151
+
152
+ import signal
153
+ import LangSegment
154
+ from time import time as ttime
155
+ import torch
156
+ import librosa
157
+ import soundfile as sf
158
+ from fastapi import FastAPI, Request, Query, HTTPException
159
+ from fastapi.responses import StreamingResponse, JSONResponse
160
+ import uvicorn
161
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
162
+ import numpy as np
163
+ from feature_extractor import cnhubert
164
+ from io import BytesIO
165
+ from module.models import SynthesizerTrn
166
+ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
167
+ from text import cleaned_text_to_sequence
168
+ from text.cleaner import clean_text
169
+ from module.mel_processing import spectrogram_torch
170
+ from tools.my_utils import load_audio
171
+ import config as global_config
172
+ import logging
173
+ import subprocess
174
+
175
+
176
+ class DefaultRefer:
177
+ def __init__(self, path, text, language):
178
+ self.path = args.default_refer_path
179
+ self.text = args.default_refer_text
180
+ self.language = args.default_refer_language
181
+
182
+ def is_ready(self) -> bool:
183
+ return is_full(self.path, self.text, self.language)
184
+
185
+
186
+ def is_empty(*items): # 任意一项不为空返回False
187
+ for item in items:
188
+ if item is not None and item != "":
189
+ return False
190
+ return True
191
+
192
+
193
+ def is_full(*items): # 任意一项为空返回False
194
+ for item in items:
195
+ if item is None or item == "":
196
+ return False
197
+ return True
198
+
199
+
200
+ class Speaker:
201
+ def __init__(self, name, gpt, sovits, phones = None, bert = None, prompt = None):
202
+ self.name = name
203
+ self.sovits = sovits
204
+ self.gpt = gpt
205
+ self.phones = phones
206
+ self.bert = bert
207
+ self.prompt = prompt
208
+
209
+ speaker_list = {}
210
+
211
+
212
+ class Sovits:
213
+ def __init__(self, vq_model, hps):
214
+ self.vq_model = vq_model
215
+ self.hps = hps
216
+
217
+ def get_sovits_weights(sovits_path):
218
+ dict_s2 = torch.load(sovits_path, map_location="cpu")
219
+ hps = dict_s2["config"]
220
+ hps = DictToAttrRecursive(hps)
221
+ hps.model.semantic_frame_rate = "25hz"
222
+ if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
223
+ hps.model.version = "v1"
224
+ else:
225
+ hps.model.version = "v2"
226
+ logger.info(f"模型版本: {hps.model.version}")
227
+ model_params_dict = vars(hps.model)
228
+ vq_model = SynthesizerTrn(
229
+ hps.data.filter_length // 2 + 1,
230
+ hps.train.segment_size // hps.data.hop_length,
231
+ n_speakers=hps.data.n_speakers,
232
+ **model_params_dict
233
+ )
234
+ if ("pretrained" not in sovits_path):
235
+ del vq_model.enc_q
236
+ if is_half == True:
237
+ vq_model = vq_model.half().to(device)
238
+ else:
239
+ vq_model = vq_model.to(device)
240
+ vq_model.eval()
241
+ vq_model.load_state_dict(dict_s2["weight"], strict=False)
242
+
243
+ sovits = Sovits(vq_model, hps)
244
+ return sovits
245
+
246
+ class Gpt:
247
+ def __init__(self, max_sec, t2s_model):
248
+ self.max_sec = max_sec
249
+ self.t2s_model = t2s_model
250
+
251
+ global hz
252
+ hz = 50
253
+ def get_gpt_weights(gpt_path):
254
+ dict_s1 = torch.load(gpt_path, map_location="cpu")
255
+ config = dict_s1["config"]
256
+ max_sec = config["data"]["max_sec"]
257
+ t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
258
+ t2s_model.load_state_dict(dict_s1["weight"])
259
+ if is_half == True:
260
+ t2s_model = t2s_model.half()
261
+ t2s_model = t2s_model.to(device)
262
+ t2s_model.eval()
263
+ total = sum([param.nelement() for param in t2s_model.parameters()])
264
+ logger.info("Number of parameter: %.2fM" % (total / 1e6))
265
+
266
+ gpt = Gpt(max_sec, t2s_model)
267
+ return gpt
268
+
269
+ def change_gpt_sovits_weights(gpt_path,sovits_path):
270
+ try:
271
+ gpt = get_gpt_weights(gpt_path)
272
+ sovits = get_sovits_weights(sovits_path)
273
+ except Exception as e:
274
+ return JSONResponse({"code": 400, "message": str(e)}, status_code=400)
275
+
276
+ speaker_list["default"] = Speaker(name="default", gpt=gpt, sovits=sovits)
277
+ return JSONResponse({"code": 0, "message": "Success"}, status_code=200)
278
+
279
+
280
+ def get_bert_feature(text, word2ph):
281
+ with torch.no_grad():
282
+ inputs = tokenizer(text, return_tensors="pt")
283
+ for i in inputs:
284
+ inputs[i] = inputs[i].to(device) #####输入是long不用管精度问题,精度随bert_model
285
+ res = bert_model(**inputs, output_hidden_states=True)
286
+ res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
287
+ assert len(word2ph) == len(text)
288
+ phone_level_feature = []
289
+ for i in range(len(word2ph)):
290
+ repeat_feature = res[i].repeat(word2ph[i], 1)
291
+ phone_level_feature.append(repeat_feature)
292
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
293
+ # if(is_half==True):phone_level_feature=phone_level_feature.half()
294
+ return phone_level_feature.T
295
+
296
+
297
+ def clean_text_inf(text, language, version):
298
+ phones, word2ph, norm_text = clean_text(text, language, version)
299
+ phones = cleaned_text_to_sequence(phones, version)
300
+ return phones, word2ph, norm_text
301
+
302
+
303
+ def get_bert_inf(phones, word2ph, norm_text, language):
304
+ language=language.replace("all_","")
305
+ if language == "zh":
306
+ bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype)
307
+ else:
308
+ bert = torch.zeros(
309
+ (1024, len(phones)),
310
+ dtype=torch.float16 if is_half == True else torch.float32,
311
+ ).to(device)
312
+
313
+ return bert
314
+
315
+ from text import chinese
316
+ def get_phones_and_bert(text,language,version,final=False):
317
+ if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
318
+ language = language.replace("all_","")
319
+ if language == "en":
320
+ LangSegment.setfilters(["en"])
321
+ formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text))
322
+ else:
323
+ # 因无法区别中日韩文汉字,以用户输入为准
324
+ formattext = text
325
+ while " " in formattext:
326
+ formattext = formattext.replace(" ", " ")
327
+ if language == "zh":
328
+ if re.search(r'[A-Za-z]', formattext):
329
+ formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
330
+ formattext = chinese.mix_text_normalize(formattext)
331
+ return get_phones_and_bert(formattext,"zh",version)
332
+ else:
333
+ phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
334
+ bert = get_bert_feature(norm_text, word2ph).to(device)
335
+ elif language == "yue" and re.search(r'[A-Za-z]', formattext):
336
+ formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
337
+ formattext = chinese.mix_text_normalize(formattext)
338
+ return get_phones_and_bert(formattext,"yue",version)
339
+ else:
340
+ phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
341
+ bert = torch.zeros(
342
+ (1024, len(phones)),
343
+ dtype=torch.float16 if is_half == True else torch.float32,
344
+ ).to(device)
345
+ elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
346
+ textlist=[]
347
+ langlist=[]
348
+ LangSegment.setfilters(["zh","ja","en","ko"])
349
+ if language == "auto":
350
+ for tmp in LangSegment.getTexts(text):
351
+ langlist.append(tmp["lang"])
352
+ textlist.append(tmp["text"])
353
+ elif language == "auto_yue":
354
+ for tmp in LangSegment.getTexts(text):
355
+ if tmp["lang"] == "zh":
356
+ tmp["lang"] = "yue"
357
+ langlist.append(tmp["lang"])
358
+ textlist.append(tmp["text"])
359
+ else:
360
+ for tmp in LangSegment.getTexts(text):
361
+ if tmp["lang"] == "en":
362
+ langlist.append(tmp["lang"])
363
+ else:
364
+ # 因无法区别中日韩文汉字,以用户输入为准
365
+ langlist.append(language)
366
+ textlist.append(tmp["text"])
367
+ phones_list = []
368
+ bert_list = []
369
+ norm_text_list = []
370
+ for i in range(len(textlist)):
371
+ lang = langlist[i]
372
+ phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
373
+ bert = get_bert_inf(phones, word2ph, norm_text, lang)
374
+ phones_list.append(phones)
375
+ norm_text_list.append(norm_text)
376
+ bert_list.append(bert)
377
+ bert = torch.cat(bert_list, dim=1)
378
+ phones = sum(phones_list, [])
379
+ norm_text = ''.join(norm_text_list)
380
+
381
+ if not final and len(phones) < 6:
382
+ return get_phones_and_bert("." + text,language,version,final=True)
383
+
384
+ return phones,bert.to(torch.float16 if is_half == True else torch.float32),norm_text
385
+
386
+
387
+ class DictToAttrRecursive(dict):
388
+ def __init__(self, input_dict):
389
+ super().__init__(input_dict)
390
+ for key, value in input_dict.items():
391
+ if isinstance(value, dict):
392
+ value = DictToAttrRecursive(value)
393
+ self[key] = value
394
+ setattr(self, key, value)
395
+
396
+ def __getattr__(self, item):
397
+ try:
398
+ return self[item]
399
+ except KeyError:
400
+ raise AttributeError(f"Attribute {item} not found")
401
+
402
+ def __setattr__(self, key, value):
403
+ if isinstance(value, dict):
404
+ value = DictToAttrRecursive(value)
405
+ super(DictToAttrRecursive, self).__setitem__(key, value)
406
+ super().__setattr__(key, value)
407
+
408
+ def __delattr__(self, item):
409
+ try:
410
+ del self[item]
411
+ except KeyError:
412
+ raise AttributeError(f"Attribute {item} not found")
413
+
414
+
415
+ def get_spepc(hps, filename):
416
+ audio,_ = librosa.load(filename, int(hps.data.sampling_rate))
417
+ audio = torch.FloatTensor(audio)
418
+ maxx=audio.abs().max()
419
+ if(maxx>1):
420
+ audio/=min(2,maxx)
421
+ audio_norm = audio
422
+ audio_norm = audio_norm.unsqueeze(0)
423
+ spec = spectrogram_torch(audio_norm, hps.data.filter_length, hps.data.sampling_rate, hps.data.hop_length,
424
+ hps.data.win_length, center=False)
425
+ return spec
426
+
427
+
428
+ def pack_audio(audio_bytes, data, rate):
429
+ if media_type == "ogg":
430
+ audio_bytes = pack_ogg(audio_bytes, data, rate)
431
+ elif media_type == "aac":
432
+ audio_bytes = pack_aac(audio_bytes, data, rate)
433
+ else:
434
+ # wav无法流式, 先暂存raw
435
+ audio_bytes = pack_raw(audio_bytes, data, rate)
436
+
437
+ return audio_bytes
438
+
439
+
440
+ def pack_ogg(audio_bytes, data, rate):
441
+ # Author: AkagawaTsurunaki
442
+ # Issue:
443
+ # Stack overflow probabilistically occurs
444
+ # when the function `sf_writef_short` of `libsndfile_64bit.dll` is called
445
+ # using the Python library `soundfile`
446
+ # Note:
447
+ # This is an issue related to `libsndfile`, not this project itself.
448
+ # It happens when you generate a large audio tensor (about 499804 frames in my PC)
449
+ # and try to convert it to an ogg file.
450
+ # Related:
451
+ # https://github.com/RVC-Boss/GPT-SoVITS/issues/1199
452
+ # https://github.com/libsndfile/libsndfile/issues/1023
453
+ # https://github.com/bastibe/python-soundfile/issues/396
454
+ # Suggestion:
455
+ # Or split the whole audio data into smaller audio segment to avoid stack overflow?
456
+
457
+ def handle_pack_ogg():
458
+ with sf.SoundFile(audio_bytes, mode='w', samplerate=rate, channels=1, format='ogg') as audio_file:
459
+ audio_file.write(data)
460
+
461
+ import threading
462
+ # See: https://docs.python.org/3/library/threading.html
463
+ # The stack size of this thread is at least 32768
464
+ # If stack overflow error still occurs, just modify the `stack_size`.
465
+ # stack_size = n * 4096, where n should be a positive integer.
466
+ # Here we chose n = 4096.
467
+ stack_size = 4096 * 4096
468
+ try:
469
+ threading.stack_size(stack_size)
470
+ pack_ogg_thread = threading.Thread(target=handle_pack_ogg)
471
+ pack_ogg_thread.start()
472
+ pack_ogg_thread.join()
473
+ except RuntimeError as e:
474
+ # If changing the thread stack size is unsupported, a RuntimeError is raised.
475
+ print("RuntimeError: {}".format(e))
476
+ print("Changing the thread stack size is unsupported.")
477
+ except ValueError as e:
478
+ # If the specified stack size is invalid, a ValueError is raised and the stack size is unmodified.
479
+ print("ValueError: {}".format(e))
480
+ print("The specified stack size is invalid.")
481
+
482
+ return audio_bytes
483
+
484
+
485
+ def pack_raw(audio_bytes, data, rate):
486
+ audio_bytes.write(data.tobytes())
487
+
488
+ return audio_bytes
489
+
490
+
491
+ def pack_wav(audio_bytes, rate):
492
+ if is_int32:
493
+ data = np.frombuffer(audio_bytes.getvalue(),dtype=np.int32)
494
+ wav_bytes = BytesIO()
495
+ sf.write(wav_bytes, data, rate, format='WAV', subtype='PCM_32')
496
+ else:
497
+ data = np.frombuffer(audio_bytes.getvalue(),dtype=np.int16)
498
+ wav_bytes = BytesIO()
499
+ sf.write(wav_bytes, data, rate, format='WAV')
500
+ return wav_bytes
501
+
502
+
503
+ def pack_aac(audio_bytes, data, rate):
504
+ if is_int32:
505
+ pcm = 's32le'
506
+ bit_rate = '256k'
507
+ else:
508
+ pcm = 's16le'
509
+ bit_rate = '128k'
510
+ process = subprocess.Popen([
511
+ 'ffmpeg',
512
+ '-f', pcm, # 输入16位有符号小端整数PCM
513
+ '-ar', str(rate), # 设置采样率
514
+ '-ac', '1', # 单声道
515
+ '-i', 'pipe:0', # 从管道读取输入
516
+ '-c:a', 'aac', # 音频编码器为AAC
517
+ '-b:a', bit_rate, # 比特率
518
+ '-vn', # 不包含视频
519
+ '-f', 'adts', # 输出AAC数据流格式
520
+ 'pipe:1' # 将输出写入管道
521
+ ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
522
+ out, _ = process.communicate(input=data.tobytes())
523
+ audio_bytes.write(out)
524
+
525
+ return audio_bytes
526
+
527
+
528
+ def read_clean_buffer(audio_bytes):
529
+ audio_chunk = audio_bytes.getvalue()
530
+ audio_bytes.truncate(0)
531
+ audio_bytes.seek(0)
532
+
533
+ return audio_bytes, audio_chunk
534
+
535
+
536
+ def cut_text(text, punc):
537
+ punc_list = [p for p in punc if p in {",", ".", ";", "?", "!", "、", ",", "。", "?", "!", ";", ":", "…"}]
538
+ if len(punc_list) > 0:
539
+ punds = r"[" + "".join(punc_list) + r"]"
540
+ text = text.strip("\n")
541
+ items = re.split(f"({punds})", text)
542
+ mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
543
+ # 在句子不存在符号或句尾无符号的时候保证文本完整
544
+ if len(items)%2 == 1:
545
+ mergeitems.append(items[-1])
546
+ text = "\n".join(mergeitems)
547
+
548
+ while "\n\n" in text:
549
+ text = text.replace("\n\n", "\n")
550
+
551
+ return text
552
+
553
+
554
+ def only_punc(text):
555
+ return not any(t.isalnum() or t.isalpha() for t in text)
556
+
557
+
558
+ splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", }
559
+ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, top_k= 15, top_p = 0.6, temperature = 0.6, speed = 1, inp_refs = None, spk = "default"):
560
+ infer_sovits = speaker_list[spk].sovits
561
+ vq_model = infer_sovits.vq_model
562
+ hps = infer_sovits.hps
563
+
564
+ infer_gpt = speaker_list[spk].gpt
565
+ t2s_model = infer_gpt.t2s_model
566
+ max_sec = infer_gpt.max_sec
567
+
568
+ t0 = ttime()
569
+ prompt_text = prompt_text.strip("\n")
570
+ if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
571
+ prompt_language, text = prompt_language, text.strip("\n")
572
+ dtype = torch.float16 if is_half == True else torch.float32
573
+ zero_wav = np.zeros(int(hps.data.sampling_rate * 0.3), dtype=np.float16 if is_half == True else np.float32)
574
+ with torch.no_grad():
575
+ wav16k, sr = librosa.load(ref_wav_path, sr=16000)
576
+ wav16k = torch.from_numpy(wav16k)
577
+ zero_wav_torch = torch.from_numpy(zero_wav)
578
+ if (is_half == True):
579
+ wav16k = wav16k.half().to(device)
580
+ zero_wav_torch = zero_wav_torch.half().to(device)
581
+ else:
582
+ wav16k = wav16k.to(device)
583
+ zero_wav_torch = zero_wav_torch.to(device)
584
+ wav16k = torch.cat([wav16k, zero_wav_torch])
585
+ ssl_content = ssl_model.model(wav16k.unsqueeze(0))["last_hidden_state"].transpose(1, 2) # .float()
586
+ codes = vq_model.extract_latent(ssl_content)
587
+ prompt_semantic = codes[0, 0]
588
+ prompt = prompt_semantic.unsqueeze(0).to(device)
589
+
590
+ refers=[]
591
+ if(inp_refs):
592
+ for path in inp_refs:
593
+ try:
594
+ refer = get_spepc(hps, path).to(dtype).to(device)
595
+ refers.append(refer)
596
+ except Exception as e:
597
+ logger.error(e)
598
+ if(len(refers)==0):
599
+ refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
600
+
601
+ t1 = ttime()
602
+ version = vq_model.version
603
+ os.environ['version'] = version
604
+ prompt_language = dict_language[prompt_language.lower()]
605
+ text_language = dict_language[text_language.lower()]
606
+ phones1, bert1, norm_text1 = get_phones_and_bert(prompt_text, prompt_language, version)
607
+ texts = text.split("\n")
608
+ audio_bytes = BytesIO()
609
+
610
+ for text in texts:
611
+ # 简单防止纯符号引发参考音频泄露
612
+ if only_punc(text):
613
+ continue
614
+
615
+ audio_opt = []
616
+ if (text[-1] not in splits): text += "。" if text_language != "en" else "."
617
+ phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
618
+ bert = torch.cat([bert1, bert2], 1)
619
+
620
+ all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
621
+ bert = bert.to(device).unsqueeze(0)
622
+ all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device)
623
+ t2 = ttime()
624
+ with torch.no_grad():
625
+ pred_semantic, idx = t2s_model.model.infer_panel(
626
+ all_phoneme_ids,
627
+ all_phoneme_len,
628
+ prompt,
629
+ bert,
630
+ # prompt_phone_len=ph_offset,
631
+ top_k = top_k,
632
+ top_p = top_p,
633
+ temperature = temperature,
634
+ early_stop_num=hz * max_sec)
635
+ pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
636
+ t3 = ttime()
637
+ audio = \
638
+ vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0),
639
+ refers,speed=speed).detach().cpu().numpy()[
640
+ 0, 0] ###试试重建不带上prompt部分
641
+ max_audio=np.abs(audio).max()
642
+ if max_audio>1:
643
+ audio/=max_audio
644
+ audio_opt.append(audio)
645
+ audio_opt.append(zero_wav)
646
+ t4 = ttime()
647
+ if is_int32:
648
+ audio_bytes = pack_audio(audio_bytes,(np.concatenate(audio_opt, 0) * 2147483647).astype(np.int32),hps.data.sampling_rate)
649
+ else:
650
+ audio_bytes = pack_audio(audio_bytes,(np.concatenate(audio_opt, 0) * 32768).astype(np.int16),hps.data.sampling_rate)
651
+ # logger.info("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3))
652
+ if stream_mode == "normal":
653
+ audio_bytes, audio_chunk = read_clean_buffer(audio_bytes)
654
+ yield audio_chunk
655
+
656
+ if not stream_mode == "normal":
657
+ if media_type == "wav":
658
+ audio_bytes = pack_wav(audio_bytes,hps.data.sampling_rate)
659
+ yield audio_bytes.getvalue()
660
+
661
+
662
+
663
+ def handle_control(command):
664
+ if command == "restart":
665
+ os.execl(g_config.python_exec, g_config.python_exec, *sys.argv)
666
+ elif command == "exit":
667
+ os.kill(os.getpid(), signal.SIGTERM)
668
+ exit(0)
669
+
670
+
671
+ def handle_change(path, text, language):
672
+ if is_empty(path, text, language):
673
+ return JSONResponse({"code": 400, "message": '缺少任意一项以下参数: "path", "text", "language"'}, status_code=400)
674
+
675
+ if path != "" or path is not None:
676
+ default_refer.path = path
677
+ if text != "" or text is not None:
678
+ default_refer.text = text
679
+ if language != "" or language is not None:
680
+ default_refer.language = language
681
+
682
+ logger.info(f"当前默认参考音频路径: {default_refer.path}")
683
+ logger.info(f"当前默认参考音频文本: {default_refer.text}")
684
+ logger.info(f"当前默认参考音频语种: {default_refer.language}")
685
+ logger.info(f"is_ready: {default_refer.is_ready()}")
686
+
687
+
688
+ return JSONResponse({"code": 0, "message": "Success"}, status_code=200)
689
+
690
+
691
+ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc, top_k, top_p, temperature, speed, inp_refs):
692
+ if (
693
+ refer_wav_path == "" or refer_wav_path is None
694
+ or prompt_text == "" or prompt_text is None
695
+ or prompt_language == "" or prompt_language is None
696
+ ):
697
+ refer_wav_path, prompt_text, prompt_language = (
698
+ default_refer.path,
699
+ default_refer.text,
700
+ default_refer.language,
701
+ )
702
+ if not default_refer.is_ready():
703
+ return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
704
+
705
+ if cut_punc == None:
706
+ text = cut_text(text,default_cut_punc)
707
+ else:
708
+ text = cut_text(text,cut_punc)
709
+
710
+ return StreamingResponse(get_tts_wav(refer_wav_path, prompt_text, prompt_language, text, text_language, top_k, top_p, temperature, speed, inp_refs), media_type="audio/"+media_type)
711
+
712
+
713
+
714
+
715
+ # --------------------------------
716
+ # 初始化部分
717
+ # --------------------------------
718
+ dict_language = {
719
+ "中文": "all_zh",
720
+ "粤语": "all_yue",
721
+ "英文": "en",
722
+ "日文": "all_ja",
723
+ "韩文": "all_ko",
724
+ "中英混合": "zh",
725
+ "粤英混合": "yue",
726
+ "日英混合": "ja",
727
+ "韩英混合": "ko",
728
+ "多语种混合": "auto", #多语种启动切分识别语种
729
+ "多语种混合(粤语)": "auto_yue",
730
+ "all_zh": "all_zh",
731
+ "all_yue": "all_yue",
732
+ "en": "en",
733
+ "all_ja": "all_ja",
734
+ "all_ko": "all_ko",
735
+ "zh": "zh",
736
+ "yue": "yue",
737
+ "ja": "ja",
738
+ "ko": "ko",
739
+ "auto": "auto",
740
+ "auto_yue": "auto_yue",
741
+ }
742
+
743
+ # logger
744
+ logging.config.dictConfig(uvicorn.config.LOGGING_CONFIG)
745
+ logger = logging.getLogger('uvicorn')
746
+
747
+ # 获取配置
748
+ g_config = global_config.Config()
749
+
750
+ # 获取参数
751
+ parser = argparse.ArgumentParser(description="GPT-SoVITS api")
752
+
753
+ parser.add_argument("-s", "--sovits_path", type=str, default=g_config.sovits_path, help="SoVITS模型路径")
754
+ parser.add_argument("-g", "--gpt_path", type=str, default=g_config.gpt_path, help="GPT模型路径")
755
+ parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="默认参考音频路径")
756
+ parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本")
757
+ parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种")
758
+ parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu")
759
+ parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0")
760
+ parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880")
761
+ parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度")
762
+ parser.add_argument("-hp", "--half_precision", action="store_true", default=False, help="覆盖config.is_half为True, 使用半精度")
763
+ # bool值的用法为 `python ./api.py -fp ...`
764
+ # 此时 full_precision==True, half_precision==False
765
+ parser.add_argument("-sm", "--stream_mode", type=str, default="close", help="流式返回模式, close / normal / keepalive")
766
+ parser.add_argument("-mt", "--media_type", type=str, default="wav", help="音频编码格式, wav / ogg / aac")
767
+ parser.add_argument("-st", "--sub_type", type=str, default="int16", help="音频数据类型, int16 / int32")
768
+ parser.add_argument("-cp", "--cut_punc", type=str, default="", help="文本切分符号设定, 符号范围,.;?!、,。?!;:…")
769
+ # 切割常用分句符为 `python ./api.py -cp ".?!。?!"`
770
+ parser.add_argument("-hb", "--hubert_path", type=str, default=g_config.cnhubert_path, help="覆盖config.cnhubert_path")
771
+ parser.add_argument("-b", "--bert_path", type=str, default=g_config.bert_path, help="覆盖config.bert_path")
772
+
773
+ args = parser.parse_args()
774
+ sovits_path = args.sovits_path
775
+ gpt_path = args.gpt_path
776
+ device = args.device
777
+ port = args.port
778
+ host = args.bind_addr
779
+ cnhubert_base_path = args.hubert_path
780
+ bert_path = args.bert_path
781
+ default_cut_punc = args.cut_punc
782
+
783
+ # 应用参数配置
784
+ default_refer = DefaultRefer(args.default_refer_path, args.default_refer_text, args.default_refer_language)
785
+
786
+ # 模型路径检查
787
+ if sovits_path == "":
788
+ sovits_path = g_config.pretrained_sovits_path
789
+ logger.warn(f"未指定SoVITS模型路径, fallback后当前值: {sovits_path}")
790
+ if gpt_path == "":
791
+ gpt_path = g_config.pretrained_gpt_path
792
+ logger.warn(f"未指定GPT模型路径, fallback后当前值: {gpt_path}")
793
+
794
+ # 指定默认参考音频, 调用方 未提供/未给全 参考音频参数时使用
795
+ if default_refer.path == "" or default_refer.text == "" or default_refer.language == "":
796
+ default_refer.path, default_refer.text, default_refer.language = "", "", ""
797
+ logger.info("未指定默认参考音频")
798
+ else:
799
+ logger.info(f"默认参考音频路径: {default_refer.path}")
800
+ logger.info(f"默认参考音频文本: {default_refer.text}")
801
+ logger.info(f"默认参考音频语种: {default_refer.language}")
802
+
803
+ # 获取��精度
804
+ is_half = g_config.is_half
805
+ if args.full_precision:
806
+ is_half = False
807
+ if args.half_precision:
808
+ is_half = True
809
+ if args.full_precision and args.half_precision:
810
+ is_half = g_config.is_half # 炒饭fallback
811
+ logger.info(f"半精: {is_half}")
812
+
813
+ # 流式返回模式
814
+ if args.stream_mode.lower() in ["normal","n"]:
815
+ stream_mode = "normal"
816
+ logger.info("流式返回已开启")
817
+ else:
818
+ stream_mode = "close"
819
+
820
+ # 音频编码格式
821
+ if args.media_type.lower() in ["aac","ogg"]:
822
+ media_type = args.media_type.lower()
823
+ elif stream_mode == "close":
824
+ media_type = "wav"
825
+ else:
826
+ media_type = "ogg"
827
+ logger.info(f"编码格式: {media_type}")
828
+
829
+ # 音频数据类型
830
+ if args.sub_type.lower() == 'int32':
831
+ is_int32 = True
832
+ logger.info(f"数据类型: int32")
833
+ else:
834
+ is_int32 = False
835
+ logger.info(f"数据类型: int16")
836
+
837
+ # 初始化模型
838
+ cnhubert.cnhubert_base_path = cnhubert_base_path
839
+ tokenizer = AutoTokenizer.from_pretrained(bert_path)
840
+ bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
841
+ ssl_model = cnhubert.get_model()
842
+ if is_half:
843
+ bert_model = bert_model.half().to(device)
844
+ ssl_model = ssl_model.half().to(device)
845
+ else:
846
+ bert_model = bert_model.to(device)
847
+ ssl_model = ssl_model.to(device)
848
+ change_gpt_sovits_weights(gpt_path = gpt_path, sovits_path = sovits_path)
849
+
850
+
851
+
852
+ # --------------------------------
853
+ # 接口部分
854
+ # --------------------------------
855
+ app = FastAPI()
856
+
857
+ @app.post("/set_model")
858
+ async def set_model(request: Request):
859
+ json_post_raw = await request.json()
860
+ return change_gpt_sovits_weights(
861
+ gpt_path = json_post_raw.get("gpt_model_path"),
862
+ sovits_path = json_post_raw.get("sovits_model_path")
863
+ )
864
+
865
+
866
+ @app.get("/set_model")
867
+ async def set_model(
868
+ gpt_model_path: str = None,
869
+ sovits_model_path: str = None,
870
+ ):
871
+ return change_gpt_sovits_weights(gpt_path = gpt_model_path, sovits_path = sovits_model_path)
872
+
873
+
874
+ @app.post("/control")
875
+ async def control(request: Request):
876
+ json_post_raw = await request.json()
877
+ return handle_control(json_post_raw.get("command"))
878
+
879
+
880
+ @app.get("/control")
881
+ async def control(command: str = None):
882
+ return handle_control(command)
883
+
884
+
885
+ @app.post("/change_refer")
886
+ async def change_refer(request: Request):
887
+ json_post_raw = await request.json()
888
+ return handle_change(
889
+ json_post_raw.get("refer_wav_path"),
890
+ json_post_raw.get("prompt_text"),
891
+ json_post_raw.get("prompt_language")
892
+ )
893
+
894
+
895
+ @app.get("/change_refer")
896
+ async def change_refer(
897
+ refer_wav_path: str = None,
898
+ prompt_text: str = None,
899
+ prompt_language: str = None
900
+ ):
901
+ return handle_change(refer_wav_path, prompt_text, prompt_language)
902
+
903
+
904
+ @app.post("/")
905
+ async def tts_endpoint(request: Request):
906
+ json_post_raw = await request.json()
907
+ return handle(
908
+ json_post_raw.get("refer_wav_path"),
909
+ json_post_raw.get("prompt_text"),
910
+ json_post_raw.get("prompt_language"),
911
+ json_post_raw.get("text"),
912
+ json_post_raw.get("text_language"),
913
+ json_post_raw.get("cut_punc"),
914
+ json_post_raw.get("top_k", 15),
915
+ json_post_raw.get("top_p", 1.0),
916
+ json_post_raw.get("temperature", 1.0),
917
+ json_post_raw.get("speed", 1.0),
918
+ json_post_raw.get("inp_refs", [])
919
+ )
920
+
921
+
922
+ @app.get("/")
923
+ async def tts_endpoint(
924
+ refer_wav_path: str = None,
925
+ prompt_text: str = None,
926
+ prompt_language: str = None,
927
+ text: str = None,
928
+ text_language: str = None,
929
+ cut_punc: str = None,
930
+ top_k: int = 15,
931
+ top_p: float = 1.0,
932
+ temperature: float = 1.0,
933
+ speed: float = 1.0,
934
+ inp_refs: list = Query(default=[])
935
+ ):
936
+ return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc, top_k, top_p, temperature, speed, inp_refs)
937
+
938
+
939
+ if __name__ == "__main__":
940
+ uvicorn.run(app, host=host, port=port, workers=1)
colab_webui.ipynb ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "include_colab_link": true
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "accelerator": "GPU"
14
+ },
15
+ "cells": [
16
+ {
17
+ "cell_type": "markdown",
18
+ "metadata": {
19
+ "id": "view-in-github",
20
+ "colab_type": "text"
21
+ },
22
+ "source": [
23
+ "<a href=\"https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "source": [
29
+ "环境配置 environment"
30
+ ],
31
+ "metadata": {
32
+ "id": "_o6a8GS2lWQM"
33
+ }
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "metadata": {
38
+ "id": "e9b7iFV3dm1f"
39
+ },
40
+ "source": [
41
+ "!pip install -q condacolab\n",
42
+ "# Setting up condacolab and installing packages\n",
43
+ "import condacolab\n",
44
+ "condacolab.install_from_url(\"https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-2-Linux-x86_64.sh\")\n",
45
+ "%cd -q /content\n",
46
+ "!git clone https://github.com/RVC-Boss/GPT-SoVITS\n",
47
+ "!conda install -y -q -c pytorch -c nvidia cudatoolkit\n",
48
+ "%cd -q /content/GPT-SoVITS\n",
49
+ "!conda install -y -q -c conda-forge gcc gxx ffmpeg cmake -c pytorch -c nvidia\n",
50
+ "!/usr/local/bin/pip install -r requirements.txt"
51
+ ],
52
+ "execution_count": null,
53
+ "outputs": []
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "source": [
58
+ "# @title Download pretrained models 下载预训练模型\n",
59
+ "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
60
+ "!mkdir -p /content/GPT-SoVITS/tools/damo_asr/models\n",
61
+ "!mkdir -p /content/GPT-SoVITS/tools/uvr5\n",
62
+ "%cd /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
63
+ "!git clone https://huggingface.co/lj1995/GPT-SoVITS\n",
64
+ "%cd /content/GPT-SoVITS/tools/damo_asr/models\n",
65
+ "!git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git\n",
66
+ "!git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git\n",
67
+ "!git clone https://www.modelscope.cn/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git\n",
68
+ "# @title UVR5 pretrains 安装uvr5模型\n",
69
+ "%cd /content/GPT-SoVITS/tools/uvr5\n",
70
+ "%rm -r uvr5_weights\n",
71
+ "!git clone https://huggingface.co/Delik/uvr5_weights\n",
72
+ "!git config core.sparseCheckout true\n",
73
+ "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
74
+ ],
75
+ "metadata": {
76
+ "id": "0NgxXg5sjv7z"
77
+ },
78
+ "execution_count": null,
79
+ "outputs": []
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "source": [
84
+ "# @title launch WebUI 启动WebUI\n",
85
+ "!/usr/local/bin/pip install ipykernel\n",
86
+ "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
87
+ "%cd /content/GPT-SoVITS/\n",
88
+ "!/usr/local/bin/python webui.py"
89
+ ],
90
+ "metadata": {
91
+ "id": "4oRGUzkrk8C7"
92
+ },
93
+ "execution_count": null,
94
+ "outputs": []
95
+ }
96
+ ]
97
+ }
go-webui.bat ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ runtime\python.exe webui.py zh_CN
2
+ pause
go-webui.ps1 ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ $ErrorActionPreference = "SilentlyContinue"
2
+ chcp 65001
3
+ & "$PSScriptRoot\runtime\python.exe" "$PSScriptRoot\webui.py zh_CN"
4
+ pause
install.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ conda install -c conda-forge gcc
3
+ conda install -c conda-forge gxx
4
+ conda install ffmpeg cmake
5
+ conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 pytorch-cuda=11.8 -c pytorch -c nvidia
6
+ pip install -r requirements.txt
pretrained_models/chinese-hubert-base/config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.1,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "HubertModel"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "classifier_proj_size": 256,
10
+ "conv_bias": false,
11
+ "conv_dim": [
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512
19
+ ],
20
+ "conv_kernel": [
21
+ 10,
22
+ 3,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 2,
27
+ 2
28
+ ],
29
+ "conv_stride": [
30
+ 5,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2
37
+ ],
38
+ "ctc_loss_reduction": "sum",
39
+ "ctc_zero_infinity": false,
40
+ "do_stable_layer_norm": false,
41
+ "eos_token_id": 2,
42
+ "feat_extract_activation": "gelu",
43
+ "feat_extract_norm": "group",
44
+ "feat_proj_dropout": 0.0,
45
+ "feat_proj_layer_norm": true,
46
+ "final_dropout": 0.1,
47
+ "hidden_act": "gelu",
48
+ "hidden_dropout": 0.1,
49
+ "hidden_size": 768,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 3072,
52
+ "layer_norm_eps": 1e-05,
53
+ "layerdrop": 0.1,
54
+ "mask_feature_length": 10,
55
+ "mask_feature_min_masks": 0,
56
+ "mask_feature_prob": 0.0,
57
+ "mask_time_length": 10,
58
+ "mask_time_min_masks": 2,
59
+ "mask_time_prob": 0.05,
60
+ "model_type": "hubert",
61
+ "num_attention_heads": 12,
62
+ "num_conv_pos_embedding_groups": 16,
63
+ "num_conv_pos_embeddings": 128,
64
+ "num_feat_extract_layers": 7,
65
+ "num_hidden_layers": 12,
66
+ "pad_token_id": 0,
67
+ "torch_dtype": "float32",
68
+ "transformers_version": "4.20.0.dev0",
69
+ "use_weighted_layer_sum": false,
70
+ "vocab_size": 32
71
+ }
pretrained_models/chinese-hubert-base/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
requirements.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.23.4
2
+ scipy
3
+ tensorboard
4
+ librosa==0.9.2
5
+ numba==0.56.4
6
+ pytorch-lightning
7
+ gradio>=4.0,<=4.24.0
8
+ ffmpeg-python
9
+ onnxruntime; sys_platform == 'darwin'
10
+ onnxruntime-gpu; sys_platform != 'darwin'
11
+ tqdm
12
+ funasr==1.0.27
13
+ cn2an
14
+ pypinyin
15
+ pyopenjtalk>=0.3.4
16
+ g2p_en
17
+ torchaudio
18
+ modelscope==1.10.0
19
+ sentencepiece
20
+ transformers
21
+ chardet
22
+ PyYAML
23
+ psutil
24
+ jieba_fast
25
+ jieba
26
+ LangSegment>=0.2.0
27
+ Faster_Whisper
28
+ wordsegment
29
+ rotary_embedding_torch
30
+ pyjyutping
31
+ g2pk2
32
+ ko_pron
33
+ opencc; sys_platform != 'linux'
34
+ opencc==1.1.1; sys_platform == 'linux'
35
+ python_mecab_ko; sys_platform != 'win32'
36
+ fastapi<0.112.2
wandb/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-04T06:07:02.658364831Z","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2024-12-04T06:07:02.658507656Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug-core.log"}
3
+ {"time":"2024-12-04T06:07:02.774333381Z","level":"INFO","msg":"created new stream","id":"yfryieml"}
4
+ {"time":"2024-12-04T06:07:02.774640768Z","level":"INFO","msg":"stream: started","id":"yfryieml"}
5
+ {"time":"2024-12-04T06:07:02.7747967Z","level":"INFO","msg":"writer: Do: started","stream_id":"yfryieml"}
6
+ {"time":"2024-12-04T06:07:02.774821631Z","level":"INFO","msg":"handler: started","stream_id":"yfryieml"}
7
+ {"time":"2024-12-04T06:07:02.774802368Z","level":"INFO","msg":"sender: started","stream_id":"yfryieml"}
8
+ {"time":"2024-12-04T06:07:03.049190221Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-04T07:59:52.403867882Z","level":"INFO","msg":"stream: closing","id":"yfryieml"}
10
+ {"time":"2024-12-04T07:59:52.404822987Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2024-12-04T07:59:52.415019985Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2024-12-04T07:59:53.952775387Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
+ {"time":"2024-12-04T07:59:54.233678016Z","level":"INFO","msg":"handler: closed","stream_id":"yfryieml"}
14
+ {"time":"2024-12-04T07:59:54.233767202Z","level":"INFO","msg":"writer: Close: closed","stream_id":"yfryieml"}
15
+ {"time":"2024-12-04T07:59:54.233797446Z","level":"INFO","msg":"sender: closed","stream_id":"yfryieml"}
16
+ {"time":"2024-12-04T07:59:54.235023872Z","level":"INFO","msg":"stream: closed","id":"yfryieml"}
wandb/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-04 06:07:02,640 INFO MainThread:47802 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2024-12-04 06:07:02,640 INFO MainThread:47802 [wandb_setup.py:_flush():79] Configure stats pid to 47802
3
+ 2024-12-04 06:07:02,641 INFO MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-04 06:07:02,641 INFO MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
5
+ 2024-12-04 06:07:02,642 INFO MainThread:47802 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2024-12-04 06:07:02,642 INFO MainThread:47802 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
7
+ 2024-12-04 06:07:02,643 INFO MainThread:47802 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
8
+ 2024-12-04 06:07:02,643 INFO MainThread:47802 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2024-12-04 06:07:02,644 INFO MainThread:47802 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug.log
10
+ 2024-12-04 06:07:02,645 INFO MainThread:47802 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241204_060702-yfryieml/logs/debug-internal.log
11
+ 2024-12-04 06:07:02,645 INFO MainThread:47802 [wandb_init.py:init():619] calling init triggers
12
+ 2024-12-04 06:07:02,646 INFO MainThread:47802 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
14
+ 2024-12-04 06:07:02,646 INFO MainThread:47802 [wandb_init.py:init():669] starting backend
15
+ 2024-12-04 06:07:02,646 INFO MainThread:47802 [wandb_init.py:init():673] sending inform_init request
16
+ 2024-12-04 06:07:02,654 INFO MainThread:47802 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-12-04 06:07:02,655 INFO MainThread:47802 [wandb_init.py:init():686] backend started and connected
18
+ 2024-12-04 06:07:02,671 INFO MainThread:47802 [wandb_init.py:init():781] updated telemetry
19
+ 2024-12-04 06:07:02,711 INFO MainThread:47802 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2024-12-04 06:07:03,035 INFO MainThread:47802 [wandb_init.py:init():867] starting run threads in backend
21
+ 2024-12-04 06:07:03,310 INFO MainThread:47802 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2024-12-04 06:07:03,310 INFO MainThread:47802 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2024-12-04 06:07:03,311 INFO MainThread:47802 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2024-12-04 06:07:03,311 INFO MainThread:47802 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2024-12-04 06:07:03,315 INFO MainThread:47802 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2024-12-04 06:07:05,437 INFO MainThread:47802 [wandb_watch.py:_watch():71] Watching
27
+ 2024-12-04 06:07:15,972 INFO MainThread:47802 [wandb_run.py:_config_callback():1387] config_cb None None {'config': {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}, 'output_dir': 'logs/s1', 'is_train': True}
28
+ 2024-12-04 06:07:15,973 INFO MainThread:47802 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'optimizer': {'lr_init': 0.0001, 'lr': 0.0004, 'lr_end': 1e-05, 'warmup_steps': 4000, 'decay_steps': 50000}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
29
+ 2024-12-04 07:59:52,403 WARNING MsgRouterThr:47802 [router.py:message_loop():75] message_loop has been closed
wandb/run-20241203_183434-y6gzynz8/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20241203_184628-qm0jlwqu/files/config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m: []
5
+ python_version: 3.10.12
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 5
10
+ - 9
11
+ - 11
12
+ - 49
13
+ - 53
14
+ - 55
15
+ - 103
16
+ - 105
17
+ "2":
18
+ - 1
19
+ - 5
20
+ - 9
21
+ - 11
22
+ - 49
23
+ - 53
24
+ - 55
25
+ - 103
26
+ - 105
27
+ "3":
28
+ - 1
29
+ - 13
30
+ - 16
31
+ - 23
32
+ - 55
33
+ "4": 3.10.12
34
+ "5": 0.18.7
35
+ "6": 4.46.3
36
+ "8":
37
+ - 5
38
+ "12": 0.18.7
39
+ "13": linux-x86_64
40
+ data:
41
+ value:
42
+ batch_size: 8
43
+ cleaned_text: true
44
+ filter_length: 2048
45
+ hop_length: 640
46
+ max_frames: 60
47
+ max_sec: 60
48
+ mel_channels: 128
49
+ mel_fmax: null
50
+ mel_fmin: 0
51
+ num_workers: 4
52
+ pad_val: 1024
53
+ training_files: data8
54
+ win_length: 2048
55
+ model:
56
+ value:
57
+ EOS: 2047
58
+ attention_dropout: 0.1
59
+ attn_pdrop: 0.1
60
+ block_size: 1000
61
+ dropout: 0.1
62
+ embd_pdrop: 0.1
63
+ embedding_dim: 768
64
+ ffn_hidden: 3072
65
+ head: 12
66
+ hidden_dim: 768
67
+ hidden_dropout: 0.1
68
+ max_mel_positions: 8000
69
+ max_text_positions: 2048
70
+ n_embd: 768
71
+ n_layer: 12
72
+ num_layers: 6
73
+ pad_val: 1024
74
+ phoneme_vocab_size: 2048
75
+ postnet_dim: 384
76
+ postnet_layers: 3
77
+ prenet_dim: 384
78
+ prenet_layers: 3
79
+ resid_pdrop: 0.1
80
+ semantic_dim: 1024
81
+ vocab_size: 2048
82
+ output_dir:
83
+ value: logs/s1
84
+ train:
85
+ value:
86
+ batch_size: 8
87
+ epochs: 15
88
+ exp_name: gpt_training
89
+ half_weights_save_dir: weights/s1
90
+ if_save_every_weights: true
91
+ if_save_latest: true
92
+ precision: 32
93
+ save_every_n_epoch: 5
94
+ seed: 1234
95
+ wandb:
96
+ entity: null
97
+ log_interval: 100
98
+ name: stage1_training
99
+ project: gpt-sovits-hindi
100
+ train_phoneme_path:
101
+ value: data8/phoneme.txt
102
+ train_semantic_path:
103
+ value: data8/semantic.tsv
wandb/run-20241203_184628-qm0jlwqu/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20241203_184628-qm0jlwqu/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":4}}
wandb/run-20241203_185057-b425fq2v/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "startedAt": "2024-12-03T18:50:57.819397Z",
5
+ "args": [
6
+ "-c",
7
+ "configs/s1.yaml"
8
+ ],
9
+ "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
10
+ "codePath": "GPT_SoVITS/s1_train.py",
11
+ "git": {
12
+ "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
13
+ "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
14
+ },
15
+ "email": "[email protected]",
16
+ "root": "/workspace/GPT-SoVITS",
17
+ "host": "7a6bba088bf1",
18
+ "username": "root",
19
+ "executable": "/usr/bin/python",
20
+ "codePathLocal": "GPT_SoVITS/s1_train.py",
21
+ "cpu_count": 48,
22
+ "cpu_count_logical": 96,
23
+ "gpu": "NVIDIA A40",
24
+ "gpu_count": 1,
25
+ "disk": {
26
+ "/": {
27
+ "total": "42949672960",
28
+ "used": "3612397568"
29
+ }
30
+ },
31
+ "memory": {
32
+ "total": "540662632448"
33
+ },
34
+ "cpu": {
35
+ "count": 48,
36
+ "countLogical": 96
37
+ },
38
+ "gpu_nvidia": [
39
+ {
40
+ "name": "NVIDIA A40",
41
+ "memoryTotal": "48305799168",
42
+ "cudaCores": 10752,
43
+ "architecture": "Ampere"
44
+ }
45
+ ],
46
+ "cudaVersion": "12.7"
47
+ }
wandb/run-20241203_185057-b425fq2v/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:50:57.298838127Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp3grczmmy/port-20897.txt","pid":20897,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-03T18:50:57.298925737Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-03T18:50:57.300062798Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":20897}
4
+ {"time":"2024-12-03T18:50:57.300116485Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36211,"Zone":""}}
5
+ {"time":"2024-12-03T18:50:57.480327477Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51888"}
6
+ {"time":"2024-12-03T18:50:57.822370995Z","level":"INFO","msg":"handleInformInit: received","streamId":"b425fq2v","id":"127.0.0.1:51888"}
7
+ {"time":"2024-12-03T18:50:57.948271348Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"b425fq2v","id":"127.0.0.1:51888"}
8
+ {"time":"2024-12-03T18:51:01.009900112Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51888"}
9
+ {"time":"2024-12-03T18:51:01.010177746Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:51888"}
10
+ {"time":"2024-12-03T18:51:01.010280082Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2024-12-03T18:51:01.010428582Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:51888"}
12
+ {"time":"2024-12-03T18:51:01.720218142Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51888"}
13
+ {"time":"2024-12-03T18:51:01.720272183Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51888"}
14
+ {"time":"2024-12-03T18:51:01.720296805Z","level":"INFO","msg":"server is closed"}
wandb/run-20241203_185057-b425fq2v/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-03 18:50:57,786 INFO MainThread:20897 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2024-12-03 18:50:57,787 INFO MainThread:20897 [wandb_setup.py:_flush():79] Configure stats pid to 20897
3
+ 2024-12-03 18:50:57,787 INFO MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-03 18:50:57,788 INFO MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
5
+ 2024-12-03 18:50:57,788 INFO MainThread:20897 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2024-12-03 18:50:57,788 INFO MainThread:20897 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
7
+ 2024-12-03 18:50:57,789 INFO MainThread:20897 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
8
+ 2024-12-03 18:50:57,789 INFO MainThread:20897 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2024-12-03 18:50:57,789 INFO MainThread:20897 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241203_185057-b425fq2v/logs/debug.log
10
+ 2024-12-03 18:50:57,790 INFO MainThread:20897 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241203_185057-b425fq2v/logs/debug-internal.log
11
+ 2024-12-03 18:50:57,791 INFO MainThread:20897 [wandb_init.py:init():619] calling init triggers
12
+ 2024-12-03 18:50:57,791 INFO MainThread:20897 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
14
+ 2024-12-03 18:50:57,791 INFO MainThread:20897 [wandb_init.py:init():669] starting backend
15
+ 2024-12-03 18:50:57,792 INFO MainThread:20897 [wandb_init.py:init():673] sending inform_init request
16
+ 2024-12-03 18:50:57,815 INFO MainThread:20897 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-12-03 18:50:57,817 INFO MainThread:20897 [wandb_init.py:init():686] backend started and connected
18
+ 2024-12-03 18:50:57,852 INFO MainThread:20897 [wandb_init.py:init():781] updated telemetry
19
+ 2024-12-03 18:50:57,966 INFO MainThread:20897 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2024-12-03 18:50:58,251 INFO MainThread:20897 [wandb_init.py:init():867] starting run threads in backend
21
+ 2024-12-03 18:50:58,566 INFO MainThread:20897 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2024-12-03 18:50:58,566 INFO MainThread:20897 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2024-12-03 18:50:58,566 INFO MainThread:20897 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2024-12-03 18:50:58,567 INFO MainThread:20897 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2024-12-03 18:50:58,569 INFO MainThread:20897 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2024-12-03 18:50:59,137 INFO MainThread:20897 [wandb_watch.py:_watch():71] Watching
27
+ 2024-12-03 18:51:01,010 WARNING MsgRouterThr:20897 [router.py:message_loop():75] message_loop has been closed
wandb/run-20241203_185057-b425fq2v/run-b425fq2v.wandb ADDED
Binary file (16.1 kB). View file
 
wandb/run-20241203_185203-najxbup6/files/output.log ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GPU available: True (cuda), used: True
2
+ TPU available: False, using: 0 TPU cores
3
+ HPU available: False, using: 0 HPUs
4
+ Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
5
+ ----------------------------------------------------------------------------------------------------
6
+ distributed_backend=nccl
7
+ All distributed processes registered. Starting with 1 processes
8
+ ----------------------------------------------------------------------------------------------------
9
+
10
+ /usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
11
+ semantic_data_len: 726
12
+ phoneme_data_len: 727
13
+ data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
14
+ 0 data8/wavs/1.wav साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
15
+ 1 data8/wavs/2.wav कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
16
+ 2 data8/wavs/3.wav हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
17
+ 3 data8/wavs/4.wav नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
18
+ 4 data8/wavs/5.wav में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
19
+ .. ... ...
20
+ 721 data8/wavs/722.wav होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
21
+ 722 data8/wavs/723.wav की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
22
+ 723 data8/wavs/724.wav उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
23
+ 724 data8/wavs/725.wav है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
24
+ 725 data8/wavs/726.wav हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
25
+
26
+ [726 rows x 2 columns]
27
+ Traceback (most recent call last):
28
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
29
+ main(args)
30
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
31
+ trainer.fit(model, data_module)
32
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
33
+ call._call_and_handle_interrupt(
34
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
35
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
36
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
37
+ return function(*args, **kwargs)
38
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
39
+ self._run(model, ckpt_path=ckpt_path)
40
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 943, in _run
41
+ call._call_setup_hook(self) # allow user to set up LightningModule in accelerator environment
42
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 102, in _call_setup_hook
43
+ _call_lightning_datamodule_hook(trainer, "setup", stage=fn)
44
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 189, in _call_lightning_datamodule_hook
45
+ return fn(*args, **kwargs)
46
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/data_module.py", line 30, in setup
47
+ self._train_dataset = Text2SemanticDataset(
48
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 117, in __init__
49
+ self.init_batch()
50
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in init_batch
51
+ semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids] # Convert to character codes if needed
52
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in <listcomp>
53
+ semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids] # Convert to character codes if needed
54
+ TypeError: ord() expected a character, but string of length 2 found
55
+ Traceback (most recent call last):
56
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
57
+ main(args)
58
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
59
+ trainer.fit(model, data_module)
60
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
61
+ call._call_and_handle_interrupt(
62
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
63
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
64
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
65
+ return function(*args, **kwargs)
66
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
67
+ self._run(model, ckpt_path=ckpt_path)
68
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 943, in _run
69
+ call._call_setup_hook(self) # allow user to set up LightningModule in accelerator environment
70
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 102, in _call_setup_hook
71
+ _call_lightning_datamodule_hook(trainer, "setup", stage=fn)
72
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 189, in _call_lightning_datamodule_hook
73
+ return fn(*args, **kwargs)
74
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/data_module.py", line 30, in setup
75
+ self._train_dataset = Text2SemanticDataset(
76
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 117, in __init__
77
+ self.init_batch()
78
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in init_batch
79
+ semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids] # Convert to character codes if needed
80
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 147, in <listcomp>
81
+ semantic_ids = [ord(c) if isinstance(c, str) else c for c in semantic_ids] # Convert to character codes if needed
82
+ TypeError: ord() expected a character, but string of length 2 found
wandb/run-20241203_185203-najxbup6/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "startedAt": "2024-12-03T18:52:04.054803Z",
5
+ "args": [
6
+ "-c",
7
+ "configs/s1.yaml"
8
+ ],
9
+ "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
10
+ "codePath": "GPT_SoVITS/s1_train.py",
11
+ "git": {
12
+ "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
13
+ "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
14
+ },
15
+ "email": "[email protected]",
16
+ "root": "/workspace/GPT-SoVITS",
17
+ "host": "7a6bba088bf1",
18
+ "username": "root",
19
+ "executable": "/usr/bin/python",
20
+ "codePathLocal": "GPT_SoVITS/s1_train.py",
21
+ "cpu_count": 48,
22
+ "cpu_count_logical": 96,
23
+ "gpu": "NVIDIA A40",
24
+ "gpu_count": 1,
25
+ "disk": {
26
+ "/": {
27
+ "total": "42949672960",
28
+ "used": "3612413952"
29
+ }
30
+ },
31
+ "memory": {
32
+ "total": "540662632448"
33
+ },
34
+ "cpu": {
35
+ "count": 48,
36
+ "countLogical": 96
37
+ },
38
+ "gpu_nvidia": [
39
+ {
40
+ "name": "NVIDIA A40",
41
+ "memoryTotal": "48305799168",
42
+ "cudaCores": 10752,
43
+ "architecture": "Ampere"
44
+ }
45
+ ],
46
+ "cudaVersion": "12.7"
47
+ }
wandb/run-20241203_185203-najxbup6/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:52:03.57165883Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6yh_tti_/port-21367.txt","pid":21367,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-03T18:52:03.571726063Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-03T18:52:03.572934846Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":21367}
4
+ {"time":"2024-12-03T18:52:03.57285693Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46379,"Zone":""}}
5
+ {"time":"2024-12-03T18:52:03.737251507Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:36504"}
6
+ {"time":"2024-12-03T18:52:04.058031183Z","level":"INFO","msg":"handleInformInit: received","streamId":"najxbup6","id":"127.0.0.1:36504"}
7
+ {"time":"2024-12-03T18:52:04.183470455Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"najxbup6","id":"127.0.0.1:36504"}
8
+ {"time":"2024-12-03T18:52:08.130482168Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:36504"}
9
+ {"time":"2024-12-03T18:52:08.130659224Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:36504"}
10
+ {"time":"2024-12-03T18:52:08.130755688Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2024-12-03T18:52:08.130860268Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:36504"}
12
+ {"time":"2024-12-03T18:52:08.858356392Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:36504"}
13
+ {"time":"2024-12-03T18:52:08.858424292Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:36504"}
14
+ {"time":"2024-12-03T18:52:08.85844791Z","level":"INFO","msg":"server is closed"}
wandb/run-20241203_185203-najxbup6/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:52:04.061594638Z","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2024-12-03T18:52:04.062683181Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185203-najxbup6/logs/debug-core.log"}
3
+ {"time":"2024-12-03T18:52:04.181494866Z","level":"INFO","msg":"created new stream","id":"najxbup6"}
4
+ {"time":"2024-12-03T18:52:04.182422476Z","level":"INFO","msg":"stream: started","id":"najxbup6"}
5
+ {"time":"2024-12-03T18:52:04.182576293Z","level":"INFO","msg":"writer: Do: started","stream_id":"najxbup6"}
6
+ {"time":"2024-12-03T18:52:04.182633903Z","level":"INFO","msg":"handler: started","stream_id":"najxbup6"}
7
+ {"time":"2024-12-03T18:52:04.182665857Z","level":"INFO","msg":"sender: started","stream_id":"najxbup6"}
8
+ {"time":"2024-12-03T18:52:04.568505243Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-03T18:52:08.130657144Z","level":"INFO","msg":"stream: closing","id":"najxbup6"}
10
+ {"time":"2024-12-03T18:52:08.130915038Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2024-12-03T18:52:08.13386141Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2024-12-03T18:52:08.667615718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
+ {"time":"2024-12-03T18:52:08.85195332Z","level":"INFO","msg":"handler: closed","stream_id":"najxbup6"}
14
+ {"time":"2024-12-03T18:52:08.852082887Z","level":"INFO","msg":"sender: closed","stream_id":"najxbup6"}
15
+ {"time":"2024-12-03T18:52:08.852047068Z","level":"INFO","msg":"writer: Close: closed","stream_id":"najxbup6"}
16
+ {"time":"2024-12-03T18:52:08.856221306Z","level":"INFO","msg":"stream: closed","id":"najxbup6"}
wandb/run-20241203_185203-najxbup6/run-najxbup6.wandb ADDED
Binary file (16.3 kB). View file
 
wandb/run-20241203_185257-7zp6kxhx/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20241203_185257-7zp6kxhx/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "startedAt": "2024-12-03T18:52:57.631714Z",
5
+ "args": [
6
+ "-c",
7
+ "configs/s1.yaml"
8
+ ],
9
+ "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
10
+ "codePath": "GPT_SoVITS/s1_train.py",
11
+ "git": {
12
+ "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
13
+ "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
14
+ },
15
+ "email": "[email protected]",
16
+ "root": "/workspace/GPT-SoVITS",
17
+ "host": "7a6bba088bf1",
18
+ "username": "root",
19
+ "executable": "/usr/bin/python",
20
+ "codePathLocal": "GPT_SoVITS/s1_train.py",
21
+ "cpu_count": 48,
22
+ "cpu_count_logical": 96,
23
+ "gpu": "NVIDIA A40",
24
+ "gpu_count": 1,
25
+ "disk": {
26
+ "/": {
27
+ "total": "42949672960",
28
+ "used": "3612430336"
29
+ }
30
+ },
31
+ "memory": {
32
+ "total": "540662632448"
33
+ },
34
+ "cpu": {
35
+ "count": 48,
36
+ "countLogical": 96
37
+ },
38
+ "gpu_nvidia": [
39
+ {
40
+ "name": "NVIDIA A40",
41
+ "memoryTotal": "48305799168",
42
+ "cudaCores": 10752,
43
+ "architecture": "Ampere"
44
+ }
45
+ ],
46
+ "cudaVersion": "12.7"
47
+ }
wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:52:57.125257617Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpg94bhy5s/port-21847.txt","pid":21847,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-03T18:52:57.125312331Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-03T18:52:57.126540671Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":21847}
4
+ {"time":"2024-12-03T18:52:57.126474677Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44225,"Zone":""}}
5
+ {"time":"2024-12-03T18:52:57.297685759Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:35782"}
6
+ {"time":"2024-12-03T18:52:57.634807084Z","level":"INFO","msg":"handleInformInit: received","streamId":"7zp6kxhx","id":"127.0.0.1:35782"}
7
+ {"time":"2024-12-03T18:52:57.766148785Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7zp6kxhx","id":"127.0.0.1:35782"}
8
+ {"time":"2024-12-03T18:53:07.832970963Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:35782"}
9
+ {"time":"2024-12-03T18:53:07.833273182Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2024-12-03T18:53:07.833246872Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:35782"}
11
+ {"time":"2024-12-03T18:53:07.833479032Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:35782"}
12
+ {"time":"2024-12-03T18:53:08.261131593Z","level":"INFO","msg":"Parent process exited, terminating service process."}
wandb/run-20241203_185257-7zp6kxhx/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:52:57.637678482Z","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2024-12-03T18:52:57.638824343Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185257-7zp6kxhx/logs/debug-core.log"}
3
+ {"time":"2024-12-03T18:52:57.7637763Z","level":"INFO","msg":"created new stream","id":"7zp6kxhx"}
4
+ {"time":"2024-12-03T18:52:57.764894735Z","level":"INFO","msg":"stream: started","id":"7zp6kxhx"}
5
+ {"time":"2024-12-03T18:52:57.765059685Z","level":"INFO","msg":"writer: Do: started","stream_id":"7zp6kxhx"}
6
+ {"time":"2024-12-03T18:52:57.76510391Z","level":"INFO","msg":"handler: started","stream_id":"7zp6kxhx"}
7
+ {"time":"2024-12-03T18:52:57.765125664Z","level":"INFO","msg":"sender: started","stream_id":"7zp6kxhx"}
8
+ {"time":"2024-12-03T18:52:58.098522624Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-03T18:53:07.833139123Z","level":"INFO","msg":"stream: closing","id":"7zp6kxhx"}
10
+ {"time":"2024-12-03T18:53:07.833395573Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2024-12-03T18:53:07.835643464Z","level":"INFO","msg":"Stopped system monitor"}
wandb/run-20241203_185453-2eog1nt2/files/config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m: []
5
+ python_version: 3.10.12
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 5
10
+ - 9
11
+ - 11
12
+ - 49
13
+ - 53
14
+ - 55
15
+ - 103
16
+ - 105
17
+ "2":
18
+ - 1
19
+ - 5
20
+ - 9
21
+ - 11
22
+ - 49
23
+ - 53
24
+ - 55
25
+ - 103
26
+ - 105
27
+ "3":
28
+ - 1
29
+ - 13
30
+ - 16
31
+ - 23
32
+ - 55
33
+ "4": 3.10.12
34
+ "5": 0.18.7
35
+ "6": 4.46.3
36
+ "8":
37
+ - 5
38
+ "12": 0.18.7
39
+ "13": linux-x86_64
40
+ data:
41
+ value:
42
+ batch_size: 8
43
+ cleaned_text: true
44
+ filter_length: 2048
45
+ hop_length: 640
46
+ max_frames: 60
47
+ max_sec: 60
48
+ mel_channels: 128
49
+ mel_fmax: null
50
+ mel_fmin: 0
51
+ num_workers: 4
52
+ pad_val: 1024
53
+ training_files: data8
54
+ win_length: 2048
55
+ model:
56
+ value:
57
+ EOS: 2047
58
+ attention_dropout: 0.1
59
+ attn_pdrop: 0.1
60
+ block_size: 1000
61
+ dropout: 0.1
62
+ embd_pdrop: 0.1
63
+ embedding_dim: 768
64
+ ffn_hidden: 3072
65
+ head: 12
66
+ hidden_dim: 768
67
+ hidden_dropout: 0.1
68
+ max_mel_positions: 8000
69
+ max_text_positions: 2048
70
+ n_embd: 768
71
+ n_layer: 12
72
+ num_layers: 6
73
+ pad_val: 1024
74
+ phoneme_vocab_size: 2048
75
+ postnet_dim: 384
76
+ postnet_layers: 3
77
+ prenet_dim: 384
78
+ prenet_layers: 3
79
+ resid_pdrop: 0.1
80
+ semantic_dim: 1024
81
+ vocab_size: 2048
82
+ output_dir:
83
+ value: logs/s1
84
+ train:
85
+ value:
86
+ batch_size: 8
87
+ epochs: 15
88
+ exp_name: gpt_training
89
+ half_weights_save_dir: weights/s1
90
+ if_save_every_weights: true
91
+ if_save_latest: true
92
+ precision: 32
93
+ save_every_n_epoch: 5
94
+ seed: 1234
95
+ wandb:
96
+ entity: null
97
+ log_interval: 100
98
+ name: stage1_training
99
+ project: gpt-sovits-hindi
100
+ train_phoneme_path:
101
+ value: data8/phoneme.txt
102
+ train_semantic_path:
103
+ value: data8/semantic.tsv
wandb/run-20241203_185453-2eog1nt2/files/output.log ADDED
@@ -0,0 +1,1140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GPU available: True (cuda), used: True
2
+ TPU available: False, using: 0 TPU cores
3
+ HPU available: False, using: 0 HPUs
4
+ Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
5
+ ----------------------------------------------------------------------------------------------------
6
+ distributed_backend=nccl
7
+ All distributed processes registered. Starting with 1 processes
8
+ ----------------------------------------------------------------------------------------------------
9
+
10
+ /usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
11
+ semantic_data_len: 726
12
+ phoneme_data_len: 727
13
+ data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
14
+ 0 data8/wavs/1.wav साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
15
+ 1 data8/wavs/2.wav कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
16
+ 2 data8/wavs/3.wav हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
17
+ 3 data8/wavs/4.wav नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
18
+ 4 data8/wavs/5.wav में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
19
+ .. ... ...
20
+ 721 data8/wavs/722.wav होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
21
+ 722 data8/wavs/723.wav की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
22
+ 723 data8/wavs/724.wav उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
23
+ 724 data8/wavs/725.wav है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
24
+ 725 data8/wavs/726.wav हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
25
+
26
+ [726 rows x 2 columns]
27
+ Traceback (most recent call last):
28
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
29
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
30
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
31
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
32
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
33
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
34
+ KeyError: 'ऑ'
35
+ Traceback (most recent call last):
36
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
37
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
38
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
39
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
40
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
41
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
42
+ KeyError: 'ॉ'
43
+ Traceback (most recent call last):
44
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
45
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
46
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
47
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
48
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
49
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
50
+ KeyError: 'ॉ'
51
+ Traceback (most recent call last):
52
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
53
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
54
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
55
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
56
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
57
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
58
+ KeyError: 'ॉ'
59
+ Traceback (most recent call last):
60
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
61
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
62
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
63
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
64
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
65
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
66
+ KeyError: 'ॉ'
67
+ Traceback (most recent call last):
68
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
69
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
70
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
71
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
72
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
73
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
74
+ KeyError: 'ॉ'
75
+ Traceback (most recent call last):
76
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
77
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
78
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
79
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
80
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
81
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
82
+ KeyError: 'ॉ'
83
+ Traceback (most recent call last):
84
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
85
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
86
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
87
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
88
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
89
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
90
+ KeyError: 'ॉ'
91
+ Traceback (most recent call last):
92
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
93
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
94
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
95
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
96
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
97
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
98
+ KeyError: 'ॉ'
99
+ Traceback (most recent call last):
100
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
101
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
102
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
103
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
104
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
105
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
106
+ KeyError: 'ऑ'
107
+ Traceback (most recent call last):
108
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
109
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
110
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
111
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
112
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
113
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
114
+ KeyError: 'ॉ'
115
+ Traceback (most recent call last):
116
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
117
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
118
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
119
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
120
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
121
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
122
+ KeyError: 'ॉ'
123
+ Traceback (most recent call last):
124
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
125
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
126
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
127
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
128
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
129
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
130
+ KeyError: 'ॉ'
131
+ Traceback (most recent call last):
132
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
133
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
134
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
135
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
136
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
137
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
138
+ KeyError: 'ॉ'
139
+ Traceback (most recent call last):
140
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
141
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
142
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
143
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
144
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
145
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
146
+ KeyError: 'ॉ'
147
+ Traceback (most recent call last):
148
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
149
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
150
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
151
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
152
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
153
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
154
+ KeyError: 'ॉ'
155
+ Traceback (most recent call last):
156
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
157
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
158
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
159
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
160
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
161
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
162
+ KeyError: 'ॉ'
163
+ Traceback (most recent call last):
164
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
165
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
166
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
167
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
168
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
169
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
170
+ KeyError: 'ऑ'
171
+ Traceback (most recent call last):
172
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
173
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
174
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
175
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
176
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
177
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
178
+ KeyError: 'ॉ'
179
+ Traceback (most recent call last):
180
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
181
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
182
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
183
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
184
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
185
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
186
+ KeyError: 'ॉ'
187
+ Traceback (most recent call last):
188
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
189
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
190
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
191
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
192
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
193
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
194
+ KeyError: 'ऑ'
195
+ Traceback (most recent call last):
196
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
197
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
198
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
199
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
200
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
201
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
202
+ KeyError: 'ॉ'
203
+ Traceback (most recent call last):
204
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
205
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
206
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
207
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
208
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
209
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
210
+ KeyError: 'ॉ'
211
+ Traceback (most recent call last):
212
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
213
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
214
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
215
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
216
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
217
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
218
+ KeyError: 'ॉ'
219
+ Traceback (most recent call last):
220
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
221
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
222
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
223
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
224
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
225
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
226
+ KeyError: 'ॉ'
227
+ Traceback (most recent call last):
228
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
229
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
230
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
231
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
232
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
233
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
234
+ KeyError: 'ॉ'
235
+ Traceback (most recent call last):
236
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
237
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
238
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
239
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
240
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
241
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
242
+ KeyError: 'ॉ'
243
+ Traceback (most recent call last):
244
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
245
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
246
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
247
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
248
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
249
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
250
+ KeyError: 'ऑ'
251
+ Traceback (most recent call last):
252
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
253
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
254
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
255
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
256
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
257
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
258
+ KeyError: 'ॉ'
259
+ Traceback (most recent call last):
260
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
261
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
262
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
263
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
264
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
265
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
266
+ KeyError: 'ऑ'
267
+ Traceback (most recent call last):
268
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
269
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
270
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
271
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
272
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
273
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
274
+ KeyError: 'ॉ'
275
+ Traceback (most recent call last):
276
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
277
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
278
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
279
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
280
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
281
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
282
+ KeyError: 'ऑ'
283
+ Traceback (most recent call last):
284
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
285
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
286
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
287
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
288
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
289
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
290
+ KeyError: 'ऑ'
291
+ Traceback (most recent call last):
292
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
293
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
294
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
295
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
296
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
297
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
298
+ KeyError: 'ॉ'
299
+ Traceback (most recent call last):
300
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
301
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
302
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
303
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
304
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
305
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
306
+ KeyError: 'ॉ'
307
+ Traceback (most recent call last):
308
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
309
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
310
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
311
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
312
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
313
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
314
+ KeyError: 'ॉ'
315
+ Traceback (most recent call last):
316
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
317
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
318
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
319
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
320
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
321
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
322
+ KeyError: 'ऑ'
323
+ Traceback (most recent call last):
324
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
325
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
326
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
327
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
328
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
329
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
330
+ KeyError: 'ॉ'
331
+ Traceback (most recent call last):
332
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
333
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
334
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
335
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
336
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
337
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
338
+ KeyError: 'ॉ'
339
+ Traceback (most recent call last):
340
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
341
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
342
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
343
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
344
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
345
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
346
+ KeyError: 'ॉ'
347
+ Traceback (most recent call last):
348
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
349
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
350
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
351
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
352
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
353
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
354
+ KeyError: 'ॉ'
355
+ Traceback (most recent call last):
356
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
357
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
358
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
359
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
360
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
361
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
362
+ KeyError: 'ॉ'
363
+ Traceback (most recent call last):
364
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
365
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
366
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
367
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
368
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
369
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
370
+ KeyError: 'ॉ'
371
+ Traceback (most recent call last):
372
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
373
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
374
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
375
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
376
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
377
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
378
+ KeyError: 'ॉ'
379
+ Traceback (most recent call last):
380
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
381
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
382
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
383
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
384
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
385
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
386
+ KeyError: 'ॉ'
387
+ Traceback (most recent call last):
388
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
389
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
390
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
391
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
392
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
393
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
394
+ KeyError: 'ॉ'
395
+ Traceback (most recent call last):
396
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
397
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
398
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
399
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
400
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
401
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
402
+ KeyError: 'ॉ'
403
+ Traceback (most recent call last):
404
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
405
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
406
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
407
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
408
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
409
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
410
+ KeyError: 'ॉ'
411
+ Traceback (most recent call last):
412
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
413
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
414
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
415
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
416
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
417
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
418
+ KeyError: 'ॉ'
419
+ Traceback (most recent call last):
420
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
421
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
422
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
423
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
424
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
425
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
426
+ KeyError: 'ऑ'
427
+ Traceback (most recent call last):
428
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
429
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
430
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
431
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
432
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
433
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
434
+ KeyError: 'ऑ'
435
+ Traceback (most recent call last):
436
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
437
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
438
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
439
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
440
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
441
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
442
+ KeyError: 'ॉ'
443
+ Traceback (most recent call last):
444
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
445
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
446
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
447
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
448
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
449
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
450
+ KeyError: 'ॉ'
451
+ Traceback (most recent call last):
452
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
453
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
454
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
455
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
456
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
457
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
458
+ KeyError: 'ॉ'
459
+ Traceback (most recent call last):
460
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
461
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
462
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
463
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
464
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
465
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
466
+ KeyError: 'ॉ'
467
+ Traceback (most recent call last):
468
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
469
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
470
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
471
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
472
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
473
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
474
+ KeyError: 'ॉ'
475
+ Traceback (most recent call last):
476
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
477
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
478
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
479
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
480
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
481
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
482
+ KeyError: 'ॉ'
483
+ Traceback (most recent call last):
484
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
485
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
486
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
487
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
488
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
489
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
490
+ KeyError: 'ॉ'
491
+ Traceback (most recent call last):
492
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
493
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
494
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
495
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
496
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
497
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
498
+ KeyError: 'ॉ'
499
+ Traceback (most recent call last):
500
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
501
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
502
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
503
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
504
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
505
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
506
+ KeyError: 'ॉ'
507
+ Traceback (most recent call last):
508
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
509
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
510
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
511
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
512
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
513
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
514
+ KeyError: 'ॉ'
515
+ Traceback (most recent call last):
516
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
517
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
518
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
519
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
520
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
521
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
522
+ KeyError: 'ॉ'
523
+ Traceback (most recent call last):
524
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
525
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
526
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
527
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
528
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
529
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
530
+ KeyError: 'ॉ'
531
+ Traceback (most recent call last):
532
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
533
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
534
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
535
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
536
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
537
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
538
+ KeyError: 'ॉ'
539
+ Traceback (most recent call last):
540
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
541
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
542
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
543
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
544
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
545
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
546
+ KeyError: 'ॉ'
547
+ Traceback (most recent call last):
548
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
549
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
550
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
551
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
552
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
553
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
554
+ KeyError: 'ॉ'
555
+ Traceback (most recent call last):
556
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
557
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
558
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
559
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
560
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
561
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
562
+ KeyError: 'ॉ'
563
+ Traceback (most recent call last):
564
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
565
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
566
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
567
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
568
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
569
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
570
+ KeyError: 'ॉ'
571
+ Traceback (most recent call last):
572
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
573
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
574
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
575
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
576
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
577
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
578
+ KeyError: 'ॉ'
579
+ Traceback (most recent call last):
580
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
581
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
582
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
583
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
584
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
585
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
586
+ KeyError: 'ॉ'
587
+ Traceback (most recent call last):
588
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
589
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
590
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
591
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
592
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
593
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
594
+ KeyError: 'ॉ'
595
+ Traceback (most recent call last):
596
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
597
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
598
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
599
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
600
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
601
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
602
+ KeyError: 'ऑ'
603
+ Traceback (most recent call last):
604
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
605
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
606
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
607
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
608
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
609
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
610
+ KeyError: 'ॉ'
611
+ Traceback (most recent call last):
612
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
613
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
614
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
615
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
616
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
617
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
618
+ KeyError: 'ऑ'
619
+ Traceback (most recent call last):
620
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
621
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
622
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
623
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
624
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
625
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
626
+ KeyError: 'ॉ'
627
+ Traceback (most recent call last):
628
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
629
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
630
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
631
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
632
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
633
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
634
+ KeyError: 'ॉ'
635
+ Traceback (most recent call last):
636
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
637
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
638
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
639
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
640
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
641
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
642
+ KeyError: 'ॉ'
643
+ Traceback (most recent call last):
644
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
645
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
646
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
647
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
648
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
649
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
650
+ KeyError: 'ॉ'
651
+ Traceback (most recent call last):
652
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
653
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
654
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
655
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
656
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
657
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
658
+ KeyError: 'ॉ'
659
+ Traceback (most recent call last):
660
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
661
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
662
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
663
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
664
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
665
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
666
+ KeyError: 'ॉ'
667
+ Traceback (most recent call last):
668
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
669
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
670
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
671
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
672
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
673
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
674
+ KeyError: 'ॉ'
675
+ Traceback (most recent call last):
676
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
677
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
678
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
679
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
680
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
681
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
682
+ KeyError: 'ॉ'
683
+ Traceback (most recent call last):
684
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
685
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
686
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
687
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
688
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
689
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
690
+ KeyError: 'ॉ'
691
+ Traceback (most recent call last):
692
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
693
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
694
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
695
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
696
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
697
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
698
+ KeyError: 'ॉ'
699
+ Traceback (most recent call last):
700
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
701
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
702
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
703
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
704
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
705
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
706
+ KeyError: 'ऑ'
707
+ Traceback (most recent call last):
708
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
709
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
710
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
711
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
712
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
713
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
714
+ KeyError: 'ॉ'
715
+ Traceback (most recent call last):
716
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
717
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
718
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
719
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
720
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
721
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
722
+ KeyError: 'ॉ'
723
+ Traceback (most recent call last):
724
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
725
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
726
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
727
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
728
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
729
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
730
+ KeyError: 'ॉ'
731
+ Traceback (most recent call last):
732
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
733
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
734
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
735
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
736
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
737
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
738
+ KeyError: 'ॉ'
739
+ Traceback (most recent call last):
740
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
741
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
742
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
743
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
744
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
745
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
746
+ KeyError: 'ॉ'
747
+ Traceback (most recent call last):
748
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
749
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
750
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
751
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
752
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
753
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
754
+ KeyError: 'ॉ'
755
+ Traceback (most recent call last):
756
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
757
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
758
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
759
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
760
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
761
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
762
+ KeyError: 'ॉ'
763
+ Traceback (most recent call last):
764
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
765
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
766
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
767
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
768
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
769
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
770
+ KeyError: 'ऑ'
771
+ Traceback (most recent call last):
772
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
773
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
774
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
775
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
776
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
777
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
778
+ KeyError: 'ॉ'
779
+ Traceback (most recent call last):
780
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
781
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
782
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
783
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
784
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
785
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
786
+ KeyError: 'ॉ'
787
+ Traceback (most recent call last):
788
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
789
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
790
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
791
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
792
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
793
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
794
+ KeyError: 'ॉ'
795
+ Traceback (most recent call last):
796
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
797
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
798
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
799
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
800
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
801
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
802
+ KeyError: 'ॉ'
803
+ Traceback (most recent call last):
804
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
805
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
806
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
807
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
808
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
809
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
810
+ KeyError: 'ॉ'
811
+ Traceback (most recent call last):
812
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
813
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
814
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
815
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
816
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
817
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
818
+ KeyError: 'ॉ'
819
+ Traceback (most recent call last):
820
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
821
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
822
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
823
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
824
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
825
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
826
+ KeyError: 'ॉ'
827
+ Traceback (most recent call last):
828
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
829
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
830
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
831
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
832
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
833
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
834
+ KeyError: 'ऑ'
835
+ Traceback (most recent call last):
836
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
837
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
838
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
839
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
840
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
841
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
842
+ KeyError: 'ॉ'
843
+ Traceback (most recent call last):
844
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
845
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
846
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
847
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
848
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
849
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
850
+ KeyError: 'ऑ'
851
+ Traceback (most recent call last):
852
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
853
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
854
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
855
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
856
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
857
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
858
+ KeyError: 'ॉ'
859
+ Traceback (most recent call last):
860
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
861
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
862
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
863
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
864
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
865
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
866
+ KeyError: 'ऑ'
867
+ Traceback (most recent call last):
868
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
869
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
870
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
871
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
872
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
873
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
874
+ KeyError: 'ॉ'
875
+ Traceback (most recent call last):
876
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
877
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
878
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
879
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
880
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
881
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
882
+ KeyError: 'ॉ'
883
+ Traceback (most recent call last):
884
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
885
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
886
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
887
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
888
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
889
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
890
+ KeyError: 'ॉ'
891
+ Traceback (most recent call last):
892
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
893
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
894
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
895
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
896
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
897
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
898
+ KeyError: 'ॉ'
899
+ Traceback (most recent call last):
900
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
901
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
902
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
903
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
904
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
905
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
906
+ KeyError: 'ॉ'
907
+ Traceback (most recent call last):
908
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
909
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
910
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
911
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
912
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
913
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
914
+ KeyError: 'ॉ'
915
+ Traceback (most recent call last):
916
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
917
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
918
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
919
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
920
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
921
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
922
+ KeyError: 'ॉ'
923
+ Traceback (most recent call last):
924
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
925
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
926
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
927
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
928
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
929
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
930
+ KeyError: 'ॉ'
931
+ Traceback (most recent call last):
932
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
933
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
934
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
935
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
936
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
937
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
938
+ KeyError: 'ॉ'
939
+ Traceback (most recent call last):
940
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
941
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
942
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
943
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
944
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
945
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
946
+ KeyError: 'ॉ'
947
+ Traceback (most recent call last):
948
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
949
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
950
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
951
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
952
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
953
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
954
+ KeyError: 'ॉ'
955
+ Traceback (most recent call last):
956
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
957
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
958
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
959
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
960
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
961
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
962
+ KeyError: 'ऑ'
963
+ Traceback (most recent call last):
964
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
965
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
966
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
967
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
968
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
969
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
970
+ KeyError: 'ॉ'
971
+ Traceback (most recent call last):
972
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
973
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
974
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
975
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
976
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
977
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
978
+ KeyError: 'ऑ'
979
+ Traceback (most recent call last):
980
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
981
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
982
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
983
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
984
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
985
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
986
+ KeyError: 'ॉ'
987
+ Traceback (most recent call last):
988
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
989
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
990
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
991
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
992
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
993
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
994
+ KeyError: 'ॉ'
995
+ Traceback (most recent call last):
996
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
997
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
998
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
999
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1000
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1001
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1002
+ KeyError: 'ऑ'
1003
+ Traceback (most recent call last):
1004
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1005
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1006
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1007
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1008
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1009
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1010
+ KeyError: 'ऑ'
1011
+ Traceback (most recent call last):
1012
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1013
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1014
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1015
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1016
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1017
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1018
+ KeyError: 'ॉ'
1019
+ Traceback (most recent call last):
1020
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1021
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1022
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1023
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1024
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1025
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1026
+ KeyError: 'ॉ'
1027
+ Traceback (most recent call last):
1028
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1029
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1030
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1031
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1032
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1033
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1034
+ KeyError: 'ऑ'
1035
+ Traceback (most recent call last):
1036
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1037
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1038
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1039
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1040
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1041
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1042
+ KeyError: 'ॉ'
1043
+ Traceback (most recent call last):
1044
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1045
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1046
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1047
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1048
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1049
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1050
+ KeyError: 'ॉ'
1051
+ Traceback (most recent call last):
1052
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1053
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1054
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1055
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1056
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1057
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1058
+ KeyError: 'ॉ'
1059
+ Traceback (most recent call last):
1060
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1061
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1062
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1063
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1064
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1065
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1066
+ KeyError: 'ॉ'
1067
+ Traceback (most recent call last):
1068
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1069
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1070
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1071
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1072
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1073
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1074
+ KeyError: 'ॉ'
1075
+ Traceback (most recent call last):
1076
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1077
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1078
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1079
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1080
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1081
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1082
+ KeyError: 'ॉ'
1083
+ Traceback (most recent call last):
1084
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1085
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1086
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1087
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1088
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1089
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1090
+ KeyError: 'ॉ'
1091
+ Traceback (most recent call last):
1092
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1093
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1094
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1095
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1096
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1097
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1098
+ KeyError: 'ॉ'
1099
+ Traceback (most recent call last):
1100
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1101
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1102
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1103
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1104
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1105
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1106
+ KeyError: 'ॉ'
1107
+ Traceback (most recent call last):
1108
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1109
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1110
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1111
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1112
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1113
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1114
+ KeyError: 'ॉ'
1115
+ Traceback (most recent call last):
1116
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1117
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1118
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1119
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1120
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1121
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1122
+ KeyError: 'ॉ'
1123
+ Traceback (most recent call last):
1124
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1125
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1126
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1127
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1128
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1129
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1130
+ KeyError: 'ॉ'
1131
+ Traceback (most recent call last):
1132
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/data/dataset.py", line 161, in init_batch
1133
+ phoneme_ids = cleaned_text_to_sequence(phoneme, version)
1134
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in cleaned_text_to_sequence
1135
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1136
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/text/__init__.py", line 24, in <listcomp>
1137
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
1138
+ KeyError: 'ऑ'
1139
+
1140
+ Detected KeyboardInterrupt, attempting graceful shutdown ...
wandb/run-20241203_185453-2eog1nt2/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "startedAt": "2024-12-03T18:54:53.423670Z",
5
+ "args": [
6
+ "-c",
7
+ "configs/s1.yaml"
8
+ ],
9
+ "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
10
+ "codePath": "GPT_SoVITS/s1_train.py",
11
+ "git": {
12
+ "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
13
+ "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
14
+ },
15
+ "email": "[email protected]",
16
+ "root": "/workspace/GPT-SoVITS",
17
+ "host": "7a6bba088bf1",
18
+ "username": "root",
19
+ "executable": "/usr/bin/python",
20
+ "codePathLocal": "GPT_SoVITS/s1_train.py",
21
+ "cpu_count": 48,
22
+ "cpu_count_logical": 96,
23
+ "gpu": "NVIDIA A40",
24
+ "gpu_count": 1,
25
+ "disk": {
26
+ "/": {
27
+ "total": "42949672960",
28
+ "used": "3612602368"
29
+ }
30
+ },
31
+ "memory": {
32
+ "total": "540662632448"
33
+ },
34
+ "cpu": {
35
+ "count": 48,
36
+ "countLogical": 96
37
+ },
38
+ "gpu_nvidia": [
39
+ {
40
+ "name": "NVIDIA A40",
41
+ "memoryTotal": "48305799168",
42
+ "cudaCores": 10752,
43
+ "architecture": "Ampere"
44
+ }
45
+ ],
46
+ "cudaVersion": "12.7"
47
+ }
wandb/run-20241203_185453-2eog1nt2/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":5}}
wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:54:52.901098169Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1th9fdp8/port-22344.txt","pid":22344,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-03T18:54:52.901139573Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-03T18:54:52.901707465Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":22344}
4
+ {"time":"2024-12-03T18:54:52.901751292Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37953,"Zone":""}}
5
+ {"time":"2024-12-03T18:54:53.085077672Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:53598"}
6
+ {"time":"2024-12-03T18:54:53.427167893Z","level":"INFO","msg":"handleInformInit: received","streamId":"2eog1nt2","id":"127.0.0.1:53598"}
7
+ {"time":"2024-12-03T18:54:53.557164059Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"2eog1nt2","id":"127.0.0.1:53598"}
8
+ {"time":"2024-12-03T18:54:59.344386691Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:53598"}
9
+ {"time":"2024-12-03T18:54:59.344540917Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:53598"}
10
+ {"time":"2024-12-03T18:54:59.34469848Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:53598"}
11
+ {"time":"2024-12-03T18:54:59.344689478Z","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2024-12-03T18:55:00.055284536Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:53598"}
13
+ {"time":"2024-12-03T18:55:00.055376635Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:53598"}
14
+ {"time":"2024-12-03T18:55:00.055426719Z","level":"INFO","msg":"server is closed"}
wandb/run-20241203_185453-2eog1nt2/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:54:53.430425698Z","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2024-12-03T18:54:53.431636023Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185453-2eog1nt2/logs/debug-core.log"}
3
+ {"time":"2024-12-03T18:54:53.555785799Z","level":"INFO","msg":"created new stream","id":"2eog1nt2"}
4
+ {"time":"2024-12-03T18:54:53.556507344Z","level":"INFO","msg":"stream: started","id":"2eog1nt2"}
5
+ {"time":"2024-12-03T18:54:53.55667089Z","level":"INFO","msg":"writer: Do: started","stream_id":"2eog1nt2"}
6
+ {"time":"2024-12-03T18:54:53.556777182Z","level":"INFO","msg":"handler: started","stream_id":"2eog1nt2"}
7
+ {"time":"2024-12-03T18:54:53.556777662Z","level":"INFO","msg":"sender: started","stream_id":"2eog1nt2"}
8
+ {"time":"2024-12-03T18:54:53.924884022Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-03T18:54:59.344524138Z","level":"INFO","msg":"stream: closing","id":"2eog1nt2"}
10
+ {"time":"2024-12-03T18:54:59.344765045Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2024-12-03T18:54:59.346382242Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2024-12-03T18:54:59.826491777Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
+ {"time":"2024-12-03T18:55:00.049648629Z","level":"INFO","msg":"handler: closed","stream_id":"2eog1nt2"}
14
+ {"time":"2024-12-03T18:55:00.049753825Z","level":"INFO","msg":"writer: Close: closed","stream_id":"2eog1nt2"}
15
+ {"time":"2024-12-03T18:55:00.049814222Z","level":"INFO","msg":"sender: closed","stream_id":"2eog1nt2"}
16
+ {"time":"2024-12-03T18:55:00.052569227Z","level":"INFO","msg":"stream: closed","id":"2eog1nt2"}
wandb/run-20241203_185453-2eog1nt2/run-2eog1nt2.wandb ADDED
Binary file (165 kB). View file
 
wandb/run-20241203_185605-tfl0uvkj/files/config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m: []
5
+ python_version: 3.10.12
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 5
10
+ - 9
11
+ - 11
12
+ - 49
13
+ - 53
14
+ - 55
15
+ - 103
16
+ - 105
17
+ "2":
18
+ - 1
19
+ - 5
20
+ - 9
21
+ - 11
22
+ - 49
23
+ - 53
24
+ - 55
25
+ - 103
26
+ - 105
27
+ "3":
28
+ - 1
29
+ - 13
30
+ - 16
31
+ - 23
32
+ - 55
33
+ "4": 3.10.12
34
+ "5": 0.18.7
35
+ "6": 4.46.3
36
+ "8":
37
+ - 5
38
+ "12": 0.18.7
39
+ "13": linux-x86_64
40
+ data:
41
+ value:
42
+ batch_size: 8
43
+ cleaned_text: true
44
+ filter_length: 2048
45
+ hop_length: 640
46
+ max_frames: 60
47
+ max_sec: 60
48
+ mel_channels: 128
49
+ mel_fmax: null
50
+ mel_fmin: 0
51
+ num_workers: 4
52
+ pad_val: 1024
53
+ training_files: data8
54
+ win_length: 2048
55
+ model:
56
+ value:
57
+ EOS: 2047
58
+ attention_dropout: 0.1
59
+ attn_pdrop: 0.1
60
+ block_size: 1000
61
+ dropout: 0.1
62
+ embd_pdrop: 0.1
63
+ embedding_dim: 768
64
+ ffn_hidden: 3072
65
+ head: 12
66
+ hidden_dim: 768
67
+ hidden_dropout: 0.1
68
+ max_mel_positions: 8000
69
+ max_text_positions: 2048
70
+ n_embd: 768
71
+ n_layer: 12
72
+ num_layers: 6
73
+ pad_val: 1024
74
+ phoneme_vocab_size: 2048
75
+ postnet_dim: 384
76
+ postnet_layers: 3
77
+ prenet_dim: 384
78
+ prenet_layers: 3
79
+ resid_pdrop: 0.1
80
+ semantic_dim: 1024
81
+ vocab_size: 2048
82
+ output_dir:
83
+ value: logs/s1
84
+ train:
85
+ value:
86
+ batch_size: 8
87
+ epochs: 15
88
+ exp_name: gpt_training
89
+ half_weights_save_dir: weights/s1
90
+ if_save_every_weights: true
91
+ if_save_latest: true
92
+ precision: 32
93
+ save_every_n_epoch: 5
94
+ seed: 1234
95
+ wandb:
96
+ entity: null
97
+ log_interval: 100
98
+ name: stage1_training
99
+ project: gpt-sovits-hindi
100
+ train_phoneme_path:
101
+ value: data8/phoneme.txt
102
+ train_semantic_path:
103
+ value: data8/semantic.tsv
wandb/run-20241203_185605-tfl0uvkj/files/output.log ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GPU available: True (cuda), used: True
2
+ TPU available: False, using: 0 TPU cores
3
+ HPU available: False, using: 0 HPUs
4
+ Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
5
+ ----------------------------------------------------------------------------------------------------
6
+ distributed_backend=nccl
7
+ All distributed processes registered. Starting with 1 processes
8
+ ----------------------------------------------------------------------------------------------------
9
+
10
+ /usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
11
+ semantic_data_len: 726
12
+ phoneme_data_len: 727
13
+ data8/wavs/0.wav अखरोट साइज में कैसी होगी ताकि फिर खोल सिविल डिफेंस पास तो इसमें क्या इशू जाते हैं ऐसी क्या प्रॉब्लम आती है 5 से रिलेटेड जॉब्स अपडेटेड साइट सौगंध पीपल फ्रॉम ऑल ग्रेंस बैकग्राउंड तो क्या ऐसे इशूज है जो पास से रिलेटेड आते हैं दायित्व प्रॉब्लम्स से मेरे पास आते हैं वह रिलेशनशिप को लेकर कि आते हैं इस पेस्ट को अनम्यूट रिलेशनशिप को लेकर किया पास भी किसी ने मेरे साथ कुछ किया मैं वह लेट को
14
+ 0 data8/wavs/1.wav साथ मुझे बहुत टाइम पहले कैंची याद आती है ऑफ मु...
15
+ 1 data8/wavs/2.wav कि वन भूल जाओ अब आगे देखो फीचर को देखो लेकिन प...
16
+ 2 data8/wavs/3.wav हैं बिकॉज़ क्या होता है कि किसी को भी उस पेन अ...
17
+ 3 data8/wavs/4.wav नेक्स्ट टो इंपासिबल जब तक कि हम यह न समझ लें क...
18
+ 4 data8/wavs/5.wav में रखी है तू जान उसको जिसमें वांट बट अगर उसको...
19
+ .. ... ...
20
+ 721 data8/wavs/722.wav होने लगेंगी तो इस सबको सलूशन किया है या जो भी ...
21
+ 722 data8/wavs/723.wav की फैट की सब कुछ इंपोर्टेंट है हो सकता पतले हो...
22
+ 723 data8/wavs/724.wav उस समय हर चीज द प्रॉब्लम है फ्रेगनेट चाहती है ...
23
+ 724 data8/wavs/725.wav है तो इसको थोड़ी सी देर के लिए बैटर फील होता ह...
24
+ 725 data8/wavs/726.wav हो यानी बेसिक्स के एकॉर्डिंग हो हैं तो इन जस्ट...
25
+
26
+ [726 rows x 2 columns]
27
+ deleted 48 audios who's phoneme/sec are bigger than 25 or smaller than 3
28
+ dataset.__len__(): 678
29
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
30
+ Traceback (most recent call last):
31
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
32
+ main(args)
33
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
34
+ trainer.fit(model, data_module)
35
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
36
+ call._call_and_handle_interrupt(
37
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
38
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
39
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
40
+ return function(*args, **kwargs)
41
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
42
+ self._run(model, ckpt_path=ckpt_path)
43
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 957, in _run
44
+ self.strategy.setup(self)
45
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 174, in setup
46
+ self.setup_optimizers(trainer)
47
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 138, in setup_optimizers
48
+ self.optimizers, self.lr_scheduler_configs = _init_optimizers_and_lr_schedulers(self.lightning_module)
49
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/optimizer.py", line 179, in _init_optimizers_and_lr_schedulers
50
+ optim_conf = call._call_lightning_module_hook(model.trainer, "configure_optimizers", pl_module=model)
51
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 167, in _call_lightning_module_hook
52
+ output = fn(*args, **kwargs)
53
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/models/t2s_lightning_module.py", line 134, in configure_optimizers
54
+ init_lr=self.config["optimizer"]["lr_init"],
55
+ KeyError: 'optimizer'
56
+ Traceback (most recent call last):
57
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 113, in <module>
58
+ main(args)
59
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py", line 100, in main
60
+ trainer.fit(model, data_module)
61
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
62
+ call._call_and_handle_interrupt(
63
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 46, in _call_and_handle_interrupt
64
+ return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
65
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 105, in launch
66
+ return function(*args, **kwargs)
67
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
68
+ self._run(model, ckpt_path=ckpt_path)
69
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/trainer.py", line 957, in _run
70
+ self.strategy.setup(self)
71
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/ddp.py", line 174, in setup
72
+ self.setup_optimizers(trainer)
73
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/strategies/strategy.py", line 138, in setup_optimizers
74
+ self.optimizers, self.lr_scheduler_configs = _init_optimizers_and_lr_schedulers(self.lightning_module)
75
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/optimizer.py", line 179, in _init_optimizers_and_lr_schedulers
76
+ optim_conf = call._call_lightning_module_hook(model.trainer, "configure_optimizers", pl_module=model)
77
+ File "/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/call.py", line 167, in _call_lightning_module_hook
78
+ output = fn(*args, **kwargs)
79
+ File "/workspace/GPT-SoVITS/GPT_SoVITS/AR/models/t2s_lightning_module.py", line 134, in configure_optimizers
80
+ init_lr=self.config["optimizer"]["lr_init"],
81
+ KeyError: 'optimizer'
wandb/run-20241203_185605-tfl0uvkj/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.12",
4
+ "startedAt": "2024-12-03T18:56:05.716152Z",
5
+ "args": [
6
+ "-c",
7
+ "configs/s1.yaml"
8
+ ],
9
+ "program": "/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py",
10
+ "codePath": "GPT_SoVITS/s1_train.py",
11
+ "git": {
12
+ "remote": "https://github.com/RVC-Boss/GPT-SoVITS.git",
13
+ "commit": "a70e1ad30c072cdbcfb716962abdc8008fa41cc2"
14
+ },
15
+ "email": "[email protected]",
16
+ "root": "/workspace/GPT-SoVITS",
17
+ "host": "7a6bba088bf1",
18
+ "username": "root",
19
+ "executable": "/usr/bin/python",
20
+ "codePathLocal": "GPT_SoVITS/s1_train.py",
21
+ "cpu_count": 48,
22
+ "cpu_count_logical": 96,
23
+ "gpu": "NVIDIA A40",
24
+ "gpu_count": 1,
25
+ "disk": {
26
+ "/": {
27
+ "total": "42949672960",
28
+ "used": "3612618752"
29
+ }
30
+ },
31
+ "memory": {
32
+ "total": "540662632448"
33
+ },
34
+ "cpu": {
35
+ "count": 48,
36
+ "countLogical": 96
37
+ },
38
+ "gpu_nvidia": [
39
+ {
40
+ "name": "NVIDIA A40",
41
+ "memoryTotal": "48305799168",
42
+ "cudaCores": 10752,
43
+ "architecture": "Ampere"
44
+ }
45
+ ],
46
+ "cudaVersion": "12.7"
47
+ }
wandb/run-20241203_185605-tfl0uvkj/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":12}}
wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:56:05.255120107Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpjj94wwee/port-22844.txt","pid":22844,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-03T18:56:05.255154724Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-03T18:56:05.255597668Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":22844}
4
+ {"time":"2024-12-03T18:56:05.255639452Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43237,"Zone":""}}
5
+ {"time":"2024-12-03T18:56:05.437957868Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:57582"}
6
+ {"time":"2024-12-03T18:56:05.720550265Z","level":"INFO","msg":"handleInformInit: received","streamId":"tfl0uvkj","id":"127.0.0.1:57582"}
7
+ {"time":"2024-12-03T18:56:05.846473014Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tfl0uvkj","id":"127.0.0.1:57582"}
8
+ {"time":"2024-12-03T18:56:17.728386531Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:57582"}
9
+ {"time":"2024-12-03T18:56:17.728524639Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:57582"}
10
+ {"time":"2024-12-03T18:56:17.728611687Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2024-12-03T18:56:17.728766722Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:57582"}
12
+ {"time":"2024-12-03T18:56:18.402657245Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:57582"}
13
+ {"time":"2024-12-03T18:56:18.402725927Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:57582"}
14
+ {"time":"2024-12-03T18:56:18.402744573Z","level":"INFO","msg":"server is closed"}
wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-03T18:56:05.722088979Z","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2024-12-03T18:56:05.723035183Z","level":"INFO","msg":"created symlink","path":"/workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug-core.log"}
3
+ {"time":"2024-12-03T18:56:05.845849169Z","level":"INFO","msg":"created new stream","id":"tfl0uvkj"}
4
+ {"time":"2024-12-03T18:56:05.846223002Z","level":"INFO","msg":"stream: started","id":"tfl0uvkj"}
5
+ {"time":"2024-12-03T18:56:05.846266483Z","level":"INFO","msg":"writer: Do: started","stream_id":"tfl0uvkj"}
6
+ {"time":"2024-12-03T18:56:05.846270711Z","level":"INFO","msg":"handler: started","stream_id":"tfl0uvkj"}
7
+ {"time":"2024-12-03T18:56:05.846290518Z","level":"INFO","msg":"sender: started","stream_id":"tfl0uvkj"}
8
+ {"time":"2024-12-03T18:56:06.133251456Z","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2024-12-03T18:56:17.728529149Z","level":"INFO","msg":"stream: closing","id":"tfl0uvkj"}
10
+ {"time":"2024-12-03T18:56:17.728772449Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2024-12-03T18:56:17.731915231Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2024-12-03T18:56:18.14960212Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
+ {"time":"2024-12-03T18:56:18.392894708Z","level":"INFO","msg":"handler: closed","stream_id":"tfl0uvkj"}
14
+ {"time":"2024-12-03T18:56:18.392995863Z","level":"INFO","msg":"writer: Close: closed","stream_id":"tfl0uvkj"}
15
+ {"time":"2024-12-03T18:56:18.393014563Z","level":"INFO","msg":"sender: closed","stream_id":"tfl0uvkj"}
16
+ {"time":"2024-12-03T18:56:18.400170737Z","level":"INFO","msg":"stream: closed","id":"tfl0uvkj"}
wandb/run-20241203_185605-tfl0uvkj/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-03 18:56:05,696 INFO MainThread:22844 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2024-12-03 18:56:05,697 INFO MainThread:22844 [wandb_setup.py:_flush():79] Configure stats pid to 22844
3
+ 2024-12-03 18:56:05,698 INFO MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-03 18:56:05,698 INFO MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from /workspace/GPT-SoVITS/wandb/settings
5
+ 2024-12-03 18:56:05,698 INFO MainThread:22844 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2024-12-03 18:56:05,698 INFO MainThread:22844 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
7
+ 2024-12-03 18:56:05,698 INFO MainThread:22844 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'GPT_SoVITS/s1_train.py', 'program_abspath': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py', 'program': '/workspace/GPT-SoVITS/GPT_SoVITS/s1_train.py'}
8
+ 2024-12-03 18:56:05,699 INFO MainThread:22844 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2024-12-03 18:56:05,699 INFO MainThread:22844 [wandb_init.py:_log_setup():533] Logging user logs to /workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug.log
10
+ 2024-12-03 18:56:05,700 INFO MainThread:22844 [wandb_init.py:_log_setup():534] Logging internal logs to /workspace/GPT-SoVITS/wandb/run-20241203_185605-tfl0uvkj/logs/debug-internal.log
11
+ 2024-12-03 18:56:05,700 INFO MainThread:22844 [wandb_init.py:init():619] calling init triggers
12
+ 2024-12-03 18:56:05,700 INFO MainThread:22844 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'output_dir': 'logs/s1', 'train': {'seed': 1234, 'epochs': 15, 'batch_size': 8, 'save_every_n_epoch': 5, 'precision': 32, 'if_save_latest': True, 'if_save_every_weights': True, 'exp_name': 'gpt_training', 'half_weights_save_dir': 'weights/s1', 'wandb': {'project': 'gpt-sovits-hindi', 'name': 'stage1_training', 'entity': None, 'log_interval': 100}}, 'data': {'training_files': 'data8', 'max_sec': 60, 'max_frames': 60, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'cleaned_text': True, 'num_workers': 4, 'batch_size': 8, 'pad_val': 1024}, 'train_semantic_path': 'data8/semantic.tsv', 'train_phoneme_path': 'data8/phoneme.txt', 'model': {'hidden_dim': 768, 'embedding_dim': 768, 'n_layer': 12, 'head': 12, 'n_embd': 768, 'vocab_size': 2048, 'block_size': 1000, 'embd_pdrop': 0.1, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1, 'semantic_dim': 1024, 'num_layers': 6, 'ffn_hidden': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'max_text_positions': 2048, 'max_mel_positions': 8000, 'prenet_dim': 384, 'postnet_dim': 384, 'prenet_layers': 3, 'postnet_layers': 3, 'phoneme_vocab_size': 2048, 'EOS': 2047, 'pad_val': 1024}}
14
+ 2024-12-03 18:56:05,701 INFO MainThread:22844 [wandb_init.py:init():669] starting backend
15
+ 2024-12-03 18:56:05,701 INFO MainThread:22844 [wandb_init.py:init():673] sending inform_init request
16
+ 2024-12-03 18:56:05,714 INFO MainThread:22844 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-12-03 18:56:05,715 INFO MainThread:22844 [wandb_init.py:init():686] backend started and connected
18
+ 2024-12-03 18:56:05,728 INFO MainThread:22844 [wandb_init.py:init():781] updated telemetry
19
+ 2024-12-03 18:56:05,760 INFO MainThread:22844 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2024-12-03 18:56:06,101 INFO MainThread:22844 [wandb_init.py:init():867] starting run threads in backend
21
+ 2024-12-03 18:56:06,391 INFO MainThread:22844 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2024-12-03 18:56:06,391 INFO MainThread:22844 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2024-12-03 18:56:06,393 INFO MainThread:22844 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2024-12-03 18:56:06,393 INFO MainThread:22844 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2024-12-03 18:56:06,395 INFO MainThread:22844 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2024-12-03 18:56:06,941 INFO MainThread:22844 [wandb_watch.py:_watch():71] Watching
27
+ 2024-12-03 18:56:17,728 WARNING MsgRouterThr:22844 [router.py:message_loop():75] message_loop has been closed
wandb/run-20241203_185605-tfl0uvkj/run-tfl0uvkj.wandb ADDED
Binary file (21.4 kB). View file