kevinwang676 commited on
Commit
627c134
1 Parent(s): 79a08d6

Upload 6 files

Browse files
Dockerfile (2).txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM debian:stable
2
+
3
+ # Install system packages
4
+ RUN apt update && apt install -y git pip
5
+
6
+ # Create non-root user
7
+ RUN useradd -m -d /bark bark
8
+
9
+ # Run as new user
10
+ USER bark
11
+ WORKDIR /bark
12
+
13
+ # Clone git repo
14
+ RUN git clone https://github.com/C0untFloyd/bark-gui
15
+
16
+ # Switch to git directory
17
+ WORKDIR /bark/bark-gui
18
+
19
+ # Append pip bin path to PATH
20
+ ENV PATH=$PATH:/bark/.local/bin
21
+
22
+ # Install dependancies
23
+ RUN pip install .
24
+ RUN pip install -r requirements.txt
25
+
26
+ # List on all addresses, since we are in a container.
27
+ RUN sed -i "s/server_name: ''/server_name: 0.0.0.0/g" ./config.yaml
28
+
29
+ # Suggested volumes
30
+ VOLUME /bark/bark-gui/assets/prompts/custom
31
+ VOLUME /bark/bark-gui/models
32
+ VOLUME /bark/.cache/huggingface/hub
33
+
34
+ # Default port for web-ui
35
+ EXPOSE 7860/tcp
36
+
37
+ # Start script
38
+ CMD python3 webui.py
config (2).yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ input_text_desired_length: 110
2
+ input_text_max_length: 170
3
+ selected_theme: JohnSmith9982/small_and_pretty
4
+ server_name: ''
5
+ server_port: 0
6
+ server_share: false
7
+ silence_between_sentences: 250
8
+ silence_between_speakers: 500
gitignore (3).txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ /outputs
3
+ /speakers
4
+ .vs
5
+ *.npz
6
+ *.wav
7
+ *.npy
8
+ .vs/
9
+ /models
10
+ /bark_ui_enhanced.egg-info
11
+ /build/lib/bark
12
+ *.pth
13
+ *.pt
14
+ *.zip
pyproject (2).toml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "bark-ui-enhanced"
7
+ version = "0.7.0"
8
+ description = "Bark text to audio model with addition features and a Web UI"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ {name = "Suno Inc (original Bark)", email = "[email protected]"},
13
+ {name = "Count Floyd"},
14
+ ]
15
+ # MIT License
16
+ license = {file = "LICENSE"}
17
+
18
+ dependencies = [
19
+ "boto3",
20
+ "encodec",
21
+ "funcy",
22
+ "huggingface-hub>=0.14.1",
23
+ "numpy",
24
+ "scipy",
25
+ "tokenizers",
26
+ "torch",
27
+ "tqdm",
28
+ "transformers",
29
+ ]
30
+
31
+ [project.urls]
32
+ source = "https://github.com/C0untFloyd/bark-gui"
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "bandit",
37
+ "black",
38
+ "codecov",
39
+ "flake8",
40
+ "hypothesis>=6.14,<7",
41
+ "isort>=5.0.0,<6",
42
+ "jupyter",
43
+ "mypy",
44
+ "nbconvert",
45
+ "nbformat",
46
+ "pydocstyle",
47
+ "pylint",
48
+ "pytest",
49
+ "pytest-cov",
50
+ ]
51
+
52
+ [tool.setuptools]
53
+ packages = ["bark"]
54
+
55
+ [tool.setuptools.package-data]
56
+ bark = ["assets/prompts/*.npz", "assets/prompts/v2/*.npz"]
57
+
58
+
59
+ [tool.black]
60
+ line-length = 100
setup (2).py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from setuptools import setup
2
+
3
+ setup()
swap_voice (2).py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bark.generation import load_codec_model, generate_text_semantic, grab_best_device
2
+ from bark import SAMPLE_RATE
3
+ from encodec.utils import convert_audio
4
+ from bark.hubert.hubert_manager import HuBERTManager
5
+ from bark.hubert.pre_kmeans_hubert import CustomHubert
6
+ from bark.hubert.customtokenizer import CustomTokenizer
7
+ from bark.api import semantic_to_waveform
8
+ from scipy.io.wavfile import write as write_wav
9
+ from util.helper import create_filename
10
+ from util.settings import Settings
11
+
12
+
13
+ import torchaudio
14
+ import torch
15
+ import os
16
+ import gradio
17
+
18
+ def swap_voice_from_audio(swap_audio_filename, selected_speaker, tokenizer_lang, seed, batchcount, progress=gradio.Progress(track_tqdm=True)):
19
+ use_gpu = not os.environ.get("BARK_FORCE_CPU", False)
20
+ progress(0, desc="Loading Codec")
21
+
22
+ # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
23
+ hubert_manager = HuBERTManager()
24
+ hubert_manager.make_sure_hubert_installed()
25
+ hubert_manager.make_sure_tokenizer_installed(tokenizer_lang=tokenizer_lang)
26
+
27
+ # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
28
+ # Load HuBERT for semantic tokens
29
+
30
+ # Load the HuBERT model
31
+ device = grab_best_device(use_gpu)
32
+ hubert_model = CustomHubert(checkpoint_path='./models/hubert/hubert.pt').to(device)
33
+ model = load_codec_model(use_gpu=use_gpu)
34
+
35
+ # Load the CustomTokenizer model
36
+ tokenizer = CustomTokenizer.load_from_checkpoint(f'./models/hubert/{tokenizer_lang}_tokenizer.pth').to(device) # Automatically uses the right layers
37
+
38
+ progress(0.25, desc="Converting WAV")
39
+
40
+ # Load and pre-process the audio waveform
41
+ wav, sr = torchaudio.load(swap_audio_filename)
42
+ if wav.shape[0] == 2: # Stereo to mono if needed
43
+ wav = wav.mean(0, keepdim=True)
44
+
45
+ wav = convert_audio(wav, sr, model.sample_rate, model.channels)
46
+ wav = wav.to(device)
47
+ semantic_vectors = hubert_model.forward(wav, input_sample_hz=model.sample_rate)
48
+ semantic_tokens = tokenizer.get_token(semantic_vectors)
49
+
50
+ audio = semantic_to_waveform(
51
+ semantic_tokens,
52
+ history_prompt=selected_speaker,
53
+ temp=0.7,
54
+ silent=False,
55
+ output_full=False)
56
+
57
+ settings = Settings('config.yaml')
58
+
59
+ result = create_filename(settings.output_folder_path, None, "swapvoice",".wav")
60
+ write_wav(result, SAMPLE_RATE, audio)
61
+ return result
62
+