Clint Adams
commited on
Commit
•
b618cc5
1
Parent(s):
6c33c6b
change voice input
Browse files- README.md +2 -3
- app.py +34 -33
- examples/1.npz +3 -0
- examples/2.npz +3 -0
- requirements.txt +4 -5
README.md
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Bark
|
3 |
emoji: 🐶
|
4 |
colorFrom: gray
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
python_version: 3.8.15
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
license: cc-by-nc-4.0
|
|
|
1 |
---
|
2 |
+
title: Bark (with user-supplied voices)
|
3 |
emoji: 🐶
|
4 |
colorFrom: gray
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.26.0
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: cc-by-nc-4.0
|
app.py
CHANGED
@@ -3,39 +3,46 @@ import gradio as gr
|
|
3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
4 |
from bark.generation import SUPPORTED_LANGS
|
5 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
|
|
|
|
6 |
|
7 |
DEBUG_MODE = False
|
8 |
|
9 |
if not DEBUG_MODE:
|
10 |
_ = preload_models()
|
11 |
|
12 |
-
|
13 |
-
PROMPT_LOOKUP = {}
|
14 |
-
for _, lang in SUPPORTED_LANGS:
|
15 |
-
for n in range(10):
|
16 |
-
label = f"Speaker {n} ({lang})"
|
17 |
-
AVAILABLE_PROMPTS.append(label)
|
18 |
-
PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
|
19 |
-
PROMPT_LOOKUP["Unconditional"] = None
|
20 |
-
PROMPT_LOOKUP["Announcer"] = "announcer"
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
title = "# 🐶 Bark</div>"
|
25 |
|
26 |
description = """
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
36 |
Use at your own risk.
|
37 |
"""
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
article = """
|
40 |
|
41 |
## 🌎 Foreign Language
|
@@ -108,17 +115,12 @@ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commerc
|
|
108 |
"""
|
109 |
|
110 |
examples = [
|
111 |
-
["
|
112 |
-
|
113 |
-
["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
|
114 |
-
"Speaker 1 (en)"], # , 0.7, 0.7],
|
115 |
-
["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
|
116 |
-
"Speaker 0 (es)"], # , 0.7, 0.7],
|
117 |
]
|
118 |
|
119 |
-
|
120 |
def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
|
121 |
-
history_prompt = PROMPT_LOOKUP[history_prompt]
|
122 |
if DEBUG_MODE:
|
123 |
audio_arr = np.zeros(SAMPLE_RATE)
|
124 |
else:
|
@@ -171,9 +173,8 @@ with gr.Blocks(css=css) as block:
|
|
171 |
with gr.Column():
|
172 |
input_text = gr.Textbox(
|
173 |
label="Input Text", lines=2, value=default_text, elem_id="input_text")
|
174 |
-
options = gr.
|
175 |
-
|
176 |
-
run_button = gr.Button(text="Generate Audio", type="button")
|
177 |
with gr.Column():
|
178 |
audio_out = gr.Audio(label="Generated Audio",
|
179 |
type="numpy", elem_id="audio_out")
|
@@ -183,7 +184,7 @@ with gr.Blocks(css=css) as block:
|
|
183 |
loading_icon = gr.HTML(loading_icon_html)
|
184 |
share_button = gr.Button(
|
185 |
"Share to community", elem_id="share-btn")
|
186 |
-
share_button.click(None, [], [],
|
187 |
inputs = [input_text, options]
|
188 |
outputs = [audio_out]
|
189 |
gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
|
|
|
3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
4 |
from bark.generation import SUPPORTED_LANGS
|
5 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
6 |
+
import spaces
|
7 |
+
import torch
|
8 |
|
9 |
DEBUG_MODE = False
|
10 |
|
11 |
if not DEBUG_MODE:
|
12 |
_ = preload_models()
|
13 |
|
14 |
+
default_text = "This is a fork of Suno's Bark Spaces that allows you to supply your own voice."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
title = "# 🐶 Bark (with user-supplied voices)</div>"
|
|
|
|
|
17 |
|
18 |
description = """
|
19 |
+
This is a fork of Suno's [Bark Space](https://huggingface.co/spaces/suno/bark)
|
20 |
+
that allows you to supply your own voice.
|
21 |
+
|
22 |
+
You can use [GitMyLo's bark-voice-cloning Space](https://huggingface.co/spaces/GitMylo/bark-voice-cloning)
|
23 |
+
to clone your own voice, or provide a voice file from
|
24 |
+
[this Bark speaker directory](https://rsxdalv.github.io/bark-speaker-directory/),
|
25 |
+
or use an alternate method to generate the same .npz format with semantic, coarse, and fine histories.
|
26 |
+
|
27 |
+
Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark).
|
28 |
+
Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects.
|
29 |
+
This demo should be used for research purposes only. Commercial use is strictly prohibited.
|
30 |
+
The model output is not censored and the authors do not endorse the opinions in the generated content.
|
31 |
Use at your own risk.
|
32 |
"""
|
33 |
|
34 |
+
if torch.cuda.is_available():
|
35 |
+
device = 'cuda'
|
36 |
+
device_dtype = torch.float16
|
37 |
+
xlp_kwargs['variant'] = 'fp16'
|
38 |
+
else:
|
39 |
+
device = 'cpu'
|
40 |
+
device_dtype = torch.float32
|
41 |
+
description+='''
|
42 |
+
|
43 |
+
This Space appears to be running on a CPU; it may take over 30 minutes to get results. You may [duplicate this space](https://huggingface.co/spaces/clinteroni/bark-with-custom-voice?duplicate=true) and pay for an upgraded runtime instead.
|
44 |
+
'''
|
45 |
+
|
46 |
article = """
|
47 |
|
48 |
## 🌎 Foreign Language
|
|
|
115 |
"""
|
116 |
|
117 |
examples = [
|
118 |
+
["I enjoy reading murder mysteries, long walks on the beach, sculpting mashed potatoes into the shape of a homicidal snowman, and telling you what's up.", 'examples/1.npz'],
|
119 |
+
['The space clown descended the long staircase and invaded New Jersey.', 'examples/2.npz'],
|
|
|
|
|
|
|
|
|
120 |
]
|
121 |
|
122 |
+
@spaces.GPU
|
123 |
def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
|
|
|
124 |
if DEBUG_MODE:
|
125 |
audio_arr = np.zeros(SAMPLE_RATE)
|
126 |
else:
|
|
|
173 |
with gr.Column():
|
174 |
input_text = gr.Textbox(
|
175 |
label="Input Text", lines=2, value=default_text, elem_id="input_text")
|
176 |
+
options = gr.File(elem_id="speaker_option")
|
177 |
+
run_button = gr.Button("Generate Audio")
|
|
|
178 |
with gr.Column():
|
179 |
audio_out = gr.Audio(label="Generated Audio",
|
180 |
type="numpy", elem_id="audio_out")
|
|
|
184 |
loading_icon = gr.HTML(loading_icon_html)
|
185 |
share_button = gr.Button(
|
186 |
"Share to community", elem_id="share-btn")
|
187 |
+
share_button.click(None, [], [], js=share_js)
|
188 |
inputs = [input_text, options]
|
189 |
outputs = [audio_out]
|
190 |
gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
|
examples/1.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b16df700e5bc90c37eb502d769e6a7aa5cd7b8f4a59f7c46b3c39fdb85c02e2
|
3 |
+
size 77588
|
examples/2.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:957c59a4490d1384020a2e2277a59a1d2b01b954c78ec4572520b04722512a27
|
3 |
+
size 90388
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
https://download.pytorch.org/whl/nightly/cu117/torchaudio-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
|
|
|
1 |
+
bark
|
2 |
+
torch
|
3 |
+
torchvision
|
4 |
+
torchaudio
|
|