File size: 1,700 Bytes
cfd58c5
2a5abe3
88cd06b
cfd58c5
 
 
88cd06b
 
 
 
 
 
 
cfd58c5
88cd06b
b026f7e
 
88cd06b
b026f7e
f4e63b7
191ed4a
a2597a8
 
 
f3aa612
cfd58c5
 
 
249facd
f3aa612
cfd58c5
c370655
f3aa612
cfd58c5
 
 
 
107d5cd
 
b026f7e
6e4cda4
9fb0a06
b026f7e
 
107d5cd
cfd58c5
107d5cd
 
 
b026f7e
cfd58c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import gradio as gr
import torch
from transformers import AutoProcessor, BarkModel
import scipy


# device = "cuda" if torch.cuda.is_available() else "cpu"
# model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
# model.enable_cpu_offload()

device = "cpu"


processor = AutoProcessor.from_pretrained("suno/bark-small")
model = BarkModel.from_pretrained("suno/bark-small").to(device)
num_list = ["1","2","3","4","5","6","7","8","9","10"]
lang_list = ["en","fr"]

def run_bark(text, n, lang):
    #history_prompt = []
    semantic_prompt=f"v2/{lang}_speaker_{int(n)}"

        #text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
    inputs = processor(text=text,
        voice_preset = semantic_prompt,
        return_tensors="pt",
    )
    
    speech_values = model.generate(**inputs, do_sample=True)
    sampling_rate = model.generation_config.sample_rate

    #sampling_rate = model.config.sample_rate
    #sampling_rate = 24000
    scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
    return ("bark_out.wav")

with gr.Blocks() as app:
    with gr.Column():
        in_text = gr.Textbox()
        with gr.Row():
            speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
            speaker_lang = gr.Dropdown(label="Speaker Language", choices=lang_list,value="en")
            
        #speaker_num = gr.Number(value=0)
        go_btn = gr.Button()

    with gr.Column():
        out_audio = gr.Audio()

    go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)

app.launch()