Spaces:
Sleeping
Sleeping
File size: 1,702 Bytes
cfd58c5 2a5abe3 88cd06b cfd58c5 88cd06b cfd58c5 88cd06b b026f7e af8075f 88cd06b b026f7e f4e63b7 88472c9 a2597a8 f3aa612 cfd58c5 249facd f3aa612 cfd58c5 c370655 f3aa612 cfd58c5 107d5cd b026f7e 6e4cda4 9fb0a06 b026f7e 107d5cd cfd58c5 107d5cd b026f7e cfd58c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import torch
from transformers import AutoProcessor, BarkModel
import scipy
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
# model.enable_cpu_offload()
device = "cpu"
processor = AutoProcessor.from_pretrained("suno/bark-small")
model = BarkModel.from_pretrained("suno/bark-small").to(device)
num_list = ["1","2","3","4","5","6","7","8","9","10"]
lang_list = ["en","de"]
def run_bark(text, n, lang):
#history_prompt = []
semantic_prompt=f"v2/{lang}_speaker_{int(n-1)}"
#text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
inputs = processor(text=text,
voice_preset = semantic_prompt,
return_tensors="pt",
)
speech_values = model.generate(**inputs, do_sample=True)
sampling_rate = model.generation_config.sample_rate
#sampling_rate = model.config.sample_rate
#sampling_rate = 24000
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
return ("bark_out.wav")
with gr.Blocks() as app:
with gr.Column():
in_text = gr.Textbox()
with gr.Row():
speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
speaker_lang = gr.Dropdown(label="Speaker Language", choices=lang_list,value="en")
#speaker_num = gr.Number(value=0)
go_btn = gr.Button()
with gr.Column():
out_audio = gr.Audio()
go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
app.launch() |