Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,025 Bytes
d646343 3123dc8 8058ab6 a365fcd 8058ab6 f62ad4c 3123dc8 a365fcd 3123dc8 8058ab6 3123dc8 873553c 3123dc8 8058ab6 3123dc8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import spaces
import gradio as gr
from gradio_client import Client, handle_file
from zerorvc import RVC
import soundfile as sf
from pydub import AudioSegment
from joblib import memory
memory = memory.Memory(location="cache", verbose=0)
@memory.cache(ignore=["client"])
def split(client, audio):
result = client.predict(
param_0=handle_file(audio),
param_1="BS-RoFormer",
api_name="/separate"
)
return result[0], result[1]
@spaces.GPU
def convert(rvc, vocal, pitch_modification):
samples = rvc.convert(vocal, pitch_modification=pitch_modification)
return samples
def process_audio(client, model, audio, pitch_modification):
vocal, bgm = split(client, audio)
rvc = RVC.from_pretrained(model)
samples = convert(rvc, vocal, pitch_modification)
sf.write("vocal.wav", samples, rvc.sr)
vocal = AudioSegment.from_wav("vocal.wav")
background = AudioSegment.from_mp3(bgm)
combined = background.overlay(vocal)
combined.export("combined.mp3", format="mp3")
return "combined.mp3", "vocal.wav", bgm
iface = gr.Interface(
fn=process_audio,
inputs=[gr.Audio(type="filepath"), gr.Slider(minimum=-36, maximum=36, value=0, step=1)],
outputs=[gr.Audio(label="Combined"), gr.Audio(label="Vocal"), gr.Audio(label="Background")],
title="Voice Conversion",
description="Upload an audio file and process it.",
)
def set_client_for_session(request: gr.Request):
x_ip_token = request.headers['x-ip-token']
return Client("JacobLinCool/vocal-separation", headers={"X-IP-Token": x_ip_token})
with gr.Blocks() as demo:
client = gr.State()
model = gr.Text("someone/model")
audio = gr.Audio(type="filepath")
pitch = gr.Slider(minimum=-36, maximum=36, value=0, step=1)
btn = gr.Button("Run", variant="primary")
btn.click(
process_audio,
[client, model, audio, pitch],
[gr.Audio(label="Combined"), gr.Audio(label="Vocal"), gr.Audio(label="Background")]
)
demo.load(set_client_for_session, None, client)
demo.launch() |