File size: 2,037 Bytes
d646343
3123dc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8058ab6
90ffed4
417e4c2
a365fcd
417e4c2
8058ab6
f62ad4c
3123dc8
417e4c2
3123dc8
417e4c2
3123dc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8058ab6
 
 
3123dc8
 
873553c
3123dc8
8058ab6
3123dc8
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import spaces
import gradio as gr
from gradio_client import Client, handle_file
from zerorvc import RVC
import soundfile as sf
from pydub import AudioSegment
from joblib import memory

memory = memory.Memory(location="cache", verbose=0)

@memory.cache(ignore=["client"])
def split(client, audio):
  result = client.predict(
      param_0=handle_file(audio),
      param_1="BS-RoFormer",
      api_name="/separate"
  )
  return result[0], result[1]

@spaces.GPU
def convert(model, vocal, pitch_modification):
  rvc = RVC.from_pretrained(model)
  samples = rvc.convert(vocal, pitch_modification=pitch_modification)
  return samples, rvc.sr

def process_audio(client, model, audio, pitch_modification):
  vocal, bgm = split(client, audio)
  samples, sr = convert(model, vocal, pitch_modification)

  sf.write("vocal.wav", samples, sr)

  vocal = AudioSegment.from_wav("vocal.wav")
  background = AudioSegment.from_mp3(bgm)

  combined = background.overlay(vocal)
  combined.export("combined.mp3", format="mp3")
  return "combined.mp3", "vocal.wav", bgm

iface = gr.Interface(
    fn=process_audio,
    inputs=[gr.Audio(type="filepath"), gr.Slider(minimum=-36, maximum=36, value=0, step=1)],
    outputs=[gr.Audio(label="Combined"), gr.Audio(label="Vocal"), gr.Audio(label="Background")],
    title="Voice Conversion",
    description="Upload an audio file and process it.",
)


def set_client_for_session(request: gr.Request):
    x_ip_token = request.headers['x-ip-token']
    return Client("JacobLinCool/vocal-separation", headers={"X-IP-Token": x_ip_token})

with gr.Blocks() as demo:
    client = gr.State()
    model = gr.Text("someone/model")
    audio = gr.Audio(type="filepath")
    pitch = gr.Slider(minimum=-36, maximum=36, value=0, step=1)
    btn = gr.Button("Run", variant="primary")

    btn.click(
        process_audio,
        [client, model, audio, pitch],
        [gr.Audio(label="Combined"), gr.Audio(label="Vocal"), gr.Audio(label="Background")]
    )

    demo.load(set_client_for_session, None, client)

demo.launch()