File size: 1,413 Bytes
3346920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import yaml
import argbind

import audiotools as at

from vampnet.interface import Interface
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

Interface = argbind.bind(Interface)

with open("conf/interface/spotdl.yml") as f:
    conf = yaml.safe_load(f)


with argbind.scope(conf):
    interface = Interface()
    interface.to("cuda")

loader = at.data.datasets.AudioLoader(sources=[
    "input.wav",
])

dataset = at.data.datasets.AudioDataset(
    loader,
    sample_rate=interface.codec.sample_rate,
    duration=interface.coarse.chunk_size_s,
    n_examples=200,
    without_replacement=True,
)

import numpy as np
def load_random_audio():
    index = np.random.randint(0, len(dataset))
    sig = dataset[index]["signal"]
    sig = interface.preprocess(sig)

    return sig


sig = load_random_audio()
z = interface.encode(sig)

sig.write('input.wav')

from vampnet import mask as pmask

# build the mask
mask = pmask.linear_random(z, 1.0)

print("coarse")
zv, mask_z = interface.coarse_vamp(
    z, 
    mask=mask,
    sampling_steps=36,
    temperature=8.0,
    return_mask=True, 
    typical_filtering=False, 
    # typical_mass=data[typical_mass], 
    # typical_min_tokens=data[typical_min_tokens], 
    gen_fn=interface.coarse.generate,
)

print("coarse2fine")
zv = interface.coarse_to_fine(zv, temperature=0.8)

sig = interface.to_signal(zv).cpu()
sig.write('output-t=8.wav')