File size: 1,133 Bytes
12bfd03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os

import librosa
import torch
import torch.nn as nn

from academicodec.models.hificodec.vqvae import VQVAE


class VqvaeTester(nn.Module):
    def __init__(self, config_path, model_path, sample_rate=24000):
        super().__init__()
        self.vqvae = VQVAE(config_path, model_path, with_encoder=True)
        self.sample_rate = sample_rate

    @torch.no_grad()
    def forward(self, wav_path):
        # 单声道
        # wav.shape (T, ), 按照模型的 sr 读取
        wav, sr = librosa.load(wav_path, sr=self.sample_rate)
        fid = os.path.basename(wav_path)[:-4]
        wav = torch.tensor(wav).unsqueeze(0)
        wav = wav.cuda()
        # vq_codes is acoustic token
        vq_codes = self.vqvae.encode(wav)
        syn = self.vqvae(vq_codes)
        return fid, syn

    @torch.no_grad()
    def vq(self, wav_path):
        wav, sr = librosa.load(wav_path, sr=self.sample_rate)
        fid = os.path.basename(wav_path)[:-4]
        wav = torch.tensor(wav).unsqueeze(0)
        wav = wav.cuda()
        # vq_codes is acoustic token
        vq_codes = self.vqvae.encode(wav)
        return fid, vq_codes