Spaces:
Sleeping
Sleeping
from encodec import EncodecModel | |
from encodec.utils import convert_audio | |
import torchaudio | |
import torch | |
# Instantiate a pretrained EnCodec model | |
model = EncodecModel.encodec_model_24khz() | |
# The number of codebooks used will be determined bythe bandwidth selected. | |
# E.g. for a bandwidth of 6kbps, `n_q = 8` codebooks are used. | |
# Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8) and 12 kbps (n_q =16) and 24kbps (n_q=32). | |
# For the 48 kHz model, only 3, 6, 12, and 24 kbps are supported. The number | |
# of codebooks for each is half that of the 24 kHz model as the frame rate is twice as much. | |
model.set_target_bandwidth(6.0) | |
# Load and pre-process the audio waveform | |
wav, sr = torchaudio.load("<PATH_TO_AUDIO_FILE>") | |
wav = convert_audio(wav, sr, model.sample_rate, model.channels) | |
wav = wav.unsqueeze(0) | |
# Extract discrete codes from EnCodec | |
with torch.no_grad(): | |
encoded_frames = model.encode(wav) | |
codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1) # [B, n_q, T] |