Spaces:
Running
Running
File size: 3,978 Bytes
9791162 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import numpy as np
import torch
import crepe
###############################################################################
# Pitch thresholding methods
###############################################################################
class At:
"""Simple thresholding at a specified probability value"""
def __init__(self, value):
self.value = value
def __call__(self, pitch, periodicity):
# Make a copy to prevent in-place modification
pitch = torch.clone(pitch)
# Threshold
pitch[periodicity < self.value] = crepe.UNVOICED
return pitch
class Hysteresis:
"""Hysteresis thresholding"""
def __init__(self,
lower_bound=.19,
upper_bound=.31,
width=.2,
stds=1.7,
return_threshold=False):
self.lower_bound = lower_bound
self.upper_bound = upper_bound
self.width = width
self.stds = stds
self.return_threshold = return_threshold
def __call__(self, pitch, periodicity):
# Save output device
device = pitch.device
# Perform hysteresis in log-2 space
pitch = torch.log2(pitch).detach().flatten().cpu().numpy()
# Flatten periodicity
periodicity = periodicity.flatten().cpu().numpy()
# Ignore confidently unvoiced pitch
pitch[periodicity < self.lower_bound] = crepe.UNVOICED
# Whiten pitch
mean, std = np.nanmean(pitch), np.nanstd(pitch)
pitch = (pitch - mean) / std
# Require high confidence to make predictions far from the mean
parabola = self.width * pitch ** 2 - self.width * self.stds ** 2
threshold = \
self.lower_bound + np.clip(parabola, 0, 1 - self.lower_bound)
threshold[np.isnan(threshold)] = self.lower_bound
# Apply hysteresis to prevent short, unconfident voiced regions
i = 0
while i < len(periodicity) - 1:
# Detect unvoiced to voiced transition
if periodicity[i] < threshold[i] and \
periodicity[i + 1] > threshold[i + 1]:
# Grow region until next unvoiced or end of array
start, end, keep = i + 1, i + 1, False
while end < len(periodicity) and \
periodicity[end] > threshold[end]:
if periodicity[end] > self.upper_bound:
keep = True
end += 1
# Force unvoiced if we didn't pass the confidence required by
# the hysteresis
if not keep:
threshold[start:end] = 1
i = end
else:
i += 1
# Remove pitch with low periodicity
pitch[periodicity < threshold] = crepe.UNVOICED
# Unwhiten
pitch = pitch * std + mean
# Convert to Hz
pitch = torch.tensor(2 ** pitch, device=device)[None, :]
# Optionally return threshold
if self.return_threshold:
return pitch, torch.tensor(threshold, device=device)
return pitch
###############################################################################
# Periodicity thresholding methods
###############################################################################
class Silence:
"""Set periodicity to zero in silent regions"""
def __init__(self, value=-60):
self.value = value
def __call__(self,
periodicity,
audio,
sample_rate=crepe.SAMPLE_RATE,
hop_length=None,
pad=True):
# Don't modify in-place
periodicity = torch.clone(periodicity)
# Compute loudness
loudness = crepe.loudness.a_weighted(
audio, sample_rate, hop_length, pad)
# Threshold silence
periodicity[loudness < self.value] = 0.
return periodicity
|