File size: 3,978 Bytes
9791162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np
import torch

import crepe


###############################################################################
# Pitch thresholding methods
###############################################################################


class At:
    """Simple thresholding at a specified probability value"""

    def __init__(self, value):
        self.value = value

    def __call__(self, pitch, periodicity):
        # Make a copy to prevent in-place modification
        pitch = torch.clone(pitch)

        # Threshold
        pitch[periodicity < self.value] = crepe.UNVOICED
        return pitch


class Hysteresis:
    """Hysteresis thresholding"""

    def __init__(self,
                 lower_bound=.19,
                 upper_bound=.31,
                 width=.2,
                 stds=1.7,
                 return_threshold=False):
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.width = width
        self.stds = stds
        self.return_threshold = return_threshold

    def __call__(self, pitch, periodicity):
        # Save output device
        device = pitch.device

        # Perform hysteresis in log-2 space
        pitch = torch.log2(pitch).detach().flatten().cpu().numpy()

        # Flatten periodicity
        periodicity = periodicity.flatten().cpu().numpy()

        # Ignore confidently unvoiced pitch
        pitch[periodicity < self.lower_bound] = crepe.UNVOICED

        # Whiten pitch
        mean, std = np.nanmean(pitch), np.nanstd(pitch)
        pitch = (pitch - mean) / std

        # Require high confidence to make predictions far from the mean
        parabola = self.width * pitch ** 2 - self.width * self.stds ** 2
        threshold = \
            self.lower_bound + np.clip(parabola, 0, 1 - self.lower_bound)
        threshold[np.isnan(threshold)] = self.lower_bound

        # Apply hysteresis to prevent short, unconfident voiced regions
        i = 0
        while i < len(periodicity) - 1:

            # Detect unvoiced to voiced transition
            if periodicity[i] < threshold[i] and \
               periodicity[i + 1] > threshold[i + 1]:

                # Grow region until next unvoiced or end of array
                start, end, keep = i + 1, i + 1, False
                while end < len(periodicity) and \
                      periodicity[end] > threshold[end]:
                    if periodicity[end] > self.upper_bound:
                        keep = True
                    end += 1

                # Force unvoiced if we didn't pass the confidence required by
                # the hysteresis
                if not keep:
                    threshold[start:end] = 1

                i = end

            else:
                i += 1

        # Remove pitch with low periodicity
        pitch[periodicity < threshold] = crepe.UNVOICED

        # Unwhiten
        pitch = pitch * std + mean

        # Convert to Hz
        pitch = torch.tensor(2 ** pitch, device=device)[None, :]

        # Optionally return threshold
        if self.return_threshold:
            return pitch, torch.tensor(threshold, device=device)

        return pitch


###############################################################################
# Periodicity thresholding methods
###############################################################################


class Silence:
    """Set periodicity to zero in silent regions"""

    def __init__(self, value=-60):
        self.value = value

    def __call__(self,
                 periodicity,
                 audio,
                 sample_rate=crepe.SAMPLE_RATE,
                 hop_length=None,
                 pad=True):
        # Don't modify in-place
        periodicity = torch.clone(periodicity)

        # Compute loudness
        loudness = crepe.loudness.a_weighted(
            audio, sample_rate, hop_length, pad)

        # Threshold silence
        periodicity[loudness < self.value] = 0.

        return periodicity