File size: 4,980 Bytes
9791162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import argparse
import os
import warnings

import crepe


###############################################################################
# Entry point
###############################################################################


def parse_args():
    """Parse command-line arguments"""
    parser = argparse.ArgumentParser()

    # Required arguments
    parser.add_argument(
        '--audio_files',
        nargs='+',
        required=True,
        help='The audio file to process')
    parser.add_argument(
        '--output_files',
        nargs='+',
        required=True,
        help='The file to save pitch or embedding')
    parser.add_argument(
        '--hop_length',
        type=int,
        help='The hop length of the analysis window')

    # Optionally save harmonicity [DEPRECATED]
    parser.add_argument(
        '--output_harmonicity_files',
        nargs='+',
        help='The file to save harmonicity')
    # Optionally save periodicity
    parser.add_argument(
        '--output_periodicity_files',
        nargs='+',
        help='The files to save periodicity')

    # Optionally create embedding instead of pitch contour
    parser.add_argument(
        '--embed',
        action='store_true',
        help='Performs embedding instead of pitch prediction')

    # Optional arguments
    parser.add_argument(
        '--fmin',
        default=50.,
        type=float,
        help='The minimum frequency allowed')
    parser.add_argument(
        '--fmax',
        default=crepe.MAX_FMAX,
        type=float,
        help='The maximum frequency allowed')
    parser.add_argument(
        '--model',
        default='full',
        help='The model capacity. One of "tiny" or "full"')
    parser.add_argument(
        '--decoder',
        default='viterbi',
        help='The decoder to use. One of "argmax", "viterbi", or ' +
             '"weighted_argmax"')
    parser.add_argument(
        '--batch_size',
        type=int,
        help='The number of frames per batch')
    parser.add_argument(
        '--gpu',
        type=int,
        help='The gpu to perform inference on')
    parser.add_argument(
        '--no_pad',
        action='store_true',
        help='Whether to pad the audio')

    return parser.parse_args()


def make_parent_directory(file):
    """Create parent directory for file if it does not already exist"""
    parent = os.path.dirname(os.path.abspath(file))
    os.makedirs(parent, exist_ok=True)


def main():
    # Parse command-line arguments
    args = parse_args()

    # Deprecate output_harmonicity_files
    if args.output_harmonicity_files is not None:
        message = (
            'The crepe output_harmonicity_files argument is deprecated and '
            'will be removed in a future release. Please use '
            'output_periodicity_files. Rationale: if network confidence measured '
            'harmonic content, the value would be low for non-harmonic, periodic '
            'sounds (e.g., sine waves). But this is not observed.')
        warnings.warn(message, DeprecationWarning)
        args.output_periodicity_files = args.output_harmonicity_files

    # Ensure output directory exist
    [make_parent_directory(file) for file in args.output_files]
    if args.output_periodicity_files is not None:
        [make_parent_directory(file) for file in args.output_periodicity_files]

    # Get inference device
    device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}'

    # Get decoder
    if args.decoder == 'argmax':
        decoder = crepe.decode.argmax
    elif args.decoder == 'weighted_argmax':
        decoder = crepe.decode.weighted_argmax
    elif args.decoder == 'viterbi':
        decoder = crepe.decode.viterbi

    # Infer pitch or embedding and save to disk
    if args.embed:
        crepe.embed_from_files_to_files(args.audio_files,
                                             args.output_files,
                                             args.hop_length,
                                             args.model,
                                             args.batch_size,
                                             device,
                                             not args.no_pad)
    else:
        crepe.predict_from_files_to_files(args.audio_files,
                                               args.output_files,
                                               None,
                                               args.output_periodicity_files,
                                               args.hop_length,
                                               args.fmin,
                                               args.fmax,
                                               args.model,
                                               decoder,
                                               args.batch_size,
                                               device,
                                               not args.no_pad)


# Run module entry point
main()