Spaces:
Running
Running
File size: 4,980 Bytes
9791162 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import argparse
import os
import warnings
import crepe
###############################################################################
# Entry point
###############################################################################
def parse_args():
"""Parse command-line arguments"""
parser = argparse.ArgumentParser()
# Required arguments
parser.add_argument(
'--audio_files',
nargs='+',
required=True,
help='The audio file to process')
parser.add_argument(
'--output_files',
nargs='+',
required=True,
help='The file to save pitch or embedding')
parser.add_argument(
'--hop_length',
type=int,
help='The hop length of the analysis window')
# Optionally save harmonicity [DEPRECATED]
parser.add_argument(
'--output_harmonicity_files',
nargs='+',
help='The file to save harmonicity')
# Optionally save periodicity
parser.add_argument(
'--output_periodicity_files',
nargs='+',
help='The files to save periodicity')
# Optionally create embedding instead of pitch contour
parser.add_argument(
'--embed',
action='store_true',
help='Performs embedding instead of pitch prediction')
# Optional arguments
parser.add_argument(
'--fmin',
default=50.,
type=float,
help='The minimum frequency allowed')
parser.add_argument(
'--fmax',
default=crepe.MAX_FMAX,
type=float,
help='The maximum frequency allowed')
parser.add_argument(
'--model',
default='full',
help='The model capacity. One of "tiny" or "full"')
parser.add_argument(
'--decoder',
default='viterbi',
help='The decoder to use. One of "argmax", "viterbi", or ' +
'"weighted_argmax"')
parser.add_argument(
'--batch_size',
type=int,
help='The number of frames per batch')
parser.add_argument(
'--gpu',
type=int,
help='The gpu to perform inference on')
parser.add_argument(
'--no_pad',
action='store_true',
help='Whether to pad the audio')
return parser.parse_args()
def make_parent_directory(file):
"""Create parent directory for file if it does not already exist"""
parent = os.path.dirname(os.path.abspath(file))
os.makedirs(parent, exist_ok=True)
def main():
# Parse command-line arguments
args = parse_args()
# Deprecate output_harmonicity_files
if args.output_harmonicity_files is not None:
message = (
'The crepe output_harmonicity_files argument is deprecated and '
'will be removed in a future release. Please use '
'output_periodicity_files. Rationale: if network confidence measured '
'harmonic content, the value would be low for non-harmonic, periodic '
'sounds (e.g., sine waves). But this is not observed.')
warnings.warn(message, DeprecationWarning)
args.output_periodicity_files = args.output_harmonicity_files
# Ensure output directory exist
[make_parent_directory(file) for file in args.output_files]
if args.output_periodicity_files is not None:
[make_parent_directory(file) for file in args.output_periodicity_files]
# Get inference device
device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}'
# Get decoder
if args.decoder == 'argmax':
decoder = crepe.decode.argmax
elif args.decoder == 'weighted_argmax':
decoder = crepe.decode.weighted_argmax
elif args.decoder == 'viterbi':
decoder = crepe.decode.viterbi
# Infer pitch or embedding and save to disk
if args.embed:
crepe.embed_from_files_to_files(args.audio_files,
args.output_files,
args.hop_length,
args.model,
args.batch_size,
device,
not args.no_pad)
else:
crepe.predict_from_files_to_files(args.audio_files,
args.output_files,
None,
args.output_periodicity_files,
args.hop_length,
args.fmin,
args.fmax,
args.model,
decoder,
args.batch_size,
device,
not args.no_pad)
# Run module entry point
main()
|