sovits-test / crepe /__main__.py
atsushieee's picture
Upload folder using huggingface_hub
9791162
import argparse
import os
import warnings
import crepe
###############################################################################
# Entry point
###############################################################################
def parse_args():
"""Parse command-line arguments"""
parser = argparse.ArgumentParser()
# Required arguments
parser.add_argument(
'--audio_files',
nargs='+',
required=True,
help='The audio file to process')
parser.add_argument(
'--output_files',
nargs='+',
required=True,
help='The file to save pitch or embedding')
parser.add_argument(
'--hop_length',
type=int,
help='The hop length of the analysis window')
# Optionally save harmonicity [DEPRECATED]
parser.add_argument(
'--output_harmonicity_files',
nargs='+',
help='The file to save harmonicity')
# Optionally save periodicity
parser.add_argument(
'--output_periodicity_files',
nargs='+',
help='The files to save periodicity')
# Optionally create embedding instead of pitch contour
parser.add_argument(
'--embed',
action='store_true',
help='Performs embedding instead of pitch prediction')
# Optional arguments
parser.add_argument(
'--fmin',
default=50.,
type=float,
help='The minimum frequency allowed')
parser.add_argument(
'--fmax',
default=crepe.MAX_FMAX,
type=float,
help='The maximum frequency allowed')
parser.add_argument(
'--model',
default='full',
help='The model capacity. One of "tiny" or "full"')
parser.add_argument(
'--decoder',
default='viterbi',
help='The decoder to use. One of "argmax", "viterbi", or ' +
'"weighted_argmax"')
parser.add_argument(
'--batch_size',
type=int,
help='The number of frames per batch')
parser.add_argument(
'--gpu',
type=int,
help='The gpu to perform inference on')
parser.add_argument(
'--no_pad',
action='store_true',
help='Whether to pad the audio')
return parser.parse_args()
def make_parent_directory(file):
"""Create parent directory for file if it does not already exist"""
parent = os.path.dirname(os.path.abspath(file))
os.makedirs(parent, exist_ok=True)
def main():
# Parse command-line arguments
args = parse_args()
# Deprecate output_harmonicity_files
if args.output_harmonicity_files is not None:
message = (
'The crepe output_harmonicity_files argument is deprecated and '
'will be removed in a future release. Please use '
'output_periodicity_files. Rationale: if network confidence measured '
'harmonic content, the value would be low for non-harmonic, periodic '
'sounds (e.g., sine waves). But this is not observed.')
warnings.warn(message, DeprecationWarning)
args.output_periodicity_files = args.output_harmonicity_files
# Ensure output directory exist
[make_parent_directory(file) for file in args.output_files]
if args.output_periodicity_files is not None:
[make_parent_directory(file) for file in args.output_periodicity_files]
# Get inference device
device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}'
# Get decoder
if args.decoder == 'argmax':
decoder = crepe.decode.argmax
elif args.decoder == 'weighted_argmax':
decoder = crepe.decode.weighted_argmax
elif args.decoder == 'viterbi':
decoder = crepe.decode.viterbi
# Infer pitch or embedding and save to disk
if args.embed:
crepe.embed_from_files_to_files(args.audio_files,
args.output_files,
args.hop_length,
args.model,
args.batch_size,
device,
not args.no_pad)
else:
crepe.predict_from_files_to_files(args.audio_files,
args.output_files,
None,
args.output_periodicity_files,
args.hop_length,
args.fmin,
args.fmax,
args.model,
decoder,
args.batch_size,
device,
not args.no_pad)
# Run module entry point
main()