Spaces:

atsushieee
/

sovits-test

Running

File size: 4,980 Bytes
import argparse
import os
import warnings

import crepe


###############################################################################
# Entry point
###############################################################################


def parse_args():
    """Parse command-line arguments"""
    parser = argparse.ArgumentParser()

    # Required arguments
    parser.add_argument(
        '--audio_files',
        nargs='+',
        required=True,
        help='The audio file to process')
    parser.add_argument(
        '--output_files',
        nargs='+',
        required=True,
        help='The file to save pitch or embedding')
    parser.add_argument(
        '--hop_length',
        type=int,
        help='The hop length of the analysis window')

    # Optionally save harmonicity [DEPRECATED]
    parser.add_argument(
        '--output_harmonicity_files',
        nargs='+',
        help='The file to save harmonicity')
    # Optionally save periodicity
    parser.add_argument(
        '--output_periodicity_files',
        nargs='+',
        help='The files to save periodicity')

    # Optionally create embedding instead of pitch contour
    parser.add_argument(
        '--embed',
        action='store_true',
        help='Performs embedding instead of pitch prediction')

    # Optional arguments
    parser.add_argument(
        '--fmin',
        default=50.,
        type=float,
        help='The minimum frequency allowed')
    parser.add_argument(
        '--fmax',
        default=crepe.MAX_FMAX,
        type=float,
        help='The maximum frequency allowed')
    parser.add_argument(
        '--model',
        default='full',
        help='The model capacity. One of "tiny" or "full"')
    parser.add_argument(
        '--decoder',
        default='viterbi',
        help='The decoder to use. One of "argmax", "viterbi", or ' +
             '"weighted_argmax"')
    parser.add_argument(
        '--batch_size',
        type=int,
        help='The number of frames per batch')
    parser.add_argument(
        '--gpu',
        type=int,
        help='The gpu to perform inference on')
    parser.add_argument(
        '--no_pad',
        action='store_true',
        help='Whether to pad the audio')

    return parser.parse_args()


def make_parent_directory(file):
    """Create parent directory for file if it does not already exist"""
    parent = os.path.dirname(os.path.abspath(file))
    os.makedirs(parent, exist_ok=True)


def main():
    # Parse command-line arguments
    args = parse_args()

    # Deprecate output_harmonicity_files
    if args.output_harmonicity_files is not None:
        message = (
            'The crepe output_harmonicity_files argument is deprecated and '
            'will be removed in a future release. Please use '
            'output_periodicity_files. Rationale: if network confidence measured '
            'harmonic content, the value would be low for non-harmonic, periodic '
            'sounds (e.g., sine waves). But this is not observed.')
        warnings.warn(message, DeprecationWarning)
        args.output_periodicity_files = args.output_harmonicity_files

    # Ensure output directory exist
    [make_parent_directory(file) for file in args.output_files]
    if args.output_periodicity_files is not None:
        [make_parent_directory(file) for file in args.output_periodicity_files]

    # Get inference device
    device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}'

    # Get decoder
    if args.decoder == 'argmax':
        decoder = crepe.decode.argmax
    elif args.decoder == 'weighted_argmax':
        decoder = crepe.decode.weighted_argmax
    elif args.decoder == 'viterbi':
        decoder = crepe.decode.viterbi

    # Infer pitch or embedding and save to disk
    if args.embed:
        crepe.embed_from_files_to_files(args.audio_files,
                                             args.output_files,
                                             args.hop_length,
                                             args.model,
                                             args.batch_size,
                                             device,
                                             not args.no_pad)
    else:
        crepe.predict_from_files_to_files(args.audio_files,
                                               args.output_files,
                                               None,
                                               args.output_periodicity_files,
                                               args.hop_length,
                                               args.fmin,
                                               args.fmax,
                                               args.model,
                                               decoder,
                                               args.batch_size,
                                               device,
                                               not args.no_pad)


# Run module entry point
main()