Spaces:
Runtime error
Runtime error
File size: 2,070 Bytes
96ee597 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
"""Get tokens using the SpeechTokenizer.
Apply SpeechTokenizer to extract acoustic and semantic tokens.
The tokens will be extracted to
encoding_output/acoustic and encoding_output/semantic.
python utils/get_tokens_speech_tokenizer.py \
--config_path ckpt/speechtokenizer/config.json \
--ckpt_path ckpt/speechtokenizer/SpeechTokenizer.pt \
--encoding_input datasets/example/audios \
--encoding_output datasets/example/audios-speech-tokenizer
Copyright PolyAI Limited.
"""
import argparse
import pathlib
from modules.speech_tokenizer import SpeechTokenizer
MQTTS_ROOT_PATH = str(pathlib.Path(__file__).parent.resolve())
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--config_path",
type=str,
help="Path to the SpeechTokenizer config",
default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/config.json",
)
parser.add_argument(
"--ckpt_path",
type=str,
help="Path to the SpeechTokenizer checkpoint",
default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/SpeechTokenizer.pt",
)
parser.add_argument(
"--encoding_input",
type=str,
help="Path to the input folder for encoding",
default=MQTTS_ROOT_PATH + "/datasets/giga-training-data/audios",
)
parser.add_argument(
"--encoding_output",
type=str,
help="Path where to save the encoded tokens",
default="/tmp/encoding_output",
)
parser.add_argument(
"--start_percent",
type=int,
default=0,
)
parser.add_argument(
"--end_percent",
type=int,
default=100,
)
args = parser.parse_args()
print("Parsed args")
print(args)
tokenizer = SpeechTokenizer(
config_path=args.config_path,
ckpt_path=args.ckpt_path,
)
tokenizer.encode_files_with_model_concurrent(
folder_path=args.encoding_input, destination_folder=args.encoding_output,
start_percent=args.start_percent, end_percent=args.end_percent
)
|