{ "saving_path": "/home/ubuntu/experiments/a2s_mls", "resume_checkpoint": null, "vocoder_type": "SPEECHTOKENIZER", "vocoder_config_path": null, "vocoder_ckpt_path": null, "metapath": [ "/var/data_mls/train.json" ], "val_metapath": [ "/var/data_mls/test.json" ], "pretrained_path": null, "speaker_embedding_dir": null, "sampledir": "/home/ubuntu/experiments/a2s_mls", "lr": 0.0005, "batch_size": 100.0, "train_bucket_size": 8192, "training_step": 800000, "optim_flat_percent": 0.0, "warmup_step": 10000, "adam_beta1": 0.9, "adam_beta2": 0.98, "ffd_size": 1024, "hidden_size": 1024, "enc_nlayers": 8, "dec_nlayers": 6, "nheads": 8, "dropout": 0.1, "depthwise_conv_kernel_size": 5, "aligner_softmax_temp": 1.0, "layer_norm_eps": 1e-05, "use_sem_tokens": true, "use_spkr_emb": false, "use_text_emb": false, "fairseq": false, "only_inference": false, "speaker_embed_dropout": 0.05, "label_smoothing": 0.0, "val_check_interval": 1, "max_dataset_samples": -1, "check_val_every_n_epoch": 1, "precision": "bf16", "nworkers": 12, "distributed": true, "accelerator": "gpu", "version": null, "accumulate_grad_batches": 1, "sagemaker": false, "use_repetition_token": false, "use_repetition_gating": false, "repetition_penalty": 1.0, "sampling_temperature": 1.0, "top_k": -1, "min_top_k": 3, "top_p": 0.8, "sample_num": 4, "length_penalty_max_length": 150, "length_penalty_max_prob": 0.95, "max_input_length": 2048, "max_output_length": 2000, "phone_context_window": 3, "sample_rate": 16000, "n_codes": 1024, "n_cluster_groups": 7, "first_n_lvls": 7, "use_pretrained_ckpt_cfg": false, "n_semantic_codes": 1024 }