File size: 1,730 Bytes
a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 a4005ad f0fb314 1673dfc a4005ad f0fb314 1673dfc f0fb314 1673dfc f0fb314 a4005ad f0fb314 a4005ad f0fb314 f0a9fb7 a4005ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
dataset:
training: [
]
validation: [
]
noise: [
]
speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
use_hdf5: True
use_metadata: True
hdf5_flag: r
validate: True
workers: 4
cache: True
phones_range: [4, 256]
duration_range: [1.0, 16.0]
random_utterance: 1.0
max_prompts: 3
prompt_duration: 3.0
sample_type: speaker
tasks_list: ["tts"] # , "ns", "sr", "tse", "cse", "nse", "tts"]
models:
_prom_levels: 4
_max_levels: 8
_models:
- name: "ar"
size: "full"
resp_levels: 1
prom_levels: 2
tasks: 8
arch_type: "retnet"
training: True
- name: "nar"
size: "full"
resp_levels: 3
prom_levels: 4
tasks: 8
arch_type: "retnet"
training: True
hyperparameters:
batch_size: 8
gradient_accumulation_steps: 1
gradient_clipping: 100
optimizer: AdamW
learning_rate: 1.0e-5
scheduler_type: ""
evaluation:
batch_size: 16
frequency: 500
size: 16
steps: 300
ar_temperature: 0.95
nar_temperature: 0.25
load_disabled_engines: True
trainer:
iterations: 1_000_000
save_tag: step
save_on_oom: True
save_on_quit: True
save_frequency: 500
export_on_save: True
keep_last_checkpoints: 4
aggressive_optimizations: False
load_disabled_engines: False
load_state_dict: True
gc_mode: None # "global_step"
weight_dtype: float32
amp: False
backend: local
deepspeed:
zero_optimization_level: 0
use_compression_training: True
inference:
weight_dtype: float32
amp: False
use_vocos: True
normalize: False
recurrent_chunk_size: 0
recurrent_forward: False
bitsandbytes:
enabled: False
injects: True
linear: True
embedding: True
device: cpu |