File size: 2,124 Bytes
a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 a4005ad c377f65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
dataset:
training: []
validation: []
noise: []
speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
use_hdf5: True
use_metadata: True
hdf5_flag: r
validate: True
workers: 2
cache: True
phones_range: [4, 256]
duration_range: [1.0, 16.0]
random_utterance: 1.0
max_prompts: 3
prompt_duration: 6.0
sample_type: speaker
tasks_list: [ "tts" ] # , [ "tts", "tts-c", "ns", "sr", "tse", "cse", "nse", "tts"]
models:
_prom_levels: 8
_max_levels: 8
_models:
- name: "ar+nar"
size: "full"
resp_levels: 8
prom_levels: 8
tasks: 8
arch_type: "retnet"
training: True
version: 2
hyperparameters:
batch_size: 8
gradient_accumulation_steps: 32
gradient_clipping: 100
optimizer: Prodigy
torch_optimizer: True
learning_rate: 1.0
scheduler_type: ""
#scheduler_type: OneCycle
#scheduler_params:
# cycle_first_step_size: 10_000
# cycle_first_stair_count: 10_000
# cycle_second_step_size: 15_000
# cycle_second_stair_count: 15_000
# decay_step_size: 5_000
# cycle_min_lr: 2.5e-4 # 1.0e-5
# cycle_max_lr: 2.5e-4 # 1.0e-4
# decay_lr_rate: 0.0
# cycle_min_mom: 0.90
# cycle_max_mom: 0.99
# decay_mom_rate: 0.0
evaluation:
batch_size: 16
frequency: 250
size: 16
steps: 450
ar_temperature: 0.95
nar_temperature: 0.25
load_disabled_engines: True
trainer:
iterations: 1_000_000
save_tag: step
save_on_oom: True
save_on_quit: True
save_frequency: 100
export_on_save: True
keep_last_checkpoints: 4
aggressive_optimizations: False
load_disabled_engines: False
#load_state_dict: True
#strict_loading: False
#load_tag: "9500"
#load_states: False
#restart_step_count: True
gc_mode: None # "global_step"
weight_dtype: bfloat16
amp: False
backend: deepspeed
deepspeed:
zero_optimization_level: 0
use_compression_training: True
activation_checkpointing: True
inference:
use_vocos: True
normalize: False
weight_dtype: bfloat16
amp: False
bitsandbytes:
enabled: False
injects: True
linear: True
embedding: True
|