ecker
/

vall-e

Model card Files Files and versions

vall-e / models /config.retnet.yaml

ecker's picture

Update models/config.retnet.yaml

7307f7e about 1 year ago

1.66 kB

	sample_rate: 24_000
	audio_backend: "vocos"

	models:
	- name: "ar+nar"
	size: "full"
	resp_levels: 8
	prom_levels: 8
	tasks: 8
	langs: 2
	tones: 1
	arch_type: retnet
	training: False
	version: 2
	dropout: 0.1
	capabilities: ["ar", "nar"]
	experimental:
	audio_embedding_sums: True

	#loras:
	#- name : "lora"
	# rank: 128
	# alpha: 128
	# training: True
	# rvq_levels: []

	hyperparameters:
	batch_size: 32
	gradient_accumulation_steps: 8
	gradient_clipping: 1.0
	warmup_steps: 10

	optimizer: Prodigy
	learning_rate: 1.0
	torch_optimizer: True

	scheduler: "" # ScheduleFree
	torch_scheduler: True

	evaluation:
	batch_size: 4
	frequency: 250
	size: 4

	steps: 500
	ar_temperature: 1.0
	nar_temperature: 0.0

	trainer:
	iterations: 1_000_000
	save_frequency: 250
	keep_last_checkpoints: 4

	resize_modules: True
	gradient_checkpointing: True

	weight_dtype: bfloat16
	amp: True

	backend: deepspeed
	deepspeed:
	inferencing: False
	amp: False

	inference:
	backend: local
	weight_dtype: bfloat16
	amp: True

	optimizations:
	injects: False
	replace: True

	linear: False
	embedding: False
	optimizers: True

	bitsandbytes: False
	dadaptation: False
	bitnet: False
	fp8: False

	dataset:
	use_hdf5: True
	hdf5_flag: r

	use_metadata: True
	validate: True

	workers: 1
	cache: True

	duration_range: [3.0, 12.0]

	prompt_max_samples: 1
	prompt_duration_range: [3.0, 3.0]

	resps_max_samples: 1

	sample_type: path # path # speaker
	sample_order: duration
	sample_max_duration_batch: 300
	sample_shuffle: False

	tasks_list: [ "tts", "stt" ]

	training: []
	validation: []
	noise: []