Spaces:

mrfakename
/

TTTS

Sleeping

App Files Files Community

TTTS / ttts /AA_diffusion_deprecated /config.yaml

mrfakename

Add source code

4ee33aa about 1 year ago

raw

history blame

3.51 kB

	dataloader:
	batch_size : 16
	shuffle: true
	num_workers : 64
	drop_last : true
	pin_memory : true
	model:
	target: cldm.cldm.ControlLDM
	params:
	# linear_start: 0.00085
	# linear_end: 0.0120
	num_timesteps_cond: 1
	log_every_t: 200
	timesteps: 1000
	first_stage_key: "jpg"
	cond_stage_key: "txt"
	control_key: "hint"
	image_size: 64
	channels: 100
	cond_stage_trainable: true
	# conditioning_key: crossattn
	monitor: val/loss_simple_ema
	scale_factor: 0.18215
	use_ema: False
	only_mid_control: False

	# control_stage_config:
	# target: cldm.cldm.ControlNet
	# params:
	# image_size: 32 # unused
	# in_channels: 100
	# hint_channels: 768
	# model_channels: 128
	# attention_resolutions: [ 4, 2, 1 ]
	# num_res_blocks: 2
	# channel_mult: [ 1, 2, 4, 4 ]
	# num_heads: 8
	# use_spatial_transformer: True
	# transformer_depth: 1
	# context_dim: 768
	# use_checkpoint: True
	# legacy: False
	refer_config:
	target: cldm.cldm.ReferenceNet
	params:
	image_size: 32 # unused
	hint_in_channels: 1024
	hint_out_channels: 128
	in_channels: 100
	out_channels: 100
	model_channels: 1024
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 1
	channel_mult: [ 1, 1 ]
	num_heads: 8
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 512
	use_checkpoint: True
	dims: 1
	legacy: False


	unet_config:
	target: tortoise_model.DiffusionTts
	params:
	model_channels: 512
	num_layers: 8
	in_channels: 100
	in_latent_channels: 1024
	out_channels: 100
	dropout: 0
	use_fp16: False
	num_heads: 16
	layer_drop: .1
	unconditioned_percentage: .1
	# target: cldm.cldm.ControlledUnetModel
	# params:
	# image_size: 32 # unused
	# hint_in_channels: 1024
	# hint_out_channels: 128
	# in_channels: 100
	# out_channels: 100
	# model_channels: 1024
	# attention_resolutions: [ 4, 2, 1 ]
	# num_res_blocks: 1
	# resblock_updown: True
	# channel_mult: [ 1, 1]
	# num_heads: 8
	# use_spatial_transformer: True
	# transformer_depth: 1
	# context_dim: 512
	# use_checkpoint: True
	# dims: 1
	# legacy: False

	cond_stage_config:
	target: cldm.cond_emb.CLIP
	params:
	embed_dim: 512
	vision_cfg:
	layers: 6
	width: 512
	head_width: 64
	mlp_ratio: 4.0
	patch_dropout: 0.4
	attentional_pool: False
	patch_size: 64
	image_size: 1000
	in_channels: 100
	pool_type: 'tok'
	pos_embed_type: 'learnable'
	final_ln_after_pool: false

	train:
	train_batch_size : 32
	gradient_accumulate_every : 1
	train_lr : 0.0001
	train_num_steps : 1000000
	ema_update_every : 10
	ema_decay : 0.995
	adam_betas : [0.9, 0.99]
	save_and_sample_every : 1000
	timesteps : 1000
	sampling_timesteps : 1000
	results_folder : "results"
	logs_folder : "ttts/AA_diffusion/logs"
	num_workers : 32
	eps : 0.000000001
	keep_ckpts : 3
	all_in_mem : false
	dataset:
	path : "/home/hyc/tortoise_plus_zh/ttts/datasets/databaker_data.jsonl"
	gpt_path : "/home/hyc/tortoise_plus_zh/ttts/gpt/logs/2023-12-24-14-22-14/model-70.pt"