AlekseyKorshuk
/

llm-daig-final-models

Model card Files Files and versions Community

llm-daig-final-models / configs /exp489.yaml

AlekseyKorshuk's picture

Upload folder using huggingface_hub

f59a1a3 verified 2 months ago

2.41 kB

	backbone_config_path: /llm-daig/models/exp489
	best_model_path: /llm-daig/models/exp489/models/fold_0_10_best.pth
	checkpoint_path: /llm-daig/models/exp489/chkp/fold_0_10_chkp.pth
	config_path: /llm-daig/models/exp489/config.yaml
	criterion:
	criterion_type: BCEWithLogitsLoss
	mcrmse_loss:
	weights:
	- 0.5
	- 0.5
	mse_loss:
	reduction: mean
	rmse_loss:
	eps: 1.0e-09
	reduction: mean
	smooth_l1_loss:
	beta: 0.1
	reduction: mean
	data_dir: /llm-daig/data
	dataset:
	bucket_batch_sampler:
	bucket_size: 400
	noise_factor: 0.2
	folds: true
	labels:
	- generated
	max_length: 256
	sampler_type: StratifiedBatchSampler
	train_batch_size: 48
	train_sources:
	- daigt
	- persuade
	- persuade_gpt
	- persuade_humanized_1
	- persuade_gpt_patially_rewritten
	- persuade_gpt_patially_rewritten_05
	- persuade_humanized_easy_1
	- daigt_gpt_patially_rewritten
	- llama-mistral-partially-r
	- moth
	- books
	- neural-chat-7b
	- nbroad
	valid_batch_size: 48
	valid_sources:
	- none
	debug: false
	exp_name: exp489_seed10
	external_dir: /llm-daig/data/external
	fold: 0
	interim_dir: /llm-daig/data/interim
	log_path: /llm-daig/models/exp489/logs/fold-0.log
	logger:
	job_type: training
	project: DAIGT-AIE
	train_print_frequency: 100
	use_wandb: true
	valid_print_frequency: 100
	model:
	architecture_type: CustomModel
	attention_dropout: 0.1
	backbone_type: microsoft/deberta-v3-large
	dropout: 0.05
	freeze_embeddings: false
	freeze_n_layers: 0
	gem_pooling:
	eps: 1.0e-06
	p: 3
	gradient_checkpointing: false
	load_embeddings: true
	load_head: false
	load_n_layers: 24
	load_parts: true
	pooling_type: MeanPooling
	reinitialize_n_layers: 0
	state_from_model: None
	models_dir: /llm-daig/models
	optimizer:
	beta1: 0.9
	beta2: 0.999
	decoder_lr: 2.0e-05
	embeddings_lr: 2.0e-05
	encoder_lr: 2.0e-05
	eps: 1.0e-06
	group_lr_multiplier: 1
	n_groups: 1
	weight_decay: 0.01
	processed_dir: /llm-daig/data/processed
	raw_dir: /llm-daig/data/raw
	run_dir: /llm-daig/models/exp489
	run_id: exp489_seed10_fold0
	run_name: exp489_seed10_fold0
	scheduler:
	cosine_schedule_with_warmup:
	n_cycles: 0.5
	n_warmup_steps: 0
	type: cosine_schedule_with_warmup
	seed: 10
	tokenizer: null
	tokenizer_path: /llm-daig/models/exp489/tokenizer
	training:
	apex: true
	epochs: 3
	evaluate_n_times_per_epoch: 4
	gradient_accumulation_steps: 1
	max_grad_norm: 1000