backbone_config_path: /home/models/exp478 best_model_path: /home/models/exp478/models/fold_0_10_best.pth checkpoint_path: /home/models/exp478/chkp/fold_0_10_chkp.pth config_path: /home/models/exp478/config.yaml criterion: criterion_type: BCEWithLogitsLoss mcrmse_loss: weights: - 0.5 - 0.5 mse_loss: reduction: mean rmse_loss: eps: 1.0e-09 reduction: mean smooth_l1_loss: beta: 0.1 reduction: mean data_dir: /home/data dataset: bucket_batch_sampler: bucket_size: 400 noise_factor: 0.2 folds: true labels: - generated max_length: 1024 sampler_type: StratifiedBatchSampler train_batch_size: 6 train_sources: - daigt - persuade - persuade_gpt - persuade_humanized_1 - persuade_gpt_patially_rewritten - persuade_gpt_patially_rewritten_05 - persuade_humanized_easy_1 - daigt_gpt_patially_rewritten - llama-mistral-partially-r - moth - books - neural-chat-7b - nbroad valid_batch_size: 6 valid_sources: - none debug: false exp_name: exp478_seed10 external_dir: /home/data/external fold: 0 interim_dir: /home/data/interim log_path: /home/models/exp478/logs/fold-0.log logger: job_type: training project: DAIGT-AIE train_print_frequency: 100 use_wandb: true valid_print_frequency: 100 model: architecture_type: CustomModel attention_dropout: 0.1 backbone_type: microsoft/deberta-v3-large dropout: 0.05 freeze_embeddings: true freeze_n_layers: 20 gem_pooling: eps: 1.0e-06 p: 3 gradient_checkpointing: false load_embeddings: true load_head: false load_n_layers: 24 load_parts: true pooling_type: MeanPooling reinitialize_n_layers: 0 state_from_model: exp475 models_dir: /home/models optimizer: beta1: 0.9 beta2: 0.999 decoder_lr: 2.0e-05 embeddings_lr: 2.0e-05 encoder_lr: 2.0e-05 eps: 1.0e-06 group_lr_multiplier: 1 n_groups: 1 weight_decay: 0.01 processed_dir: /home/data/processed raw_dir: /home/data/raw run_dir: /home/models/exp478 run_id: exp478_seed10_fold0 run_name: exp478_seed10_fold0 scheduler: cosine_schedule_with_warmup: n_cycles: 0.5 n_warmup_steps: 0 type: cosine_schedule_with_warmup seed: 10 tokenizer: null tokenizer_path: /home/models/exp478/tokenizer training: apex: true epochs: 1 evaluate_n_times_per_epoch: 1 gradient_accumulation_steps: 1 max_grad_norm: 1000