_name: null common: _name: null no_progress_bar: false log_interval: 200 log_format: json log_file: null tensorboard_logdir: tblog wandb_project: AVSP-LLM azureml_logging: false seed: 1337 cpu: false tpu: false bf16: false memory_efficient_bf16: false fp16: true memory_efficient_fp16: false fp16_no_flatten_grads: false fp16_init_scale: 128 fp16_scale_window: null fp16_scale_tolerance: 0.0 on_cpu_convert_precision: false min_loss_scale: 0.0001 threshold_loss_scale: null amp: false amp_batch_retries: 2 amp_init_scale: 128 amp_scale_window: null user_dir: /home/theodore/Projects/VSP-LLM/src empty_cache_freq: 0 all_gather_list_size: 16384 model_parallel_size: 1 quantization_config_path: null profile: false reset_logging: false suppress_crashes: false use_plasma_view: false plasma_path: /tmp/plasma common_eval: _name: null path: null post_process: null quiet: false model_overrides: '{}' results_path: null distributed_training: _name: null distributed_world_size: 1 distributed_num_procs: 1 distributed_rank: 0 distributed_backend: nccl distributed_init_method: null distributed_port: -1 device_id: 0 distributed_no_spawn: false ddp_backend: no_c10d ddp_comm_hook: none bucket_cap_mb: 25 fix_batches_to_gpus: false find_unused_parameters: true fast_stat_sync: false heartbeat_timeout: -1 broadcast_buffers: false slowmo_momentum: null slowmo_algorithm: LocalSGD localsgd_frequency: 3 nprocs_per_node: 1 pipeline_model_parallel: false pipeline_balance: null pipeline_devices: null pipeline_chunks: 0 pipeline_encoder_balance: null pipeline_encoder_devices: null pipeline_decoder_balance: null pipeline_decoder_devices: null pipeline_checkpoint: never zero_sharding: none fp16: ${common.fp16} memory_efficient_fp16: ${common.memory_efficient_fp16} tpu: ${common.tpu} no_reshard_after_forward: false fp32_reduce_scatter: false cpu_offload: false use_sharded_state: false dataset: _name: null num_workers: 0 skip_invalid_size_inputs_valid_test: false max_tokens: null batch_size: 1 required_batch_size_multiple: 8 required_seq_len_multiple: 1 dataset_impl: null data_buffer_size: 10 train_subset: train valid_subset: valid combine_valid_subsets: null ignore_unused_valid_subsets: false validate_interval: 1 validate_interval_updates: 0 validate_after_updates: 0 fixed_validation_seed: null disable_validation: false max_tokens_valid: ${dataset.max_tokens} batch_size_valid: ${dataset.batch_size} max_valid_steps: null curriculum: 0 gen_subset: test num_shards: 1 shard_id: 0 optimization: _name: null max_epoch: 0 max_update: 30000 stop_time_hours: 0.0 clip_norm: 0.0 sentence_avg: true update_freq: - 8 lr: - 0.0005 stop_min_lr: -1.0 use_bmuf: false checkpoint: _name: null save_dir: checkpoints restore_file: checkpoint_last.pt finetune_from_model: null reset_dataloader: false reset_lr_scheduler: false reset_meters: false reset_optimizer: false optimizer_overrides: '{}' save_interval: 1 save_interval_updates: 3000 keep_interval_updates: 1 keep_interval_updates_pattern: -1 keep_last_epochs: -1 keep_best_checkpoints: -1 no_save: false no_epoch_checkpoints: true no_last_checkpoints: false no_save_optimizer_state: false best_checkpoint_metric: accuracy maximize_best_checkpoint_metric: true patience: -1 checkpoint_suffix: '' checkpoint_shard_count: 1 load_checkpoint_on_all_dp_ranks: false write_checkpoints_asynchronously: false model_parallel_size: ${common.model_parallel_size} bmuf: _name: null block_lr: 1.0 block_momentum: 0.875 global_sync_iter: 50 warmup_iterations: 500 use_nbm: false average_sync: false distributed_world_size: ${distributed_training.distributed_world_size} generation: _name: null beam: 5 nbest: 1 max_len_a: 0.0 max_len_b: 200 min_len: 1 match_source_len: false unnormalized: false no_early_stop: false no_beamable_mm: false lenpen: 1.0 unkpen: 0.0 replace_unk: null sacrebleu: false score_reference: false prefix_size: 0 no_repeat_ngram_size: 0 sampling: false sampling_topk: -1 sampling_topp: -1.0 constraints: null temperature: 1.0 diverse_beam_groups: -1 diverse_beam_strength: 0.5 diversity_rate: -1.0 print_alignment: null print_step: false lm_path: null lm_weight: 0.0 iter_decode_eos_penalty: 0.0 iter_decode_max_iter: 10 iter_decode_force_max_iter: false iter_decode_with_beam: 1 iter_decode_with_external_reranker: false retain_iter_history: false retain_dropout: false retain_dropout_modules: null decoding_format: null no_seed_provided: false eval_lm: _name: null output_word_probs: false output_word_stats: false context_window: 0 softmax_batch: 9223372036854775807 interactive: _name: null buffer_size: 0 input: '-' model: _name: vsp_llm w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt llm_ckpt_path: vilm/vinallama-2.7b apply_mask: false mask_selection: static mask_length: 10 mask_other: 0 mask_prob: 0.75 mask_channel_selection: static mask_channel_length: 64 mask_channel_other: 0 mask_channel_prob: 0.5 layerdrop: 0.1 dropout: 0.0 activation_dropout: 0.1 attention_dropout: 0.0 feature_grad_mult: 1.0 encoder_embed_dim: 1024 decoder_embed_dim: 4096 freeze_finetune_updates: 18000 task: _name: vsp_llm_training is_s2s: true data: /home/theodore/Projects/VSP-LLM/data/processed/vasr label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr normalize: true labels: - wrd single_target: true fine_tuning: true stack_order_audio: 4 max_sample_size: 500 modalities: - video - audio image_aug: true pad_audio: true random_crop: false llm_ckpt_path: vilm/vinallama-2.7b criterion: _name: decoder_only_language_modeling_loss report_accuracy: true label_smoothing: 0.1 optimizer: _name: adam adam_betas: (0.9,0.98) adam_eps: 1.0e-08 lr_scheduler: _name: tri_stage warmup_steps: 10000 hold_steps: 0 decay_steps: 20000 final_lr_scale: 0.05 scoring: null bpe: null tokenizer: null