ViAVSP-LLM_v1.0 / .hydra /config.yaml
tanthinhdt's picture
Upload 9 files
926a145 verified
raw
history blame
6.26 kB
_name: null
common:
_name: null
no_progress_bar: false
log_interval: 200
log_format: json
log_file: null
tensorboard_logdir: tblog
wandb_project: AVSP-LLM
azureml_logging: false
seed: 1337
cpu: false
tpu: false
bf16: false
memory_efficient_bf16: false
fp16: true
memory_efficient_fp16: false
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
on_cpu_convert_precision: false
min_loss_scale: 0.0001
threshold_loss_scale: null
amp: false
amp_batch_retries: 2
amp_init_scale: 128
amp_scale_window: null
user_dir: /home/theodore/Projects/VSP-LLM/src
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
quantization_config_path: null
profile: false
reset_logging: false
suppress_crashes: false
use_plasma_view: false
plasma_path: /tmp/plasma
common_eval:
_name: null
path: null
post_process: null
quiet: false
model_overrides: '{}'
results_path: null
distributed_training:
_name: null
distributed_world_size: 1
distributed_num_procs: 1
distributed_rank: 0
distributed_backend: nccl
distributed_init_method: null
distributed_port: -1
device_id: 0
distributed_no_spawn: false
ddp_backend: no_c10d
ddp_comm_hook: none
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: true
fast_stat_sync: false
heartbeat_timeout: -1
broadcast_buffers: false
slowmo_momentum: null
slowmo_algorithm: LocalSGD
localsgd_frequency: 3
nprocs_per_node: 1
pipeline_model_parallel: false
pipeline_balance: null
pipeline_devices: null
pipeline_chunks: 0
pipeline_encoder_balance: null
pipeline_encoder_devices: null
pipeline_decoder_balance: null
pipeline_decoder_devices: null
pipeline_checkpoint: never
zero_sharding: none
fp16: ${common.fp16}
memory_efficient_fp16: ${common.memory_efficient_fp16}
tpu: ${common.tpu}
no_reshard_after_forward: false
fp32_reduce_scatter: false
cpu_offload: false
use_sharded_state: false
dataset:
_name: null
num_workers: 0
skip_invalid_size_inputs_valid_test: false
max_tokens: null
batch_size: 1
required_batch_size_multiple: 8
required_seq_len_multiple: 1
dataset_impl: null
data_buffer_size: 10
train_subset: train
valid_subset: valid
combine_valid_subsets: null
ignore_unused_valid_subsets: false
validate_interval: 1
validate_interval_updates: 0
validate_after_updates: 0
fixed_validation_seed: null
disable_validation: false
max_tokens_valid: ${dataset.max_tokens}
batch_size_valid: ${dataset.batch_size}
max_valid_steps: null
curriculum: 0
gen_subset: test
num_shards: 1
shard_id: 0
optimization:
_name: null
max_epoch: 0
max_update: 30000
stop_time_hours: 0.0
clip_norm: 0.0
sentence_avg: true
update_freq:
- 8
lr:
- 0.0005
stop_min_lr: -1.0
use_bmuf: false
checkpoint:
_name: null
save_dir: checkpoints
restore_file: checkpoint_last.pt
finetune_from_model: null
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: '{}'
save_interval: 1
save_interval_updates: 2500
keep_interval_updates: 1
keep_interval_updates_pattern: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: true
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: accuracy
maximize_best_checkpoint_metric: true
patience: -1
checkpoint_suffix: ''
checkpoint_shard_count: 1
load_checkpoint_on_all_dp_ranks: false
write_checkpoints_asynchronously: false
model_parallel_size: ${common.model_parallel_size}
bmuf:
_name: null
block_lr: 1.0
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
distributed_world_size: ${distributed_training.distributed_world_size}
generation:
_name: null
beam: 5
nbest: 1
max_len_a: 0.0
max_len_b: 200
min_len: 1
match_source_len: false
unnormalized: false
no_early_stop: false
no_beamable_mm: false
lenpen: 1.0
unkpen: 0.0
replace_unk: null
sacrebleu: false
score_reference: false
prefix_size: 0
no_repeat_ngram_size: 0
sampling: false
sampling_topk: -1
sampling_topp: -1.0
constraints: null
temperature: 1.0
diverse_beam_groups: -1
diverse_beam_strength: 0.5
diversity_rate: -1.0
print_alignment: null
print_step: false
lm_path: null
lm_weight: 0.0
iter_decode_eos_penalty: 0.0
iter_decode_max_iter: 10
iter_decode_force_max_iter: false
iter_decode_with_beam: 1
iter_decode_with_external_reranker: false
retain_iter_history: false
retain_dropout: false
retain_dropout_modules: null
decoding_format: null
no_seed_provided: false
eval_lm:
_name: null
output_word_probs: false
output_word_stats: false
context_window: 0
softmax_batch: 9223372036854775807
interactive:
_name: null
buffer_size: 0
input: '-'
model:
_name: vsp_llm
w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
llm_ckpt_path: vilm/vinallama-2.7b
apply_mask: false
mask_selection: static
mask_length: 10
mask_other: 0
mask_prob: 0.75
mask_channel_selection: static
mask_channel_length: 64
mask_channel_other: 0
mask_channel_prob: 0.5
layerdrop: 0.1
dropout: 0.0
activation_dropout: 0.1
attention_dropout: 0.0
feature_grad_mult: 1.0
encoder_embed_dim: 1024
decoder_embed_dim: 4096
freeze_finetune_updates: 18000
task:
_name: vsp_llm_training
is_s2s: true
data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
normalize: true
labels:
- wrd
single_target: true
fine_tuning: true
stack_order_audio: 4
max_sample_size: 500
modalities:
- video
- audio
image_aug: true
pad_audio: true
random_crop: false
llm_ckpt_path: vilm/vinallama-2.7b
criterion:
_name: decoder_only_language_modeling_loss
report_accuracy: true
label_smoothing: 0.1
optimizer:
_name: adam
adam_betas: (0.9,0.98)
adam_eps: 1.0e-08
lr_scheduler:
_name: tri_stage
warmup_steps: 10000
hold_steps: 0
decay_steps: 20000
final_lr_scale: 0.05
scoring: null
bpe: null
tokenizer: null