|
_name: null |
|
common: |
|
_name: null |
|
no_progress_bar: false |
|
log_interval: 200 |
|
log_format: json |
|
log_file: null |
|
tensorboard_logdir: tblog |
|
wandb_project: AVSP-LLM |
|
azureml_logging: false |
|
seed: 1337 |
|
cpu: false |
|
tpu: false |
|
bf16: false |
|
memory_efficient_bf16: false |
|
fp16: true |
|
memory_efficient_fp16: false |
|
fp16_no_flatten_grads: false |
|
fp16_init_scale: 128 |
|
fp16_scale_window: null |
|
fp16_scale_tolerance: 0.0 |
|
on_cpu_convert_precision: false |
|
min_loss_scale: 0.0001 |
|
threshold_loss_scale: null |
|
amp: false |
|
amp_batch_retries: 2 |
|
amp_init_scale: 128 |
|
amp_scale_window: null |
|
user_dir: /home/theodore/Projects/VSP-LLM/src |
|
empty_cache_freq: 0 |
|
all_gather_list_size: 16384 |
|
model_parallel_size: 1 |
|
quantization_config_path: null |
|
profile: false |
|
reset_logging: false |
|
suppress_crashes: false |
|
use_plasma_view: false |
|
plasma_path: /tmp/plasma |
|
common_eval: |
|
_name: null |
|
path: null |
|
post_process: null |
|
quiet: false |
|
model_overrides: '{}' |
|
results_path: null |
|
distributed_training: |
|
_name: null |
|
distributed_world_size: 1 |
|
distributed_num_procs: 1 |
|
distributed_rank: 0 |
|
distributed_backend: nccl |
|
distributed_init_method: null |
|
distributed_port: -1 |
|
device_id: 0 |
|
distributed_no_spawn: false |
|
ddp_backend: no_c10d |
|
ddp_comm_hook: none |
|
bucket_cap_mb: 25 |
|
fix_batches_to_gpus: false |
|
find_unused_parameters: true |
|
fast_stat_sync: false |
|
heartbeat_timeout: -1 |
|
broadcast_buffers: false |
|
slowmo_momentum: null |
|
slowmo_algorithm: LocalSGD |
|
localsgd_frequency: 3 |
|
nprocs_per_node: 1 |
|
pipeline_model_parallel: false |
|
pipeline_balance: null |
|
pipeline_devices: null |
|
pipeline_chunks: 0 |
|
pipeline_encoder_balance: null |
|
pipeline_encoder_devices: null |
|
pipeline_decoder_balance: null |
|
pipeline_decoder_devices: null |
|
pipeline_checkpoint: never |
|
zero_sharding: none |
|
fp16: ${common.fp16} |
|
memory_efficient_fp16: ${common.memory_efficient_fp16} |
|
tpu: ${common.tpu} |
|
no_reshard_after_forward: false |
|
fp32_reduce_scatter: false |
|
cpu_offload: false |
|
use_sharded_state: false |
|
dataset: |
|
_name: null |
|
num_workers: 0 |
|
skip_invalid_size_inputs_valid_test: false |
|
max_tokens: null |
|
batch_size: 1 |
|
required_batch_size_multiple: 8 |
|
required_seq_len_multiple: 1 |
|
dataset_impl: null |
|
data_buffer_size: 10 |
|
train_subset: train |
|
valid_subset: valid |
|
combine_valid_subsets: null |
|
ignore_unused_valid_subsets: false |
|
validate_interval: 1 |
|
validate_interval_updates: 0 |
|
validate_after_updates: 0 |
|
fixed_validation_seed: null |
|
disable_validation: false |
|
max_tokens_valid: ${dataset.max_tokens} |
|
batch_size_valid: ${dataset.batch_size} |
|
max_valid_steps: null |
|
curriculum: 0 |
|
gen_subset: test |
|
num_shards: 1 |
|
shard_id: 0 |
|
optimization: |
|
_name: null |
|
max_epoch: 0 |
|
max_update: 30000 |
|
stop_time_hours: 0.0 |
|
clip_norm: 0.0 |
|
sentence_avg: true |
|
update_freq: |
|
- 8 |
|
lr: |
|
- 0.0005 |
|
stop_min_lr: -1.0 |
|
use_bmuf: false |
|
checkpoint: |
|
_name: null |
|
save_dir: checkpoints |
|
restore_file: checkpoint_last.pt |
|
finetune_from_model: null |
|
reset_dataloader: false |
|
reset_lr_scheduler: false |
|
reset_meters: false |
|
reset_optimizer: false |
|
optimizer_overrides: '{}' |
|
save_interval: 1 |
|
save_interval_updates: 2500 |
|
keep_interval_updates: 1 |
|
keep_interval_updates_pattern: -1 |
|
keep_last_epochs: -1 |
|
keep_best_checkpoints: -1 |
|
no_save: false |
|
no_epoch_checkpoints: true |
|
no_last_checkpoints: false |
|
no_save_optimizer_state: false |
|
best_checkpoint_metric: accuracy |
|
maximize_best_checkpoint_metric: true |
|
patience: -1 |
|
checkpoint_suffix: '' |
|
checkpoint_shard_count: 1 |
|
load_checkpoint_on_all_dp_ranks: false |
|
write_checkpoints_asynchronously: false |
|
model_parallel_size: ${common.model_parallel_size} |
|
bmuf: |
|
_name: null |
|
block_lr: 1.0 |
|
block_momentum: 0.875 |
|
global_sync_iter: 50 |
|
warmup_iterations: 500 |
|
use_nbm: false |
|
average_sync: false |
|
distributed_world_size: ${distributed_training.distributed_world_size} |
|
generation: |
|
_name: null |
|
beam: 5 |
|
nbest: 1 |
|
max_len_a: 0.0 |
|
max_len_b: 200 |
|
min_len: 1 |
|
match_source_len: false |
|
unnormalized: false |
|
no_early_stop: false |
|
no_beamable_mm: false |
|
lenpen: 1.0 |
|
unkpen: 0.0 |
|
replace_unk: null |
|
sacrebleu: false |
|
score_reference: false |
|
prefix_size: 0 |
|
no_repeat_ngram_size: 0 |
|
sampling: false |
|
sampling_topk: -1 |
|
sampling_topp: -1.0 |
|
constraints: null |
|
temperature: 1.0 |
|
diverse_beam_groups: -1 |
|
diverse_beam_strength: 0.5 |
|
diversity_rate: -1.0 |
|
print_alignment: null |
|
print_step: false |
|
lm_path: null |
|
lm_weight: 0.0 |
|
iter_decode_eos_penalty: 0.0 |
|
iter_decode_max_iter: 10 |
|
iter_decode_force_max_iter: false |
|
iter_decode_with_beam: 1 |
|
iter_decode_with_external_reranker: false |
|
retain_iter_history: false |
|
retain_dropout: false |
|
retain_dropout_modules: null |
|
decoding_format: null |
|
no_seed_provided: false |
|
eval_lm: |
|
_name: null |
|
output_word_probs: false |
|
output_word_stats: false |
|
context_window: 0 |
|
softmax_batch: 9223372036854775807 |
|
interactive: |
|
_name: null |
|
buffer_size: 0 |
|
input: '-' |
|
model: |
|
_name: vsp_llm |
|
w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt |
|
llm_ckpt_path: vilm/vinallama-2.7b |
|
apply_mask: false |
|
mask_selection: static |
|
mask_length: 10 |
|
mask_other: 0 |
|
mask_prob: 0.75 |
|
mask_channel_selection: static |
|
mask_channel_length: 64 |
|
mask_channel_other: 0 |
|
mask_channel_prob: 0.5 |
|
layerdrop: 0.1 |
|
dropout: 0.0 |
|
activation_dropout: 0.1 |
|
attention_dropout: 0.0 |
|
feature_grad_mult: 1.0 |
|
encoder_embed_dim: 1024 |
|
decoder_embed_dim: 4096 |
|
freeze_finetune_updates: 18000 |
|
task: |
|
_name: vsp_llm_training |
|
is_s2s: true |
|
data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h |
|
label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h |
|
normalize: true |
|
labels: |
|
- wrd |
|
single_target: true |
|
fine_tuning: true |
|
stack_order_audio: 4 |
|
max_sample_size: 500 |
|
modalities: |
|
- video |
|
- audio |
|
image_aug: true |
|
pad_audio: true |
|
random_crop: false |
|
llm_ckpt_path: vilm/vinallama-2.7b |
|
criterion: |
|
_name: decoder_only_language_modeling_loss |
|
report_accuracy: true |
|
label_smoothing: 0.1 |
|
optimizer: |
|
_name: adam |
|
adam_betas: (0.9,0.98) |
|
adam_eps: 1.0e-08 |
|
lr_scheduler: |
|
_name: tri_stage |
|
warmup_steps: 10000 |
|
hold_steps: 0 |
|
decay_steps: 20000 |
|
final_lr_scale: 0.05 |
|
scoring: null |
|
bpe: null |
|
tokenizer: null |
|
|