Upload 9 files

Browse files

Files changed (9) hide show

.hydra/config.yaml +267 -0
.hydra/hydra.yaml +142 -0
.hydra/overrides.yaml +8 -0
hydra_train.log +0 -0
tblog/train/events.out.tfevents.1717633476.Turing.3006671.2 +3 -0
tblog/train_inner/events.out.tfevents.1717585012.Turing.2651421.0 +3 -0
tblog/train_inner/events.out.tfevents.1717602634.Turing.2997820.0 +3 -0
tblog/train_inner/events.out.tfevents.1717604373.Turing.3006671.0 +3 -0
tblog/valid/events.out.tfevents.1717609502.Turing.3006671.1 +3 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,267 @@

+_name: null
+common:
+  _name: null
+  no_progress_bar: false
+  log_interval: 200
+  log_format: json
+  log_file: null
+  tensorboard_logdir: tblog
+  wandb_project: AVSP-LLM
+  azureml_logging: false
+  seed: 1337
+  cpu: false
+  tpu: false
+  bf16: false
+  memory_efficient_bf16: false
+  fp16: true
+  memory_efficient_fp16: false
+  fp16_no_flatten_grads: false
+  fp16_init_scale: 128
+  fp16_scale_window: null
+  fp16_scale_tolerance: 0.0
+  on_cpu_convert_precision: false
+  min_loss_scale: 0.0001
+  threshold_loss_scale: null
+  amp: false
+  amp_batch_retries: 2
+  amp_init_scale: 128
+  amp_scale_window: null
+  user_dir: /home/theodore/Projects/VSP-LLM/src
+  empty_cache_freq: 0
+  all_gather_list_size: 16384
+  model_parallel_size: 1
+  quantization_config_path: null
+  profile: false
+  reset_logging: false
+  suppress_crashes: false
+  use_plasma_view: false
+  plasma_path: /tmp/plasma
+common_eval:
+  _name: null
+  path: null
+  post_process: null
+  quiet: false
+  model_overrides: '{}'
+  results_path: null
+distributed_training:
+  _name: null
+  distributed_world_size: 1
+  distributed_num_procs: 1
+  distributed_rank: 0
+  distributed_backend: nccl
+  distributed_init_method: null
+  distributed_port: -1
+  device_id: 0
+  distributed_no_spawn: false
+  ddp_backend: no_c10d
+  ddp_comm_hook: none
+  bucket_cap_mb: 25
+  fix_batches_to_gpus: false
+  find_unused_parameters: true
+  fast_stat_sync: false
+  heartbeat_timeout: -1
+  broadcast_buffers: false
+  slowmo_momentum: null
+  slowmo_algorithm: LocalSGD
+  localsgd_frequency: 3
+  nprocs_per_node: 1
+  pipeline_model_parallel: false
+  pipeline_balance: null
+  pipeline_devices: null
+  pipeline_chunks: 0
+  pipeline_encoder_balance: null
+  pipeline_encoder_devices: null
+  pipeline_decoder_balance: null
+  pipeline_decoder_devices: null
+  pipeline_checkpoint: never
+  zero_sharding: none
+  fp16: ${common.fp16}
+  memory_efficient_fp16: ${common.memory_efficient_fp16}
+  tpu: ${common.tpu}
+  no_reshard_after_forward: false
+  fp32_reduce_scatter: false
+  cpu_offload: false
+  use_sharded_state: false
+dataset:
+  _name: null
+  num_workers: 0
+  skip_invalid_size_inputs_valid_test: false
+  max_tokens: null
+  batch_size: 1
+  required_batch_size_multiple: 8
+  required_seq_len_multiple: 1
+  dataset_impl: null
+  data_buffer_size: 10
+  train_subset: train
+  valid_subset: valid
+  combine_valid_subsets: null
+  ignore_unused_valid_subsets: false
+  validate_interval: 1
+  validate_interval_updates: 0
+  validate_after_updates: 0
+  fixed_validation_seed: null
+  disable_validation: false
+  max_tokens_valid: ${dataset.max_tokens}
+  batch_size_valid: ${dataset.batch_size}
+  max_valid_steps: null
+  curriculum: 0
+  gen_subset: test
+  num_shards: 1
+  shard_id: 0
+optimization:
+  _name: null
+  max_epoch: 0
+  max_update: 30000
+  stop_time_hours: 0.0
+  clip_norm: 0.0
+  sentence_avg: true
+  update_freq:
+  - 8
+  lr:
+  - 0.0005
+  stop_min_lr: -1.0
+  use_bmuf: false
+checkpoint:
+  _name: null
+  save_dir: checkpoints
+  restore_file: checkpoint_last.pt
+  finetune_from_model: null
+  reset_dataloader: false
+  reset_lr_scheduler: false
+  reset_meters: false
+  reset_optimizer: false
+  optimizer_overrides: '{}'
+  save_interval: 1
+  save_interval_updates: 2500
+  keep_interval_updates: 1
+  keep_interval_updates_pattern: -1
+  keep_last_epochs: -1
+  keep_best_checkpoints: -1
+  no_save: false
+  no_epoch_checkpoints: true
+  no_last_checkpoints: false
+  no_save_optimizer_state: false
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  patience: -1
+  checkpoint_suffix: ''
+  checkpoint_shard_count: 1
+  load_checkpoint_on_all_dp_ranks: false
+  write_checkpoints_asynchronously: false
+  model_parallel_size: ${common.model_parallel_size}
+bmuf:
+  _name: null
+  block_lr: 1.0
+  block_momentum: 0.875
+  global_sync_iter: 50
+  warmup_iterations: 500
+  use_nbm: false
+  average_sync: false
+  distributed_world_size: ${distributed_training.distributed_world_size}
+generation:
+  _name: null
+  beam: 5
+  nbest: 1
+  max_len_a: 0.0
+  max_len_b: 200
+  min_len: 1
+  match_source_len: false
+  unnormalized: false
+  no_early_stop: false
+  no_beamable_mm: false
+  lenpen: 1.0
+  unkpen: 0.0
+  replace_unk: null
+  sacrebleu: false
+  score_reference: false
+  prefix_size: 0
+  no_repeat_ngram_size: 0
+  sampling: false
+  sampling_topk: -1
+  sampling_topp: -1.0
+  constraints: null
+  temperature: 1.0
+  diverse_beam_groups: -1
+  diverse_beam_strength: 0.5
+  diversity_rate: -1.0
+  print_alignment: null
+  print_step: false
+  lm_path: null
+  lm_weight: 0.0
+  iter_decode_eos_penalty: 0.0
+  iter_decode_max_iter: 10
+  iter_decode_force_max_iter: false
+  iter_decode_with_beam: 1
+  iter_decode_with_external_reranker: false
+  retain_iter_history: false
+  retain_dropout: false
+  retain_dropout_modules: null
+  decoding_format: null
+  no_seed_provided: false
+eval_lm:
+  _name: null
+  output_word_probs: false
+  output_word_stats: false
+  context_window: 0
+  softmax_batch: 9223372036854775807
+interactive:
+  _name: null
+  buffer_size: 0
+  input: '-'
+model:
+  _name: vsp_llm
+  w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
+  llm_ckpt_path: vilm/vinallama-2.7b
+  apply_mask: false
+  mask_selection: static
+  mask_length: 10
+  mask_other: 0
+  mask_prob: 0.75
+  mask_channel_selection: static
+  mask_channel_length: 64
+  mask_channel_other: 0
+  mask_channel_prob: 0.5
+  layerdrop: 0.1
+  dropout: 0.0
+  activation_dropout: 0.1
+  attention_dropout: 0.0
+  feature_grad_mult: 1.0
+  encoder_embed_dim: 1024
+  decoder_embed_dim: 4096
+  freeze_finetune_updates: 18000
+task:
+  _name: vsp_llm_training
+  is_s2s: true
+  data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+  label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+  normalize: true
+  labels:
+  - wrd
+  single_target: true
+  fine_tuning: true
+  stack_order_audio: 4
+  max_sample_size: 500
+  modalities:
+  - video
+  - audio
+  image_aug: true
+  pad_audio: true
+  random_crop: false
+  llm_ckpt_path: vilm/vinallama-2.7b
+criterion:
+  _name: decoder_only_language_modeling_loss
+  report_accuracy: true
+  label_smoothing: 0.1
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1.0e-08
+lr_scheduler:
+  _name: tri_stage
+  warmup_steps: 10000
+  hold_steps: 0
+  decay_steps: 20000
+  final_lr_scale: 0.05
+scoring: null
+bpe: null
+tokenizer: null

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,142 @@

+hydra:
+  run:
+    dir: /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
+  sweep:
+    dir: ???
+    subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    hydra_help: ???
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.run.dir=/home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
+    task:
+    - common.user_dir=/home/theodore/Projects/VSP-LLM/src
+    - task.data=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+    - task.label_dir=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+    - task.llm_ckpt_path=vilm/vinallama-2.7b
+    - model.w2v_path=/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
+    - model.llm_ckpt_path=vilm/vinallama-2.7b
+    - distributed_training.distributed_world_size=1
+    - distributed_training.nprocs_per_node=1
+  job:
+    name: hydra_train
+    override_dirname: common.user_dir-/home/theodore/Projects/VSP-LLM/src__distributed_training.distributed_world_size-1__distributed_training.nprocs_per_node-1__model.llm_ckpt_path-vilm/vinallama-2.7b__task.llm_ckpt_path-vilm/vinallama-2.7b
+    id: ???
+    num: ???
+    config_name: vasr-100h-finetune
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '-'
+        item_sep: __
+        exclude_keys:
+        - run
+        - task.data
+        - task.label_dir
+        - model.w2v_path
+        - dataset.train_subset
+        - dataset.valid_subset
+        - criterion.wer_kenlm_model
+        - criterion.wer_lexicon
+  runtime:
+    version: 1.0.7
+    cwd: /home/theodore/Projects/VSP-LLM
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+- common.user_dir=/home/theodore/Projects/VSP-LLM/src
+- task.data=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+- task.label_dir=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
+- task.llm_ckpt_path=vilm/vinallama-2.7b
+- model.w2v_path=/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
+- model.llm_ckpt_path=vilm/vinallama-2.7b
+- distributed_training.distributed_world_size=1
+- distributed_training.nprocs_per_node=1

hydra_train.log ADDED Viewed

The diff for this file is too large to render. See raw diff

tblog/train/events.out.tfevents.1717633476.Turing.3006671.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fbf73ae597666f211ad890563fef9d2d17d761719e4fcd011ee7904523f41d5
+size 3411

tblog/train_inner/events.out.tfevents.1717585012.Turing.2651421.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba262c2f5c2b5dc3b2b3e16e69ad1440afb483acba9cba436000ee0a0a39105c
+size 6451

tblog/train_inner/events.out.tfevents.1717602634.Turing.2997820.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb126093eaf167d049f7bf904d3661c53dac2b747d07a1ae491d00d2ce864d77
+size 2431

tblog/train_inner/events.out.tfevents.1717604373.Turing.3006671.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60e4da8e0635b9b5db4db8539ec45c3991b0e0ee20862dda817acf1e673793d9
+size 129634

tblog/valid/events.out.tfevents.1717609502.Turing.3006671.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:590a3cfe5e3b0b43f048a7cfd2f1ea079fd608e3d28780f133a62015ddfcda56
+size 9094