tanthinhdt commited on
Commit
926a145
1 Parent(s): 9470a5d

Upload 9 files

Browse files
.hydra/config.yaml ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _name: null
2
+ common:
3
+ _name: null
4
+ no_progress_bar: false
5
+ log_interval: 200
6
+ log_format: json
7
+ log_file: null
8
+ tensorboard_logdir: tblog
9
+ wandb_project: AVSP-LLM
10
+ azureml_logging: false
11
+ seed: 1337
12
+ cpu: false
13
+ tpu: false
14
+ bf16: false
15
+ memory_efficient_bf16: false
16
+ fp16: true
17
+ memory_efficient_fp16: false
18
+ fp16_no_flatten_grads: false
19
+ fp16_init_scale: 128
20
+ fp16_scale_window: null
21
+ fp16_scale_tolerance: 0.0
22
+ on_cpu_convert_precision: false
23
+ min_loss_scale: 0.0001
24
+ threshold_loss_scale: null
25
+ amp: false
26
+ amp_batch_retries: 2
27
+ amp_init_scale: 128
28
+ amp_scale_window: null
29
+ user_dir: /home/theodore/Projects/VSP-LLM/src
30
+ empty_cache_freq: 0
31
+ all_gather_list_size: 16384
32
+ model_parallel_size: 1
33
+ quantization_config_path: null
34
+ profile: false
35
+ reset_logging: false
36
+ suppress_crashes: false
37
+ use_plasma_view: false
38
+ plasma_path: /tmp/plasma
39
+ common_eval:
40
+ _name: null
41
+ path: null
42
+ post_process: null
43
+ quiet: false
44
+ model_overrides: '{}'
45
+ results_path: null
46
+ distributed_training:
47
+ _name: null
48
+ distributed_world_size: 1
49
+ distributed_num_procs: 1
50
+ distributed_rank: 0
51
+ distributed_backend: nccl
52
+ distributed_init_method: null
53
+ distributed_port: -1
54
+ device_id: 0
55
+ distributed_no_spawn: false
56
+ ddp_backend: no_c10d
57
+ ddp_comm_hook: none
58
+ bucket_cap_mb: 25
59
+ fix_batches_to_gpus: false
60
+ find_unused_parameters: true
61
+ fast_stat_sync: false
62
+ heartbeat_timeout: -1
63
+ broadcast_buffers: false
64
+ slowmo_momentum: null
65
+ slowmo_algorithm: LocalSGD
66
+ localsgd_frequency: 3
67
+ nprocs_per_node: 1
68
+ pipeline_model_parallel: false
69
+ pipeline_balance: null
70
+ pipeline_devices: null
71
+ pipeline_chunks: 0
72
+ pipeline_encoder_balance: null
73
+ pipeline_encoder_devices: null
74
+ pipeline_decoder_balance: null
75
+ pipeline_decoder_devices: null
76
+ pipeline_checkpoint: never
77
+ zero_sharding: none
78
+ fp16: ${common.fp16}
79
+ memory_efficient_fp16: ${common.memory_efficient_fp16}
80
+ tpu: ${common.tpu}
81
+ no_reshard_after_forward: false
82
+ fp32_reduce_scatter: false
83
+ cpu_offload: false
84
+ use_sharded_state: false
85
+ dataset:
86
+ _name: null
87
+ num_workers: 0
88
+ skip_invalid_size_inputs_valid_test: false
89
+ max_tokens: null
90
+ batch_size: 1
91
+ required_batch_size_multiple: 8
92
+ required_seq_len_multiple: 1
93
+ dataset_impl: null
94
+ data_buffer_size: 10
95
+ train_subset: train
96
+ valid_subset: valid
97
+ combine_valid_subsets: null
98
+ ignore_unused_valid_subsets: false
99
+ validate_interval: 1
100
+ validate_interval_updates: 0
101
+ validate_after_updates: 0
102
+ fixed_validation_seed: null
103
+ disable_validation: false
104
+ max_tokens_valid: ${dataset.max_tokens}
105
+ batch_size_valid: ${dataset.batch_size}
106
+ max_valid_steps: null
107
+ curriculum: 0
108
+ gen_subset: test
109
+ num_shards: 1
110
+ shard_id: 0
111
+ optimization:
112
+ _name: null
113
+ max_epoch: 0
114
+ max_update: 30000
115
+ stop_time_hours: 0.0
116
+ clip_norm: 0.0
117
+ sentence_avg: true
118
+ update_freq:
119
+ - 8
120
+ lr:
121
+ - 0.0005
122
+ stop_min_lr: -1.0
123
+ use_bmuf: false
124
+ checkpoint:
125
+ _name: null
126
+ save_dir: checkpoints
127
+ restore_file: checkpoint_last.pt
128
+ finetune_from_model: null
129
+ reset_dataloader: false
130
+ reset_lr_scheduler: false
131
+ reset_meters: false
132
+ reset_optimizer: false
133
+ optimizer_overrides: '{}'
134
+ save_interval: 1
135
+ save_interval_updates: 2500
136
+ keep_interval_updates: 1
137
+ keep_interval_updates_pattern: -1
138
+ keep_last_epochs: -1
139
+ keep_best_checkpoints: -1
140
+ no_save: false
141
+ no_epoch_checkpoints: true
142
+ no_last_checkpoints: false
143
+ no_save_optimizer_state: false
144
+ best_checkpoint_metric: accuracy
145
+ maximize_best_checkpoint_metric: true
146
+ patience: -1
147
+ checkpoint_suffix: ''
148
+ checkpoint_shard_count: 1
149
+ load_checkpoint_on_all_dp_ranks: false
150
+ write_checkpoints_asynchronously: false
151
+ model_parallel_size: ${common.model_parallel_size}
152
+ bmuf:
153
+ _name: null
154
+ block_lr: 1.0
155
+ block_momentum: 0.875
156
+ global_sync_iter: 50
157
+ warmup_iterations: 500
158
+ use_nbm: false
159
+ average_sync: false
160
+ distributed_world_size: ${distributed_training.distributed_world_size}
161
+ generation:
162
+ _name: null
163
+ beam: 5
164
+ nbest: 1
165
+ max_len_a: 0.0
166
+ max_len_b: 200
167
+ min_len: 1
168
+ match_source_len: false
169
+ unnormalized: false
170
+ no_early_stop: false
171
+ no_beamable_mm: false
172
+ lenpen: 1.0
173
+ unkpen: 0.0
174
+ replace_unk: null
175
+ sacrebleu: false
176
+ score_reference: false
177
+ prefix_size: 0
178
+ no_repeat_ngram_size: 0
179
+ sampling: false
180
+ sampling_topk: -1
181
+ sampling_topp: -1.0
182
+ constraints: null
183
+ temperature: 1.0
184
+ diverse_beam_groups: -1
185
+ diverse_beam_strength: 0.5
186
+ diversity_rate: -1.0
187
+ print_alignment: null
188
+ print_step: false
189
+ lm_path: null
190
+ lm_weight: 0.0
191
+ iter_decode_eos_penalty: 0.0
192
+ iter_decode_max_iter: 10
193
+ iter_decode_force_max_iter: false
194
+ iter_decode_with_beam: 1
195
+ iter_decode_with_external_reranker: false
196
+ retain_iter_history: false
197
+ retain_dropout: false
198
+ retain_dropout_modules: null
199
+ decoding_format: null
200
+ no_seed_provided: false
201
+ eval_lm:
202
+ _name: null
203
+ output_word_probs: false
204
+ output_word_stats: false
205
+ context_window: 0
206
+ softmax_batch: 9223372036854775807
207
+ interactive:
208
+ _name: null
209
+ buffer_size: 0
210
+ input: '-'
211
+ model:
212
+ _name: vsp_llm
213
+ w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
214
+ llm_ckpt_path: vilm/vinallama-2.7b
215
+ apply_mask: false
216
+ mask_selection: static
217
+ mask_length: 10
218
+ mask_other: 0
219
+ mask_prob: 0.75
220
+ mask_channel_selection: static
221
+ mask_channel_length: 64
222
+ mask_channel_other: 0
223
+ mask_channel_prob: 0.5
224
+ layerdrop: 0.1
225
+ dropout: 0.0
226
+ activation_dropout: 0.1
227
+ attention_dropout: 0.0
228
+ feature_grad_mult: 1.0
229
+ encoder_embed_dim: 1024
230
+ decoder_embed_dim: 4096
231
+ freeze_finetune_updates: 18000
232
+ task:
233
+ _name: vsp_llm_training
234
+ is_s2s: true
235
+ data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
236
+ label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
237
+ normalize: true
238
+ labels:
239
+ - wrd
240
+ single_target: true
241
+ fine_tuning: true
242
+ stack_order_audio: 4
243
+ max_sample_size: 500
244
+ modalities:
245
+ - video
246
+ - audio
247
+ image_aug: true
248
+ pad_audio: true
249
+ random_crop: false
250
+ llm_ckpt_path: vilm/vinallama-2.7b
251
+ criterion:
252
+ _name: decoder_only_language_modeling_loss
253
+ report_accuracy: true
254
+ label_smoothing: 0.1
255
+ optimizer:
256
+ _name: adam
257
+ adam_betas: (0.9,0.98)
258
+ adam_eps: 1.0e-08
259
+ lr_scheduler:
260
+ _name: tri_stage
261
+ warmup_steps: 10000
262
+ hold_steps: 0
263
+ decay_steps: 20000
264
+ final_lr_scale: 0.05
265
+ scoring: null
266
+ bpe: null
267
+ tokenizer: null
.hydra/hydra.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
4
+ sweep:
5
+ dir: ???
6
+ subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
7
+ hydra_logging:
8
+ version: 1
9
+ formatters:
10
+ simple:
11
+ format: '[%(asctime)s][HYDRA] %(message)s'
12
+ handlers:
13
+ console:
14
+ class: logging.StreamHandler
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+ root:
18
+ level: INFO
19
+ handlers:
20
+ - console
21
+ loggers:
22
+ logging_example:
23
+ level: DEBUG
24
+ disable_existing_loggers: false
25
+ job_logging:
26
+ version: 1
27
+ formatters:
28
+ simple:
29
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
30
+ handlers:
31
+ console:
32
+ class: logging.StreamHandler
33
+ formatter: simple
34
+ stream: ext://sys.stdout
35
+ file:
36
+ class: logging.FileHandler
37
+ formatter: simple
38
+ filename: ${hydra.job.name}.log
39
+ root:
40
+ level: INFO
41
+ handlers:
42
+ - console
43
+ - file
44
+ disable_existing_loggers: false
45
+ sweeper:
46
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
47
+ max_batch_size: null
48
+ launcher:
49
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
50
+ help:
51
+ app_name: ${hydra.job.name}
52
+ header: '${hydra.help.app_name} is powered by Hydra.
53
+
54
+ '
55
+ footer: 'Powered by Hydra (https://hydra.cc)
56
+
57
+ Use --hydra-help to view Hydra specific help
58
+
59
+ '
60
+ template: '${hydra.help.header}
61
+
62
+ == Configuration groups ==
63
+
64
+ Compose your configuration from those groups (group=option)
65
+
66
+
67
+ $APP_CONFIG_GROUPS
68
+
69
+
70
+ == Config ==
71
+
72
+ Override anything in the config (foo.bar=value)
73
+
74
+
75
+ $CONFIG
76
+
77
+
78
+ ${hydra.help.footer}
79
+
80
+ '
81
+ hydra_help:
82
+ hydra_help: ???
83
+ template: 'Hydra (${hydra.runtime.version})
84
+
85
+ See https://hydra.cc for more info.
86
+
87
+
88
+ == Flags ==
89
+
90
+ $FLAGS_HELP
91
+
92
+
93
+ == Configuration groups ==
94
+
95
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
96
+ to command line)
97
+
98
+
99
+ $HYDRA_CONFIG_GROUPS
100
+
101
+
102
+ Use ''--cfg hydra'' to Show the Hydra config.
103
+
104
+ '
105
+ output_subdir: .hydra
106
+ overrides:
107
+ hydra:
108
+ - hydra.run.dir=/home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
109
+ task:
110
+ - common.user_dir=/home/theodore/Projects/VSP-LLM/src
111
+ - task.data=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
112
+ - task.label_dir=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
113
+ - task.llm_ckpt_path=vilm/vinallama-2.7b
114
+ - model.w2v_path=/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
115
+ - model.llm_ckpt_path=vilm/vinallama-2.7b
116
+ - distributed_training.distributed_world_size=1
117
+ - distributed_training.nprocs_per_node=1
118
+ job:
119
+ name: hydra_train
120
+ override_dirname: common.user_dir-/home/theodore/Projects/VSP-LLM/src__distributed_training.distributed_world_size-1__distributed_training.nprocs_per_node-1__model.llm_ckpt_path-vilm/vinallama-2.7b__task.llm_ckpt_path-vilm/vinallama-2.7b
121
+ id: ???
122
+ num: ???
123
+ config_name: vasr-100h-finetune
124
+ env_set: {}
125
+ env_copy: []
126
+ config:
127
+ override_dirname:
128
+ kv_sep: '-'
129
+ item_sep: __
130
+ exclude_keys:
131
+ - run
132
+ - task.data
133
+ - task.label_dir
134
+ - model.w2v_path
135
+ - dataset.train_subset
136
+ - dataset.valid_subset
137
+ - criterion.wer_kenlm_model
138
+ - criterion.wer_lexicon
139
+ runtime:
140
+ version: 1.0.7
141
+ cwd: /home/theodore/Projects/VSP-LLM
142
+ verbose: false
.hydra/overrides.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ - common.user_dir=/home/theodore/Projects/VSP-LLM/src
2
+ - task.data=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
3
+ - task.label_dir=/home/theodore/Projects/VSP-LLM/data/processed/vasr/100h
4
+ - task.llm_ckpt_path=vilm/vinallama-2.7b
5
+ - model.w2v_path=/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
6
+ - model.llm_ckpt_path=vilm/vinallama-2.7b
7
+ - distributed_training.distributed_world_size=1
8
+ - distributed_training.nprocs_per_node=1
hydra_train.log ADDED
The diff for this file is too large to render. See raw diff
 
tblog/train/events.out.tfevents.1717633476.Turing.3006671.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fbf73ae597666f211ad890563fef9d2d17d761719e4fcd011ee7904523f41d5
3
+ size 3411
tblog/train_inner/events.out.tfevents.1717585012.Turing.2651421.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba262c2f5c2b5dc3b2b3e16e69ad1440afb483acba9cba436000ee0a0a39105c
3
+ size 6451
tblog/train_inner/events.out.tfevents.1717602634.Turing.2997820.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb126093eaf167d049f7bf904d3661c53dac2b747d07a1ae491d00d2ce864d77
3
+ size 2431
tblog/train_inner/events.out.tfevents.1717604373.Turing.3006671.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e4da8e0635b9b5db4db8539ec45c3991b0e0ee20862dda817acf1e673793d9
3
+ size 129634
tblog/valid/events.out.tfevents.1717609502.Turing.3006671.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590a3cfe5e3b0b43f048a7cfd2f1ea079fd608e3d28780f133a62015ddfcda56
3
+ size 9094