ViAVSP-LLM_v1.0 / hydra_train.log

Upload 10 files

c523959 verified 7 months ago

204 kB

	[2024-06-12 12:23:20,053][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': 'tblog', 'wandb_project': 'AVSP-LLM', 'azureml_logging': False, 'seed': 1337, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': '/home/theodore/Projects/VSP-LLM/src', 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': True, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 0, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 30000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': True, 'update_freq': [8], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 3000, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'accuracy', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'vsp_llm', 'w2v_path': '/home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt', 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'apply_mask': False, 'mask_selection': 'static', 'mask_length': 10, 'mask_other': 0, 'mask_prob': 0.75, 'mask_channel_selection': 'static', 'mask_channel_length': 64, 'mask_channel_other': 0, 'mask_channel_prob': 0.5, 'layerdrop': 0.1, 'dropout': 0.0, 'activation_dropout': 0.1, 'attention_dropout': 0.0, 'feature_grad_mult': 1.0, 'encoder_embed_dim': 1024, 'decoder_embed_dim': 4096, 'freeze_finetune_updates': 18000}, 'task': {'_name': 'vsp_llm_training', 'is_s2s': True, 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'normalize': True, 'labels': ['wrd'], 'single_target': True, 'fine_tuning': True, 'stack_order_audio': 4, 'max_sample_size': 500, 'modalities': ['video', 'audio'], 'image_aug': True, 'pad_audio': True, 'random_crop': False, 'llm_ckpt_path': 'vilm/vinallama-2.7b'}, 'criterion': {'_name': 'decoder_only_language_modeling_loss', 'report_accuracy': True, 'label_smoothing': 0.1}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0, 'use_old_adam': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'tri_stage', 'warmup_steps': 10000, 'hold_steps': 0, 'decay_steps': 20000, 'phase_ratio': None, 'init_lr_scale': 0.01, 'final_lr_scale': 0.05, 'max_update': 30000, 'lr': [0.0005]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}}
	[2024-06-12 12:23:20,056][src.vsp_llm_training][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
	[2024-06-12 12:23:20,056][src.vsp_llm_training][INFO] - AVHubertPretrainingTask Config {'_name': 'vsp_llm_training', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['wrd'], 'label_dir': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'label_rate': -1, 'sample_rate': 16000, 'llm_ckpt_path': 'vilm/vinallama-2.7b', 'normalize': True, 'enable_padding': False, 'max_sample_size': 500, 'min_sample_size': None, 'max_trim_sample_size': '${task.max_sample_size}', 'single_target': True, 'random_crop': False, 'pad_audio': True, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['video', 'audio'], 'is_s2s': True, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': True}
	[2024-06-12 12:23:21,822][src.hubert_pretraining][INFO] - current directory is /home/theodore/Projects/VSP-LLM/experiments/ViAVSP-LLM_v1.0
	[2024-06-12 12:23:21,822][src.hubert_pretraining][INFO] - AVHubertPretrainingTask Config {'_name': 'av_hubert_pretraining', 'data': '/home/theodore/Projects/VSP-LLM/data/processed/vasr', 'labels': ['km'], 'label_dir': '/checkpoint/bshi/data/lrs3//video/hubert/stitch-iters/envox-iter4-l12c2000/', 'label_rate': 25, 'sample_rate': 25, 'normalize': True, 'enable_padding': False, 'max_sample_size': 2000, 'min_sample_size': 5, 'max_trim_sample_size': 400, 'single_target': False, 'random_crop': True, 'pad_audio': False, 'pdb': False, 'stack_order_audio': 4, 'skip_verify': False, 'image_aug': True, 'image_crop_size': 88, 'image_mean': 0.421, 'image_std': 0.165, 'modalities': ['audio', 'video'], 'is_s2s': False, 'tokenizer_bpe_name': None, 'tokenizer_bpe_model': None, 'noise_wav': None, 'noise_prob': 0.0, 'noise_snr': '0', 'noise_num': 1, 'fine_tuning': False}
	[2024-06-12 12:23:21,826][src.hubert][INFO] - HubertModel Config: {'_name': 'av_hubert', 'label_rate': 25, 'input_modality': '${task.input_modality}', 'extractor_mode': default, 'encoder_layers': 24, 'encoder_embed_dim': 1024, 'encoder_ffn_embed_dim': 4096, 'encoder_attention_heads': 16, 'activation_fn': gelu, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.1, 'encoder_layerdrop': 0.1, 'dropout_input': 0.0, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': True, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 1.0, 'mask_length_audio': 10, 'mask_prob_audio': 0.8, 'mask_length_image': 5, 'mask_prob_image': 0.3, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'resnet_relu_type': 'prelu', 'resnet_weights': None, 'sim_type': 'cosine', 'sub_encoder_layers': 0, 'audio_feat_dim': 104, 'modality_dropout': 0.5, 'audio_dropout': 0.5, 'modality_fuse': 'concat', 'selection_type': 'same_seq', 'masking_type': 'input', 'decoder_embed_dim': 768, 'decoder_ffn_embed_dim': 3072, 'decoder_layers': 6, 'decoder_layerdrop': 0.0, 'decoder_attention_heads': 4, 'decoder_learned_pos': False, 'decoder_normalize_before': False, 'no_token_positional_embeddings': False, 'decoder_dropout': 0.1, 'decoder_attention_dropout': 0.1, 'decoder_activation_dropout': 0.0, 'max_target_positions': 2048, 'share_decoder_input_output_embed': False, 'no_scale_embedding': True}
	[2024-06-12 12:23:29,598][fairseq_cli.train][INFO] - avhubert_llm_seq2seq_cluster_count(
	(encoder): HubertEncoderWrapper(
	(w2v_model): AVHubertModel(
	(feature_extractor_audio): SubModel(
	(proj): Linear(in_features=104, out_features=1024, bias=True)
	)
	(feature_extractor_video): SubModel(
	(resnet): ResEncoder(
	(frontend3D): Sequential(
	(0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
	(1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(2): PReLU(num_parameters=64)
	(3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
	)
	(trunk): ResNet(
	(layer1): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	(1): BasicBlock(
	(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=64)
	(relu2): PReLU(num_parameters=64)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer2): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=128)
	(relu2): PReLU(num_parameters=128)
	(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer3): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=256)
	(relu2): PReLU(num_parameters=256)
	(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(layer4): Sequential(
	(0): BasicBlock(
	(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(downsample): Sequential(
	(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(1): BasicBlock(
	(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(relu1): PReLU(num_parameters=512)
	(relu2): PReLU(num_parameters=512)
	(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	)
	(avgpool): AdaptiveAvgPool2d(output_size=1)
	)
	)
	(proj): Linear(in_features=512, out_features=1024, bias=True)
	)
	(post_extract_proj): Linear(in_features=2048, out_features=1024, bias=True)
	(dropout_input): Dropout(p=0.0, inplace=False)
	(dropout_features): Dropout(p=0.1, inplace=False)
	(encoder): TransformerEncoder(
	(pos_conv): Sequential(
	(0): Conv1d(1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16)
	(1): SamePad()
	(2): GELU(approximate='none')
	)
	(layers): ModuleList(
	(0-23): 24 x TransformerSentenceEncoderLayer(
	(self_attn): MultiheadAttention(
	(dropout_module): FairseqDropout()
	(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
	(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
	)
	(dropout1): Dropout(p=0.0, inplace=False)
	(dropout2): Dropout(p=0.1, inplace=False)
	(dropout3): Dropout(p=0.0, inplace=False)
	(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	(fc1): Linear(in_features=1024, out_features=4096, bias=True)
	(fc2): Linear(in_features=4096, out_features=1024, bias=True)
	(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	)
	(layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
	)
	(layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
	(final_proj): None
	)
	)
	(decoder): PeftModelForCausalLM(
	(base_model): LoraModel(
	(model): LlamaForCausalLM(
	(model): LlamaModel(
	(embed_tokens): Embedding(46304, 2560, padding_idx=0)
	(layers): ModuleList(
	(0-31): 32 x LlamaDecoderLayer(
	(self_attn): LlamaSdpaAttention(
	(q_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(k_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(v_proj): lora.Linear4bit(
	(base_layer): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.05, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=2560, out_features=16, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=16, out_features=2560, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
	(rotary_emb): LlamaRotaryEmbedding()
	)
	(mlp): LlamaMLP(
	(gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
	(down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False)
	(act_fn): SiLU()
	)
	(input_layernorm): LlamaRMSNorm()
	(post_attention_layernorm): LlamaRMSNorm()
	)
	)
	(norm): LlamaRMSNorm()
	)
	(lm_head): Linear(in_features=2560, out_features=46304, bias=False)
	)
	)
	)
	(avfeat_to_llm): Linear(in_features=1024, out_features=2560, bias=True)
	)
	[2024-06-12 12:23:29,603][fairseq_cli.train][INFO] - task: VSP_LLM_TrainingTask
	[2024-06-12 12:23:29,603][fairseq_cli.train][INFO] - model: avhubert_llm_seq2seq_cluster_count
	[2024-06-12 12:23:29,603][fairseq_cli.train][INFO] - criterion: decoder_only_language_modeling_loss
	[2024-06-12 12:23:29,606][fairseq_cli.train][INFO] - num. shared model params: 1,841,644,264 (num. trained: 335,624,424)
	[2024-06-12 12:23:29,609][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0)
	[2024-06-12 12:23:29,609][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-12 12:23:29,651][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 23990, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=76
	[2024-06-12 12:23:30,491][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/valid.wrd is sequence label. skipped
	[2024-06-12 12:23:30,492][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	<src.utils_vsp_llm.CenterCrop object at 0x7bb483823c70>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-12 12:23:30,492][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-12 12:23:30,492][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.0.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer1.1.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.0.downsample.0.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer2.1.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.0.downsample.0.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer3.1.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.0.downsample.0.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv1.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- encoder.w2v_model.feature_extractor_video.resnet.trunk.layer4.1.conv2.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.self_attn.o_proj.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.gate_proj.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.up_proj.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.0.mlp.down_proj.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,661][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.self_attn.o_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.gate_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.up_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.1.mlp.down_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.self_attn.o_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.gate_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.up_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.2.mlp.down_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.self_attn.o_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.gate_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.up_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.3.mlp.down_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.self_attn.o_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.gate_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.up_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.4.mlp.down_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.self_attn.o_proj.bias
	[2024-06-12 12:23:30,662][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.gate_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.up_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.5.mlp.down_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.self_attn.o_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.gate_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.up_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.6.mlp.down_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.self_attn.o_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.gate_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.up_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.7.mlp.down_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.self_attn.o_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.gate_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.up_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.8.mlp.down_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.self_attn.o_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.gate_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.up_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.9.mlp.down_proj.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,663][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.self_attn.o_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.gate_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.up_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.10.mlp.down_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.self_attn.o_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.gate_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.up_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.11.mlp.down_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.self_attn.o_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.gate_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.up_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.12.mlp.down_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.self_attn.o_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.gate_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.up_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.13.mlp.down_proj.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,664][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.self_attn.o_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.gate_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.up_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.14.mlp.down_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.self_attn.o_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.gate_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.up_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.15.mlp.down_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.self_attn.o_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.gate_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.up_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.16.mlp.down_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.self_attn.o_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.gate_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.up_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.17.mlp.down_proj.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,665][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.self_attn.o_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.gate_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.up_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.18.mlp.down_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.self_attn.o_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.gate_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.up_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.19.mlp.down_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.self_attn.o_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.gate_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.up_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.20.mlp.down_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.self_attn.o_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.gate_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.up_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.21.mlp.down_proj.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,666][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.self_attn.o_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.gate_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.up_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.22.mlp.down_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.self_attn.o_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.gate_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.up_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.23.mlp.down_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.self_attn.o_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.gate_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.up_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.24.mlp.down_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.self_attn.o_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.gate_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.up_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.25.mlp.down_proj.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,667][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.self_attn.o_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.gate_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.up_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.26.mlp.down_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.self_attn.o_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.gate_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.up_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.27.mlp.down_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.self_attn.o_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.gate_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.up_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.28.mlp.down_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.self_attn.o_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.gate_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.up_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.29.mlp.down_proj.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,668][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.self_attn.o_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.gate_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.up_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.30.mlp.down_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.base_layer.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.base_layer.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.base_layer.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.self_attn.o_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.gate_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.up_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.model.layers.31.mlp.down_proj.bias
	[2024-06-12 12:23:30,669][fairseq.trainer][INFO] - detected shared parameter: encoder.w2v_model.feature_extractor_video.resnet.frontend3D.0.bias <- decoder.base_model.model.lm_head.bias
	[2024-06-12 12:23:30,669][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-12 12:23:30,669][fairseq.utils][INFO] - rank 0: capabilities = 8.6 ; total memory = 15.731 GB ; name = NVIDIA RTX A4000
	[2024-06-12 12:23:30,669][fairseq.utils][INFO] - *********************CUDA enviroments for all 1 workers*********************
	[2024-06-12 12:23:30,669][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs)
	[2024-06-12 12:23:30,669][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 1
	[2024-06-12 12:23:30,670][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt
	[2024-06-12 12:23:30,670][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt
	[2024-06-12 12:23:30,670][fairseq.trainer][INFO] - loading train data for epoch 1
	[2024-06-12 12:23:30,670][src.vsp_llm_training][INFO] - Using tokenizer
	[2024-06-12 12:23:30,839][src.vsp_llm_dataset][INFO] - max_keep=500, min_keep=None, loaded 120686, skipped 0 short and 0 long and 0 unaligned, longest-loaded=76, shortest-loaded=73
	[2024-06-12 12:23:31,211][src.vsp_llm_dataset][INFO] - /home/theodore/Projects/VSP-LLM/data/processed/vasr/train.wrd is sequence label. skipped
	[2024-06-12 12:23:31,212][src.vsp_llm_dataset][INFO] - image transform: Compose(
	Normalize(mean=0.0, std=255.0)
	RandomCrop(size=(88, 88))
	<src.utils_vsp_llm.HorizontalFlip object at 0x7bb483d3a5e0>
	Normalize(mean=0.421, std=0.165)
	)
	[2024-06-12 12:23:31,212][src.vsp_llm_dataset][INFO] - pad_audio=True, random_crop=False, normalize=True, max_sample_size=500, seqs2seq data=True,
	[2024-06-12 12:23:31,212][src.vsp_llm_dataset][INFO] - Noise wav: None->0 wav, Prob: 0.0, SNR: 0, Number of mixture: 1
	[2024-06-12 12:23:35,470][fairseq.trainer][INFO] - begin training epoch 1
	[2024-06-12 12:23:35,470][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-12 12:29:11,545][train_inner][INFO] - {"epoch": 1, "update": 0.013, "loss": "7.621", "ntokens": "126.725", "acc_total": "126.725", "n_correct": "18.3", "wer_total": "126.725", "n_error": "108.34", "ppl": "196.85", "accuracy": "14.441", "wer": "85.492", "wps": "75.6", "ups": "0.6", "wpb": "126.7", "bsz": "8", "num_updates": "200", "lr": "1.49e-05", "gnorm": "8.772", "loss_scale": "128", "train_wall": "335", "gb_free": "7.1", "wall": "341"}
	[2024-06-12 12:34:48,292][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "6.197", "ntokens": "126.93", "acc_total": "126.93", "n_correct": "25.84", "wer_total": "126.93", "n_error": "100.905", "ppl": "73.37", "accuracy": "20.358", "wer": "79.497", "wps": "75.4", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "400", "lr": "2.48e-05", "gnorm": "3.724", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "678"}
	[2024-06-12 12:40:24,687][train_inner][INFO] - {"epoch": 1, "update": 0.04, "loss": "6.074", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "28.605", "wer_total": "127.015", "n_error": "98.125", "ppl": "67.36", "accuracy": "22.521", "wer": "77.255", "wps": "75.5", "ups": "0.59", "wpb": "127", "bsz": "8", "num_updates": "600", "lr": "3.47e-05", "gnorm": "3.96", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "1014"}
	[2024-06-12 12:46:01,170][train_inner][INFO] - {"epoch": 1, "update": 0.053, "loss": "5.868", "ntokens": "126.865", "acc_total": "126.865", "n_correct": "30.655", "wer_total": "126.865", "n_error": "95.945", "ppl": "58.39", "accuracy": "24.163", "wer": "75.628", "wps": "75.4", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "800", "lr": "4.46e-05", "gnorm": "4.095", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "1350"}
	[2024-06-12 12:51:37,718][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "5.931", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "30.48", "wer_total": "127.025", "n_error": "96.285", "ppl": "61.01", "accuracy": "23.995", "wer": "75.8", "wps": "75.5", "ups": "0.59", "wpb": "127", "bsz": "8", "num_updates": "1000", "lr": "5.45e-05", "gnorm": "3.801", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "1687"}
	[2024-06-12 12:57:14,725][train_inner][INFO] - {"epoch": 1, "update": 0.08, "loss": "5.883", "ntokens": "127.095", "acc_total": "127.095", "n_correct": "30.905", "wer_total": "127.095", "n_error": "95.94", "ppl": "59.01", "accuracy": "24.316", "wer": "75.487", "wps": "75.4", "ups": "0.59", "wpb": "127.1", "bsz": "8", "num_updates": "1200", "lr": "6.44e-05", "gnorm": "3.584", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "2024"}
	[2024-06-12 13:02:51,187][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "5.723", "ntokens": "127.62", "acc_total": "127.62", "n_correct": "32.29", "wer_total": "127.62", "n_error": "95.125", "ppl": "52.83", "accuracy": "25.302", "wer": "74.538", "wps": "75.9", "ups": "0.59", "wpb": "127.6", "bsz": "8", "num_updates": "1400", "lr": "7.43e-05", "gnorm": "3.398", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "2361"}
	[2024-06-12 13:08:27,809][train_inner][INFO] - {"epoch": 1, "update": 0.106, "loss": "5.736", "ntokens": "127.41", "acc_total": "127.41", "n_correct": "32.595", "wer_total": "127.41", "n_error": "94.54", "ppl": "53.3", "accuracy": "25.583", "wer": "74.201", "wps": "75.7", "ups": "0.59", "wpb": "127.4", "bsz": "8", "num_updates": "1600", "lr": "8.42e-05", "gnorm": "3.155", "loss_scale": "128", "train_wall": "336", "gb_free": "7.1", "wall": "2697"}
	[2024-06-12 13:14:03,760][train_inner][INFO] - {"epoch": 1, "update": 0.119, "loss": "5.776", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "32.125", "wer_total": "126.56", "n_error": "94.19", "ppl": "54.81", "accuracy": "25.383", "wer": "74.423", "wps": "75.3", "ups": "0.6", "wpb": "126.6", "bsz": "8", "num_updates": "1800", "lr": "9.41e-05", "gnorm": "2.973", "loss_scale": "128", "train_wall": "335", "gb_free": "7.1", "wall": "3033"}
	[2024-06-12 13:19:39,496][train_inner][INFO] - {"epoch": 1, "update": 0.133, "loss": "5.677", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "33.63", "wer_total": "126.875", "n_error": "93.05", "ppl": "51.17", "accuracy": "26.506", "wer": "73.34", "wps": "75.6", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "2000", "lr": "0.000104", "gnorm": "2.818", "loss_scale": "128", "train_wall": "335", "gb_free": "7.1", "wall": "3369"}
	[2024-06-12 13:25:16,543][train_inner][INFO] - {"epoch": 1, "update": 0.146, "loss": "5.594", "ntokens": "128.2", "acc_total": "128.2", "n_correct": "34.95", "wer_total": "128.2", "n_error": "92.94", "ppl": "48.3", "accuracy": "27.262", "wer": "72.496", "wps": "76.1", "ups": "0.59", "wpb": "128.2", "bsz": "8", "num_updates": "2200", "lr": "0.0001139", "gnorm": "2.791", "loss_scale": "256", "train_wall": "336", "gb_free": "7.1", "wall": "3706"}
	[2024-06-12 13:30:52,033][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "5.487", "ntokens": "127.775", "acc_total": "127.775", "n_correct": "37.185", "wer_total": "127.775", "n_error": "90.37", "ppl": "44.85", "accuracy": "29.102", "wer": "70.726", "wps": "76.2", "ups": "0.6", "wpb": "127.8", "bsz": "8", "num_updates": "2400", "lr": "0.0001238", "gnorm": "2.89", "loss_scale": "256", "train_wall": "335", "gb_free": "7.1", "wall": "4041"}
	[2024-06-12 13:36:29,126][train_inner][INFO] - {"epoch": 1, "update": 0.172, "loss": "5.285", "ntokens": "126.92", "acc_total": "126.92", "n_correct": "39.84", "wer_total": "126.92", "n_error": "86.81", "ppl": "38.99", "accuracy": "31.39", "wer": "68.397", "wps": "75.3", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "2600", "lr": "0.0001337", "gnorm": "3.334", "loss_scale": "256", "train_wall": "336", "gb_free": "7.1", "wall": "4378"}
	[2024-06-12 13:42:06,802][train_inner][INFO] - {"epoch": 1, "update": 0.186, "loss": "5.125", "ntokens": "125.685", "acc_total": "125.685", "n_correct": "42.48", "wer_total": "125.685", "n_error": "82.92", "ppl": "34.91", "accuracy": "33.799", "wer": "65.974", "wps": "74.4", "ups": "0.59", "wpb": "125.7", "bsz": "8", "num_updates": "2800", "lr": "0.0001436", "gnorm": "3.811", "loss_scale": "256", "train_wall": "337", "gb_free": "7.1", "wall": "4716"}
	[2024-06-12 13:47:43,577][train_inner][INFO] - {"epoch": 1, "update": 0.199, "loss": "4.901", "ntokens": "127.19", "acc_total": "127.19", "n_correct": "46.57", "wer_total": "127.19", "n_error": "80.35", "ppl": "29.87", "accuracy": "36.615", "wer": "63.173", "wps": "75.5", "ups": "0.59", "wpb": "127.2", "bsz": "8", "num_updates": "3000", "lr": "0.0001535", "gnorm": "4.027", "loss_scale": "256", "train_wall": "336", "gb_free": "7.1", "wall": "5053"}
	[2024-06-12 13:47:43,577][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 14:30:52,643][valid][INFO] - {"epoch": 1, "valid_loss": "4.602", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "7.31209", "valid_wer_total": "18.1585", "valid_n_error": "10.8328", "valid_ppl": "24.28", "valid_accuracy": "40.268", "valid_wer": "59.657", "valid_wps": "168.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "3000"}
	[2024-06-12 14:30:52,643][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 3000 updates
	[2024-06-12 14:30:52,644][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_3000.pt
	[2024-06-12 14:30:55,828][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_3000.pt
	[2024-06-12 14:30:58,721][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_3000.pt (epoch 1 @ 3000 updates, score 40.268) (writing took 6.077752015000442 seconds)
	[2024-06-12 14:36:35,084][train_inner][INFO] - {"epoch": 1, "update": 0.212, "loss": "4.631", "ntokens": "126.535", "acc_total": "126.535", "n_correct": "50.205", "wer_total": "126.535", "n_error": "76.125", "ppl": "24.78", "accuracy": "39.677", "wer": "60.161", "wps": "8.6", "ups": "0.07", "wpb": "126.5", "bsz": "8", "num_updates": "3200", "lr": "0.0001634", "gnorm": "4.2", "loss_scale": "256", "train_wall": "336", "gb_free": "7.1", "wall": "7984"}
	[2024-06-12 14:42:13,033][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "4.463", "ntokens": "126.53", "acc_total": "126.53", "n_correct": "52.3", "wer_total": "126.53", "n_error": "74.045", "ppl": "22.05", "accuracy": "41.334", "wer": "58.52", "wps": "74.9", "ups": "0.59", "wpb": "126.5", "bsz": "8", "num_updates": "3400", "lr": "0.0001733", "gnorm": "4.267", "loss_scale": "256", "train_wall": "337", "gb_free": "7.1", "wall": "8322"}
	[2024-06-12 14:47:50,828][train_inner][INFO] - {"epoch": 1, "update": 0.239, "loss": "4.236", "ntokens": "127.025", "acc_total": "127.025", "n_correct": "55.67", "wer_total": "127.025", "n_error": "71.24", "ppl": "18.85", "accuracy": "43.826", "wer": "56.083", "wps": "75.2", "ups": "0.59", "wpb": "127", "bsz": "8", "num_updates": "3600", "lr": "0.0001832", "gnorm": "4.4", "loss_scale": "256", "train_wall": "337", "gb_free": "7.1", "wall": "8660"}
	[2024-06-12 14:53:28,101][train_inner][INFO] - {"epoch": 1, "update": 0.252, "loss": "4.07", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "57.62", "wer_total": "127.35", "n_error": "69.525", "ppl": "16.79", "accuracy": "45.245", "wer": "54.594", "wps": "75.5", "ups": "0.59", "wpb": "127.3", "bsz": "8", "num_updates": "3800", "lr": "0.0001931", "gnorm": "4.358", "loss_scale": "256", "train_wall": "337", "gb_free": "7.1", "wall": "8997"}
	[2024-06-12 14:59:04,977][train_inner][INFO] - {"epoch": 1, "update": 0.265, "loss": "3.95", "ntokens": "127.785", "acc_total": "127.785", "n_correct": "59.725", "wer_total": "127.785", "n_error": "67.895", "ppl": "15.46", "accuracy": "46.739", "wer": "53.132", "wps": "75.9", "ups": "0.59", "wpb": "127.8", "bsz": "8", "num_updates": "4000", "lr": "0.000203", "gnorm": "4.416", "loss_scale": "256", "train_wall": "336", "gb_free": "7.1", "wall": "9334"}
	[2024-06-12 15:04:41,368][train_inner][INFO] - {"epoch": 1, "update": 0.278, "loss": "3.788", "ntokens": "126.255", "acc_total": "126.255", "n_correct": "60.775", "wer_total": "126.255", "n_error": "65.32", "ppl": "13.81", "accuracy": "48.137", "wer": "51.737", "wps": "75.1", "ups": "0.59", "wpb": "126.3", "bsz": "8", "num_updates": "4200", "lr": "0.0002129", "gnorm": "4.421", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "9671"}
	[2024-06-12 15:10:18,145][train_inner][INFO] - {"epoch": 1, "update": 0.292, "loss": "3.673", "ntokens": "125.9", "acc_total": "125.9", "n_correct": "62.14", "wer_total": "125.9", "n_error": "63.63", "ppl": "12.75", "accuracy": "49.357", "wer": "50.54", "wps": "74.8", "ups": "0.59", "wpb": "125.9", "bsz": "8", "num_updates": "4400", "lr": "0.0002228", "gnorm": "4.37", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "10007"}
	[2024-06-12 15:15:55,142][train_inner][INFO] - {"epoch": 1, "update": 0.305, "loss": "3.525", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "65.025", "wer_total": "127.885", "n_error": "62.72", "ppl": "11.52", "accuracy": "50.846", "wer": "49.044", "wps": "75.9", "ups": "0.59", "wpb": "127.9", "bsz": "8", "num_updates": "4600", "lr": "0.0002327", "gnorm": "4.404", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "10344"}
	[2024-06-12 15:21:33,489][train_inner][INFO] - {"epoch": 1, "update": 0.318, "loss": "3.433", "ntokens": "126.21", "acc_total": "126.21", "n_correct": "65.295", "wer_total": "126.21", "n_error": "60.785", "ppl": "10.8", "accuracy": "51.735", "wer": "48.162", "wps": "74.6", "ups": "0.59", "wpb": "126.2", "bsz": "8", "num_updates": "4800", "lr": "0.0002426", "gnorm": "4.44", "loss_scale": "512", "train_wall": "337", "gb_free": "7.1", "wall": "10683"}
	[2024-06-12 15:27:10,699][train_inner][INFO] - {"epoch": 1, "update": 0.331, "loss": "3.362", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "66.01", "wer_total": "126.87", "n_error": "60.705", "ppl": "10.28", "accuracy": "52.03", "wer": "47.848", "wps": "75.2", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "5000", "lr": "0.0002525", "gnorm": "4.329", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "11020"}
	[2024-06-12 15:32:49,053][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "3.132", "ntokens": "126.51", "acc_total": "126.51", "n_correct": "69.12", "wer_total": "126.51", "n_error": "57.255", "ppl": "8.77", "accuracy": "54.636", "wer": "45.257", "wps": "74.8", "ups": "0.59", "wpb": "126.5", "bsz": "8", "num_updates": "5200", "lr": "0.0002624", "gnorm": "4.173", "loss_scale": "512", "train_wall": "338", "gb_free": "7.1", "wall": "11358"}
	[2024-06-12 15:38:25,766][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.154", "ntokens": "127.425", "acc_total": "127.425", "n_correct": "69.395", "wer_total": "127.425", "n_error": "57.935", "ppl": "8.9", "accuracy": "54.459", "wer": "45.466", "wps": "75.7", "ups": "0.59", "wpb": "127.4", "bsz": "8", "num_updates": "5400", "lr": "0.0002723", "gnorm": "4.276", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "11695"}
	[2024-06-12 15:44:03,001][train_inner][INFO] - {"epoch": 1, "update": 0.371, "loss": "3.018", "ntokens": "127.52", "acc_total": "127.52", "n_correct": "70.84", "wer_total": "127.52", "n_error": "56.5", "ppl": "8.1", "accuracy": "55.552", "wer": "44.307", "wps": "75.6", "ups": "0.59", "wpb": "127.5", "bsz": "8", "num_updates": "5600", "lr": "0.0002822", "gnorm": "4.192", "loss_scale": "512", "train_wall": "337", "gb_free": "7.1", "wall": "12032"}
	[2024-06-12 15:49:39,617][train_inner][INFO] - {"epoch": 1, "update": 0.384, "loss": "3.123", "ntokens": "126.59", "acc_total": "126.59", "n_correct": "69.525", "wer_total": "126.59", "n_error": "56.895", "ppl": "8.71", "accuracy": "54.921", "wer": "44.944", "wps": "75.2", "ups": "0.59", "wpb": "126.6", "bsz": "8", "num_updates": "5800", "lr": "0.0002921", "gnorm": "4.146", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "12369"}
	[2024-06-12 15:55:16,089][train_inner][INFO] - {"epoch": 1, "update": 0.398, "loss": "2.965", "ntokens": "127.395", "acc_total": "127.395", "n_correct": "71.705", "wer_total": "127.395", "n_error": "55.6", "ppl": "7.81", "accuracy": "56.286", "wer": "43.644", "wps": "75.7", "ups": "0.59", "wpb": "127.4", "bsz": "8", "num_updates": "6000", "lr": "0.000302", "gnorm": "4.068", "loss_scale": "512", "train_wall": "336", "gb_free": "7.1", "wall": "12705"}
	[2024-06-12 15:55:16,089][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 16:37:27,482][valid][INFO] - {"epoch": 1, "valid_loss": "2.643", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "10.9619", "valid_wer_total": "18.1585", "valid_n_error": "7.18274", "valid_ppl": "6.24", "valid_accuracy": "60.368", "valid_wer": "39.556", "valid_wps": "172.1", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "6000", "valid_best_accuracy": "60.368"}
	[2024-06-12 16:37:27,483][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 6000 updates
	[2024-06-12 16:37:27,483][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_6000.pt
	[2024-06-12 16:37:30,647][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_6000.pt
	[2024-06-12 16:37:35,223][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_6000.pt (epoch 1 @ 6000 updates, score 60.368) (writing took 7.739816330984468 seconds)
	[2024-06-12 16:43:11,630][train_inner][INFO] - {"epoch": 1, "update": 0.411, "loss": "2.887", "ntokens": "126.995", "acc_total": "126.995", "n_correct": "72.57", "wer_total": "126.995", "n_error": "54.255", "ppl": "7.4", "accuracy": "57.144", "wer": "42.722", "wps": "8.8", "ups": "0.07", "wpb": "127", "bsz": "8", "num_updates": "6200", "lr": "0.0003119", "gnorm": "4.016", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "15581"}
	[2024-06-12 16:48:48,948][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "2.941", "ntokens": "126.01", "acc_total": "126.01", "n_correct": "71.41", "wer_total": "126.01", "n_error": "54.47", "ppl": "7.68", "accuracy": "56.67", "wer": "43.227", "wps": "74.7", "ups": "0.59", "wpb": "126", "bsz": "8", "num_updates": "6400", "lr": "0.0003218", "gnorm": "4.072", "loss_scale": "1024", "train_wall": "337", "gb_free": "7.1", "wall": "15918"}
	[2024-06-12 16:54:25,865][train_inner][INFO] - {"epoch": 1, "update": 0.437, "loss": "2.839", "ntokens": "126.33", "acc_total": "126.33", "n_correct": "72.75", "wer_total": "126.33", "n_error": "53.45", "ppl": "7.16", "accuracy": "57.587", "wer": "42.31", "wps": "75", "ups": "0.59", "wpb": "126.3", "bsz": "8", "num_updates": "6600", "lr": "0.0003317", "gnorm": "4.074", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "16255"}
	[2024-06-12 17:00:04,482][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "2.797", "ntokens": "126.735", "acc_total": "126.735", "n_correct": "73.405", "wer_total": "126.735", "n_error": "53.185", "ppl": "6.95", "accuracy": "57.92", "wer": "41.966", "wps": "74.9", "ups": "0.59", "wpb": "126.7", "bsz": "8", "num_updates": "6800", "lr": "0.0003416", "gnorm": "3.958", "loss_scale": "1024", "train_wall": "337", "gb_free": "7.1", "wall": "16594"}
	[2024-06-12 17:05:41,900][train_inner][INFO] - {"epoch": 1, "update": 0.464, "loss": "2.718", "ntokens": "126.98", "acc_total": "126.98", "n_correct": "74.695", "wer_total": "126.98", "n_error": "52.17", "ppl": "6.58", "accuracy": "58.824", "wer": "41.085", "wps": "75.3", "ups": "0.59", "wpb": "127", "bsz": "8", "num_updates": "7000", "lr": "0.0003515", "gnorm": "3.968", "loss_scale": "1024", "train_wall": "337", "gb_free": "7.1", "wall": "16931"}
	[2024-06-12 17:11:18,224][train_inner][INFO] - {"epoch": 1, "update": 0.477, "loss": "2.798", "ntokens": "126.97", "acc_total": "126.97", "n_correct": "73.81", "wer_total": "126.97", "n_error": "53.015", "ppl": "6.96", "accuracy": "58.132", "wer": "41.754", "wps": "75.5", "ups": "0.59", "wpb": "127", "bsz": "8", "num_updates": "7200", "lr": "0.0003614", "gnorm": "4.074", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "17268"}
	[2024-06-12 17:16:55,466][train_inner][INFO] - {"epoch": 1, "update": 0.491, "loss": "2.615", "ntokens": "127.45", "acc_total": "127.45", "n_correct": "76.63", "wer_total": "127.45", "n_error": "50.7", "ppl": "6.12", "accuracy": "60.126", "wer": "39.78", "wps": "75.6", "ups": "0.59", "wpb": "127.5", "bsz": "8", "num_updates": "7400", "lr": "0.0003713", "gnorm": "3.952", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "17605"}
	[2024-06-12 17:22:33,421][train_inner][INFO] - {"epoch": 1, "update": 0.504, "loss": "2.64", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "75.895", "wer_total": "127.32", "n_error": "51.29", "ppl": "6.23", "accuracy": "59.61", "wer": "40.284", "wps": "75.3", "ups": "0.59", "wpb": "127.3", "bsz": "8", "num_updates": "7600", "lr": "0.0003812", "gnorm": "4.053", "loss_scale": "1024", "train_wall": "337", "gb_free": "7.1", "wall": "17943"}
	[2024-06-12 17:28:09,809][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.647", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "75.865", "wer_total": "127.11", "n_error": "51.18", "ppl": "6.26", "accuracy": "59.685", "wer": "40.264", "wps": "75.6", "ups": "0.59", "wpb": "127.1", "bsz": "8", "num_updates": "7800", "lr": "0.0003911", "gnorm": "4.096", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "18279"}
	[2024-06-12 17:33:47,881][train_inner][INFO] - {"epoch": 1, "update": 0.53, "loss": "2.606", "ntokens": "126.875", "acc_total": "126.875", "n_correct": "75.87", "wer_total": "126.875", "n_error": "50.88", "ppl": "6.09", "accuracy": "59.799", "wer": "40.102", "wps": "75.1", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "8000", "lr": "0.000401", "gnorm": "3.96", "loss_scale": "1024", "train_wall": "337", "gb_free": "7.1", "wall": "18617"}
	[2024-06-12 17:39:25,208][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.565", "ntokens": "126.05", "acc_total": "126.05", "n_correct": "76.09", "wer_total": "126.05", "n_error": "49.895", "ppl": "5.92", "accuracy": "60.365", "wer": "39.583", "wps": "74.7", "ups": "0.59", "wpb": "126", "bsz": "8", "num_updates": "8200", "lr": "0.0004109", "gnorm": "3.977", "loss_scale": "2048", "train_wall": "336", "gb_free": "7.1", "wall": "18955"}
	[2024-06-12 17:45:04,920][train_inner][INFO] - {"epoch": 1, "update": 0.557, "loss": "2.536", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "77.1", "wer_total": "126.785", "n_error": "49.58", "ppl": "5.8", "accuracy": "60.812", "wer": "39.106", "wps": "74.6", "ups": "0.59", "wpb": "126.8", "bsz": "8", "num_updates": "8400", "lr": "0.0004208", "gnorm": "4.112", "loss_scale": "2048", "train_wall": "337", "gb_free": "7.1", "wall": "19294"}
	[2024-06-12 17:50:43,344][train_inner][INFO] - {"epoch": 1, "update": 0.57, "loss": "2.537", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "76.815", "wer_total": "126.675", "n_error": "49.735", "ppl": "5.8", "accuracy": "60.639", "wer": "39.262", "wps": "74.9", "ups": "0.59", "wpb": "126.7", "bsz": "8", "num_updates": "8600", "lr": "0.0004307", "gnorm": "4.051", "loss_scale": "2048", "train_wall": "338", "gb_free": "7.1", "wall": "19633"}
	[2024-06-12 17:56:22,700][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.545", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "77.395", "wer_total": "127.09", "n_error": "49.6", "ppl": "5.84", "accuracy": "60.898", "wer": "39.027", "wps": "74.9", "ups": "0.59", "wpb": "127.1", "bsz": "8", "num_updates": "8800", "lr": "0.0004406", "gnorm": "4.003", "loss_scale": "2048", "train_wall": "337", "gb_free": "7.1", "wall": "19972"}
	[2024-06-12 18:01:59,908][train_inner][INFO] - {"epoch": 1, "update": 0.597, "loss": "2.515", "ntokens": "127.43", "acc_total": "127.43", "n_correct": "78.42", "wer_total": "127.43", "n_error": "48.895", "ppl": "5.71", "accuracy": "61.54", "wer": "38.37", "wps": "75.6", "ups": "0.59", "wpb": "127.4", "bsz": "8", "num_updates": "9000", "lr": "0.0004505", "gnorm": "3.91", "loss_scale": "2048", "train_wall": "337", "gb_free": "7.1", "wall": "20309"}
	[2024-06-12 18:01:59,909][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 18:44:07,953][valid][INFO] - {"epoch": 1, "valid_loss": "2.174", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "11.8654", "valid_wer_total": "18.1585", "valid_n_error": "6.28253", "valid_ppl": "4.51", "valid_accuracy": "65.344", "valid_wer": "34.598", "valid_wps": "172.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "9000", "valid_best_accuracy": "65.344"}
	[2024-06-12 18:44:07,954][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 9000 updates
	[2024-06-12 18:44:07,954][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_9000.pt
	[2024-06-12 18:44:11,150][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_9000.pt
	[2024-06-12 18:44:15,629][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_9000.pt (epoch 1 @ 9000 updates, score 65.344) (writing took 7.674968866980635 seconds)
	[2024-06-12 18:49:51,782][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.494", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "77.92", "wer_total": "127.145", "n_error": "49.15", "ppl": "5.63", "accuracy": "61.284", "wer": "38.657", "wps": "8.9", "ups": "0.07", "wpb": "127.1", "bsz": "8", "num_updates": "9200", "lr": "0.0004604", "gnorm": "4.134", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "23181"}
	[2024-06-12 18:55:26,787][train_inner][INFO] - {"epoch": 1, "update": 0.623, "loss": "2.506", "ntokens": "125.995", "acc_total": "125.995", "n_correct": "76.845", "wer_total": "125.995", "n_error": "49.07", "ppl": "5.68", "accuracy": "60.991", "wer": "38.946", "wps": "75.2", "ups": "0.6", "wpb": "126", "bsz": "8", "num_updates": "9400", "lr": "0.0004703", "gnorm": "4.018", "loss_scale": "2048", "train_wall": "334", "gb_free": "7.1", "wall": "23516"}
	[2024-06-12 19:01:02,880][train_inner][INFO] - {"epoch": 1, "update": 0.636, "loss": "2.458", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "77.92", "wer_total": "126.87", "n_error": "48.815", "ppl": "5.49", "accuracy": "61.417", "wer": "38.476", "wps": "75.5", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "9600", "lr": "0.0004802", "gnorm": "4.03", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "23852"}
	[2024-06-12 19:02:19,781][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-12 19:06:39,784][train_inner][INFO] - {"epoch": 1, "update": 0.65, "loss": "2.432", "ntokens": "125.575", "acc_total": "125.575", "n_correct": "77.655", "wer_total": "125.575", "n_error": "47.82", "ppl": "5.4", "accuracy": "61.84", "wer": "38.081", "wps": "74.5", "ups": "0.59", "wpb": "125.6", "bsz": "8", "num_updates": "9800", "lr": "0.0004901", "gnorm": "4.054", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "24189"}
	[2024-06-12 19:12:16,188][train_inner][INFO] - {"epoch": 1, "update": 0.663, "loss": "2.421", "ntokens": "127.655", "acc_total": "127.655", "n_correct": "79.17", "wer_total": "127.655", "n_error": "48.375", "ppl": "5.36", "accuracy": "62.019", "wer": "37.895", "wps": "75.9", "ups": "0.59", "wpb": "127.7", "bsz": "8", "num_updates": "10000", "lr": "0.0005", "gnorm": "4.067", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "24526"}
	[2024-06-12 19:17:52,993][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.409", "ntokens": "126.56", "acc_total": "126.56", "n_correct": "79.24", "wer_total": "126.56", "n_error": "47.235", "ppl": "5.31", "accuracy": "62.611", "wer": "37.322", "wps": "75.2", "ups": "0.59", "wpb": "126.6", "bsz": "8", "num_updates": "10200", "lr": "0.000485243", "gnorm": "4.148", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "24862"}
	[2024-06-12 19:23:28,533][train_inner][INFO] - {"epoch": 1, "update": 0.689, "loss": "2.379", "ntokens": "127.215", "acc_total": "127.215", "n_correct": "80.38", "wer_total": "127.215", "n_error": "46.7", "ppl": "5.2", "accuracy": "63.184", "wer": "36.71", "wps": "75.8", "ups": "0.6", "wpb": "127.2", "bsz": "8", "num_updates": "10400", "lr": "0.000470922", "gnorm": "4.085", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "25198"}
	[2024-06-12 19:29:06,355][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.373", "ntokens": "127.885", "acc_total": "127.885", "n_correct": "79.73", "wer_total": "127.885", "n_error": "48.08", "ppl": "5.18", "accuracy": "62.345", "wer": "37.596", "wps": "75.7", "ups": "0.59", "wpb": "127.9", "bsz": "8", "num_updates": "10600", "lr": "0.000457024", "gnorm": "3.94", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "25536"}
	[2024-06-12 19:34:42,678][train_inner][INFO] - {"epoch": 1, "update": 0.716, "loss": "2.336", "ntokens": "127.33", "acc_total": "127.33", "n_correct": "80.505", "wer_total": "127.33", "n_error": "46.78", "ppl": "5.05", "accuracy": "63.225", "wer": "36.739", "wps": "75.7", "ups": "0.59", "wpb": "127.3", "bsz": "8", "num_updates": "10800", "lr": "0.000443536", "gnorm": "3.994", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "25872"}
	[2024-06-12 19:40:17,412][train_inner][INFO] - {"epoch": 1, "update": 0.729, "loss": "2.282", "ntokens": "126.09", "acc_total": "126.09", "n_correct": "81.35", "wer_total": "126.09", "n_error": "44.65", "ppl": "4.86", "accuracy": "64.517", "wer": "35.411", "wps": "75.3", "ups": "0.6", "wpb": "126.1", "bsz": "8", "num_updates": "11000", "lr": "0.000430446", "gnorm": "4.031", "loss_scale": "1024", "train_wall": "334", "gb_free": "7.1", "wall": "26207"}
	[2024-06-12 19:45:55,035][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.231", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "81.92", "wer_total": "126.625", "n_error": "44.62", "ppl": "4.69", "accuracy": "64.695", "wer": "35.238", "wps": "75", "ups": "0.59", "wpb": "126.6", "bsz": "8", "num_updates": "11200", "lr": "0.000417742", "gnorm": "3.905", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "26544"}
	[2024-06-12 19:51:29,535][train_inner][INFO] - {"epoch": 1, "update": 0.756, "loss": "2.226", "ntokens": "125.82", "acc_total": "125.82", "n_correct": "83.8", "wer_total": "125.82", "n_error": "41.91", "ppl": "4.68", "accuracy": "66.603", "wer": "33.309", "wps": "75.2", "ups": "0.6", "wpb": "125.8", "bsz": "8", "num_updates": "11400", "lr": "0.000405413", "gnorm": "4.011", "loss_scale": "1024", "train_wall": "334", "gb_free": "7.1", "wall": "26879"}
	[2024-06-12 19:57:04,593][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.222", "ntokens": "126.955", "acc_total": "126.955", "n_correct": "82.895", "wer_total": "126.955", "n_error": "43.995", "ppl": "4.67", "accuracy": "65.295", "wer": "34.654", "wps": "75.8", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "11600", "lr": "0.000393448", "gnorm": "3.825", "loss_scale": "1024", "train_wall": "334", "gb_free": "7.1", "wall": "27214"}
	[2024-06-12 20:02:42,717][train_inner][INFO] - {"epoch": 1, "update": 0.782, "loss": "2.153", "ntokens": "126.705", "acc_total": "126.705", "n_correct": "83.16", "wer_total": "126.705", "n_error": "43.48", "ppl": "4.45", "accuracy": "65.633", "wer": "34.316", "wps": "74.9", "ups": "0.59", "wpb": "126.7", "bsz": "8", "num_updates": "11800", "lr": "0.000381836", "gnorm": "3.745", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "27552"}
	[2024-06-12 20:08:20,201][train_inner][INFO] - {"epoch": 1, "update": 0.796, "loss": "2.137", "ntokens": "126.675", "acc_total": "126.675", "n_correct": "83.845", "wer_total": "126.675", "n_error": "42.775", "ppl": "4.4", "accuracy": "66.189", "wer": "33.768", "wps": "75.1", "ups": "0.59", "wpb": "126.7", "bsz": "8", "num_updates": "12000", "lr": "0.000370567", "gnorm": "3.648", "loss_scale": "2048", "train_wall": "337", "gb_free": "7.1", "wall": "27890"}
	[2024-06-12 20:08:20,201][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 20:50:28,266][valid][INFO] - {"epoch": 1, "valid_loss": "1.856", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.2521", "valid_wer_total": "18.1585", "valid_n_error": "4.90083", "valid_ppl": "3.62", "valid_accuracy": "72.98", "valid_wer": "26.989", "valid_wps": "172.3", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "12000", "valid_best_accuracy": "72.98"}
	[2024-06-12 20:50:28,267][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 12000 updates
	[2024-06-12 20:50:28,267][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_12000.pt
	[2024-06-12 20:50:31,471][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_12000.pt
	[2024-06-12 20:50:35,774][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_12000.pt (epoch 1 @ 12000 updates, score 72.98) (writing took 7.507050585991237 seconds)
	[2024-06-12 20:55:39,444][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-12 20:56:15,053][train_inner][INFO] - {"epoch": 1, "update": 0.809, "loss": "2.162", "ntokens": "126.88", "acc_total": "126.88", "n_correct": "83.425", "wer_total": "126.88", "n_error": "43.425", "ppl": "4.48", "accuracy": "65.751", "wer": "34.225", "wps": "8.8", "ups": "0.07", "wpb": "126.9", "bsz": "8", "num_updates": "12200", "lr": "0.000359631", "gnorm": "3.917", "loss_scale": "1024", "train_wall": "338", "gb_free": "7.1", "wall": "30764"}
	[2024-06-12 21:01:50,869][train_inner][INFO] - {"epoch": 1, "update": 0.822, "loss": "2.143", "ntokens": "126.95", "acc_total": "126.95", "n_correct": "82.95", "wer_total": "126.95", "n_error": "43.94", "ppl": "4.42", "accuracy": "65.341", "wer": "34.612", "wps": "75.6", "ups": "0.6", "wpb": "127", "bsz": "8", "num_updates": "12400", "lr": "0.000349017", "gnorm": "4", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "31100"}
	[2024-06-12 21:07:27,328][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.051", "ntokens": "126.58", "acc_total": "126.58", "n_correct": "83.795", "wer_total": "126.58", "n_error": "42.7", "ppl": "4.14", "accuracy": "66.199", "wer": "33.734", "wps": "75.2", "ups": "0.59", "wpb": "126.6", "bsz": "8", "num_updates": "12600", "lr": "0.000338716", "gnorm": "3.732", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "31437"}
	[2024-06-12 21:13:03,830][train_inner][INFO] - {"epoch": 1, "update": 0.849, "loss": "2.025", "ntokens": "127.27", "acc_total": "127.27", "n_correct": "84.21", "wer_total": "127.27", "n_error": "43.005", "ppl": "4.07", "accuracy": "66.166", "wer": "33.79", "wps": "75.6", "ups": "0.59", "wpb": "127.3", "bsz": "8", "num_updates": "12800", "lr": "0.00032872", "gnorm": "3.667", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "31773"}
	[2024-06-12 21:18:39,738][train_inner][INFO] - {"epoch": 1, "update": 0.862, "loss": "2.037", "ntokens": "125.955", "acc_total": "125.955", "n_correct": "83.845", "wer_total": "125.955", "n_error": "42.055", "ppl": "4.1", "accuracy": "66.567", "wer": "33.389", "wps": "75", "ups": "0.6", "wpb": "126", "bsz": "8", "num_updates": "13000", "lr": "0.000319018", "gnorm": "3.624", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "32109"}
	[2024-06-12 21:24:15,926][train_inner][INFO] - {"epoch": 1, "update": 0.875, "loss": "2.056", "ntokens": "126.89", "acc_total": "126.89", "n_correct": "83.915", "wer_total": "126.89", "n_error": "42.895", "ppl": "4.16", "accuracy": "66.132", "wer": "33.805", "wps": "75.5", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "13200", "lr": "0.000309603", "gnorm": "3.698", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "32445"}
	[2024-06-12 21:29:51,885][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "2.039", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "86.185", "wer_total": "127.49", "n_error": "41.21", "ppl": "4.11", "accuracy": "67.601", "wer": "32.324", "wps": "75.9", "ups": "0.6", "wpb": "127.5", "bsz": "8", "num_updates": "13400", "lr": "0.000300466", "gnorm": "3.749", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "32781"}
	[2024-06-12 21:35:27,548][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "1.944", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "87.465", "wer_total": "126.87", "n_error": "39.37", "ppl": "3.85", "accuracy": "68.941", "wer": "31.032", "wps": "75.6", "ups": "0.6", "wpb": "126.9", "bsz": "8", "num_updates": "13600", "lr": "0.000291598", "gnorm": "3.487", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "33117"}
	[2024-06-12 21:41:04,263][train_inner][INFO] - {"epoch": 1, "update": 0.915, "loss": "1.909", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "86.585", "wer_total": "127.055", "n_error": "40.385", "ppl": "3.76", "accuracy": "68.148", "wer": "31.785", "wps": "75.5", "ups": "0.59", "wpb": "127.1", "bsz": "8", "num_updates": "13800", "lr": "0.000282992", "gnorm": "3.517", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "33454"}
	[2024-06-12 21:46:40,763][train_inner][INFO] - {"epoch": 1, "update": 0.928, "loss": "1.945", "ntokens": "127.135", "acc_total": "127.135", "n_correct": "87.7", "wer_total": "127.135", "n_error": "39.37", "ppl": "3.85", "accuracy": "68.982", "wer": "30.967", "wps": "75.6", "ups": "0.59", "wpb": "127.1", "bsz": "8", "num_updates": "14000", "lr": "0.00027464", "gnorm": "3.585", "loss_scale": "1024", "train_wall": "336", "gb_free": "7.1", "wall": "33790"}
	[2024-06-12 21:52:16,402][train_inner][INFO] - {"epoch": 1, "update": 0.941, "loss": "1.906", "ntokens": "126.005", "acc_total": "126.005", "n_correct": "86.39", "wer_total": "126.005", "n_error": "39.545", "ppl": "3.75", "accuracy": "68.561", "wer": "31.384", "wps": "75.1", "ups": "0.6", "wpb": "126", "bsz": "8", "num_updates": "14200", "lr": "0.000266535", "gnorm": "3.372", "loss_scale": "1024", "train_wall": "335", "gb_free": "7.1", "wall": "34126"}
	[2024-06-12 21:57:53,188][train_inner][INFO] - {"epoch": 1, "update": 0.955, "loss": "1.921", "ntokens": "126.9", "acc_total": "126.9", "n_correct": "86.495", "wer_total": "126.9", "n_error": "40.335", "ppl": "3.79", "accuracy": "68.16", "wer": "31.785", "wps": "75.4", "ups": "0.59", "wpb": "126.9", "bsz": "8", "num_updates": "14400", "lr": "0.000258668", "gnorm": "3.487", "loss_scale": "2048", "train_wall": "336", "gb_free": "7.1", "wall": "34463"}
	[2024-06-12 22:03:28,820][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "1.863", "ntokens": "127.485", "acc_total": "127.485", "n_correct": "89.105", "wer_total": "127.485", "n_error": "38.345", "ppl": "3.64", "accuracy": "69.894", "wer": "30.078", "wps": "76", "ups": "0.6", "wpb": "127.5", "bsz": "8", "num_updates": "14600", "lr": "0.000251034", "gnorm": "3.37", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "34798"}
	[2024-06-12 22:09:06,584][train_inner][INFO] - {"epoch": 1, "update": 0.981, "loss": "1.893", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "90.025", "wer_total": "127.26", "n_error": "37.2", "ppl": "3.71", "accuracy": "70.741", "wer": "29.231", "wps": "75.4", "ups": "0.59", "wpb": "127.3", "bsz": "8", "num_updates": "14800", "lr": "0.000243626", "gnorm": "3.388", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "35136"}
	[2024-06-12 22:14:42,303][train_inner][INFO] - {"epoch": 1, "update": 0.994, "loss": "1.866", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "91.315", "wer_total": "127.385", "n_error": "36.015", "ppl": "3.64", "accuracy": "71.684", "wer": "28.273", "wps": "75.9", "ups": "0.6", "wpb": "127.4", "bsz": "8", "num_updates": "15000", "lr": "0.000236435", "gnorm": "3.455", "loss_scale": "2048", "train_wall": "335", "gb_free": "7.1", "wall": "35472"}
	[2024-06-12 22:14:42,303][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 22:56:44,979][valid][INFO] - {"epoch": 1, "valid_loss": "1.603", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.617", "valid_wer_total": "18.1585", "valid_n_error": "4.5371", "valid_ppl": "3.04", "valid_accuracy": "74.99", "valid_wer": "24.986", "valid_wps": "172.7", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15000", "valid_best_accuracy": "74.99"}
	[2024-06-12 22:56:44,980][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15000 updates
	[2024-06-12 22:56:44,980][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-12 22:56:48,194][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_1_15000.pt
	[2024-06-12 22:56:52,406][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_1_15000.pt (epoch 1 @ 15000 updates, score 74.99) (writing took 7.426846079004463 seconds)
	[2024-06-12 22:59:12,169][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-12 23:41:16,996][valid][INFO] - {"epoch": 1, "valid_loss": "1.6", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.522", "valid_wer_total": "18.1585", "valid_n_error": "4.63168", "valid_ppl": "3.03", "valid_accuracy": "74.467", "valid_wer": "25.507", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "15084", "valid_best_accuracy": "74.99"}
	[2024-06-12 23:41:16,997][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 15084 updates
	[2024-06-12 23:41:16,997][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_last.pt
	[2024-06-12 23:41:20,944][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_last.pt
	[2024-06-12 23:41:21,018][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 1 @ 15084 updates, score 74.467) (writing took 4.021296519989846 seconds)
	[2024-06-12 23:41:21,019][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below)
	[2024-06-12 23:41:21,034][train][INFO] - {"epoch": 1, "train_loss": "3.278", "train_ntokens": "126.896", "train_acc_total": "126.896", "train_n_correct": "67.7191", "train_wer_total": "126.896", "train_n_error": "59.0457", "train_ppl": "9.7", "train_accuracy": "53.366", "train_wer": "46.531", "train_wps": "47.1", "train_ups": "0.37", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "15084", "train_lr": "0.000233479", "train_gnorm": "3.938", "train_loss_scale": "2048", "train_train_wall": "25328", "train_gb_free": "7.1", "train_wall": "40670"}
	[2024-06-12 23:41:21,080][fairseq.trainer][INFO] - begin training epoch 2
	[2024-06-12 23:41:21,080][fairseq_cli.train][INFO] - Start iterating over samples
	[2024-06-12 23:44:32,178][train_inner][INFO] - {"epoch": 2, "update": 1.008, "loss": "1.775", "ntokens": "127.05", "acc_total": "127.05", "n_correct": "90.26", "wer_total": "127.05", "n_error": "36.785", "ppl": "3.42", "accuracy": "71.043", "wer": "28.953", "wps": "4.7", "ups": "0.04", "wpb": "127", "bsz": "8", "num_updates": "15200", "lr": "0.000229457", "gnorm": "3.415", "loss_scale": "2048", "train_wall": "330", "gb_free": "7.1", "wall": "40862"}
	[2024-06-12 23:50:01,492][train_inner][INFO] - {"epoch": 2, "update": 1.021, "loss": "1.712", "ntokens": "126.32", "acc_total": "126.32", "n_correct": "91.58", "wer_total": "126.32", "n_error": "34.705", "ppl": "3.28", "accuracy": "72.498", "wer": "27.474", "wps": "76.7", "ups": "0.61", "wpb": "126.3", "bsz": "8", "num_updates": "15400", "lr": "0.000222685", "gnorm": "3.105", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "41191"}
	[2024-06-12 23:55:30,869][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "1.69", "ntokens": "127.015", "acc_total": "127.015", "n_correct": "90.42", "wer_total": "127.015", "n_error": "36.555", "ppl": "3.23", "accuracy": "71.188", "wer": "28.78", "wps": "77.1", "ups": "0.61", "wpb": "127", "bsz": "8", "num_updates": "15600", "lr": "0.000216113", "gnorm": "3.236", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "41520"}
	[2024-06-12 23:59:54,000][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 00:01:01,541][train_inner][INFO] - {"epoch": 2, "update": 1.048, "loss": "1.757", "ntokens": "125.5", "acc_total": "125.5", "n_correct": "88.41", "wer_total": "125.5", "n_error": "37.045", "ppl": "3.38", "accuracy": "70.446", "wer": "29.518", "wps": "75.9", "ups": "0.6", "wpb": "125.5", "bsz": "8", "num_updates": "15800", "lr": "0.000209735", "gnorm": "3.135", "loss_scale": "1024", "train_wall": "330", "gb_free": "7.1", "wall": "41851"}
	[2024-06-13 00:06:30,932][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "1.719", "ntokens": "127.405", "acc_total": "127.405", "n_correct": "89.255", "wer_total": "127.405", "n_error": "38.105", "ppl": "3.29", "accuracy": "70.056", "wer": "29.909", "wps": "77.4", "ups": "0.61", "wpb": "127.4", "bsz": "8", "num_updates": "16000", "lr": "0.000203545", "gnorm": "3.255", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "42180"}
	[2024-06-13 00:12:00,016][train_inner][INFO] - {"epoch": 2, "update": 1.074, "loss": "1.602", "ntokens": "126.915", "acc_total": "126.915", "n_correct": "90.38", "wer_total": "126.915", "n_error": "36.505", "ppl": "3.03", "accuracy": "71.213", "wer": "28.763", "wps": "77.1", "ups": "0.61", "wpb": "126.9", "bsz": "8", "num_updates": "16200", "lr": "0.000197538", "gnorm": "3.155", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "42509"}
	[2024-06-13 00:17:29,009][train_inner][INFO] - {"epoch": 2, "update": 1.087, "loss": "1.639", "ntokens": "127.14", "acc_total": "127.14", "n_correct": "91.44", "wer_total": "127.14", "n_error": "35.655", "ppl": "3.11", "accuracy": "71.921", "wer": "28.044", "wps": "77.3", "ups": "0.61", "wpb": "127.1", "bsz": "8", "num_updates": "16400", "lr": "0.000191708", "gnorm": "3.184", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "42838"}
	[2024-06-13 00:22:58,152][train_inner][INFO] - {"epoch": 2, "update": 1.101, "loss": "1.62", "ntokens": "127.51", "acc_total": "127.51", "n_correct": "91.105", "wer_total": "127.51", "n_error": "36.355", "ppl": "3.07", "accuracy": "71.449", "wer": "28.511", "wps": "77.5", "ups": "0.61", "wpb": "127.5", "bsz": "8", "num_updates": "16600", "lr": "0.00018605", "gnorm": "3.087", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "43167"}
	[2024-06-13 00:28:27,224][train_inner][INFO] - {"epoch": 2, "update": 1.114, "loss": "1.676", "ntokens": "126.405", "acc_total": "126.405", "n_correct": "90.3", "wer_total": "126.405", "n_error": "36.08", "ppl": "3.2", "accuracy": "71.437", "wer": "28.543", "wps": "76.8", "ups": "0.61", "wpb": "126.4", "bsz": "8", "num_updates": "16800", "lr": "0.000180559", "gnorm": "3.26", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "43497"}
	[2024-06-13 00:33:56,493][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "1.59", "ntokens": "127.925", "acc_total": "127.925", "n_correct": "91.66", "wer_total": "127.925", "n_error": "36.225", "ppl": "3.01", "accuracy": "71.651", "wer": "28.317", "wps": "77.7", "ups": "0.61", "wpb": "127.9", "bsz": "8", "num_updates": "17000", "lr": "0.00017523", "gnorm": "3.06", "loss_scale": "1024", "train_wall": "329", "gb_free": "7.1", "wall": "43826"}
	[2024-06-13 00:39:25,603][train_inner][INFO] - {"epoch": 2, "update": 1.14, "loss": "1.612", "ntokens": "126.125", "acc_total": "126.125", "n_correct": "90.48", "wer_total": "126.125", "n_error": "35.61", "ppl": "3.06", "accuracy": "71.738", "wer": "28.234", "wps": "76.6", "ups": "0.61", "wpb": "126.1", "bsz": "8", "num_updates": "17200", "lr": "0.000170059", "gnorm": "3.11", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "44155"}
	[2024-06-13 00:44:54,667][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "1.639", "ntokens": "127.245", "acc_total": "127.245", "n_correct": "91.46", "wer_total": "127.245", "n_error": "35.74", "ppl": "3.12", "accuracy": "71.877", "wer": "28.088", "wps": "77.3", "ups": "0.61", "wpb": "127.2", "bsz": "8", "num_updates": "17400", "lr": "0.00016504", "gnorm": "3.231", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "44484"}
	[2024-06-13 00:50:23,793][train_inner][INFO] - {"epoch": 2, "update": 1.167, "loss": "1.617", "ntokens": "126.2", "acc_total": "126.2", "n_correct": "90.685", "wer_total": "126.2", "n_error": "35.47", "ppl": "3.07", "accuracy": "71.858", "wer": "28.106", "wps": "76.7", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "17600", "lr": "0.000160169", "gnorm": "3.163", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "44813"}
	[2024-06-13 00:55:52,818][train_inner][INFO] - {"epoch": 2, "update": 1.18, "loss": "1.535", "ntokens": "128.29", "acc_total": "128.29", "n_correct": "93.1", "wer_total": "128.29", "n_error": "35.16", "ppl": "2.9", "accuracy": "72.57", "wer": "27.407", "wps": "78", "ups": "0.61", "wpb": "128.3", "bsz": "8", "num_updates": "17800", "lr": "0.000155442", "gnorm": "2.971", "loss_scale": "1024", "train_wall": "328", "gb_free": "7.1", "wall": "45142"}
	[2024-06-13 01:01:22,047][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "1.606", "ntokens": "126.235", "acc_total": "126.235", "n_correct": "90.42", "wer_total": "126.235", "n_error": "35.805", "ppl": "3.04", "accuracy": "71.628", "wer": "28.364", "wps": "76.7", "ups": "0.61", "wpb": "126.2", "bsz": "8", "num_updates": "18000", "lr": "0.000150854", "gnorm": "3.094", "loss_scale": "2048", "train_wall": "329", "gb_free": "7.1", "wall": "45471"}
	[2024-06-13 01:01:22,047][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-13 01:43:25,808][valid][INFO] - {"epoch": 2, "valid_loss": "1.457", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "13.6381", "valid_wer_total": "18.1585", "valid_n_error": "4.51488", "valid_ppl": "2.74", "valid_accuracy": "75.106", "valid_wer": "24.864", "valid_wps": "172.6", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "18000", "valid_best_accuracy": "75.106"}
	[2024-06-13 01:43:25,809][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 18000 updates
	[2024-06-13 01:43:25,809][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_18000.pt
	[2024-06-13 01:43:28,987][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_18000.pt
	[2024-06-13 01:43:33,154][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_18000.pt (epoch 2 @ 18000 updates, score 75.106) (writing took 7.3454166339943185 seconds)
	[2024-06-13 01:48:25,127][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 01:49:39,830][train_inner][INFO] - {"epoch": 2, "update": 1.207, "loss": "1.638", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "93.42", "wer_total": "127.64", "n_error": "34.18", "ppl": "3.11", "accuracy": "73.19", "wer": "26.778", "wps": "8.8", "ups": "0.07", "wpb": "127.6", "bsz": "8", "num_updates": "18200", "lr": "0.000146402", "gnorm": "10.161", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "48369"}
	[2024-06-13 01:55:44,757][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "1.589", "ntokens": "126.27", "acc_total": "126.27", "n_correct": "92.09", "wer_total": "126.27", "n_error": "34.135", "ppl": "3.01", "accuracy": "72.931", "wer": "27.033", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "18400", "lr": "0.000142081", "gnorm": "9.759", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "48734"}
	[2024-06-13 02:01:49,918][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "1.564", "ntokens": "126.87", "acc_total": "126.87", "n_correct": "93.77", "wer_total": "126.87", "n_error": "33.07", "ppl": "2.96", "accuracy": "73.91", "wer": "26.066", "wps": "69.5", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "18600", "lr": "0.000137888", "gnorm": "9.264", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "49099"}
	[2024-06-13 02:07:54,943][train_inner][INFO] - {"epoch": 2, "update": 1.246, "loss": "1.511", "ntokens": "127.155", "acc_total": "127.155", "n_correct": "93.975", "wer_total": "127.155", "n_error": "33.14", "ppl": "2.85", "accuracy": "73.906", "wer": "26.063", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "18800", "lr": "0.000133819", "gnorm": "9.183", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "49464"}
	[2024-06-13 02:13:59,889][train_inner][INFO] - {"epoch": 2, "update": 1.26, "loss": "1.477", "ntokens": "127.695", "acc_total": "127.695", "n_correct": "95.13", "wer_total": "127.695", "n_error": "32.53", "ppl": "2.78", "accuracy": "74.498", "wer": "25.475", "wps": "70", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "19000", "lr": "0.000129869", "gnorm": "8.568", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "49829"}
	[2024-06-13 02:20:04,913][train_inner][INFO] - {"epoch": 2, "update": 1.273, "loss": "1.536", "ntokens": "127.145", "acc_total": "127.145", "n_correct": "93.975", "wer_total": "127.145", "n_error": "33.15", "ppl": "2.9", "accuracy": "73.912", "wer": "26.073", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "19200", "lr": "0.000126036", "gnorm": "9.153", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "50194"}
	[2024-06-13 02:26:09,850][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "1.4", "ntokens": "125.81", "acc_total": "125.81", "n_correct": "95.92", "wer_total": "125.81", "n_error": "29.865", "ppl": "2.64", "accuracy": "76.242", "wer": "23.738", "wps": "69", "ups": "0.55", "wpb": "125.8", "bsz": "8", "num_updates": "19400", "lr": "0.000122317", "gnorm": "8.373", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "50559"}
	[2024-06-13 02:32:14,883][train_inner][INFO] - {"epoch": 2, "update": 1.299, "loss": "1.423", "ntokens": "125.62", "acc_total": "125.62", "n_correct": "93.185", "wer_total": "125.62", "n_error": "32.42", "ppl": "2.68", "accuracy": "74.18", "wer": "25.808", "wps": "68.8", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "19600", "lr": "0.000118707", "gnorm": "8.314", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "50924"}
	[2024-06-13 02:38:20,194][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "1.376", "ntokens": "127.705", "acc_total": "127.705", "n_correct": "96.58", "wer_total": "127.705", "n_error": "31.085", "ppl": "2.6", "accuracy": "75.627", "wer": "24.341", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "19800", "lr": "0.000115203", "gnorm": "7.792", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "51290"}
	[2024-06-13 02:44:25,435][train_inner][INFO] - {"epoch": 2, "update": 1.326, "loss": "1.426", "ntokens": "126.185", "acc_total": "126.185", "n_correct": "97.21", "wer_total": "126.185", "n_error": "28.95", "ppl": "2.69", "accuracy": "77.038", "wer": "22.943", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "20000", "lr": "0.000111803", "gnorm": "8.016", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "51655"}
	[2024-06-13 02:50:30,554][train_inner][INFO] - {"epoch": 2, "update": 1.339, "loss": "1.411", "ntokens": "126.295", "acc_total": "126.295", "n_correct": "98.11", "wer_total": "126.295", "n_error": "28.175", "ppl": "2.66", "accuracy": "77.683", "wer": "22.309", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "20200", "lr": "0.000108504", "gnorm": "8.015", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "52020"}
	[2024-06-13 02:56:35,710][train_inner][INFO] - {"epoch": 2, "update": 1.353, "loss": "1.429", "ntokens": "127.09", "acc_total": "127.09", "n_correct": "97.92", "wer_total": "127.09", "n_error": "29.145", "ppl": "2.69", "accuracy": "77.048", "wer": "22.933", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "20400", "lr": "0.000105301", "gnorm": "8.113", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "52385"}
	[2024-06-13 03:02:40,905][train_inner][INFO] - {"epoch": 2, "update": 1.366, "loss": "1.376", "ntokens": "126.48", "acc_total": "126.48", "n_correct": "98.08", "wer_total": "126.48", "n_error": "28.39", "ppl": "2.59", "accuracy": "77.546", "wer": "22.446", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "20600", "lr": "0.000102194", "gnorm": "8.132", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "52750"}
	[2024-06-13 03:08:46,243][train_inner][INFO] - {"epoch": 2, "update": 1.379, "loss": "1.345", "ntokens": "127.35", "acc_total": "127.35", "n_correct": "98.355", "wer_total": "127.35", "n_error": "28.95", "ppl": "2.54", "accuracy": "77.232", "wer": "22.733", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "20800", "lr": "9.91776e-05", "gnorm": "7.4", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "53116"}
	[2024-06-13 03:14:51,403][train_inner][INFO] - {"epoch": 2, "update": 1.392, "loss": "1.348", "ntokens": "127.315", "acc_total": "127.315", "n_correct": "99.08", "wer_total": "127.315", "n_error": "28.215", "ppl": "2.55", "accuracy": "77.823", "wer": "22.162", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "21000", "lr": "9.62506e-05", "gnorm": "7.468", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "53481"}
	[2024-06-13 03:14:51,403][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-13 03:56:56,721][valid][INFO] - {"epoch": 2, "valid_loss": "1.194", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "14.7151", "valid_wer_total": "18.1585", "valid_n_error": "3.44181", "valid_ppl": "2.29", "valid_accuracy": "81.037", "valid_wer": "18.954", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "21000", "valid_best_accuracy": "81.037"}
	[2024-06-13 03:56:56,722][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 21000 updates
	[2024-06-13 03:56:56,722][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_21000.pt
	[2024-06-13 03:56:59,912][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_21000.pt
	[2024-06-13 03:57:04,158][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_21000.pt (epoch 2 @ 21000 updates, score 81.037) (writing took 7.436629530013306 seconds)
	[2024-06-13 03:57:13,174][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 04:03:10,873][train_inner][INFO] - {"epoch": 2, "update": 1.406, "loss": "1.334", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "100.06", "wer_total": "126.81", "n_error": "26.73", "ppl": "2.52", "accuracy": "78.905", "wer": "21.079", "wps": "8.7", "ups": "0.07", "wpb": "126.8", "bsz": "8", "num_updates": "21200", "lr": "9.341e-05", "gnorm": "7.524", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "56380"}
	[2024-06-13 04:09:16,283][train_inner][INFO] - {"epoch": 2, "update": 1.419, "loss": "1.304", "ntokens": "127.32", "acc_total": "127.32", "n_correct": "99.885", "wer_total": "127.32", "n_error": "27.42", "ppl": "2.47", "accuracy": "78.452", "wer": "21.536", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "21400", "lr": "9.06532e-05", "gnorm": "7.483", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "56746"}
	[2024-06-13 04:15:21,675][train_inner][INFO] - {"epoch": 2, "update": 1.432, "loss": "1.32", "ntokens": "126.805", "acc_total": "126.805", "n_correct": "97.655", "wer_total": "126.805", "n_error": "29.12", "ppl": "2.5", "accuracy": "77.012", "wer": "22.964", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "21600", "lr": "8.79777e-05", "gnorm": "7.923", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "57111"}
	[2024-06-13 04:21:26,918][train_inner][INFO] - {"epoch": 2, "update": 1.445, "loss": "1.273", "ntokens": "125.555", "acc_total": "125.555", "n_correct": "98.36", "wer_total": "125.555", "n_error": "27.185", "ppl": "2.42", "accuracy": "78.34", "wer": "21.652", "wps": "68.8", "ups": "0.55", "wpb": "125.6", "bsz": "8", "num_updates": "21800", "lr": "8.53812e-05", "gnorm": "7.63", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "57476"}
	[2024-06-13 04:27:32,248][train_inner][INFO] - {"epoch": 2, "update": 1.459, "loss": "1.345", "ntokens": "127.64", "acc_total": "127.64", "n_correct": "99.055", "wer_total": "127.64", "n_error": "28.575", "ppl": "2.54", "accuracy": "77.605", "wer": "22.387", "wps": "69.9", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "22000", "lr": "8.28614e-05", "gnorm": "7.442", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "57842"}
	[2024-06-13 04:33:37,591][train_inner][INFO] - {"epoch": 2, "update": 1.472, "loss": "1.25", "ntokens": "126.515", "acc_total": "126.515", "n_correct": "99.44", "wer_total": "126.515", "n_error": "27.06", "ppl": "2.38", "accuracy": "78.599", "wer": "21.389", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "22200", "lr": "8.04159e-05", "gnorm": "7.258", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "58207"}
	[2024-06-13 04:39:42,713][train_inner][INFO] - {"epoch": 2, "update": 1.485, "loss": "1.263", "ntokens": "127.11", "acc_total": "127.11", "n_correct": "101.045", "wer_total": "127.11", "n_error": "26.045", "ppl": "2.4", "accuracy": "79.494", "wer": "20.49", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "22400", "lr": "7.80425e-05", "gnorm": "7.508", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "58572"}
	[2024-06-13 04:45:47,899][train_inner][INFO] - {"epoch": 2, "update": 1.498, "loss": "1.283", "ntokens": "126.78", "acc_total": "126.78", "n_correct": "101.75", "wer_total": "126.78", "n_error": "25.015", "ppl": "2.43", "accuracy": "80.257", "wer": "19.731", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "22600", "lr": "7.57393e-05", "gnorm": "7.417", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "58937"}
	[2024-06-13 04:51:53,081][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "1.24", "ntokens": "126.625", "acc_total": "126.625", "n_correct": "103.345", "wer_total": "126.625", "n_error": "23.27", "ppl": "2.36", "accuracy": "81.615", "wer": "18.377", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "22800", "lr": "7.3504e-05", "gnorm": "7.214", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "59302"}
	[2024-06-13 04:57:58,095][train_inner][INFO] - {"epoch": 2, "update": 1.525, "loss": "1.301", "ntokens": "126.17", "acc_total": "126.17", "n_correct": "102.7", "wer_total": "126.17", "n_error": "23.46", "ppl": "2.46", "accuracy": "81.398", "wer": "18.594", "wps": "69.1", "ups": "0.55", "wpb": "126.2", "bsz": "8", "num_updates": "23000", "lr": "7.13346e-05", "gnorm": "7.344", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "59667"}
	[2024-06-13 05:04:03,572][train_inner][INFO] - {"epoch": 2, "update": 1.538, "loss": "1.232", "ntokens": "126.86", "acc_total": "126.86", "n_correct": "103.87", "wer_total": "126.86", "n_error": "22.985", "ppl": "2.35", "accuracy": "81.878", "wer": "18.118", "wps": "69.4", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "23200", "lr": "6.92293e-05", "gnorm": "7.162", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "60033"}
	[2024-06-13 05:10:08,772][train_inner][INFO] - {"epoch": 2, "update": 1.551, "loss": "1.167", "ntokens": "126.31", "acc_total": "126.31", "n_correct": "104.89", "wer_total": "126.31", "n_error": "21.42", "ppl": "2.25", "accuracy": "83.042", "wer": "16.958", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "23400", "lr": "6.71862e-05", "gnorm": "6.883", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "60398"}
	[2024-06-13 05:16:14,108][train_inner][INFO] - {"epoch": 2, "update": 1.565, "loss": "1.287", "ntokens": "126.645", "acc_total": "126.645", "n_correct": "102.61", "wer_total": "126.645", "n_error": "24.035", "ppl": "2.44", "accuracy": "81.022", "wer": "18.978", "wps": "69.3", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "23600", "lr": "6.52033e-05", "gnorm": "7.399", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "60763"}
	[2024-06-13 05:22:19,225][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "1.237", "ntokens": "127.17", "acc_total": "127.17", "n_correct": "103.795", "wer_total": "127.17", "n_error": "23.37", "ppl": "2.36", "accuracy": "81.619", "wer": "18.377", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "23800", "lr": "6.3279e-05", "gnorm": "7.127", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "61129"}
	[2024-06-13 05:28:24,122][train_inner][INFO] - {"epoch": 2, "update": 1.591, "loss": "1.243", "ntokens": "126.81", "acc_total": "126.81", "n_correct": "103.095", "wer_total": "126.81", "n_error": "23.705", "ppl": "2.37", "accuracy": "81.299", "wer": "18.693", "wps": "69.5", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "24000", "lr": "6.14114e-05", "gnorm": "7.272", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "61493"}
	[2024-06-13 05:28:24,123][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-13 06:10:29,226][valid][INFO] - {"epoch": 2, "valid_loss": "1.085", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.1193", "valid_wer_total": "18.1585", "valid_n_error": "3.03764", "valid_ppl": "2.12", "valid_accuracy": "83.263", "valid_wer": "16.728", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "24000", "valid_best_accuracy": "83.263"}
	[2024-06-13 06:10:29,227][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 24000 updates
	[2024-06-13 06:10:29,227][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_24000.pt
	[2024-06-13 06:10:32,454][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_24000.pt
	[2024-06-13 06:10:36,604][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_24000.pt (epoch 2 @ 24000 updates, score 83.263) (writing took 7.377722701989114 seconds)
	[2024-06-13 06:16:41,492][train_inner][INFO] - {"epoch": 2, "update": 1.604, "loss": "1.224", "ntokens": "126.145", "acc_total": "126.145", "n_correct": "101.7", "wer_total": "126.145", "n_error": "24.435", "ppl": "2.34", "accuracy": "80.622", "wer": "19.371", "wps": "8.7", "ups": "0.07", "wpb": "126.1", "bsz": "8", "num_updates": "24200", "lr": "5.9599e-05", "gnorm": "6.898", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "64391"}
	[2024-06-13 06:22:46,713][train_inner][INFO] - {"epoch": 2, "update": 1.618, "loss": "1.226", "ntokens": "127.23", "acc_total": "127.23", "n_correct": "103.15", "wer_total": "127.23", "n_error": "24.08", "ppl": "2.34", "accuracy": "81.074", "wer": "18.926", "wps": "69.7", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "24400", "lr": "5.784e-05", "gnorm": "7.099", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "64756"}
	[2024-06-13 06:22:52,118][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 06:28:53,812][train_inner][INFO] - {"epoch": 2, "update": 1.631, "loss": "1.26", "ntokens": "127.385", "acc_total": "127.385", "n_correct": "101.44", "wer_total": "127.385", "n_error": "25.92", "ppl": "2.39", "accuracy": "79.633", "wer": "20.348", "wps": "69.4", "ups": "0.54", "wpb": "127.4", "bsz": "8", "num_updates": "24600", "lr": "5.6133e-05", "gnorm": "7.108", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "65123"}
	[2024-06-13 06:34:59,004][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "1.194", "ntokens": "127.26", "acc_total": "127.26", "n_correct": "102.715", "wer_total": "127.26", "n_error": "24.535", "ppl": "2.29", "accuracy": "80.713", "wer": "19.279", "wps": "69.7", "ups": "0.55", "wpb": "127.3", "bsz": "8", "num_updates": "24800", "lr": "5.44763e-05", "gnorm": "6.953", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "65488"}
	[2024-06-13 06:41:04,172][train_inner][INFO] - {"epoch": 2, "update": 1.658, "loss": "1.231", "ntokens": "125.92", "acc_total": "125.92", "n_correct": "101.295", "wer_total": "125.92", "n_error": "24.61", "ppl": "2.35", "accuracy": "80.444", "wer": "19.544", "wps": "69", "ups": "0.55", "wpb": "125.9", "bsz": "8", "num_updates": "25000", "lr": "5.28686e-05", "gnorm": "6.799", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "65854"}
	[2024-06-13 06:47:09,304][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "1.141", "ntokens": "127.44", "acc_total": "127.44", "n_correct": "104.865", "wer_total": "127.44", "n_error": "22.56", "ppl": "2.2", "accuracy": "82.286", "wer": "17.702", "wps": "69.8", "ups": "0.55", "wpb": "127.4", "bsz": "8", "num_updates": "25200", "lr": "5.13083e-05", "gnorm": "6.781", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "66219"}
	[2024-06-13 06:53:14,428][train_inner][INFO] - {"epoch": 2, "update": 1.684, "loss": "1.194", "ntokens": "125.36", "acc_total": "125.36", "n_correct": "102.79", "wer_total": "125.36", "n_error": "22.555", "ppl": "2.29", "accuracy": "81.996", "wer": "17.992", "wps": "68.7", "ups": "0.55", "wpb": "125.4", "bsz": "8", "num_updates": "25400", "lr": "4.9794e-05", "gnorm": "6.944", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "66584"}
	[2024-06-13 06:59:19,183][train_inner][INFO] - {"epoch": 2, "update": 1.697, "loss": "1.134", "ntokens": "127.07", "acc_total": "127.07", "n_correct": "103.89", "wer_total": "127.07", "n_error": "23.175", "ppl": "2.2", "accuracy": "81.758", "wer": "18.238", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25600", "lr": "4.83244e-05", "gnorm": "6.987", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "66949"}
	[2024-06-13 07:05:23,910][train_inner][INFO] - {"epoch": 2, "update": 1.711, "loss": "1.137", "ntokens": "127.055", "acc_total": "127.055", "n_correct": "105.045", "wer_total": "127.055", "n_error": "22.005", "ppl": "2.2", "accuracy": "82.677", "wer": "17.319", "wps": "69.7", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "25800", "lr": "4.68982e-05", "gnorm": "6.823", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "67313"}
	[2024-06-13 07:11:28,762][train_inner][INFO] - {"epoch": 2, "update": 1.724, "loss": "1.184", "ntokens": "126.935", "acc_total": "126.935", "n_correct": "104.555", "wer_total": "126.935", "n_error": "22.36", "ppl": "2.27", "accuracy": "82.369", "wer": "17.615", "wps": "69.6", "ups": "0.55", "wpb": "126.9", "bsz": "8", "num_updates": "26000", "lr": "4.55141e-05", "gnorm": "6.855", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "67678"}
	[2024-06-13 07:17:33,648][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "1.166", "ntokens": "126.305", "acc_total": "126.305", "n_correct": "102.565", "wer_total": "126.305", "n_error": "23.735", "ppl": "2.24", "accuracy": "81.204", "wer": "18.792", "wps": "69.2", "ups": "0.55", "wpb": "126.3", "bsz": "8", "num_updates": "26200", "lr": "4.41708e-05", "gnorm": "6.915", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "68043"}
	[2024-06-13 07:23:38,699][train_inner][INFO] - {"epoch": 2, "update": 1.75, "loss": "1.15", "ntokens": "127.47", "acc_total": "127.47", "n_correct": "103.72", "wer_total": "127.47", "n_error": "23.735", "ppl": "2.22", "accuracy": "81.368", "wer": "18.62", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "26400", "lr": "4.28672e-05", "gnorm": "6.62", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "68408"}
	[2024-06-13 07:29:43,686][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "1.117", "ntokens": "126.6", "acc_total": "126.6", "n_correct": "103.78", "wer_total": "126.6", "n_error": "22.82", "ppl": "2.17", "accuracy": "81.975", "wer": "18.025", "wps": "69.4", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26600", "lr": "4.16021e-05", "gnorm": "6.756", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "68773"}
	[2024-06-13 07:33:20,788][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 07:35:50,343][train_inner][INFO] - {"epoch": 2, "update": 1.777, "loss": "1.13", "ntokens": "126.585", "acc_total": "126.585", "n_correct": "103.935", "wer_total": "126.585", "n_error": "22.635", "ppl": "2.19", "accuracy": "82.107", "wer": "17.881", "wps": "69", "ups": "0.55", "wpb": "126.6", "bsz": "8", "num_updates": "26800", "lr": "4.03743e-05", "gnorm": "6.853", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "69140"}
	[2024-06-13 07:41:55,005][train_inner][INFO] - {"epoch": 2, "update": 1.79, "loss": "1.18", "ntokens": "127.595", "acc_total": "127.595", "n_correct": "104.76", "wer_total": "127.595", "n_error": "22.82", "ppl": "2.27", "accuracy": "82.104", "wer": "17.885", "wps": "70", "ups": "0.55", "wpb": "127.6", "bsz": "8", "num_updates": "27000", "lr": "3.91827e-05", "gnorm": "6.777", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "69504"}
	[2024-06-13 07:41:55,006][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-13 08:24:00,292][valid][INFO] - {"epoch": 2, "valid_loss": "1.016", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.4384", "valid_wer_total": "18.1585", "valid_n_error": "2.71897", "valid_ppl": "2.02", "valid_accuracy": "85.02", "valid_wer": "14.974", "valid_wps": "172.5", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "27000", "valid_best_accuracy": "85.02"}
	[2024-06-13 08:24:00,293][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 27000 updates
	[2024-06-13 08:24:00,293][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_27000.pt
	[2024-06-13 08:24:03,488][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_27000.pt
	[2024-06-13 08:24:07,556][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_27000.pt (epoch 2 @ 27000 updates, score 85.02) (writing took 7.26307546699536 seconds)
	[2024-06-13 08:30:12,389][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "1.165", "ntokens": "127.94", "acc_total": "127.94", "n_correct": "105.665", "wer_total": "127.94", "n_error": "22.275", "ppl": "2.24", "accuracy": "82.589", "wer": "17.411", "wps": "8.8", "ups": "0.07", "wpb": "127.9", "bsz": "8", "num_updates": "27200", "lr": "3.80263e-05", "gnorm": "6.844", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "72402"}
	[2024-06-13 08:36:17,694][train_inner][INFO] - {"epoch": 2, "update": 1.817, "loss": "1.157", "ntokens": "126.68", "acc_total": "126.68", "n_correct": "104.855", "wer_total": "126.68", "n_error": "21.82", "ppl": "2.23", "accuracy": "82.772", "wer": "17.225", "wps": "69.4", "ups": "0.55", "wpb": "126.7", "bsz": "8", "num_updates": "27400", "lr": "3.6904e-05", "gnorm": "6.795", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "72767"}
	[2024-06-13 08:42:22,624][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "1.126", "ntokens": "126.45", "acc_total": "126.45", "n_correct": "104.375", "wer_total": "126.45", "n_error": "22.07", "ppl": "2.18", "accuracy": "82.543", "wer": "17.454", "wps": "69.3", "ups": "0.55", "wpb": "126.5", "bsz": "8", "num_updates": "27600", "lr": "3.58149e-05", "gnorm": "6.943", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "73132"}
	[2024-06-13 08:48:27,690][train_inner][INFO] - {"epoch": 2, "update": 1.843, "loss": "1.147", "ntokens": "127.465", "acc_total": "127.465", "n_correct": "105.07", "wer_total": "127.465", "n_error": "22.375", "ppl": "2.21", "accuracy": "82.43", "wer": "17.554", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "27800", "lr": "3.47579e-05", "gnorm": "6.877", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "73497"}
	[2024-06-13 08:54:32,804][train_inner][INFO] - {"epoch": 2, "update": 1.856, "loss": "1.141", "ntokens": "127.49", "acc_total": "127.49", "n_correct": "105.37", "wer_total": "127.49", "n_error": "22.12", "ppl": "2.21", "accuracy": "82.65", "wer": "17.35", "wps": "69.8", "ups": "0.55", "wpb": "127.5", "bsz": "8", "num_updates": "28000", "lr": "3.37321e-05", "gnorm": "6.748", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "73862"}
	[2024-06-13 09:00:37,926][train_inner][INFO] - {"epoch": 2, "update": 1.87, "loss": "1.124", "ntokens": "127.75", "acc_total": "127.75", "n_correct": "106.195", "wer_total": "127.75", "n_error": "21.555", "ppl": "2.18", "accuracy": "83.127", "wer": "16.873", "wps": "70", "ups": "0.55", "wpb": "127.8", "bsz": "8", "num_updates": "28200", "lr": "3.27365e-05", "gnorm": "6.627", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "74227"}
	[2024-06-13 09:06:43,095][train_inner][INFO] - {"epoch": 2, "update": 1.883, "loss": "1.126", "ntokens": "127.125", "acc_total": "127.125", "n_correct": "105.5", "wer_total": "127.125", "n_error": "21.62", "ppl": "2.18", "accuracy": "82.989", "wer": "17.007", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "28400", "lr": "3.17704e-05", "gnorm": "6.572", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "74592"}
	[2024-06-13 09:12:48,499][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "1.131", "ntokens": "128.015", "acc_total": "128.015", "n_correct": "106.05", "wer_total": "128.015", "n_error": "21.95", "ppl": "2.19", "accuracy": "82.842", "wer": "17.146", "wps": "70.1", "ups": "0.55", "wpb": "128", "bsz": "8", "num_updates": "28600", "lr": "3.08327e-05", "gnorm": "6.753", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "74958"}
	[2024-06-13 09:18:53,749][train_inner][INFO] - {"epoch": 2, "update": 1.91, "loss": "1.103", "ntokens": "126.755", "acc_total": "126.755", "n_correct": "105.41", "wer_total": "126.755", "n_error": "21.34", "ppl": "2.15", "accuracy": "83.16", "wer": "16.836", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "28800", "lr": "2.99228e-05", "gnorm": "6.508", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "75323"}
	[2024-06-13 09:24:59,072][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "1.072", "ntokens": "127.74", "acc_total": "127.74", "n_correct": "107.025", "wer_total": "127.74", "n_error": "20.7", "ppl": "2.1", "accuracy": "83.783", "wer": "16.205", "wps": "69.9", "ups": "0.55", "wpb": "127.7", "bsz": "8", "num_updates": "29000", "lr": "2.90397e-05", "gnorm": "6.515", "loss_scale": "2048", "train_wall": "365", "gb_free": "6.5", "wall": "75688"}
	[2024-06-13 09:31:04,170][train_inner][INFO] - {"epoch": 2, "update": 1.936, "loss": "1.128", "ntokens": "126.765", "acc_total": "126.765", "n_correct": "105.145", "wer_total": "126.765", "n_error": "21.61", "ppl": "2.19", "accuracy": "82.945", "wer": "17.047", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29200", "lr": "2.81826e-05", "gnorm": "6.912", "loss_scale": "2048", "train_wall": "364", "gb_free": "6.5", "wall": "76053"}
	[2024-06-13 09:33:53,939][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1024.0
	[2024-06-13 09:37:11,140][train_inner][INFO] - {"epoch": 2, "update": 1.949, "loss": "1.114", "ntokens": "127.235", "acc_total": "127.235", "n_correct": "106.16", "wer_total": "127.235", "n_error": "21.07", "ppl": "2.17", "accuracy": "83.436", "wer": "16.56", "wps": "69.3", "ups": "0.55", "wpb": "127.2", "bsz": "8", "num_updates": "29400", "lr": "2.73509e-05", "gnorm": "6.721", "loss_scale": "1024", "train_wall": "366", "gb_free": "6.5", "wall": "76420"}
	[2024-06-13 09:43:16,490][train_inner][INFO] - {"epoch": 2, "update": 1.963, "loss": "1.086", "ntokens": "127.1", "acc_total": "127.1", "n_correct": "106.15", "wer_total": "127.1", "n_error": "20.94", "ppl": "2.12", "accuracy": "83.517", "wer": "16.475", "wps": "69.6", "ups": "0.55", "wpb": "127.1", "bsz": "8", "num_updates": "29600", "lr": "2.65436e-05", "gnorm": "6.838", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "76786"}
	[2024-06-13 09:49:21,812][train_inner][INFO] - {"epoch": 2, "update": 1.976, "loss": "1.114", "ntokens": "126.785", "acc_total": "126.785", "n_correct": "104.825", "wer_total": "126.785", "n_error": "21.945", "ppl": "2.16", "accuracy": "82.679", "wer": "17.309", "wps": "69.4", "ups": "0.55", "wpb": "126.8", "bsz": "8", "num_updates": "29800", "lr": "2.57603e-05", "gnorm": "6.715", "loss_scale": "1024", "train_wall": "365", "gb_free": "6.5", "wall": "77151"}
	[2024-06-13 09:55:27,037][train_inner][INFO] - {"epoch": 2, "update": 1.989, "loss": "1.113", "ntokens": "126.96", "acc_total": "126.96", "n_correct": "105.8", "wer_total": "126.96", "n_error": "21.14", "ppl": "2.16", "accuracy": "83.333", "wer": "16.651", "wps": "69.5", "ups": "0.55", "wpb": "127", "bsz": "8", "num_updates": "30000", "lr": "2.5e-05", "gnorm": "6.914", "loss_scale": "1024", "train_wall": "364", "gb_free": "6.5", "wall": "77516"}
	[2024-06-13 09:55:27,038][fairseq_cli.train][INFO] - Stopping training due to num_updates: 30000 >= max_update: 30000
	[2024-06-13 09:55:27,038][fairseq_cli.train][INFO] - begin validation on "valid" subset
	[2024-06-13 10:37:33,551][valid][INFO] - {"epoch": 2, "valid_loss": "0.97", "valid_ntokens": "18.1585", "valid_acc_total": "18.1585", "valid_n_correct": "15.5259", "valid_wer_total": "18.1585", "valid_n_error": "2.63176", "valid_ppl": "1.96", "valid_accuracy": "85.502", "valid_wer": "14.493", "valid_wps": "172.4", "valid_wpb": "18.2", "valid_bsz": "1", "valid_num_updates": "30000", "valid_best_accuracy": "85.502"}
	[2024-06-13 10:37:33,552][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 30000 updates
	[2024-06-13 10:37:33,552][fairseq.trainer][INFO] - Saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-13 10:37:36,767][fairseq.trainer][INFO] - Finished saving checkpoint to checkpoints/checkpoint_2_30000.pt
	[2024-06-13 10:37:40,880][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_2_30000.pt (epoch 2 @ 30000 updates, score 85.502) (writing took 7.327467755007092 seconds)
	[2024-06-13 10:37:40,913][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below)
	[2024-06-13 10:37:40,915][train][INFO] - {"epoch": 2, "train_loss": "1.334", "train_ntokens": "126.909", "train_acc_total": "126.909", "train_n_correct": "99.4368", "train_wer_total": "126.909", "train_n_error": "27.4529", "train_ppl": "2.52", "train_accuracy": "78.353", "train_wer": "21.632", "train_wps": "48.1", "train_ups": "0.38", "train_wpb": "126.9", "train_bsz": "8", "train_num_updates": "30000", "train_lr": "2.5e-05", "train_gnorm": "6.553", "train_loss_scale": "1024", "train_train_wall": "26659", "train_gb_free": "6.5", "train_wall": "80050"}
	[2024-06-13 10:37:40,915][fairseq_cli.train][INFO] - done training in 80049.5 seconds