File size: 3,412 Bytes
bfc8406
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
name: gemma-7b-sql-nemo
trainer:
  num_nodes: 1
  devices: 8
  accelerator: gpu
  precision: bf16
  logger: false
  enable_checkpointing: false
  use_distributed_sampler: false
  max_time: null
  max_epochs: 1
  max_steps: -1
  sft:
    max_epochs: 1
    max_steps: -1
    val_check_interval: 1000
    save_interval: 1000
    limit_val_batches: 40
    gradient_clip_val: 1.0
exp_manager:
  explicit_log_dir: models/gemma-7b-sql-nemo
  exp_dir: null
  name: gemma-7b-sql-nemo
  create_wandb_logger: false
  wandb_logger_kwargs:
    project: null
    name: null
  resume_if_exists: true
  resume_ignore_no_checkpoint: true
  create_checkpoint_callback: true
  checkpoint_callback_params:
    monitor: validation_loss
    save_top_k: 5
    mode: min
    save_nemo_on_train_end: true
    filename: megatron_gpt_sft--{validation_loss:.3f}-{step}-{consumed_samples}-{epoch}
    model_parallel_size: 4
    save_best_model: false
model:
  seed: 1234
  tensor_model_parallel_size: 4
  pipeline_model_parallel_size: 1
  restore_from_path: /workspace/models/pytorch-7b-pt.nemo
  resume_from_checkpoint: null
  save_nemo_on_validation_end: true
  sync_batch_comm: false
  megatron_amp_O2: true
  encoder_seq_length: 8192
  sequence_parallel: false
  activations_checkpoint_granularity: null
  activations_checkpoint_method: null
  activations_checkpoint_num_layers: null
  activations_checkpoint_layers_per_pipeline: null
  answer_only_loss: true
  gradient_as_bucket_view: false
  seq_len_interpolation_factor: null
  use_flash_attention: null
  hidden_dropout: 0.0
  attention_dropout: 0.0
  ffn_dropout: 0.0
  peft:
    peft_scheme: none
    restore_from_path: null
    lora_tuning:
      target_modules:
      - attention_qkv
      adapter_dim: 32
      adapter_dropout: 0.0
      column_init_method: xavier
      row_init_method: zero
      layer_selection: null
      weight_tying: false
      position_embedding_strategy: null
  data:
    chat: false
    chat_prompt_tokens:
      system_turn_start: "\0"
      turn_start: "\x11"
      label_start: "\x12"
      end_of_turn: '

        '
      end_of_name: '

        '
    sample: false
    num_workers: 0
    dataloader_type: single
    train_ds:
      file_path: nsql.jsonl
      global_batch_size: 128
      micro_batch_size: 1
      shuffle: true
      memmap_workers: null
      max_seq_length: 8192
      min_seq_length: 1
      drop_last: true
      label_key: output
      add_eos: true
      add_sep: false
      add_bos: false
      truncation_field: input
      index_mapping_dir: null
      prompt_template: '{input} {output}'
      hf_dataset: false
      truncation_method: right
    validation_ds:
      file_path: nsql.jsonl
      global_batch_size: 128
      micro_batch_size: 1
      shuffle: false
      memmap_workers: null
      max_seq_length: 8192
      min_seq_length: 1
      drop_last: true
      label_key: output
      add_eos: true
      add_sep: false
      add_bos: false
      truncation_field: input
      index_mapping_dir: null
      prompt_template: '{input} {output}'
      hf_dataset: false
      truncation_method: right
      output_original_text: true
  optim:
    name: distributed_fused_adam
    lr: 5.0e-06
    weight_decay: 0.01
    betas:
    - 0.9
    - 0.98
    sched:
      name: CosineAnnealing
      warmup_steps: 10
      constant_steps: 1000
      min_lr: 9.0e-07
  bias_activation_fusion: true
  precision: bf16