File size: 3,716 Bytes
3b37c31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# accelerate launch ./scripts/finetune.py 2-PKTDC-llama-13B-gptq-lora-24gb.yml
#
# base model settings (local or huggingface repo)
base_model: PocketDoc/llama-13b-gptq-4bit-128g
base_model_config: PocketDoc/llama-13b-gptq-4bit-128g
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
trust_remote_code:
# wandb configuration
wandb_project: llama-13b-gptq-4bit-128g-lora
wandb_watch:
wandb_run_id:
wandb_log_model:
# where to save the finished model to
output_dir: ./llama-13b-gptq-4bit-128g-lora
# dataset settings (local or huggingface repo)
datasets:
- path: dansmeth.json
type: pygmalion
dataset_prepared_path: data/last_run_prepared
# percentage of the dataset to set aside as evaluation.
val_set_size: 0.02
# max token length / prompt
sequence_len: 2048
# max sequence length to concatenate training samples together up to
# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
max_packed_sequence_len: 2048
# quantized model loading settings
gptq: true
gptq_groupsize: 128 # group size
gptq_model_v1: false # v1 or v2
strict: false
# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
load_in_8bit: true
load_in_4bit:
# Use CUDA bf16
bf16: false
# Use CUDA fp16
fp16: true
# Use CUDA tf32
tf32: true
# training hyperparameters
gradient_accumulation_steps: 30
micro_batch_size: 6
eval_batch_size: 6
num_epochs: 12
warmup_steps: 10
learning_rate: 0.000004
logging_steps: 1
eval_steps: 5
save_steps: 10
# stop training after this many evaluation losses have increased in a row
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
early_stopping_patience:
# specify a scheduler to use with the optimizer. only one_cycle is supported currently
lr_scheduler: linear
# specify optimizer
optimizer: paged_adamw_8bit
# specify weight decay
weight_decay: 0.0001
# if you already have a lora model trained that you want to load, put that here
lora_model_dir:
# LoRA hyperparameters
adapter: lora # blank for full finetune
lora_r: 32
lora_alpha: 64
lora_dropout: 0.05
lora_target_linear:
lora_target_modules:
- q_proj
- v_proj
# - k_proj
# - o_proj
# - gate_proj
# - down_proj
# - up_proj
lora_modules_to_save:
# - embed_tokens
# - lm_head
lora_out_dir:
lora_fan_in_fan_out: false
# whether to mask out or include the human's prompt from the training labels
train_on_inputs: false
# don't use this, leads to wonky training (according to someone on the internet)
group_by_length: true
# does not work with current implementation of 4-bit LoRA
gradient_checkpointing: true
# whether to use xformers attention patch https://github.com/facebookresearch/xformers:
xformers_attention: true
# whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
flash_attention: # require a100 for llama
# whether to use scaled-dot-product attention
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
sdp_attention:
# resume from a specific checkpoint dir
resume_from_checkpoint:
# if resume_from_checkpoint isn't set and you simply want it to start where it left off
# be careful with this being turned on between different models
auto_resume_from_checkpoints: false
# don't mess with this, it's here for accelerate and torchrun
local_rank:
# add or change special tokens
special_tokens:
# sys_role_token: "<|system|>"
# user_role_token: "<|user|>"
# model_role_token: "<|model|>"
bos_token: "<s>"
eos_token: "</s>"
unk_token: "<unk>"
# add extra tokens
tokens:
# FSDP
fsdp:
fsdp_config:
# Deepspeed
deepspeed:
# TODO
torchdistx_path:
# Debug mode
debug: |