File size: 5,172 Bytes
a0806ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
args=$@
for arg in $args; do
eval "$arg"
done
echo "model: ${model:=mistralai/Mistral-7B-v0.1}"
echo "tokenizer: ${tokenizer:=mistralai/Mistral-7B-v0.1}"
echo "project: ${project:=fla}"
echo "type: ${type:=gla}"
echo "data: ${data:=}"
echo "name: ${name:=}"
echo "cache: ${cache:=}"
echo "seed: ${seed:=42}"
echo "context: ${context:=2048}"
echo "steps: ${steps:=0}"
echo "save: ${save:=2048}"
echo "limit: ${limit:=16}"
echo "preprocessing: ${preprocessing:=32}"
echo "workers: ${workers:=32}"
echo "logging: ${logging:=32}"
echo "config: ${config:=configs/deepspeed.yaml}"
echo "push: ${push:=False}"
echo "lr: ${lr:=3e-4}"
echo "scheduler: ${scheduler:=cosine_with_min_lr}"
echo "epochs: ${epochs:=1}"
echo "optim: ${optim:=adamw_torch_fused}"
echo "decay: ${decay:=0.01}"
echo "beta1: ${beta1:=0.9}"
echo "beta2: ${beta2:=0.95}"
echo "norm: ${norm:=1.0}"
echo "batch: ${batch:=32}"
echo "update: ${update:=4}"
echo "warmup: ${warmup:=512}"
echo "path: ${path:=}"
echo "checkpoint: ${checkpoint:=}"
echo "node: ${node:=}"
echo "rank: ${rank:=}"
echo "ip: ${ip:=}"
echo "port: ${port:=}"
echo "nodes: ${nodes:=1}"
params="--model_name_or_path $model \
--tokenizer $tokenizer \
--use_fast_tokenizer \
--do_train \
--dataset $data \
--context_length $context \
--streaming \
--preprocessing_num_workers $preprocessing \
--dataloader_num_workers $workers \
--dataloader_prefetch_factor 2 \
--ignore_data_skip \
--output_dir $path \
--overwrite_output_dir \
--logging_steps $logging \
--include_num_input_tokens_seen \
--save_steps $save \
--save_total_limit $limit \
--learning_rate $lr \
--lr_scheduler_type $scheduler \
--warmup_steps $warmup \
--optim $optim \
--weight_decay $decay \
--adam_beta1=$beta1 \
--adam_beta2=$beta2 \
--max_grad_norm $norm \
--num_train_epochs $epochs \
--per_device_train_batch_size $batch \
--gradient_accumulation_steps $update \
--seed $seed \
--logging_steps $logging \
--push_to_hub $push \
--bf16"
if [ $steps -gt 0 ]; then
params+=" --max_steps $steps"
fi
if [ "$name" != "" ]; then
params+=" --dataset_name $name"
fi
if [ "$cache" != "" ]; then
params+=" --cache_dir $cache"
fi
if [ "$checkpoint" != "" ]; then
params+=" --resume_from_checkpoint $checkpoint"
fi
if [ "$WANDB_DISABLED" != "true" ]; then
params+=" --report_to wandb \
--run_name $type.$(basename $path)"
else
params+=" --report_to none"
fi
NUM_GPUS=$(nvidia-smi --list-gpus | wc -l)
echo "Launching training..."
accelerate_params=""
if [ "$rank" != "" ]; then
accelerate_params+=" --machine_rank $rank \
--num_processes $((nodes * $NUM_GPUS)) \
--num_machines $nodes \
--main_process_ip $ip \
--main_process_port $port \
--same_network"
fi
if [[ $config == *"deepspeed"* ]]; then
cat <<EOF > "configs/ds_config.json"
{
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"zero_allow_untested_optimizer": true,
"bf16": {
"enabled": true
},
"zero_optimization": {
"stage": 2,
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"overlap_comm": false,
"contiguous_gradients": true
}
}
EOF
cat <<EOF > $config
compute_environment: LOCAL_MACHINE
distributed_type: DEEPSPEED
deepspeed_config:
deepspeed_config_file: configs/ds_config.json
zero3_init_flag: true
machine_rank: 0
main_training_function: main
num_machines: 1
num_processes: $NUM_GPUS
use_cpu: false
EOF
fi
if [[ $config == *"fsdp"* ]]; then
cat <<EOF > $config
compute_environment: LOCAL_MACHINE
distributed_type: FSDP
fsdp_config:
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_forward_prefetch: false
fsdp_cpu_ram_efficient_loading: true
fsdp_offload_params: false
fsdp_sharding_strategy: HYBRID_SHARD_ZERO2
fsdp_state_dict_type: SHARDED_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: $nodes
num_processes: $((nodes * $NUM_GPUS))
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
EOF
fi
cat $config
set -x
mkdir -p $path
cp * $path
cp -r configs $path
cp -r flame $path
cp -r ../fla $path
export TRANSFORMERS_OFFLINE=1
export HF_DATASETS_OFFLINE=1
if [ "$date" == "" ]; then
date=$(date +%Y%m%d%H%M)
fi
export WANDB_RESUME=allow
export WANDB_NAME="$type.$(basename $path)"
export WANDB_PROJECT=$project
export WANDB_RUN_ID="$WANDB_NAME-$date"
accelerate launch $accelerate_params --config_file $config run.py $params
echo "RUNNING DONE!"
|