|
<!DOCTYPE html> |
|
<html lang="en"> |
|
|
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Training Commands</title> |
|
<style> |
|
body { |
|
font-family: monospace; |
|
margin: 0; |
|
padding: 8px; |
|
} |
|
|
|
#command { |
|
padding: 8px; |
|
background-color: #f6f8fa; |
|
border: 1px solid #d1d5da; |
|
border-radius: 3px; |
|
display: inline-block; |
|
white-space: pre-wrap; |
|
|
|
word-break: break-all; |
|
|
|
} |
|
|
|
button { |
|
margin-left: 8px; |
|
cursor: pointer; |
|
} |
|
</style> |
|
</head> |
|
|
|
<body> |
|
<div id="command">torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \ |
|
--model_name_or_path "facebook/opt-6.7b" \ |
|
--data_path medalpaca_small.json \ |
|
--bf16 True \ |
|
--output_dir models \ |
|
--num_train_epochs 3 \ |
|
--per_device_train_batch_size 4 \ |
|
--per_device_eval_batch_size 4 \ |
|
--gradient_accumulation_steps 8 \ |
|
--evaluation_strategy "no" \ |
|
--save_strategy "steps" \ |
|
--save_steps 2000 \ |
|
--save_total_limit 1 \ |
|
--learning_rate 2e-5 \ |
|
--weight_decay 0. \ |
|
--warmup_ratio 0.03 \ |
|
--lr_scheduler_type "cosine" \ |
|
--logging_steps 1 \ |
|
--fsdp "full_shard auto_wrap" \ |
|
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \ |
|
--tf32 True</div> |
|
<div> |
|
<button onclick="changeCommand(0)">OPT 6.7B</button> |
|
<button onclick="changeCommand(0)">OPT 13B</button> |
|
<button onclick="changeCommand(1)">Alpaca 7B</button> |
|
</div> |
|
|
|
|
|
<script> |
|
const commands = [ |
|
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
|
--model_name_or_path "facebook/opt-6.7b" \\ |
|
--data_path medalpaca_small.json \\ |
|
--bf16 True \\ |
|
--output_dir models \\ |
|
--num_train_epochs 3 \\ |
|
--per_device_train_batch_size 4 \\ |
|
--per_device_eval_batch_size 4 \\ |
|
--gradient_accumulation_steps 8 \\ |
|
--evaluation_strategy "no" \\ |
|
--save_strategy "steps" \\ |
|
--save_steps 2000 \\ |
|
--save_total_limit 1 \\ |
|
--learning_rate 2e-5 \\ |
|
--weight_decay 0. \\ |
|
--warmup_ratio 0.03 \\ |
|
--lr_scheduler_type "cosine" \\ |
|
--logging_steps 1 \\ |
|
--fsdp "full_shard auto_wrap" \\ |
|
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\ |
|
--tf32 True`, |
|
|
|
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
|
--model_name_or_path "facebook/opt-13b" \\ |
|
--data_path medalpaca_small.json \\ |
|
--bf16 True \\ |
|
--output_dir models \\ |
|
--num_train_epochs 3 \\ |
|
--per_device_train_batch_size 2 \\ |
|
--per_device_eval_batch_size 2 \\ |
|
--gradient_accumulation_steps 16 \\ |
|
--evaluation_strategy "no" \\ |
|
--save_strategy "steps" \\ |
|
--save_steps 2000 \\ |
|
--save_total_limit 1 \\ |
|
--learning_rate 2e-5 \\ |
|
--weight_decay 0. \\ |
|
--warmup_ratio 0.03 \\ |
|
--lr_scheduler_type "cosine" \\ |
|
--logging_steps 1 \\ |
|
--fsdp "full_shard auto_wrap" \\ |
|
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\ |
|
--tf32 True`, |
|
|
|
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
|
--model_name_or_path <PATH_TO_LLAMA_WEIGHTS> \\ |
|
--data_path medalpaca_small.json \\ |
|
--bf16 True \\ |
|
--output_dir models \\ |
|
--num_train_epochs 3 \\ |
|
--per_device_train_batch_size 4 \\ |
|
--per_device_eval_batch_size 4 \\ |
|
--gradient_accumulation_steps 8 \\ |
|
--evaluation_strategy "no" \\ |
|
--save_strategy "steps" \\ |
|
--save_steps 2000 \\ |
|
--save_total_limit 1 \\ |
|
--learning_rate 2e-5 \\ |
|
--weight_decay 0. \\ |
|
--warmup_ratio 0.03 \\ |
|
--lr_scheduler_type "cosine" \\ |
|
--logging_steps 1 \\ |
|
--fsdp "full_shard auto_wrap" \\ |
|
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \\ |
|
--tf32 True`, |
|
]; |
|
|
|
function changeCommand(appIndex) { |
|
document.getElementById("command").innerText = commands[appIndex]; |
|
} |
|
</script> |
|
</body> |
|
|
|
</html> |