File size: 4,469 Bytes
d727a17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Training Commands</title>
<style>
body {
font-family: monospace;
margin: 0;
padding: 8px;
}
#command {
padding: 8px;
background-color: #f6f8fa;
border: 1px solid #d1d5da;
border-radius: 3px;
display: inline-block;
white-space: pre-wrap;
/* Allows the text to wrap */
word-break: break-all;
/* Breaks long words to fit container width */
}
button {
margin-left: 8px;
cursor: pointer;
}
</style>
</head>
<body>
<div id="command">torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \
--model_name_or_path "facebook/opt-6.7b" \
--data_path medalpaca_small.json \
--bf16 True \
--output_dir models \
--num_train_epochs 3 \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 8 \
--evaluation_strategy "no" \
--save_strategy "steps" \
--save_steps 2000 \
--save_total_limit 1 \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--fsdp "full_shard auto_wrap" \
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \
--tf32 True</div>
<div>
<button onclick="changeCommand(0)">OPT 6.7B</button>
<button onclick="changeCommand(0)">OPT 13B</button>
<button onclick="changeCommand(1)">Alpaca 7B</button>
</div>
<script>
const commands = [
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path "facebook/opt-6.7b" \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 4 \\
--per_device_eval_batch_size 4 \\
--gradient_accumulation_steps 8 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
--tf32 True`,
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path "facebook/opt-13b" \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 2 \\
--per_device_eval_batch_size 2 \\
--gradient_accumulation_steps 16 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
--tf32 True`,
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path <PATH_TO_LLAMA_WEIGHTS> \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 4 \\
--per_device_eval_batch_size 4 \\
--gradient_accumulation_steps 8 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \\
--tf32 True`,
];
function changeCommand(appIndex) {
document.getElementById("command").innerText = commands[appIndex];
}
</script>
</body>
</html> |