Muennighoff commited on
Commit
e19438b
·
1 Parent(s): 20a9f7d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 2b812b4b/sbatch_2b812b4bval.sh +168 -0
  2. 2b816b4b/3479666.err +0 -0
  3. 2b816b4b/3479666.out +0 -0
  4. 2b816b4b/global_step15258/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  5. 2b816b4b/global_step15258/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  6. 2b816b4b/global_step15258/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  7. 2b816b4b/global_step15258/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  8. 2b816b4b/global_step15258/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  9. 2b816b4b/global_step15258/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  10. 2b816b4b/global_step15258/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  11. 2b816b4b/global_step15258/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  12. 2b816b4b/global_step15258/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  13. 2b816b4b/global_step15258/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  14. 2b816b4b/global_step15258/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  15. 2b816b4b/global_step15258/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  16. 2b816b4b/global_step15258/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  17. 2b816b4b/global_step15258/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  18. 2b816b4b/global_step15258/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  19. 2b816b4b/global_step15258/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  20. 2b816b4b/global_step15258/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  21. 2b816b4b/global_step15258/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  22. 2b816b4b/global_step15258/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  23. 2b816b4b/global_step15258/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt +3 -0
  24. 2b816b4b/global_step15258/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt +3 -0
  25. 2b816b4b/global_step15258/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt +3 -0
  26. 2b816b4b/global_step15258/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  27. 2b816b4b/global_step15258/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt +3 -0
  28. 2b816b4b/global_step15258/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt +3 -0
  29. 2b816b4b/global_step15258/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt +3 -0
  30. 2b816b4b/global_step15258/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt +3 -0
  31. 2b816b4b/global_step15258/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt +3 -0
  32. 2b816b4b/global_step15258/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt +3 -0
  33. 2b816b4b/global_step15258/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt +3 -0
  34. 2b816b4b/global_step15258/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt +3 -0
  35. 2b816b4b/global_step15258/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt +3 -0
  36. 2b816b4b/global_step15258/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt +3 -0
  37. 2b816b4b/global_step15258/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  38. 2b816b4b/global_step15258/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt +3 -0
  39. 2b816b4b/global_step15258/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt +3 -0
  40. 2b816b4b/global_step15258/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt +3 -0
  41. 2b816b4b/global_step15258/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt +3 -0
  42. 2b816b4b/global_step15258/bf16_zero_pp_rank_134_mp_rank_00_optim_states.pt +3 -0
  43. 2b816b4b/global_step15258/bf16_zero_pp_rank_135_mp_rank_00_optim_states.pt +3 -0
  44. 2b816b4b/global_step15258/bf16_zero_pp_rank_136_mp_rank_00_optim_states.pt +3 -0
  45. 2b816b4b/global_step15258/bf16_zero_pp_rank_137_mp_rank_00_optim_states.pt +3 -0
  46. 2b816b4b/global_step15258/bf16_zero_pp_rank_138_mp_rank_00_optim_states.pt +3 -0
  47. 2b816b4b/global_step15258/bf16_zero_pp_rank_139_mp_rank_00_optim_states.pt +3 -0
  48. 2b816b4b/global_step15258/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
  49. 2b816b4b/global_step15258/bf16_zero_pp_rank_140_mp_rank_00_optim_states.pt +3 -0
  50. 2b816b4b/global_step15258/bf16_zero_pp_rank_141_mp_rank_00_optim_states.pt +3 -0
2b812b4b/sbatch_2b812b4bval.sh ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901
3
+ #SBATCH --nodes=32
4
+ #SBATCH --ntasks-per-node=1
5
+ #SBATCH --cpus-per-task=32
6
+ #SBATCH --mem=256G
7
+ #SBATCH -p standard-g
8
+ #SBATCH -t 48:00:00
9
+ #SBATCH --gpus-per-node=mi250:8
10
+ #SBATCH --exclusive=user
11
+ #SBATCH --hint=nomultithread
12
+ #SBATCH --account=project_462000119
13
+ #SBATCH -o logs/%j.out
14
+ #SBATCH -e logs/%j.err
15
+
16
+ VARIANT=2b812b4bval
17
+ VARIANT_CKPT=checkpoints_2b812b4bc4
18
+
19
+ # if run without sbatch, invoke here
20
+ if [ -z $SLURM_JOB_ID ]; then
21
+ mkdir -p logs
22
+ sbatch "$0"
23
+ exit
24
+ fi
25
+
26
+ set -euo pipefail
27
+
28
+ # symlink logs/latest.out and logs/latest.err
29
+ ln -f -s $SLURM_JOB_ID.out logs/latest.out
30
+ ln -f -s $SLURM_JOB_ID.err logs/latest.err
31
+
32
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
33
+ CHECKPOINT_PATH=$VARIANT_CKPT
34
+ TENSORBOARD_PATH=tensorboard_$VARIANT
35
+
36
+ # Data
37
+ VOCAB_FILE="gpt2/vocab.json"
38
+ MERGE_FILE="gpt2/merges.txt"
39
+ #DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document"
40
+ TRAIN_DATA_PATH=train1b5.txt
41
+ # "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document"
42
+ VALID_DATA_PATH=val.txt
43
+ # "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document"
44
+
45
+ PP_SIZE=1
46
+ TP_SIZE=1
47
+
48
+ MICRO_BATCH_SIZE=2
49
+ GRADIENT_ACCUMULATION_STEPS=1
50
+ WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
51
+ GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
52
+
53
+ # Model parameters
54
+ source model_params.sh
55
+ MODEL_PARAM=("${PARAM_2980M[@]}")
56
+ NHIDDEN=${MODEL_PARAM[0]}
57
+ FFN_HIDDEN_SIZE=${MODEL_PARAM[1]}
58
+ KV_SIZE=${MODEL_PARAM[2]}
59
+ NHEADS=${MODEL_PARAM[3]}
60
+ NLAYERS=${MODEL_PARAM[4]}
61
+ SEQ_LEN=2048
62
+
63
+ echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS"
64
+
65
+ SAVE_INTERVAL=1000
66
+
67
+ # Tokens: 35546190000
68
+ # -> Samples: 17356538
69
+ TRAIN_SAMPLES=1
70
+
71
+ OPTIMIZER_ARGS=" \
72
+ --optimizer adam \
73
+ --adam-beta1 0.9 \
74
+ --adam-beta2 0.999 \
75
+ --adam-eps 1e-8 \
76
+ --lr 2e-4 \
77
+ --min-lr 2e-5 \
78
+ --lr-decay-style cosine \
79
+ --lr-decay-samples $TRAIN_SAMPLES \
80
+ --lr-warmup-samples 0 \
81
+ --clip-grad 1.0 \
82
+ --weight-decay 1e-1 \
83
+ --override-lr-scheduler \
84
+ --reset-progress \
85
+ --no-load-optim \
86
+ "
87
+
88
+ GPT_ARGS=" \
89
+ --num-layers $NLAYERS \
90
+ --hidden-size $NHIDDEN \
91
+ --num-attention-heads $NHEADS \
92
+ --kv-channels $KV_SIZE \
93
+ --ffn-hidden-size $FFN_HIDDEN_SIZE \
94
+ --seq-length $SEQ_LEN \
95
+ --max-position-embeddings $SEQ_LEN \
96
+ --micro-batch-size $MICRO_BATCH_SIZE \
97
+ --global-batch-size $GLOBAL_BATCH_SIZE \
98
+ --train-samples $TRAIN_SAMPLES \
99
+ --vocab-file $VOCAB_FILE \
100
+ --merge-file $MERGE_FILE \
101
+ --clip-grad 1.0 \
102
+ --kill-switch-path $KILL_SWITCH_PATH \
103
+ --bf16 \
104
+ $OPTIMIZER_ARGS \
105
+ "
106
+
107
+ OUTPUT_ARGS=" \
108
+ --log-interval 10 \
109
+ --save-interval $SAVE_INTERVAL \
110
+ --eval-interval 1 \
111
+ --eval-iters 100 \
112
+ --eval-only true \
113
+ --tensorboard-dir $TENSORBOARD_PATH \
114
+ --tensorboard-queue-size 5 \
115
+ --log-timers-to-tensorboard \
116
+ --log-batch-size-to-tensorboard \
117
+ --log-validation-ppl-to-tensorboard \
118
+ "
119
+
120
+ ZERO_STAGE=0
121
+
122
+ mkdir -p ds_configs
123
+ DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json"
124
+
125
+ cat <<EOF > $DS_CONFIG_PATH
126
+ {
127
+ "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
128
+ "train_batch_size": $GLOBAL_BATCH_SIZE,
129
+ "gradient_clipping": 1.0,
130
+ "zero_optimization": {
131
+ "stage": $ZERO_STAGE
132
+ },
133
+ "bf16": {
134
+ "enabled": true
135
+ },
136
+ "steps_per_print": 2000,
137
+ "wall_clock_breakdown": false
138
+ }
139
+ EOF
140
+
141
+ DEEPSPEED_ARGS=" \
142
+ --deepspeed \
143
+ --deepspeed_config $DS_CONFIG_PATH \
144
+ --zero-stage $ZERO_STAGE \
145
+ "
146
+
147
+ CMD=" \
148
+ Megatron-DeepSpeed/pretrain_gpt.py \
149
+ --tensor-model-parallel-size $TP_SIZE \
150
+ --pipeline-model-parallel-size $PP_SIZE \
151
+ $GPT_ARGS \
152
+ $OUTPUT_ARGS \
153
+ --save $CHECKPOINT_PATH \
154
+ --load $CHECKPOINT_PATH \
155
+ --train-weighted-split-paths-path $TRAIN_DATA_PATH \
156
+ --valid-weighted-split-paths-path $VALID_DATA_PATH \
157
+ --data-impl mmap \
158
+ $DEEPSPEED_ARGS \
159
+ "
160
+
161
+ echo $CMD
162
+
163
+ echo "START $SLURM_JOBID: $(date)"
164
+
165
+ # bash launch_srun_32.sh $CMD
166
+ srun --label launch.sh $CMD
167
+
168
+ echo "END $SLURM_JOBID: $(date)"
2b816b4b/3479666.err ADDED
The diff for this file is too large to render. See raw diff
 
2b816b4b/3479666.out ADDED
The diff for this file is too large to render. See raw diff
 
2b816b4b/global_step15258/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29d549f5e4b0e9ff68e702c053696f13395359950e4c6501b202bf95d417cc6
3
+ size 131677719
2b816b4b/global_step15258/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203cb8c35fda4a3bd3d21f120758f8c215da12216b007147e777b018a0c50253
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77bd9a62894989bdf775f1234817421750cdc502a795f2b74a9c433e16280e0
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0957e7811671a3c5c7ac25af57de3d750e17ce9c8b8359c58931657685e2eb7
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b5369a7c50a79b1774dd2f32b17599c7c9315f2a1ef4657dcba1c4eeed5a6d
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed41ba2b9d183a3c2ca0b26969b97b62909594fa63aa3434959f31ae1672fde2
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c990d1adfb3d07bc8a21ff3ec30b65b6846d17ce5f8f10bf7bf0390fff9542c8
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7476a6063fc670796b965e385447053d6a21e458e44a9bf67b21f5bf990d7b8e
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c2ba68395cce4c81893230ae91d074baf9f0859314e21100303c0950459c2f
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fc0adac133cbcb07c52e2b316a0a50fcf2fddd46f120d420e1daea8fb3383e
3
+ size 131677869
2b816b4b/global_step15258/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586ee0f1c0f00666b2d5b8ba0e183347ecb07cc9d2971a9ce4df95ad651745ff
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a049d429f456c95b4f476bd96ac9ce21e5576a20f056c4f4b42752596218399
3
+ size 131677794
2b816b4b/global_step15258/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d8160656b368020ca7b32bdf899165cea942399095319929566b8689d93cda
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601dc6a55e76964aff40a3bb28fbbdbff8e689c68795e9c88035f5d762080e43
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31994a53f3563c32d6d725b0a9331b71525e9c5458a74d260fc225b12df964d3
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5884ec4f228541a5f84cc2e803016482201797dc9864977dcfc16ef2f3f38eb1
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ec9f8dfcdf06fc30fe6e2bd354e4efed269f67014c1faaafd5214f11a2cc2f
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1995c914d0f4fc6c784f9f45a6ba07d11b7e10109c3977abe31129a18619ac10
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed6eb75b6c038b14fe53ab093fcebcfb3d2e0b8ae3d98cf61d85dabfbd321f7
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a2c643f10e1c93e779baca28cf50d20097c15243a277ee70f155a1a793b4504
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c7fe3b31847923a68f10843559d25e0f2f3057b6b4a01b4dd920f411f1b94c
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:566bc4a18118b84680d4f8dc0161de86b9ae18e70914d6f23749ff6b06af9b01
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e9c945a91d1bb1e5350c822dd35b8fae99c4a692710974b0373004d121620d
3
+ size 131677730
2b816b4b/global_step15258/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77657ab706ccc1480c14e3a8bf5e58c885ad680b72f6e6a8ea719fd3de2c026
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f03d8d9b31673645b06f028809072ccc55902f772c70a00ea5f981a4aef1429
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda43ef32f0daa7c6c778a57b845a36f29f13bbac081619da2e5aff286a6cf3c
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e714302d959dd72e6ab1c3147f5c31627f848d8ade2a744f08425a2f8c489f
3
+ size 131677869
2b816b4b/global_step15258/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db61d5ebefeb5627049f09fd4f9d3da35dae4d60eb5e89914ecf54ba998d2eb
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4bd46d162513f96ab618f38c6907285bf1aa751492e25f9cd60b0c6070e1dff
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8ec5e7876d28c0cce28597ab3d684adad4a2c7fa967ea0090e610cb0c1eeb1
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb271777a10e04e27a679f1fce6a6711e4d1640f059f5b80a1b599054c0cd086
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2fcc93e874e78d10395539962b2ca3bddd715c9a0998839463be6bbcc735810
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71056755e16b674c4915ad9a9d7d84817b5e55a6fbff9870b7c3deaae3ccbcac
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a442a72edb918844bed075c318fb5d5d96d0cee1cc07e933e1b8df5b0cbf017a
3
+ size 131677730
2b816b4b/global_step15258/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8fb1004f3e0dcf47af0d3df9693792c07463cf461147a9cecd16952299a2978
3
+ size 131677869
2b816b4b/global_step15258/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf79bd333c61b492c336c9e7b1a1eb23cf547d13d53e506bb5fcf300afe6bee
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dff7dc803d8bb133e004266d41b8679f6490537c9a788ccc9cfeb2ccb0793ac
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9c4040216af70c8906d9c5f8aee64e343b48e3ddaef9724c623d04a45c285e
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_134_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f7bb4b5766b10f242d7e891364247f6470d410a456559a8fd7b470d0dbfa31
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_135_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97cb1ad5479eaa4a47a90a4a8faabe0c397e582a776bafc405c33d141d86493c
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_136_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18c6939b3014896ccfb728ae780ee89bc5c1ce65401e72c0d8f73fa01a7580f
3
+ size 131677741
2b816b4b/global_step15258/bf16_zero_pp_rank_137_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c91c2a782880e6185d9ee6b98311e9d95a4ac4171e4e9b216c8a3974560f13
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_138_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7bff1d682255c029feba399e1ad532400cf1ed1054d324eae06e88c672d95d6
3
+ size 131677869
2b816b4b/global_step15258/bf16_zero_pp_rank_139_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda846bafb986c218cd7ded722f518b4c89206490a27b835bde343ea2bed8d78
3
+ size 131677805
2b816b4b/global_step15258/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1594db4a37ab0cb20aa414b15ae3ff41c8034677b3f89ade1dd94fc975711bb2
3
+ size 131677666
2b816b4b/global_step15258/bf16_zero_pp_rank_140_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6e94111b141f59fcc69e366002b3b0455acd7ba95f863d373806fa97ca971e
3
+ size 131677677
2b816b4b/global_step15258/bf16_zero_pp_rank_141_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b36a27bdfac68c07a9c06fd02ab24669c23260bb72efd739c3e29b1847dfaa0b
3
+ size 131677805