global_batch_size: 256
Browse files
scripts/pretrain-core-model.yaml
CHANGED
@@ -63,8 +63,8 @@ train:
|
|
63 |
log_interval: 1
|
64 |
|
65 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
66 |
-
global_batch_size: 512
|
67 |
-
|
68 |
|
69 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
70 |
# micro_batch_size: 4
|
|
|
63 |
log_interval: 1
|
64 |
|
65 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
66 |
+
# global_batch_size: 512
|
67 |
+
global_batch_size: 256
|
68 |
|
69 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
70 |
# micro_batch_size: 4
|