mtasic85 commited on
Commit
c3905b0
·
1 Parent(s): 356cb1c

new tokens

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model.yaml +4 -4
scripts/pretrain-core-model.yaml CHANGED
@@ -18,7 +18,7 @@ model_config:
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
- intermediate_size: 3584
22
  norm_eps: 1e-5
23
  rope_base: 500000
24
  rope_adjustments:
@@ -67,8 +67,8 @@ train:
67
  # global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
- micro_batch_size: 4
71
- # micro_batch_size: 2
72
  # micro_batch_size: 1
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
@@ -78,7 +78,7 @@ train:
78
  epochs:
79
 
80
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
81
- max_tokens: 7318364160
82
 
83
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
84
  max_steps:
 
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
+ intermediate_size: 2688 # n_embd * 5.25
22
  norm_eps: 1e-5
23
  rope_base: 500000
24
  rope_adjustments:
 
67
  # global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
+ # micro_batch_size: 4
71
+ micro_batch_size: 2
72
  # micro_batch_size: 1
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
 
78
  epochs:
79
 
80
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
81
+ max_tokens: 6428475392
82
 
83
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
84
  max_steps: