tangledgroup
/

tangled-llama-a-128k-base-v0.1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on 9 days ago

Commit

bbec1b1

•

1 Parent(s): 4dc0a8b

pretrain model

Files changed (1) hide show

scripts/pretrain-model.yaml +8 -6

scripts/pretrain-model.yaml CHANGED Viewed

@@ -72,13 +72,15 @@ train:
  micro_batch_size: 16
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
- lr_warmup_steps: 2000
  # Number of epochs to train on (type: Optional[int], default: null)
  epochs:
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
- max_tokens: 21260243688 # 5187956 * 2049 * 2
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
  max_steps:
@@ -121,15 +123,15 @@ optimizer:
  init_args:
  # (type: float, default: 0.001)
- lr: 1e-2
  # (type: float, default: 0.01)
- weight_decay: 0.1
  # (type: tuple, default: (0.9,0.999))
  betas:
  - 0.9
- - 0.95
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
@@ -145,4 +147,4 @@ tokenizer_dir: "../"
 logger_name: "wandb"
 # The random seed to use for reproducibility. (type: int, default: 42)
-seed: 42

  micro_batch_size: 16
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
+ # lr_warmup_steps: 2000
+ lr_warmup_steps: 10
  # Number of epochs to train on (type: Optional[int], default: null)
  epochs:
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
+ # max_tokens: 21260243688 # 5187956 * 2049 * 2
+ max_tokens: 10630121844 # 5187956 * 2049 * 1
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
  max_steps:
  init_args:
  # (type: float, default: 0.001)
+ lr: 1e-3
  # (type: float, default: 0.01)
+ weight_decay: 1e-2
  # (type: tuple, default: (0.9,0.999))
  betas:
  - 0.9
+ - 0.999
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 logger_name: "wandb"
 # The random seed to use for reproducibility. (type: int, default: 42)
+seed: 23