|
{ |
|
"use_lora": false, |
|
"lora_rank": 8, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.0, |
|
|
|
"from_checkpoint": "/root/autodl-tmp/checkpoint_20241130_005942_step_307000.pt", |
|
"save_checkpoint_to": "/root/autodl-tmp/checkpoint", |
|
"dataset_path": [ |
|
["/root/autodl-tmp/sft_train.base64", "/root/autodl-tmp/sft_val.base64"] |
|
], |
|
"tokenizer_path": "/root/Nano/tokenizer/tokenizer_16384.json", |
|
|
|
"random_seed": 39, |
|
"batch_size": 120, |
|
"gradient_accumulation_steps": 2, |
|
"grad_clip": 1.0, |
|
|
|
"dropout": 0.1, |
|
|
|
"learning_rate": 1e-6, |
|
"weight_decay": 1e-1, |
|
"beta1": 0.9, |
|
"beta2": 0.95, |
|
|
|
"decay_lr": false, |
|
"warmup_iters": 10000, |
|
"lr_decay_iters": 1e9, |
|
"min_lr": 1e-8, |
|
|
|
"eval_interval": 500, |
|
"log_interval": 10, |
|
"eval_iters": 5, |
|
|
|
"backend": "nccl", |
|
"device": "cuda", |
|
"sdp_kernel": "flash", |
|
"dtype": "bfloat16", |
|
"use_amp": true |
|
} |
|
|