|
{ |
|
"output_dir": "output/user-baichuan2-13b-v2-3.6", |
|
"model_name_or_path": "/home/jiakangxiang/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat", |
|
"train_file": "./data/train.jsonl", |
|
"template_name": "baichuan2", |
|
"num_train_epochs": 1, |
|
"per_device_train_batch_size": 1, |
|
"gradient_accumulation_steps": 16, |
|
"learning_rate": 0.0001, |
|
"max_seq_length": 3200, |
|
"logging_steps": 10, |
|
"save_steps": 100, |
|
"save_total_limit": 3, |
|
"lr_scheduler_type": "constant_with_warmup", |
|
"warmup_steps": 50, |
|
"lora_rank": 16, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"gradient_checkpointing": true, |
|
"disable_tqdm": false, |
|
"optim": "paged_adamw_32bit", |
|
"seed": 42, |
|
"fp16": true, |
|
"bf16": false, |
|
"report_to": "tensorboard", |
|
"dataloader_num_workers": 0, |
|
"save_strategy": "steps", |
|
"weight_decay": 0, |
|
"max_grad_norm": 0.3, |
|
"remove_unused_columns": false |
|
} |