|
{ |
|
"adapter_path": "adapters", |
|
"batch_size": 1, |
|
"config": null, |
|
"data": "data", |
|
"grad_checkpoint": null, |
|
"iters": 500, |
|
"learning_rate": 1e-05, |
|
"lora_layers": 16, |
|
"lora_parameters": { |
|
"rank": 8, |
|
"alpha": 16, |
|
"dropout": 0.0, |
|
"scale": 10.0 |
|
}, |
|
"lr_schedule": null, |
|
"max_seq_length": 8192, |
|
"model": "Qwen/Qwen2-1.5B-Instruct", |
|
"resume_adapter_file": null, |
|
"save_every": 100, |
|
"seed": 0, |
|
"steps_per_eval": 200, |
|
"steps_per_report": 10, |
|
"test": false, |
|
"test_batches": 500, |
|
"train": true, |
|
"use_dora": false, |
|
"val_batches": 25 |
|
} |