|
{ |
|
"pretrained_model_name_or_path": "", |
|
"path_relm": "relm-m0.3.bin", |
|
"path_train": "csc_public.train.json", |
|
"path_dev": "csc_public.dev.json", |
|
"path_tet": "csc_public.tet.json", |
|
"model_save_path": "../output/relm_v1", |
|
"task_name": "relm_csc", |
|
"do_lower_case": true, |
|
"do_train": true, |
|
"do_eval": true, |
|
"do_test": true, |
|
"gradient_accumulation_steps": 4, |
|
"warmup_proportion": 0.1, |
|
"num_warmup_steps": null, |
|
"max_train_steps": null, |
|
"num_train_epochs": 3, |
|
"train_batch_size": 8, |
|
"eval_batch_size": 8, |
|
"learning_rate": 3e-05, |
|
"max_seq_length": 256, |
|
"max_grad_norm": 1.0, |
|
"weight_decay": 0.0005, |
|
"save_steps": 1000, |
|
"anchor": null, |
|
"seed": 42, |
|
"lr_scheduler_type": "cosine", |
|
"loss_type": "focal_loss", |
|
"mask_mode": "noerror", |
|
"loss_det_rate": 0.3, |
|
"prompt_length": 0, |
|
"mask_rate": 0.3, |
|
"threshold": 0.5, |
|
"flag_dynamic_encode": false, |
|
"flag_loss_period": false, |
|
"flag_cpo_loss": false, |
|
"flag_fast_tokenizer": true, |
|
"flag_pin_memory": true, |
|
"flag_train": false, |
|
"flag_fp16": false, |
|
"flag_cuda": true, |
|
"flag_skip": true, |
|
"flag_mft": true, |
|
"num_workers": 0, |
|
"CUDA_VISIBLE_DEVICES": "0", |
|
"USE_TORCH": "1" |
|
} |