marianna13 commited on
Commit
28c91eb
1 Parent(s): f213d4d

Upload configs.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs.yaml +46 -0
configs.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ assistant_tag: gpt
4
+ bf16: true
5
+ content_tag: value
6
+ cutoff_len: 2048
7
+ dataset: /p/data1/mmlaion/marianna/lf_datasets/mlfoundations-dev/oh-dcft-v3.1-gpt-4o-mini
8
+ dataset_dir: ONLINE
9
+ deepspeed: dcft/train/zero3.json
10
+ do_train: true
11
+ enable_liger_kernel: false
12
+ eval_strategy: 'no'
13
+ finetuning_type: full
14
+ formatting: sharegpt
15
+ global_batch_size: 2048
16
+ gradient_accumulation_steps: 2
17
+ gradient_checkpointing: true
18
+ hub_model_id: mlfoundations-dev/oh-mistral-bs2048_lr2_00E-06_schedulercosine_with_min_lr_warmup1_00E-01_minlr5_00E-07
19
+ learning_rate: 2.0e-06
20
+ logging_steps: 10
21
+ lr_scheduler_kwargs:
22
+ min_lr: 5.0e-07
23
+ lr_scheduler_type: cosine_with_min_lr
24
+ max_grad_norm: 1.0
25
+ messages: conversations
26
+ model_name_or_path: mistralai/Mistral-7B-v0.3
27
+ neat_packing: true
28
+ num_train_epochs: 3.0
29
+ output_dir: /p/data1/mmlaion/marianna/dcft_checkpoints/oh-mistral-bs2048_lr2.00E-06_schedulercosine_with_min_lr_warmup1.00E-01_minlr5.00E-07
30
+ overwrite_cache: true
31
+ overwrite_output_dir: false
32
+ packing: true
33
+ per_device_train_batch_size: 8
34
+ plot_loss: false
35
+ preprocessing_num_workers: 32
36
+ push_to_db: false
37
+ push_to_hub: false
38
+ report_to: wandb
39
+ role_tag: from
40
+ run_name: oh-mistral-bs2048_lr2.00E-06_schedulercosine_with_min_lr_warmup1.00E-01_minlr5.00E-07
41
+ save_strategy: epoch
42
+ stage: sft
43
+ template: mistral
44
+ user_tag: human
45
+ warmup_ratio: 0.1
46
+ weight_decay: 0.1