adam_beta1: 0.9 adam_beta2: 0.999 assistant_tag: gpt bf16: true content_tag: value cutoff_len: 2048 dataset: /p/data1/mmlaion/marianna/lf_datasets/mlfoundations-dev/oh-dcft-v3.1-gpt-4o-mini dataset_dir: ONLINE deepspeed: dcft/train/zero3.json do_train: true enable_liger_kernel: false eval_strategy: 'no' finetuning_type: full formatting: sharegpt global_batch_size: 2048 gradient_accumulation_steps: 2 gradient_checkpointing: true hub_model_id: mlfoundations-dev/oh-mistral-bs2048_lr5_00E-06_schedulerconstant_warmup5_00E-02_minlr learning_rate: 5.0e-06 logging_steps: 10 lr_scheduler_kwargs: min_lr: null lr_scheduler_type: constant max_grad_norm: 1.0 messages: conversations model_name_or_path: mistralai/Mistral-7B-v0.3 neat_packing: true num_train_epochs: 3.0 output_dir: /p/data1/mmlaion/marianna/dcft_checkpoints/oh-mistral-bs2048_lr5.00E-06_schedulerconstant_warmup5.00E-02_minlr overwrite_cache: true overwrite_output_dir: false packing: true per_device_train_batch_size: 8 plot_loss: false preprocessing_num_workers: 32 push_to_db: false push_to_hub: false report_to: wandb role_tag: from run_name: oh-mistral-bs2048_lr5.00E-06_schedulerconstant_warmup5.00E-02_minlr save_strategy: epoch stage: sft template: mistral user_tag: human warmup_ratio: 0.05 weight_decay: 0.1