abshetty commited on
Commit
b51bdcc
·
verified ·
1 Parent(s): b89f76b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -1
README.md CHANGED
@@ -65,4 +65,44 @@ Cite TRL as:
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
+ ```
69
+
70
+
71
+ #Train the model
72
+ training_args = DPOConfig(
73
+ output_dir="llava-lora-12-06-lalpha-256",
74
+ bf16=True,
75
+ gradient_checkpointing=True,
76
+ per_device_train_batch_size=8,
77
+ per_device_eval_batch_size=4,
78
+ gradient_accumulation_steps=32,
79
+ evaluation_strategy="steps",
80
+ eval_steps=1,
81
+ learning_rate=1e-5,
82
+ beta=0.1,
83
+ warmup_ratio=0.1,
84
+ lr_scheduler_type="cosine",
85
+ num_train_epochs=2,
86
+ #rpo_alpha=0.1,
87
+ dataset_num_proc=32, # tokenization will use 32 processes
88
+ dataloader_num_workers=32, # data loading will use 32 workers
89
+ logging_steps=1,
90
+ )
91
+
92
+ #Define LoRA configuration with specified rank
93
+ lora_config = LoraConfig(
94
+ r=64, # Set rank to 64
95
+ lora_alpha=256, # Set scaling factor to 128
96
+ target_modules="all-linear", # Target all linear layers
97
+ lora_dropout=0.1,
98
+ )
99
+
100
+ trainer = DPOTrainer(
101
+ model,
102
+ ref_model=None, # not needed when using peft
103
+ args=training_args,
104
+ train_dataset=train_dataset,
105
+ eval_dataset=eval_dataset,
106
+ tokenizer=processor,
107
+ peft_config=lora_config,
108
+ )