Update README.md
Browse files
README.md
CHANGED
@@ -11,6 +11,35 @@ tags: []
|
|
11 |
|
12 |
## Model Details
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
### Model Description
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
|
|
11 |
|
12 |
## Model Details
|
13 |
|
14 |
+
peft_config = LoraConfig(
|
15 |
+
r=5, # TODO: play with this number
|
16 |
+
lora_alpha= 9, # TODO: play with this number
|
17 |
+
target_modules=['q_proj', 'v_proj', 'k_proj'],
|
18 |
+
lora_dropout=0.05,
|
19 |
+
bias="none",
|
20 |
+
task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
|
21 |
+
)
|
22 |
+
|
23 |
+
training_args = TrainingArguments(
|
24 |
+
per_device_train_batch_size=2,
|
25 |
+
gradient_accumulation_steps=2,
|
26 |
+
gradient_checkpointing =False,
|
27 |
+
max_grad_norm= 0.3,
|
28 |
+
num_train_epochs=2, # TODO: play with this number
|
29 |
+
save_steps= 100,
|
30 |
+
learning_rate=0.0003, # TODO: play with this number
|
31 |
+
bf16=True,
|
32 |
+
save_total_limit=3,
|
33 |
+
logging_steps=10,
|
34 |
+
output_dir='./sft_models',
|
35 |
+
optim="adamw_torch",
|
36 |
+
lr_scheduler_type="cosine",
|
37 |
+
warmup_ratio=0.05,
|
38 |
+
remove_unused_columns=False,
|
39 |
+
report_to="none",
|
40 |
+
)
|
41 |
+
|
42 |
+
generate_max_length: int = 64
|
43 |
### Model Description
|
44 |
|
45 |
<!-- Provide a longer summary of what this model is. -->
|