File size: 2,298 Bytes
a066a93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b976af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a066a93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Promt Format
```
alpaca_prompt = """Sen bir doktorsun. Sorular谋 buna g枚re cevapla.
### <|reserved_special_token_0|>:
{}

### <|reserved_special_token_1|>:
{}"""
```

# Training args
```
batch_size = 128
gradient_accumulation_steps = 32
num_train_epochs = 2
per_device_batch_size = int(batch_size / gradient_accumulation_steps)
training_args = TrainingArguments(
        per_device_train_batch_size = per_device_batch_size,
        per_device_eval_batch_size = per_device_batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        save_total_limit = 1,
        warmup_steps = int(2000 / batch_size),
        num_train_epochs = num_train_epochs,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = output_dir,
        save_strategy = "steps",
        eval_strategy = "steps",
        logging_strategy = "steps",
        save_steps = int(5000 / batch_size * num_train_epochs),
        eval_steps = int(28900 / batch_size * num_train_epochs),
        logging_steps = int(28900 / batch_size * num_train_epochs),
    )
```

# Trainer args

```
max_seq_length = 8192
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args
)
```

# From pretrained args

```
from unsloth import FastLanguageModel
dtype = None
load_in_4bit = False

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = output_dir,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
```

# Peft model args

```
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)
```