Crystalcareai
commited on
Update schedulefree.py
Browse files- schedulefree.py +59 -62
schedulefree.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import signal
|
2 |
import sys
|
|
|
3 |
from datasets import load_dataset
|
4 |
-
from transformers import TrainingArguments
|
5 |
from trl import SFTTrainer
|
6 |
-
|
7 |
-
|
8 |
-
from
|
9 |
-
from schedulefree import AdamWScheduleFree
|
10 |
|
11 |
# Signal handler function
|
12 |
def signal_handler(sig, frame):
|
@@ -16,88 +16,85 @@ def signal_handler(sig, frame):
|
|
16 |
# Register signal handler
|
17 |
signal.signal(signal.SIGINT, signal_handler)
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
return {
|
24 |
-
"instruction": example['instruction'] if 'instruction' in example else " \n",
|
25 |
-
"input": example['input'] if 'input' in example else " \n",
|
26 |
-
"system": example['system'] if 'system' in example else " \n",
|
27 |
-
"output": example['output'] if 'output' in example else " \n",
|
28 |
-
}
|
29 |
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
model = AutoModelForCausalLM.from_pretrained(
|
35 |
model_id,
|
36 |
device_map="auto",
|
37 |
-
attn_implementation="flash_attention_2",
|
38 |
torch_dtype=torch.bfloat16,
|
39 |
-
|
40 |
-
tokenizer = AutoTokenizer.from_pretrained(model)
|
41 |
-
tokenizer.padding_side = 'right' # to prevent warnings
|
42 |
-
|
43 |
-
peft_config = LoraConfig(
|
44 |
-
lora_alpha=16,
|
45 |
-
lora_dropout=0.05,
|
46 |
-
r=32,
|
47 |
-
bias="none",
|
48 |
-
target_modules=[
|
49 |
-
"0.w1",
|
50 |
-
"0.w2",
|
51 |
-
"0.w3",
|
52 |
-
"q_proj",
|
53 |
-
"v_proj",
|
54 |
-
"k_proj",
|
55 |
-
"o_proj"
|
56 |
-
],
|
57 |
-
task_type="CAUSAL_LM",
|
58 |
-
use_dora=False, # Enable Dora method
|
59 |
)
|
60 |
|
|
|
61 |
args = TrainingArguments(
|
62 |
-
output_dir="./out",
|
63 |
-
num_train_epochs=3,
|
64 |
-
per_device_train_batch_size=4,
|
65 |
-
gradient_checkpointing=True,
|
66 |
-
optim="adamw_hf",
|
67 |
logging_steps=2,
|
68 |
save_strategy="steps",
|
69 |
save_steps=300,
|
70 |
-
bf16=True,
|
71 |
-
tf32=True,
|
72 |
-
|
73 |
-
|
74 |
-
max_grad_norm=0.3,
|
75 |
warmup_ratio=0.00,
|
76 |
-
lr_scheduler_type="
|
77 |
-
|
78 |
-
push_to_hub=False,
|
79 |
-
# push model to hub
|
80 |
)
|
81 |
|
82 |
-
max_seq_length = 2048
|
83 |
|
84 |
-
#
|
85 |
-
|
|
|
|
|
|
|
86 |
|
87 |
-
|
|
|
88 |
model=model,
|
89 |
args=args,
|
90 |
train_dataset=dataset,
|
91 |
-
### peft specific arguments ###
|
92 |
-
peft_config=peft_config,
|
93 |
max_seq_length=max_seq_length,
|
94 |
tokenizer=tokenizer,
|
|
|
95 |
packing=False,
|
96 |
-
optimizers=(optimizer, None), # Pass the schedulefree optimizer
|
97 |
)
|
98 |
|
99 |
-
#
|
100 |
trainer.train()
|
101 |
|
102 |
-
#
|
103 |
-
trainer.save_model()
|
|
|
1 |
import signal
|
2 |
import sys
|
3 |
+
import torch
|
4 |
from datasets import load_dataset
|
5 |
+
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForCausalLM
|
6 |
from trl import SFTTrainer
|
7 |
+
|
8 |
+
# Importing Sophia optimizer
|
9 |
+
from sophia import SophiaG
|
|
|
10 |
|
11 |
# Signal handler function
|
12 |
def signal_handler(sig, frame):
|
|
|
16 |
# Register signal handler
|
17 |
signal.signal(signal.SIGINT, signal_handler)
|
18 |
|
19 |
+
# Load the dataset
|
20 |
+
dataset = load_dataset("Crystalcareai/Orca-Reka", split="train")
|
21 |
+
model_id = "./outkannn"
|
22 |
+
tokenizer_id = model_id
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
24 |
+
tokenizer.padding_side = 'right'
|
25 |
+
|
26 |
+
# Formatting function for the dataset
|
27 |
+
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
28 |
|
29 |
+
### Instruction:
|
30 |
+
{}
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
### Input:
|
33 |
+
{}
|
34 |
|
35 |
+
### Response:
|
36 |
+
{}"""
|
37 |
+
def formatting_prompts_func(examples):
|
38 |
+
instructions = examples["instruction"]
|
39 |
+
inputs = examples["input"]
|
40 |
+
outputs = examples["output"]
|
41 |
+
texts = []
|
42 |
+
EOS_TOKEN = tokenizer.eos_token
|
43 |
+
for instruction, input, output in zip(instructions, inputs, outputs):
|
44 |
+
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
|
45 |
+
texts.append(text)
|
46 |
+
return {"text": texts}
|
47 |
+
|
48 |
+
# Process and map the formatting function
|
49 |
+
dataset = dataset.map(formatting_prompts_func, batched=True)
|
50 |
+
|
51 |
+
# Load model
|
52 |
model = AutoModelForCausalLM.from_pretrained(
|
53 |
model_id,
|
54 |
device_map="auto",
|
|
|
55 |
torch_dtype=torch.bfloat16,
|
56 |
+
trust_remote_code=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
)
|
58 |
|
59 |
+
# Define training arguments
|
60 |
args = TrainingArguments(
|
61 |
+
output_dir="./out",
|
62 |
+
num_train_epochs=3,
|
63 |
+
per_device_train_batch_size=4,
|
64 |
+
gradient_checkpointing=True,
|
|
|
65 |
logging_steps=2,
|
66 |
save_strategy="steps",
|
67 |
save_steps=300,
|
68 |
+
bf16=True,
|
69 |
+
tf32=True,
|
70 |
+
learning_rate=1e-4,
|
71 |
+
max_grad_norm=0.1,
|
|
|
72 |
warmup_ratio=0.00,
|
73 |
+
lr_scheduler_type="cosine",
|
74 |
+
push_to_hub=False
|
|
|
|
|
75 |
)
|
76 |
|
77 |
+
max_seq_length = 2048
|
78 |
|
79 |
+
# Custom Trainer Class
|
80 |
+
class CustomTrainer(SFTTrainer):
|
81 |
+
def create_optimizer(self):
|
82 |
+
# Override to use SophiaG optimizer
|
83 |
+
self.optimizer = SophiaG(self.model.parameters(), lr=self.args.learning_rate, betas=(0.965, 0.99), rho=0.01, weight_decay=0.1)
|
84 |
|
85 |
+
# Trainer configuration
|
86 |
+
trainer = CustomTrainer(
|
87 |
model=model,
|
88 |
args=args,
|
89 |
train_dataset=dataset,
|
|
|
|
|
90 |
max_seq_length=max_seq_length,
|
91 |
tokenizer=tokenizer,
|
92 |
+
dataset_text_field="output",
|
93 |
packing=False,
|
|
|
94 |
)
|
95 |
|
96 |
+
# Start training
|
97 |
trainer.train()
|
98 |
|
99 |
+
# Save model
|
100 |
+
trainer.save_model()
|