goodmodeler's picture
ADD: LLM SFT, RLHF and Distillation
c1c9e88
#!/usr/bin/env python3
"""
QLoRA Fine-tuning script for OpenAI OSS 120B model
Using smangrul/ad-copy-generation dataset for advertisement copy generation
"""
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
pipeline,
logging,
)
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
from trl import SFTTrainer
import warnings
# Suppress warnings
warnings.filterwarnings("ignore")
logging.set_verbosity(logging.CRITICAL)
# Configuration
class Config:
# Model configuration
model_name = "microsoft/DialoGPT-medium" # Replace with actual OpenAI OSS 120B model name
dataset_name = "smangrul/ad-copy-generation"
# Training parameters
output_dir = "./sft_results"
num_train_epochs = 3
per_device_train_batch_size = 1
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 25
logging_steps = 25
learning_rate = 2e-4
weight_decay = 0.001
fp16 = False
bf16 = False
max_grad_norm = 0.3
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
lr_scheduler_type = "constant"
report_to = "tensorboard"
# QLoRA parameters
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64
# bitsandbytes parameters
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
# SFT parameters
max_seq_length = 512
packing = False
def create_bnb_config():
"""Create BitsAndBytesConfig for 4-bit quantization"""
bnb_config = BitsAndBytesConfig(
load_in_4bit=Config.use_4bit,
bnb_4bit_quant_type=Config.bnb_4bit_quant_type,
bnb_4bit_compute_dtype=getattr(torch, Config.bnb_4bit_compute_dtype),
bnb_4bit_use_double_quant=Config.use_nested_quant,
)
return bnb_config
def load_model_and_tokenizer():
"""Load model and tokenizer with quantization"""
print("Loading model and tokenizer...")
# Create BnB config
bnb_config = create_bnb_config()
# Load model
model = AutoModelForCausalLM.from_pretrained(
Config.model_name,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
use_auth_token=True, # If using gated model
)
model.config.use_cache = False
model.config.pretraining_tp = 1
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
Config.model_name,
trust_remote_code=True,
use_auth_token=True, # If using gated model
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
return model, tokenizer
def create_peft_config():
"""Create PEFT (LoRA) configuration"""
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=Config.lora_r,
lora_alpha=Config.lora_alpha,
lora_dropout=Config.lora_dropout,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
]
)
return peft_config
def load_and_prepare_dataset(tokenizer):
"""Load and prepare the dataset"""
print("Loading dataset...")
# Load dataset
dataset = load_dataset(Config.dataset_name, split="train")
print(f"Dataset loaded: {len(dataset)} samples")
# Format dataset for chat completion
def format_prompts(examples):
texts = []
for conversation in examples["conversations"]:
if len(conversation) >= 2:
user_msg = conversation[0]["value"]
assistant_msg = conversation[1]["value"]
# Format as chat template
text = f"### Human: {user_msg}\n### Assistant: {assistant_msg}{tokenizer.eos_token}"
texts.append(text)
else:
# Fallback for malformed data
texts.append(f"### Human: Create an advertisement\n### Assistant: {conversation[0]['value']}{tokenizer.eos_token}")
return {"text": texts}
# Apply formatting
dataset = dataset.map(
format_prompts,
batched=True,
remove_columns=dataset.column_names
)
return dataset
def create_training_arguments():
"""Create training arguments"""
training_arguments = TrainingArguments(
output_dir=Config.output_dir,
num_train_epochs=Config.num_train_epochs,
per_device_train_batch_size=Config.per_device_train_batch_size,
gradient_accumulation_steps=Config.gradient_accumulation_steps,
optim=Config.optim,
save_steps=Config.save_steps,
logging_steps=Config.logging_steps,
learning_rate=Config.learning_rate,
weight_decay=Config.weight_decay,
fp16=Config.fp16,
bf16=Config.bf16,
max_grad_norm=Config.max_grad_norm,
max_steps=Config.max_steps,
warmup_ratio=Config.warmup_ratio,
group_by_length=Config.group_by_length,
lr_scheduler_type=Config.lr_scheduler_type,
report_to=Config.report_to,
save_strategy="steps",
evaluation_strategy="no",
load_best_model_at_end=False,
push_to_hub=False,
remove_unused_columns=False,
)
return training_arguments
def main():
"""Main fine-tuning function"""
print("πŸš€ Starting QLoRA fine-tuning of OpenAI OSS 120B model")
# Check CUDA availability
if not torch.cuda.is_available():
raise RuntimeError("CUDA is required for this training script")
print(f"Using GPU: {torch.cuda.get_device_name()}")
print(f"Available VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
# Load model and tokenizer
model, tokenizer = load_model_and_tokenizer()
# Apply PEFT
peft_config = create_peft_config()
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# Load and prepare dataset
dataset = load_and_prepare_dataset(tokenizer)
# Create training arguments
training_arguments = create_training_arguments()
# Create trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=Config.max_seq_length,
tokenizer=tokenizer,
args=training_arguments,
packing=Config.packing,
)
# Start training
print("πŸ”₯ Starting training...")
trainer.train()
# Save model
print("πŸ’Ύ Saving model...")
trainer.model.save_pretrained(Config.output_dir)
tokenizer.save_pretrained(Config.output_dir)
print("βœ… Training completed!")
# Test the model
test_model(trainer.model, tokenizer)
def test_model(model, tokenizer):
"""Test the fine-tuned model"""
print("\nπŸ§ͺ Testing the fine-tuned model...")
# Test prompts
test_prompts = [
"Create an advertisement for a new smartphone with advanced camera features",
"Write ad copy for an eco-friendly clothing brand targeting young professionals",
"Generate marketing content for a fitness app with AI personal trainer",
]
for prompt in test_prompts:
formatted_prompt = f"### Human: {prompt}\n### Assistant:"
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_text = response[len(formatted_prompt):].strip()
print(f"\nπŸ“ Prompt: {prompt}")
print(f"πŸ“„ Generated: {generated_text}")
print("-" * 50)
if __name__ == "__main__":
# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
main()