File size: 4,601 Bytes
1c11029 3926616 1c11029 3926616 1c11029 829f82f 1c11029 e9b6b65 1c11029 e9b6b65 1c11029 e9b6b65 1c11029 e9b6b65 1c11029 d1e4843 1c11029 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# -*- coding: utf-8 -*-
"""MIXTRAL_Mixtral-8x7B (QLoRA)
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1GFRi-ND2WTbqCfPuhLkXvB6D3LieCpjK
This notebook shows how to fine-tune Mixtral-8x7b on a sample of ultrachat with QLoRA.
It requires at least 32 GB of VRAM (at least 2*16 GB GPUs if you want to use consumer hardware). On Google Colab, you can use the A100.
First, we need all these dependencies:
"""
"""
!pip install -q bitsandbytes
!pip install -q transformers
!pip install -q peft
!pip install -q accelerate
!pip install -q datasets
!pip install -q trl
!pip install -q huggingface_hub
!pip install -q diffusers
"""
import torch
from datasets import Dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
Trainer
)
from trl import SFTTrainer
"""Load the tokenizer and configure padding"""
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import PeftModel, prepare_model_for_kbit_training, LoraConfig
# Assuming you have the correct token set as an environment variable or directly in your script
os.environ['HF_TOKEN'] = 'XXXX'
# Name of the model you want to load
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
try:
# Attempt to load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, force_download=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.unk_token_id
tokenizer.padding_side = 'right'
print("Tokenizer loaded successfully.")
# Attempt to load the model
model = AutoModelForCausalLM.from_pretrained(model_id, force_download=True)
print("Model loaded successfully.")
except Exception as e:
print(f"Error loading the tokenizer or model: {e}")
"""Load the model and prepare it to be fine-tuned with QLoRA."""
# Carga de datos
def load_custom_dataset(file_path):
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()
texts = [line.strip() for line in lines if line.strip()]
return Dataset.from_dict({"text": texts})
# Actualiza las rutas a los archivos correctos
dataset_train_sft = load_custom_dataset("MIXTRAL_DatosEntrenamiento.txt")
dataset_test_sft = load_custom_dataset("MIXTRAL_DatosValidacion.txt")
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
model_id, quantization_config=bnb_config, device_map={"": 0}
)
model = prepare_model_for_kbit_training(model)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False # Gradient checkpointing is used by default but not compatible with caching
"""The following cell only prints the architecture of the model."""
print(model)
"""Define the configuration of LoRA."""
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
)
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
return {
"accuracy": accuracy_score(labels, predictions),
"f1": f1_score(labels, predictions, average='macro')
}
training_arguments = TrainingArguments(
output_dir="./results_mixtral_sft/",
evaluation_strategy="steps",
do_eval=True,
optim="paged_adamw_8bit",
num_train_epochs=1,
per_device_train_batch_size=4,
gradient_accumulation_steps=2,
per_device_eval_batch_size=4,
log_level="debug",
save_steps=1000,
logging_steps=logging_steps,
learning_rate=2e-4,
eval_steps=500,
max_steps=-1,
lr_scheduler_type="linear",
report_to="tensorboard" # Ensure TensorBoard is enabled
)
"""Start training:"""
trainer = SFTTrainer(
model=model,
train_dataset=dataset_train_sft,
eval_dataset=dataset_test_sft,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=512,
tokenizer=tokenizer,
args=training_arguments,
)
trainer.train()
# Commented out IPython magic to ensure Python compatibility.
# Activar TensorBoard para visualizar gráficos
# %load_ext tensorboard
# %tensorboard --logdir results_mixtral_sft/runs |