{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading Lora Configuration" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from peft import LoraConfig\n", "\n", "lora_config = LoraConfig(\n", " r=8,\n", " target_modules=[\"q_proj\", \"o_proj\", \"k_proj\", \"v_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n", " task_type=\"CAUSAL_LM\",\n", ")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading Model and Tokenizer" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/Desktop/GEMMA_FINETUNE/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n", "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n", "`config.hidden_activation` if you want to override this behaviour.\n", "See https://github.com/huggingface/transformers/pull/29402 for more details.\n", "Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00, 1.03it/s]\n" ] } ], "source": [ "import torch\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n", "\n", "model_id = \"google/gemma-2b\"\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16\n", ")\n", "\n", "hf_token = \"ADD_TOKEN_HERE\" # also take permission from gemma from huggingface\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)\n", "model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={\"\":0}, token=hf_token)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading Datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", "data = load_dataset(\"Yemmy1000/cybersec_embedding_llama_chat\")\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Setting Training Parameters and Fine-Tuning" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import transformers\n", "from trl import SFTTrainer\n", "\n", "def formatting_func(examples):\n", " texts = []\n", " instructions = examples['INSTRUCTION']\n", " responses = examples['RESPONSE']\n", "\n", " for idx in range(len(instructions)):\n", " instruction = instructions[idx].split('\\n')[1]\n", " response = responses[idx]\n", " text = f\"Instruction: {instruction} \\n Output: {response}\"\n", " texts.append(text)\n", " \n", " return texts\n", "\n", "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=data[\"train\"],\n", " args=transformers.TrainingArguments(\n", " per_device_train_batch_size=1,\n", " gradient_accumulation_steps=4,\n", " warmup_steps=100,\n", " num_train_epochs=4,\n", " learning_rate=2e-4,\n", " fp16=False,\n", " logging_steps=200,\n", " output_dir=\"outputs\",\n", " optim=\"paged_adamw_8bit\"\n", " ),\n", " peft_config=lora_config,\n", " formatting_func=formatting_func,\n", ")\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trainer.save_model('./Model')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7e0c02a2059a451ebd0c8b8726b68003", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00