Amod
/

falcon7b-fine-tuned-therapy

PEFT

Model card Files Files and versions Community

Amod commited on Jun 22, 2023

Commit

bed93a9

•

1 Parent(s): c659014

Uploaded Jupyter Notebook

Browse files

Files changed (1) hide show

falcon7b-fine-tuned-therapy.ipynb +305 -0

falcon7b-fine-tuned-therapy.ipynb ADDED Viewed

	@@ -0,0 +1,305 @@

+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "private_outputs": true,
+ "gpuType": "T4"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "wVEwK8exTOmG"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install -qU bitsandbytes transformers datasets accelerate loralib einops xformers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install -q -U git+https://github.com/huggingface/peft.git"
+ ],
+ "metadata": {
+ "id": "xF0gYxCezHpC"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from huggingface_hub import notebook_login\n",
+ "\n",
+ "notebook_login()"
+ ],
+ "metadata": {
+ "id": "S1Ny2qUfYe6c"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!nvidia-smi"
+ ],
+ "metadata": {
+ "id": "eunFqJaXYmoE"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import os\n",
+ "import bitsandbytes as bnb\n",
+ "import pandas as pd\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import transformers\n",
+ "from datasets import load_dataset\n",
+ "from peft import (\n",
+ " LoraConfig,\n",
+ " PeftConfig,\n",
+ " get_peft_model,\n",
+ " prepare_model_for_kbit_training,\n",
+ ")\n",
+ "from transformers import (\n",
+ " AutoConfig,\n",
+ " AutoModelForCausalLM,\n",
+ " AutoTokenizer,\n",
+ " BitsAndBytesConfig,\n",
+ ")\n",
+ "\n",
+ "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
+ ],
+ "metadata": {
+ "id": "tgjXtugbYpmE"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model_id = \"tiiuae/falcon-7b-instruct\"\n",
+ "\n",
+ "bnb_config = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " load_4bit_use_double_quant=True,\n",
+ " bnb_4bit_quant_type=\"nf4\",\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ ")\n",
+ "\n",
+ "model =AutoModelForCausalLM.from_pretrained(\n",
+ " model_id,\n",
+ " device_map=\"auto\",\n",
+ " trust_remote_code=True,\n",
+ " quantization_config=bnb_config,\n",
+ ")\n",
+ "\n",
+ "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+ "tokenizer.pad_token = tokenizer.eos_token"
+ ],
+ "metadata": {
+ "id": "SZzmS9kQZcds"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def print_trainable_parameters(model):\n",
+ " \"\"\"\n",
+ " Prints the number of trainable parameters in the model.\n",
+ " \"\"\"\n",
+ " trainable_params = 0\n",
+ " all_param = 0\n",
+ " for _, param in model.named_parameters():\n",
+ " all_param += param.numel()\n",
+ " if param.requires_grad:\n",
+ " trainable_params += param.numel()\n",
+ " print(\n",
+ " f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n",
+ " )"
+ ],
+ "metadata": {
+ "id": "TOD9rLaWaTjG"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.gradient_checkpointing_enable()\n",
+ "model = prepare_model_for_kbit_training(model"
+ ],
+ "metadata": {
+ "id": "IETKOBiRfLBM"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "config = LoraConfig(\n",
+ " r=16,\n",
+ " lora_alpha=32,\n",
+ " target_modules=[\"query_key_value\"],\n",
+ " lora_dropout=0.05,\n",
+ " bias=\"none\",\n",
+ " task_type=\"CAUSAL_LM\"\n",
+ ")\n",
+ "\n",
+ "model = get_peft_model(model, config)\n",
+ "print_trainable_parameters(model)"
+ ],
+ "metadata": {
+ "id": "3-fkzAk9fM4c"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def generate_prompt(data_point):\n",
+ " return f\"\"\"\n",
+ "<human>: {data_point[\"Context\"]}\n",
+ "<assistance>: {data_point[\"Response\"]}\n",
+ " \"\"\".strip()\n",
+ "\n",
+ "def generate_and_tokenize_prompt(data_point):\n",
+ " full_prompt = generate_prompt(data_point)\n",
+ " tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)\n",
+ " return tokenized_full_prompt\n"
+ ],
+ "metadata": {
+ "id": "pisCY6iDfX2N"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from datasets import load_dataset\n",
+ "dataset_name = 'alexandreteles/mental-health-conversational-data'\n",
+ "dataset = load_dataset(dataset_name, split=\"train\")"
+ ],
+ "metadata": {
+ "id": "9V_J1XY5fiit"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset[320]"
+ ],
+ "metadata": {
+ "id": "K9TZWSXifl7G"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset = dataset.shuffle().map(generate_and_tokenize_prompt)"
+ ],
+ "metadata": {
+ "id": "G_4-L6RtukRM"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(dataset.shape)"
+ ],
+ "metadata": {
+ "id": "X_zzdEFuuw7a"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "OUTPUT_DIR = \"experiments\""
+ ],
+ "metadata": {
+ "id": "XP0hBNrFxzp8"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "%load_ext tensorboard\n",
+ "%tensorboard --logdir experiments/runs"
+ ],
+ "metadata": {
+ "id": "N1gfJRpdx15C"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "training_args = transformers.TrainingArguments(\n",
+ " per_device_train_batch_size=2,\n",
+ " gradient_accumulation_steps=4,\n",
+ " num_train_epochs=5,\n",
+ " learning_rate=2e-4,\n",
+ " fp16=True,\n",
+ " save_total_limit=4,\n",
+ " logging_steps=10,\n",
+ " output_dir=OUTPUT_DIR,\n",
+ " max_steps=800,\n",
+ " optim=\"paged_adamw_8bit\",\n",
+ " lr_scheduler_type = 'cosine',\n",
+ " warmup_ratio = 0.05,\n",
+ " report_to = 'tensorboard'\n",
+ ")\n",
+ "\n",
+ "trainer = transformers.Trainer(\n",
+ " model=model,\n",
+ " train_dataset=dataset,\n",
+ " args=training_args,\n",
+ " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
+ ")\n",
+ "model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n",
+ "trainer.train()"
+ ],
+ "metadata": {
+ "id": "796I79rpx5tt"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}