{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "JHRpOZ5g3Flv" }, "source": [ "# Clone Mergekit and Install the dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "x8548KdSbMs2" }, "outputs": [], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "4alsYntU1gNU" }, "outputs": [], "source": [ "!pip install -qqq git+https://github.com/arcee-ai/mergekit.git" ] }, { "cell_type": "markdown", "metadata": { "id": "DtGY8BAo3alb" }, "source": [ "# Mergekit Config" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "CmfbveTblP0F" }, "outputs": [], "source": [ "# @markdown What is your model's name will be?\n", "MODEL_NAME = 'SmolMoE' # @param {type:\"string\"}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "r2-rAjH93w8x" }, "outputs": [], "source": [ "mergekit_yaml = \"\"\"\n", "base_model: BEE-spoke-data/smol_llama-220M-GQA\n", "gate_mode: random\n", "dtype: bfloat16\n", "experts:\n", " - source_model: BEE-spoke-data/smol_llama-220M-GQA\n", " - source_model: BEE-spoke-data/smol_llama-220M-GQA\n", "\"\"\" # @param {type:\"string\"}\n", "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n", " f.write(mergekit_yaml)" ] }, { "cell_type": "markdown", "metadata": { "id": "WiCGZXysn_mD" }, "source": [ "# Mergekit Runtime" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0scr7Ed_4GPe" }, "outputs": [], "source": [ "low_cpu_ram = True # @param {type:\"boolean\"}\n", "runtime = \"GPU\" # @param [\"CPU\", \"GPU\"]\n", "task = \"merge-mega\" # @param [\"merge\", \"merge-mega\", \"moe\", \"extract\"]\n", "# @markdown ### Mergekit arguments\n", "\n", "trust_remote_code = False # @param {type:\"boolean\"}\n", "clone_tensors = True # @param {type:\"boolean\"}\n", "low_ram = True # @param {type:\"boolean\"}\n", "out_shard_size = \"500M\" # @param {type:\"string\"}\n", "\n", "# @markdown ### Extract LoRA (experimental)\n", "base_model = \"unsloth/Llama-3.2-3B-Instruct\" # @param {type:\"string\"}\n", "finetuned_model = \"theprint/ReWiz-Llama-3.2-3B\" # @param {type:\"string\"}\n", "extract_rank = 32 # @param {type:\"number\"}" ] }, { "cell_type": "markdown", "metadata": { "id": "QBhBgX7U52Xn" }, "source": [ "## Run the program" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "3Y7aBJXL54GJ" }, "outputs": [], "source": [ "import os\n", "import shutil\n", "\n", "def empty_folder(folder_path):\n", " if os.path.exists(folder_path):\n", " shutil.rmtree(folder_path)\n", " os.makedirs(folder_path)\n", "\n", "empty_folder('merge')\n", "empty_folder('lora')\n", "\n", "if task == \"merge\":\n", " cli = \"mergekit-yaml\"\n", "elif task == \"merge-mega\":\n", " cli = \"mergekit-mega\"\n", "elif task == \"moe\":\n", " cli = \"mergekit-moe\"\n", "elif task == \"extract\":\n", " if base_model == \"\" or finetuned_model == \"\":\n", " raise ValueError(\"base_model and finetuned_model cannot be empty\")\n", " !pip install -qqq bitsandbytes\n", " cli = f\"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={extract_rank}\"\n", "\n", "if task in [\"merge\", \"moe\", \"merge-mega\"]:\n", " cli += \" config.yaml merge --copy-tokenizer --allow-crimes\"\n", " if runtime == \"GPU\":\n", " if task in [\"merge\", \"merge-mega\"]:\n", " cli += \" --cuda\"\n", " elif task == \"moe\":\n", " cli += \" --device cuda --cuda\"\n", " else:\n", " cli += \" --no-cuda\"\n", "\n", " if trust_remote_code:\n", " cli += \" --trust-remote-code\"\n", " if clone_tensors:\n", " cli += \" --clone-tensors\"\n", " if low_ram:\n", " cli += f\" --out-shard-size {out_shard_size} --lazy-unpickle\"\n", " if low_cpu_ram:\n", " cli += \" --low-cpu-memory\"\n", "print(cli)\n", "!{cli}" ] }, { "cell_type": "markdown", "metadata": { "id": "HyeGrtGrDn6S" }, "source": [ "# Inference the Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wpy7Ahw6hghH" }, "outputs": [], "source": [ "!pip install -qU transformers bitsandbytes accelerate\n", "from transformers import AutoTokenizer, pipeline\n", "import torch\n", "\n", "model = \"merge\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model)\n", "generator = pipeline(\n", " \"text-generation\",\n", " model=model,\n", " model_kwargs={\"torch_dtype\": torch.float16, \"load_in_4bit\": False},\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "f05D7q8wiF-5" }, "outputs": [], "source": [ "messages = [{\"role\": \"user\", \"content\": \"Explain what a Mixture of Experts is in less than 100 words.\"}]\n", "prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", "outputs = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n", "print(outputs[0][\"generated_text\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Upload to Hugging Face" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# @title ## Upload model to Hugging Face { display-mode: \"form\" }\n", "# @markdown Enter your HF username and the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).\n", "username = 'username' # @param {type:\"string\"}\n", "token_env = 'hf_token' # @param {type:\"string\"}\n", "\n", "!pip install -qU huggingface_hub\n", "\n", "import os\n", "import yaml\n", "\n", "from huggingface_hub import HfApi\n", "from google.colab import userdata\n", "\n", "def output_dir():\n", " if os.path.exists('merge') and os.listdir('merge'):\n", " return \"merge\"\n", " if os.path.exists('lora') and os.listdir('lora'):\n", " return \"lora\"\n", " raise ValueError(\"Both folders are empty or do not exist.\")\n", "\n", "\n", "# Defined in the secrets tab in Google Colab\n", "api = HfApi(token=userdata.get(token_env))\n", "try:\n", " output_dir=output_dir()\n", " api.create_repo(\n", " repo_id=f\"{username}/{MODEL_NAME}\",\n", " repo_type=\"model\",\n", " exist_ok=True,\n", " )\n", " api.upload_folder(\n", " repo_id=f\"{username}/{MODEL_NAME}\",\n", " folder_path=output_dir,\n", " )\n", "except ValueError as e:\n", " print(e)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 0 }