arpieb
/

peft-lora-starcoderbase-3b-personal-copilot-elixir

PEFT

Safetensors

Model card Files Files and versions Community

arpieb commited on Nov 21, 2023

Commit

a435e41

•

1 Parent(s): 38712c7

Added testing notebook

Browse files

Files changed (1) hide show

test.ipynb +188 -0

test.ipynb ADDED Viewed

	@@ -0,0 +1,188 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d2d5bc5c-d465-4483-b137-52e168fc6f6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from peft import PeftModel, PeftConfig\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "checkpoint = \"bigcode/starcoderbase-3b\"\n",
+    "device = \"cuda\" # for GPU usage or \"cpu\" for CPU usage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "def31126-da54-4099-b8f7-3236829d7559",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 161 ms, sys: 8.12 ms, total: 169 ms\n",
+      "Wall time: 308 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d6fa452a-33a3-4e57-983a-28e1020004cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6cc50d551a9b48cf8bb09bd208155c2f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading adapter_config.json:   0%|          | 0.00/517 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "202be71fea7a4f369fa7e04109963bdd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "910a9d258e3346f08b39b88770dfd66f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)er_model.safetensors:   0%|          | 0.00/91.5M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 1min 2s, sys: 24.7 s, total: 1min 27s\n",
+      "Wall time: 52.7 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "config = PeftConfig.from_pretrained(\"arpieb/peft-lora-starcoderbase-3b-personal-copilot-elixir\")\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"bigcode/starcoderbase-3b\")\n",
+    "model = PeftModel.from_pretrained(model, \"arpieb/peft-lora-starcoderbase-3b-personal-copilot-elixir\")\n",
+    "model = model.merge_and_unload()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b8315302-801b-4b59-b158-25c86be30192",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[ 589, 1459,   81, 7656,   81, 5860,  346,  745,   44]])\n",
+      "CPU times: user 3.85 ms, sys: 0 ns, total: 3.85 ms\n",
+      "Wall time: 1.47 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "inputs = tokenizer.encode(\"def print_hello_world() do:\", return_tensors=\"pt\")\n",
+    "print(inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "53d735d7-5941-4793-8b50-cc8e00de5437",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
+      "/home/rbates/src/starcoder-elixir/DHS-LLM-Workshop/ENV/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "def print_hello_world() do: IO.puts(\"Hello, world!\")\n",
+      "end\n",
+      "\n",
+      "CPU times: user 22.7 s, sys: 13.3 ms, total: 22.8 s\n",
+      "Wall time: 3.8 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "outputs = model.generate(inputs)\n",
+    "print(tokenizer.decode(outputs[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a346bef-a007-4311-b0ac-275dd786713d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}