{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "addd199c-097c-419d-a0f2-c3d73efb8d5d", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "from pprint import pprint\n", "\n", "import bitsandbytes as bnb\n", "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "\n", "import transformers\n", "from datasets import load_dataset\n", "from huggingface_hub import notebook_login\n", "from peft import (\n", " LoraConfig,\n", " PeftConfig,\n", " PeftModel,\n", " get_peft_model,\n", " prepare_model_for_kbit_training,\n", ")\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", ")\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "acfb1578-a66f-44f0-8df9-1c6bcf7530ea", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b92bb6f7a2784be8bf5cab2ee87292ff", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswer
0Problem:\\nI have the following dataframe:\\nind...import pandas as pd\\n\\n\\nindex = range(14)\\nda...
1Problem:\\ni got an issue over ranking of date ...import pandas as pd\\n\\n\\ndf = pd.DataFrame({'I...
2Problem:\\nI have a DataFrame like :\\n 0 ...import pandas as pd\\nimport numpy as np\\n\\ndf ...
3Problem:\\nI have this Pandas dataframe (df):\\n...import pandas as pd\\n\\n\\ndf = pd.DataFrame({'A...
4Problem:\\nI have\\n\\ndf = pd.DataFrame.from_dic...import pandas as pd\\n\\ndf = pd.DataFrame.from_...
\n", "" ], "text/plain": [ " question \\\n", "0 Problem:\\nI have the following dataframe:\\nind... \n", "1 Problem:\\ni got an issue over ranking of date ... \n", "2 Problem:\\nI have a DataFrame like :\\n 0 ... \n", "3 Problem:\\nI have this Pandas dataframe (df):\\n... \n", "4 Problem:\\nI have\\n\\ndf = pd.DataFrame.from_dic... \n", "\n", " answer \n", "0 import pandas as pd\\n\\n\\nindex = range(14)\\nda... \n", "1 import pandas as pd\\n\\n\\ndf = pd.DataFrame({'I... \n", "2 import pandas as pd\\nimport numpy as np\\n\\ndf ... \n", "3 import pandas as pd\\n\\n\\ndf = pd.DataFrame({'A... \n", "4 import pandas as pd\\n\\ndf = pd.DataFrame.from_... " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(data).head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "6fbdd3ad-062f-4744-bb8e-1c19950adfd5", "metadata": {}, "outputs": [], "source": [ "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "id": "2b5ae38c-b0d2-4b9a-acde-3370130ca6e7", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3d6c5533e9ea48e295b7fdfd96da6d47", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00: How can I create a dataframe?\n", ":\n", "import pandas as pd\n", "import numpy as np\n", "\n", "df = pd.DataFrame(\n", " {\n", " \"A\": np.array([1, 2, 3]),\n", " \"B\": np.array([4, 5, 6]),\n", " \"C\": np.array([7, 8, 9]),\n", " }\n", ")\n", "\n", "BEGIN SOLUTION\n", "\n", "[1]\n", "\n", "[python]\n", "# Your code here\n", "\n", "\n", "END SOLUTION\n", "\n", "[1]\n", "\n", "[python]\n", "print(df)\n", "\n", "\n", "\n", ": df = pd.DataFrame(\n", " {\n", " \"A\": np.array([1, 2, 3]),\n", "CPU times: user 27.4 s, sys: 372 ms, total: 27.8 s\n", "Wall time: 27.9 s\n" ] } ], "source": [ "%%time\n", "prompt = f\"\"\"\n", ": How can I create a dataframe?\n", ":\n", "\"\"\".strip()\n", "\n", "encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n", "with torch.inference_mode():\n", " outputs = model.generate(\n", " input_ids=encoding.input_ids,\n", " attention_mask=encoding.attention_mask,\n", " generation_config=generation_config,\n", " )\n", "print(tokenizer.decode(outputs[0], skip_special_tokens=True))" ] }, { "cell_type": "code", "execution_count": 16, "id": "93c95988-c563-4871-974d-004bf73fbce8", "metadata": {}, "outputs": [], "source": [ "def generate_response(question: str) -> str:\n", " prompt = f\"\"\"\n", ": {question}\n", ":\n", "\"\"\".strip()\n", " encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n", " with torch.inference_mode():\n", " outputs = model.generate(\n", " input_ids=encoding.input_ids,\n", " attention_mask=encoding.attention_mask,\n", " generation_config=generation_config,\n", " )\n", " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", "\n", " assistant_start = \":\"\n", " response_start = response.find(assistant_start)\n", " return response[response_start + len(assistant_start) :].strip()" ] }, { "cell_type": "code", "execution_count": 17, "id": "8a9a9b87-193b-4bed-8ef1-57944d931958", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "import pandas as pd\n", "import numpy as np\n", "\n", "df = pd.DataFrame(\n", " {\n", " \"A\": np.array([1, 2, 3]),\n", " \"B\": np.array([4, 5, 6]),\n", " \"C\": np.array([7, 8, 9]),\n", " }\n", ")\n", "\n", "BEGIN SOLUTION\n", "\n", "[1]\n", "\n", "[python]\n", "# Your code here\n", "\n", "\n", "END SOLUTION\n", "\n", "[1]\n", "\n", "[python]\n", "print(df)\n", "\n", "\n", "\n", ": df = pd.DataFrame(\n", " {\n", " \"A\": np.array([1, 2, 3]),\n" ] } ], "source": [ "prompt = \"How can I create a dataframe?\"\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": 18, "id": "4658f305-b7c6-432c-ac0c-f62bd79e9ad5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "import pandas as pd\n", "import numpy as np\n", "\n", "df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})\n", "df2 = pd.DataFrame({'A': [4, 5, 6], 'B': [7, 8, 9]})\n", "\n", ": df = pd.concat([df1, df2])\n", "\n", ": df\n", "\n", "\n", "import pandas as pd\n", "import numpy as np\n", "\n", "df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})\n", "df2 = pd.DataFrame({'A': [4, 5, 6],\n" ] } ], "source": [ "prompt = \"How to merge two dataframes?\"\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": 19, "id": "0e9ed231-4a62-4331-94df-f3bcd601f138", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "import pandas as pd\n", "import numpy as np\n", "\n", "name=np.array(['joy','shan'])\n", "roll_no=np.array([1,2])\n", "\n", "BEGIN SOLUTION\n", "\n", "[insert]\n", "\n", "END SOLUTION\n", "\n", "print(df)\n", "\n", ": df = pd.DataFrame({'name': name, 'roll_no': roll_no})\n", "\n", "\n", "print(df)\n", "\n", "\n", ": df = pd.DataFrame({'name': name, 'roll_no': roll_no})\n", "print(df)\n", "\n", "\n", "print(df)\n", "\n", ": df = pd.\n" ] } ], "source": [ "prompt = \"given two arrays name=['joy','shan'], roll_no=[1,2]. put these array in a dataframe ?\"\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": 20, "id": "381ba5c0-276d-411e-a8d5-9f010528433d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ ]: import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "x = np.linspace(0, 10, 100)\n", "y = np.sin(x)\n", "\n", "# your code here\n", "\n", ": plt.plot(x, y)\n", "plt.show()\n", "\n", ": [ ]: plt.plot(x, y)\n", "plt.show()\n", "\n", ": plt.plot(x, y)\n", "plt.show()\n", "\n", ": [ ]: plt.plot(x, y)\n", "plt.show()\n", "\n", ": plt.plot(x, y)\n", "plt.show()\n", "\n", "\n" ] } ], "source": [ "prompt = \"can you plot all types of plots in matplotlib?\"\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": 21, "id": "6864c3c7-b721-48ca-8943-dcff9838f7d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "import pandas as pd\n", "import numpy as np\n", "\n", "data = pd.DataFrame({'ID': ['01', '01', '01', '02', '02'],\n", "'TIME': ['2018-07-11 11:12:20', '2018-07-12 12:00:23', '2018-07-13 12:00:00', '2019-09-11 11:00:00', '2019-09-12 12:00:00']})\n", "\n", "data['TIME'] = pd.to_datetime(data['TIME'])\n", "\n", "\n", "BEGIN SOLUTION\n", "\n", "[insert]\n", "\n" ] } ], "source": [ "prompt = \"\"\"Problem:\n", "i got an issue over ranking of date times. Lets say i have following table.\n", "ID TIME\n", "01 2018-07-11 11:12:20\n", "01 2018-07-12 12:00:23\n", "01 2018-07-13 12:00:00\n", "02 2019-09-11 11:00:00\n", "02 2019-09-12 12:00:00\n", "\n", "\n", "and i want to add another column to rank the table by time for each id and group. I used \n", "df['RANK'] = data.groupby('ID')['TIME'].rank(ascending=True)\n", "\n", "\n", "but get an error:\n", "'NoneType' object is not callable\n", "\n", "\n", "If i replace datetime to numbers, it works.... any solutions?\n", "\"\"\"\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": 22, "id": "7fa02929-5c65-4aa6-81ce-9c51879e7535", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "import pandas as pd\n", "import numpy as np\n", "\n", "index = range(14)\n", "data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\n", "df = pd.DataFrame(data=data, index=index, columns = ['A'])\n", "\n", "BEGIN SOLUTION\n", "\n", "[insert]\n", "\n", "END SOLUTION\n", "\n", "print(df)\n", "\n", "\n", ": df['A'] = df['A'].replace(0, np.nan)\n", "df['A'] = df['A'].fillna(method='ffill')\n", "df['A'] = df['A'].fillna(method='bfill')\n" ] } ], "source": [ "prompt = \"\"\"Problem:\n", "I have the following dataframe:\n", "index = range(14)\n", "data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\n", "df = pd.DataFrame(data=data, index=index, columns = ['A'])\n", "\n", "\n", "How can I fill the zeros with the maximun between previous and posterior non-zero value using pandas? Is there a fillna that is not just for \"NaN\"?. \n", "The output should look like:\n", " A\n", "0 1\n", "1 2\n", "2 2\n", "3 2\n", "4 4\n", "5 4\n", "6 6\n", "7 8\n", "8 8\n", "9 8\n", "10 8\n", "11 8\n", "12 2\n", "13 1\n", "\"\"\"\n", "\n", "print(generate_response(prompt))" ] }, { "cell_type": "code", "execution_count": null, "id": "255cc021-5f5e-46af-a75e-a435b9629cdf", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }