{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4058fc54",
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2023-07-08T12:37:04.657833Z",
"iopub.status.busy": "2023-07-08T12:37:04.657370Z",
"iopub.status.idle": "2023-07-08T12:37:04.671182Z",
"shell.execute_reply": "2023-07-08T12:37:04.669941Z"
},
"papermill": {
"duration": 0.028753,
"end_time": "2023-07-08T12:37:04.673965",
"exception": false,
"start_time": "2023-07-08T12:37:04.645212",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/test.csv\n"
]
}
],
"source": [
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load\n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
]
},
{
"cell_type": "markdown",
"id": "3b574567",
"metadata": {
"papermill": {
"duration": 0.008144,
"end_time": "2023-07-08T12:37:04.690217",
"exception": false,
"start_time": "2023-07-08T12:37:04.682073",
"status": "completed"
},
"tags": []
},
"source": [
"# 1. Import the Libraries"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "82ba3941",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:04.706780Z",
"iopub.status.busy": "2023-07-08T12:37:04.706409Z",
"iopub.status.idle": "2023-07-08T12:37:11.980200Z",
"shell.execute_reply": "2023-07-08T12:37:11.979425Z"
},
"papermill": {
"duration": 7.284593,
"end_time": "2023-07-08T12:37:11.982471",
"exception": false,
"start_time": "2023-07-08T12:37:04.697878",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import torch\n",
"from torch import tensor\n",
"from fastai.data.transforms import RandomSplitter\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.model_selection import StratifiedKFold, train_test_split\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder, StandardScaler\n",
"import lightgbm as lgb\n",
"from sklearn.metrics import log_loss, confusion_matrix, roc_curve, roc_auc_score\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.linear_model import LogisticRegression"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "10789ecd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.001266Z",
"iopub.status.busy": "2023-07-08T12:37:12.000101Z",
"iopub.status.idle": "2023-07-08T12:37:12.005279Z",
"shell.execute_reply": "2023-07-08T12:37:12.004195Z"
},
"papermill": {
"duration": 0.016583,
"end_time": "2023-07-08T12:37:12.007353",
"exception": false,
"start_time": "2023-07-08T12:37:11.990770",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "markdown",
"id": "e8123568",
"metadata": {
"papermill": {
"duration": 0.008009,
"end_time": "2023-07-08T12:37:12.025078",
"exception": false,
"start_time": "2023-07-08T12:37:12.017069",
"status": "completed"
},
"tags": []
},
"source": [
"# 2. Load the Datasets"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "255f98e5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.044522Z",
"iopub.status.busy": "2023-07-08T12:37:12.042918Z",
"iopub.status.idle": "2023-07-08T12:37:12.098425Z",
"shell.execute_reply": "2023-07-08T12:37:12.097103Z"
},
"papermill": {
"duration": 0.067882,
"end_time": "2023-07-08T12:37:12.101115",
"exception": false,
"start_time": "2023-07-08T12:37:12.033233",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"train_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/train.csv\")\n",
"test_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/test.csv\")\n",
"greeks_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\")\n",
"sample_submission_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\")"
]
},
{
"cell_type": "markdown",
"id": "606bc79e",
"metadata": {
"papermill": {
"duration": 0.00765,
"end_time": "2023-07-08T12:37:12.116804",
"exception": false,
"start_time": "2023-07-08T12:37:12.109154",
"status": "completed"
},
"tags": []
},
"source": [
"# 3. View the Dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "acf04ce8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.134314Z",
"iopub.status.busy": "2023-07-08T12:37:12.133766Z",
"iopub.status.idle": "2023-07-08T12:37:12.196768Z",
"shell.execute_reply": "2023-07-08T12:37:12.195381Z"
},
"papermill": {
"duration": 0.074372,
"end_time": "2023-07-08T12:37:12.198928",
"exception": false,
"start_time": "2023-07-08T12:37:12.124556",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" AB | \n",
" AF | \n",
" AH | \n",
" AM | \n",
" AR | \n",
" AX | \n",
" AY | \n",
" AZ | \n",
" BC | \n",
" BD | \n",
" BN | \n",
" BP | \n",
" BQ | \n",
" BR | \n",
" BZ | \n",
" CB | \n",
" CC | \n",
" CD | \n",
" CF | \n",
" CH | \n",
" CL | \n",
" CR | \n",
" CS | \n",
" CU | \n",
" CW | \n",
" DA | \n",
" DE | \n",
" DF | \n",
" DH | \n",
" DI | \n",
" DL | \n",
" DN | \n",
" DU | \n",
" DV | \n",
" DY | \n",
" EB | \n",
" EE | \n",
" EG | \n",
" EH | \n",
" EJ | \n",
" EL | \n",
" EP | \n",
" EU | \n",
" FC | \n",
" FD | \n",
" FE | \n",
" FI | \n",
" FL | \n",
" FR | \n",
" FS | \n",
" GB | \n",
" GE | \n",
" GF | \n",
" GH | \n",
" GI | \n",
" GL | \n",
" Class | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 000ff2bfdfe9 | \n",
" 0.209377 | \n",
" 3109.03329 | \n",
" 85.200147 | \n",
" 22.394407 | \n",
" 8.138688 | \n",
" 0.699861 | \n",
" 0.025578 | \n",
" 9.812214 | \n",
" 5.555634 | \n",
" 4126.58731 | \n",
" 22.5984 | \n",
" 175.638726 | \n",
" 152.707705 | \n",
" 823.928241 | \n",
" 257.432377 | \n",
" 47.223358 | \n",
" 0.563481 | \n",
" 23.387600 | \n",
" 4.851915 | \n",
" 0.023482 | \n",
" 1.050225 | \n",
" 0.069225 | \n",
" 13.784111 | \n",
" 1.302012 | \n",
" 36.205956 | \n",
" 69.08340 | \n",
" 295.570575 | \n",
" 0.23868 | \n",
" 0.284232 | \n",
" 89.245560 | \n",
" 84.31664 | \n",
" 29.657104 | \n",
" 5.310690 | \n",
" 1.74307 | \n",
" 23.187704 | \n",
" 7.294176 | \n",
" 1.987283 | \n",
" 1433.166750 | \n",
" 0.949104 | \n",
" B | \n",
" 30.879420 | \n",
" 78.526968 | \n",
" 3.828384 | \n",
" 13.394640 | \n",
" 10.265073 | \n",
" 9028.291921 | \n",
" 3.583450 | \n",
" 7.298162 | \n",
" 1.73855 | \n",
" 0.094822 | \n",
" 11.339138 | \n",
" 72.611063 | \n",
" 2003.810319 | \n",
" 22.136229 | \n",
" 69.834944 | \n",
" 0.120343 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 007255e47698 | \n",
" 0.145282 | \n",
" 978.76416 | \n",
" 85.200147 | \n",
" 36.968889 | \n",
" 8.138688 | \n",
" 3.632190 | \n",
" 0.025578 | \n",
" 13.517790 | \n",
" 1.229900 | \n",
" 5496.92824 | \n",
" 19.4205 | \n",
" 155.868030 | \n",
" 14.754720 | \n",
" 51.216883 | \n",
" 257.432377 | \n",
" 30.284345 | \n",
" 0.484710 | \n",
" 50.628208 | \n",
" 6.085041 | \n",
" 0.031442 | \n",
" 1.113875 | \n",
" 1.117800 | \n",
" 28.310953 | \n",
" 1.357182 | \n",
" 37.476568 | \n",
" 70.79836 | \n",
" 178.553100 | \n",
" 0.23868 | \n",
" 0.363489 | \n",
" 110.581815 | \n",
" 75.74548 | \n",
" 37.532000 | \n",
" 0.005518 | \n",
" 1.74307 | \n",
" 17.222328 | \n",
" 4.926396 | \n",
" 0.858603 | \n",
" 1111.287150 | \n",
" 0.003042 | \n",
" A | \n",
" 109.125159 | \n",
" 95.415086 | \n",
" 52.260480 | \n",
" 17.175984 | \n",
" 0.296850 | \n",
" 6785.003474 | \n",
" 10.358927 | \n",
" 0.173229 | \n",
" 0.49706 | \n",
" 0.568932 | \n",
" 9.292698 | \n",
" 72.611063 | \n",
" 27981.562750 | \n",
" 29.135430 | \n",
" 32.131996 | \n",
" 21.978000 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 013f2bd269f5 | \n",
" 0.470030 | \n",
" 2635.10654 | \n",
" 85.200147 | \n",
" 32.360553 | \n",
" 8.138688 | \n",
" 6.732840 | \n",
" 0.025578 | \n",
" 12.824570 | \n",
" 1.229900 | \n",
" 5135.78024 | \n",
" 26.4825 | \n",
" 128.988531 | \n",
" 219.320160 | \n",
" 482.141594 | \n",
" 257.432377 | \n",
" 32.563713 | \n",
" 0.495852 | \n",
" 85.955376 | \n",
" 5.376488 | \n",
" 0.036218 | \n",
" 1.050225 | \n",
" 0.700350 | \n",
" 39.364743 | \n",
" 1.009611 | \n",
" 21.459644 | \n",
" 70.81970 | \n",
" 321.426625 | \n",
" 0.23868 | \n",
" 0.210441 | \n",
" 120.056438 | \n",
" 65.46984 | \n",
" 28.053464 | \n",
" 1.289739 | \n",
" 1.74307 | \n",
" 36.861352 | \n",
" 7.813674 | \n",
" 8.146651 | \n",
" 1494.076488 | \n",
" 0.377208 | \n",
" B | \n",
" 109.125159 | \n",
" 78.526968 | \n",
" 5.390628 | \n",
" 224.207424 | \n",
" 8.745201 | \n",
" 8338.906181 | \n",
" 11.626917 | \n",
" 7.709560 | \n",
" 0.97556 | \n",
" 1.198821 | \n",
" 37.077772 | \n",
" 88.609437 | \n",
" 13676.957810 | \n",
" 28.022851 | \n",
" 35.192676 | \n",
" 0.196941 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 043ac50845d5 | \n",
" 0.252107 | \n",
" 3819.65177 | \n",
" 120.201618 | \n",
" 77.112203 | \n",
" 8.138688 | \n",
" 3.685344 | \n",
" 0.025578 | \n",
" 11.053708 | \n",
" 1.229900 | \n",
" 4169.67738 | \n",
" 23.6577 | \n",
" 237.282264 | \n",
" 11.050410 | \n",
" 661.518640 | \n",
" 257.432377 | \n",
" 15.201914 | \n",
" 0.717882 | \n",
" 88.159360 | \n",
" 2.347652 | \n",
" 0.029054 | \n",
" 1.400300 | \n",
" 0.636075 | \n",
" 41.116960 | \n",
" 0.722727 | \n",
" 21.530392 | \n",
" 47.27586 | \n",
" 196.607985 | \n",
" 0.23868 | \n",
" 0.292431 | \n",
" 139.824570 | \n",
" 71.57120 | \n",
" 24.354856 | \n",
" 2.655345 | \n",
" 1.74307 | \n",
" 52.003884 | \n",
" 7.386060 | \n",
" 3.813326 | \n",
" 15691.552180 | \n",
" 0.614484 | \n",
" B | \n",
" 31.674357 | \n",
" 78.526968 | \n",
" 31.323372 | \n",
" 59.301984 | \n",
" 7.884336 | \n",
" 10965.766040 | \n",
" 14.852022 | \n",
" 6.122162 | \n",
" 0.49706 | \n",
" 0.284466 | \n",
" 18.529584 | \n",
" 82.416803 | \n",
" 2094.262452 | \n",
" 39.948656 | \n",
" 90.493248 | \n",
" 0.155829 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 044fb8a146ec | \n",
" 0.380297 | \n",
" 3733.04844 | \n",
" 85.200147 | \n",
" 14.103738 | \n",
" 8.138688 | \n",
" 3.942255 | \n",
" 0.054810 | \n",
" 3.396778 | \n",
" 102.151980 | \n",
" 5728.73412 | \n",
" 24.0108 | \n",
" 324.546318 | \n",
" 149.717165 | \n",
" 6074.859475 | \n",
" 257.432377 | \n",
" 82.213495 | \n",
" 0.536467 | \n",
" 72.644264 | \n",
" 30.537722 | \n",
" 0.025472 | \n",
" 1.050225 | \n",
" 0.693150 | \n",
" 31.724726 | \n",
" 0.827550 | \n",
" 34.415360 | \n",
" 74.06532 | \n",
" 200.178160 | \n",
" 0.23868 | \n",
" 0.207708 | \n",
" 97.920120 | \n",
" 52.83888 | \n",
" 26.019912 | \n",
" 1.144902 | \n",
" 1.74307 | \n",
" 9.064856 | \n",
" 7.350720 | \n",
" 3.490846 | \n",
" 1403.656300 | \n",
" 0.164268 | \n",
" B | \n",
" 109.125159 | \n",
" 91.994825 | \n",
" 51.141336 | \n",
" 29.102640 | \n",
" 4.274640 | \n",
" 16198.049590 | \n",
" 13.666727 | \n",
" 8.153058 | \n",
" 48.50134 | \n",
" 0.121914 | \n",
" 16.408728 | \n",
" 146.109943 | \n",
" 8524.370502 | \n",
" 45.381316 | \n",
" 36.262628 | \n",
" 0.096614 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id AB AF AH AM AR \\\n",
"0 000ff2bfdfe9 0.209377 3109.03329 85.200147 22.394407 8.138688 \n",
"1 007255e47698 0.145282 978.76416 85.200147 36.968889 8.138688 \n",
"2 013f2bd269f5 0.470030 2635.10654 85.200147 32.360553 8.138688 \n",
"3 043ac50845d5 0.252107 3819.65177 120.201618 77.112203 8.138688 \n",
"4 044fb8a146ec 0.380297 3733.04844 85.200147 14.103738 8.138688 \n",
"\n",
" AX AY AZ BC BD BN BP \\\n",
"0 0.699861 0.025578 9.812214 5.555634 4126.58731 22.5984 175.638726 \n",
"1 3.632190 0.025578 13.517790 1.229900 5496.92824 19.4205 155.868030 \n",
"2 6.732840 0.025578 12.824570 1.229900 5135.78024 26.4825 128.988531 \n",
"3 3.685344 0.025578 11.053708 1.229900 4169.67738 23.6577 237.282264 \n",
"4 3.942255 0.054810 3.396778 102.151980 5728.73412 24.0108 324.546318 \n",
"\n",
" BQ BR BZ CB CC CD \\\n",
"0 152.707705 823.928241 257.432377 47.223358 0.563481 23.387600 \n",
"1 14.754720 51.216883 257.432377 30.284345 0.484710 50.628208 \n",
"2 219.320160 482.141594 257.432377 32.563713 0.495852 85.955376 \n",
"3 11.050410 661.518640 257.432377 15.201914 0.717882 88.159360 \n",
"4 149.717165 6074.859475 257.432377 82.213495 0.536467 72.644264 \n",
"\n",
" CF CH CL CR CS CU CW \\\n",
"0 4.851915 0.023482 1.050225 0.069225 13.784111 1.302012 36.205956 \n",
"1 6.085041 0.031442 1.113875 1.117800 28.310953 1.357182 37.476568 \n",
"2 5.376488 0.036218 1.050225 0.700350 39.364743 1.009611 21.459644 \n",
"3 2.347652 0.029054 1.400300 0.636075 41.116960 0.722727 21.530392 \n",
"4 30.537722 0.025472 1.050225 0.693150 31.724726 0.827550 34.415360 \n",
"\n",
" DA DE DF DH DI DL DN \\\n",
"0 69.08340 295.570575 0.23868 0.284232 89.245560 84.31664 29.657104 \n",
"1 70.79836 178.553100 0.23868 0.363489 110.581815 75.74548 37.532000 \n",
"2 70.81970 321.426625 0.23868 0.210441 120.056438 65.46984 28.053464 \n",
"3 47.27586 196.607985 0.23868 0.292431 139.824570 71.57120 24.354856 \n",
"4 74.06532 200.178160 0.23868 0.207708 97.920120 52.83888 26.019912 \n",
"\n",
" DU DV DY EB EE EG EH \\\n",
"0 5.310690 1.74307 23.187704 7.294176 1.987283 1433.166750 0.949104 \n",
"1 0.005518 1.74307 17.222328 4.926396 0.858603 1111.287150 0.003042 \n",
"2 1.289739 1.74307 36.861352 7.813674 8.146651 1494.076488 0.377208 \n",
"3 2.655345 1.74307 52.003884 7.386060 3.813326 15691.552180 0.614484 \n",
"4 1.144902 1.74307 9.064856 7.350720 3.490846 1403.656300 0.164268 \n",
"\n",
" EJ EL EP EU FC FD FE \\\n",
"0 B 30.879420 78.526968 3.828384 13.394640 10.265073 9028.291921 \n",
"1 A 109.125159 95.415086 52.260480 17.175984 0.296850 6785.003474 \n",
"2 B 109.125159 78.526968 5.390628 224.207424 8.745201 8338.906181 \n",
"3 B 31.674357 78.526968 31.323372 59.301984 7.884336 10965.766040 \n",
"4 B 109.125159 91.994825 51.141336 29.102640 4.274640 16198.049590 \n",
"\n",
" FI FL FR FS GB GE \\\n",
"0 3.583450 7.298162 1.73855 0.094822 11.339138 72.611063 \n",
"1 10.358927 0.173229 0.49706 0.568932 9.292698 72.611063 \n",
"2 11.626917 7.709560 0.97556 1.198821 37.077772 88.609437 \n",
"3 14.852022 6.122162 0.49706 0.284466 18.529584 82.416803 \n",
"4 13.666727 8.153058 48.50134 0.121914 16.408728 146.109943 \n",
"\n",
" GF GH GI GL Class \n",
"0 2003.810319 22.136229 69.834944 0.120343 1 \n",
"1 27981.562750 29.135430 32.131996 21.978000 0 \n",
"2 13676.957810 28.022851 35.192676 0.196941 0 \n",
"3 2094.262452 39.948656 90.493248 0.155829 0 \n",
"4 8524.370502 45.381316 36.262628 0.096614 1 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.head()"
]
},
{
"cell_type": "markdown",
"id": "d59c5362",
"metadata": {
"papermill": {
"duration": 0.008915,
"end_time": "2023-07-08T12:37:12.216891",
"exception": false,
"start_time": "2023-07-08T12:37:12.207976",
"status": "completed"
},
"tags": []
},
"source": [
"# 4. Desciptive Statistics of train dataset"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "823c6173",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.235375Z",
"iopub.status.busy": "2023-07-08T12:37:12.234988Z",
"iopub.status.idle": "2023-07-08T12:37:12.381088Z",
"shell.execute_reply": "2023-07-08T12:37:12.379852Z"
},
"papermill": {
"duration": 0.15796,
"end_time": "2023-07-08T12:37:12.383149",
"exception": false,
"start_time": "2023-07-08T12:37:12.225189",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" AB | \n",
" AF | \n",
" AH | \n",
" AM | \n",
" AR | \n",
" AX | \n",
" AY | \n",
" AZ | \n",
" BC | \n",
" BD | \n",
" BN | \n",
" BP | \n",
" BQ | \n",
" BR | \n",
" BZ | \n",
" CB | \n",
" CC | \n",
" CD | \n",
" CF | \n",
" CH | \n",
" CL | \n",
" CR | \n",
" CS | \n",
" CU | \n",
" CW | \n",
" DA | \n",
" DE | \n",
" DF | \n",
" DH | \n",
" DI | \n",
" DL | \n",
" DN | \n",
" DU | \n",
" DV | \n",
" DY | \n",
" EB | \n",
" EE | \n",
" EG | \n",
" EH | \n",
" EL | \n",
" EP | \n",
" EU | \n",
" FC | \n",
" FD | \n",
" FE | \n",
" FI | \n",
" FL | \n",
" FR | \n",
" FS | \n",
" GB | \n",
" GE | \n",
" GF | \n",
" GH | \n",
" GI | \n",
" GL | \n",
" Class | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 557.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 615.000000 | \n",
" 614.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 616.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 557.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 616.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 616.000000 | \n",
" 617.000000 | \n",
" 615.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 616.000000 | \n",
" 617.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.477149 | \n",
" 3502.013221 | \n",
" 118.624513 | \n",
" 38.968552 | \n",
" 10.128242 | \n",
" 5.545576 | \n",
" 0.060320 | \n",
" 10.566447 | \n",
" 8.053012 | \n",
" 5350.388655 | \n",
" 21.419492 | \n",
" 231.322223 | \n",
" 98.328737 | \n",
" 1218.133238 | \n",
" 550.632525 | \n",
" 77.104151 | \n",
" 0.688801 | \n",
" 90.251735 | \n",
" 11.241064 | \n",
" 0.030615 | \n",
" 1.403761 | \n",
" 0.742262 | \n",
" 36.917590 | \n",
" 1.383792 | \n",
" 27.165653 | \n",
" 51.128326 | \n",
" 401.901299 | \n",
" 0.633884 | \n",
" 0.367002 | \n",
" 146.972099 | \n",
" 94.795377 | \n",
" 26.370568 | \n",
" 1.802900 | \n",
" 1.924830 | \n",
" 26.388989 | \n",
" 9.072700 | \n",
" 3.064778 | \n",
" 1731.248215 | \n",
" 0.305107 | \n",
" 69.582596 | \n",
" 105.060712 | \n",
" 69.117005 | \n",
" 71.341526 | \n",
" 6.930086 | \n",
" 10306.810737 | \n",
" 10.111079 | \n",
" 5.433199 | \n",
" 3.533905 | \n",
" 0.421501 | \n",
" 20.724856 | \n",
" 131.714987 | \n",
" 14679.595398 | \n",
" 31.489716 | \n",
" 50.584437 | \n",
" 8.530961 | \n",
" 0.175041 | \n",
"
\n",
" \n",
" std | \n",
" 0.468388 | \n",
" 2300.322717 | \n",
" 127.838950 | \n",
" 69.728226 | \n",
" 10.518877 | \n",
" 2.551696 | \n",
" 0.416817 | \n",
" 4.350645 | \n",
" 65.166943 | \n",
" 3021.326641 | \n",
" 3.478278 | \n",
" 183.992505 | \n",
" 96.479371 | \n",
" 7575.293707 | \n",
" 2076.371275 | \n",
" 159.049302 | \n",
" 0.263994 | \n",
" 51.585130 | \n",
" 13.571133 | \n",
" 0.014808 | \n",
" 1.922210 | \n",
" 0.281195 | \n",
" 17.266347 | \n",
" 0.538717 | \n",
" 14.645993 | \n",
" 21.210888 | \n",
" 317.745623 | \n",
" 1.912384 | \n",
" 0.112989 | \n",
" 86.084419 | \n",
" 28.243187 | \n",
" 8.038825 | \n",
" 9.034721 | \n",
" 1.484555 | \n",
" 18.116679 | \n",
" 6.200281 | \n",
" 2.058344 | \n",
" 1790.227476 | \n",
" 1.847499 | \n",
" 38.555707 | \n",
" 68.445620 | \n",
" 390.187057 | \n",
" 165.551545 | \n",
" 64.754262 | \n",
" 11331.294051 | \n",
" 2.934025 | \n",
" 11.496257 | \n",
" 50.181948 | \n",
" 1.305365 | \n",
" 9.991907 | \n",
" 144.181524 | \n",
" 19352.959387 | \n",
" 9.864239 | \n",
" 36.266251 | \n",
" 10.327010 | \n",
" 0.380310 | \n",
"
\n",
" \n",
" min | \n",
" 0.081187 | \n",
" 192.593280 | \n",
" 85.200147 | \n",
" 3.177522 | \n",
" 8.138688 | \n",
" 0.699861 | \n",
" 0.025578 | \n",
" 3.396778 | \n",
" 1.229900 | \n",
" 1693.624320 | \n",
" 9.886800 | \n",
" 72.948951 | \n",
" 1.331155 | \n",
" 51.216883 | \n",
" 257.432377 | \n",
" 12.499760 | \n",
" 0.176874 | \n",
" 23.387600 | \n",
" 0.510888 | \n",
" 0.003184 | \n",
" 1.050225 | \n",
" 0.069225 | \n",
" 13.784111 | \n",
" 0.137925 | \n",
" 7.030640 | \n",
" 6.906400 | \n",
" 35.998895 | \n",
" 0.238680 | \n",
" 0.040995 | \n",
" 60.232470 | \n",
" 10.345600 | \n",
" 6.339496 | \n",
" 0.005518 | \n",
" 1.743070 | \n",
" 0.804068 | \n",
" 4.926396 | \n",
" 0.286201 | \n",
" 185.594100 | \n",
" 0.003042 | \n",
" 5.394675 | \n",
" 78.526968 | \n",
" 3.828384 | \n",
" 7.534128 | \n",
" 0.296850 | \n",
" 1563.136688 | \n",
" 3.583450 | \n",
" 0.173229 | \n",
" 0.497060 | \n",
" 0.067730 | \n",
" 4.102182 | \n",
" 72.611063 | \n",
" 13.038894 | \n",
" 9.432735 | \n",
" 0.897628 | \n",
" 0.001129 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.252107 | \n",
" 2197.345480 | \n",
" 85.200147 | \n",
" 12.270314 | \n",
" 8.138688 | \n",
" 4.128294 | \n",
" 0.025578 | \n",
" 8.129580 | \n",
" 1.229900 | \n",
" 4155.702870 | \n",
" 19.420500 | \n",
" 156.847239 | \n",
" 27.834425 | \n",
" 424.990642 | \n",
" 257.432377 | \n",
" 23.317567 | \n",
" 0.563688 | \n",
" 64.724192 | \n",
" 5.066306 | \n",
" 0.023482 | \n",
" 1.050225 | \n",
" 0.589575 | \n",
" 29.782467 | \n",
" 1.070298 | \n",
" 7.030640 | \n",
" 37.942520 | \n",
" 188.815690 | \n",
" 0.238680 | \n",
" 0.295164 | \n",
" 102.703553 | \n",
" 78.232240 | \n",
" 20.888264 | \n",
" 0.005518 | \n",
" 1.743070 | \n",
" 14.715792 | \n",
" 5.965392 | \n",
" 1.648679 | \n",
" 1111.160625 | \n",
" 0.003042 | \n",
" 30.927468 | \n",
" 78.526968 | \n",
" 4.324656 | \n",
" 25.815384 | \n",
" 0.296850 | \n",
" 5164.666260 | \n",
" 8.523098 | \n",
" 0.173229 | \n",
" 0.497060 | \n",
" 0.067730 | \n",
" 14.036718 | \n",
" 72.611063 | \n",
" 2798.992584 | \n",
" 25.034888 | \n",
" 23.011684 | \n",
" 0.124392 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.354659 | \n",
" 3120.318960 | \n",
" 85.200147 | \n",
" 20.533110 | \n",
" 8.138688 | \n",
" 5.031912 | \n",
" 0.025578 | \n",
" 10.461320 | \n",
" 1.229900 | \n",
" 4997.960730 | \n",
" 21.186000 | \n",
" 193.908816 | \n",
" 61.642115 | \n",
" 627.417402 | \n",
" 257.432377 | \n",
" 42.554330 | \n",
" 0.658715 | \n",
" 79.819104 | \n",
" 9.123000 | \n",
" 0.027860 | \n",
" 1.050225 | \n",
" 0.730800 | \n",
" 34.835130 | \n",
" 1.351665 | \n",
" 36.019104 | \n",
" 49.180940 | \n",
" 307.509595 | \n",
" 0.238680 | \n",
" 0.358023 | \n",
" 130.050630 | \n",
" 96.264960 | \n",
" 25.248800 | \n",
" 0.251741 | \n",
" 1.743070 | \n",
" 21.642456 | \n",
" 8.149404 | \n",
" 2.616119 | \n",
" 1493.817413 | \n",
" 0.085176 | \n",
" 71.949306 | \n",
" 78.526968 | \n",
" 22.641144 | \n",
" 36.394008 | \n",
" 1.870155 | \n",
" 7345.143424 | \n",
" 9.945452 | \n",
" 3.028141 | \n",
" 1.131000 | \n",
" 0.250601 | \n",
" 18.771436 | \n",
" 72.611063 | \n",
" 7838.273610 | \n",
" 30.608946 | \n",
" 41.007968 | \n",
" 0.337827 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.559763 | \n",
" 4361.637390 | \n",
" 113.739540 | \n",
" 39.139886 | \n",
" 8.138688 | \n",
" 6.431634 | \n",
" 0.036845 | \n",
" 12.969516 | \n",
" 5.081244 | \n",
" 6035.885700 | \n",
" 23.657700 | \n",
" 247.803462 | \n",
" 134.009015 | \n",
" 975.649259 | \n",
" 257.432377 | \n",
" 77.310097 | \n",
" 0.772206 | \n",
" 99.813520 | \n",
" 13.565901 | \n",
" 0.034427 | \n",
" 1.228445 | \n",
" 0.859350 | \n",
" 40.529401 | \n",
" 1.660617 | \n",
" 37.935832 | \n",
" 61.408760 | \n",
" 507.896200 | \n",
" 0.238680 | \n",
" 0.426348 | \n",
" 165.836955 | \n",
" 110.640680 | \n",
" 30.544224 | \n",
" 1.058690 | \n",
" 1.743070 | \n",
" 34.058344 | \n",
" 10.503048 | \n",
" 3.910070 | \n",
" 1905.701475 | \n",
" 0.237276 | \n",
" 109.125159 | \n",
" 112.766654 | \n",
" 49.085352 | \n",
" 56.714448 | \n",
" 4.880214 | \n",
" 10647.951650 | \n",
" 11.516657 | \n",
" 6.238814 | \n",
" 1.512060 | \n",
" 0.535067 | \n",
" 25.608406 | \n",
" 127.591671 | \n",
" 19035.709240 | \n",
" 36.863947 | \n",
" 67.931664 | \n",
" 21.978000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 6.161666 | \n",
" 28688.187660 | \n",
" 1910.123198 | \n",
" 630.518230 | \n",
" 178.943634 | \n",
" 38.270880 | \n",
" 10.315851 | \n",
" 38.971568 | \n",
" 1463.693448 | \n",
" 53060.599240 | \n",
" 29.307300 | \n",
" 2447.810550 | \n",
" 344.644105 | \n",
" 179250.252900 | \n",
" 50092.459300 | \n",
" 2271.436167 | \n",
" 4.103032 | \n",
" 633.534408 | \n",
" 200.967526 | \n",
" 0.224074 | \n",
" 31.688153 | \n",
" 3.039675 | \n",
" 267.942823 | \n",
" 4.951507 | \n",
" 64.521624 | \n",
" 210.330920 | \n",
" 2103.405190 | \n",
" 37.895013 | \n",
" 1.060404 | \n",
" 1049.168078 | \n",
" 326.236200 | \n",
" 62.808096 | \n",
" 161.355315 | \n",
" 25.192930 | \n",
" 152.355164 | \n",
" 94.958580 | \n",
" 18.324926 | \n",
" 30243.758780 | \n",
" 42.569748 | \n",
" 109.125159 | \n",
" 1063.594578 | \n",
" 6501.264480 | \n",
" 3030.655824 | \n",
" 1578.654237 | \n",
" 143224.682300 | \n",
" 35.851039 | \n",
" 137.932739 | \n",
" 1244.227020 | \n",
" 31.365763 | \n",
" 135.781294 | \n",
" 1497.351958 | \n",
" 143790.071200 | \n",
" 81.210825 | \n",
" 191.194764 | \n",
" 21.978000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AB AF AH AM AR \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.477149 3502.013221 118.624513 38.968552 10.128242 \n",
"std 0.468388 2300.322717 127.838950 69.728226 10.518877 \n",
"min 0.081187 192.593280 85.200147 3.177522 8.138688 \n",
"25% 0.252107 2197.345480 85.200147 12.270314 8.138688 \n",
"50% 0.354659 3120.318960 85.200147 20.533110 8.138688 \n",
"75% 0.559763 4361.637390 113.739540 39.139886 8.138688 \n",
"max 6.161666 28688.187660 1910.123198 630.518230 178.943634 \n",
"\n",
" AX AY AZ BC BD \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 5.545576 0.060320 10.566447 8.053012 5350.388655 \n",
"std 2.551696 0.416817 4.350645 65.166943 3021.326641 \n",
"min 0.699861 0.025578 3.396778 1.229900 1693.624320 \n",
"25% 4.128294 0.025578 8.129580 1.229900 4155.702870 \n",
"50% 5.031912 0.025578 10.461320 1.229900 4997.960730 \n",
"75% 6.431634 0.036845 12.969516 5.081244 6035.885700 \n",
"max 38.270880 10.315851 38.971568 1463.693448 53060.599240 \n",
"\n",
" BN BP BQ BR BZ \\\n",
"count 617.000000 617.000000 557.000000 617.000000 617.000000 \n",
"mean 21.419492 231.322223 98.328737 1218.133238 550.632525 \n",
"std 3.478278 183.992505 96.479371 7575.293707 2076.371275 \n",
"min 9.886800 72.948951 1.331155 51.216883 257.432377 \n",
"25% 19.420500 156.847239 27.834425 424.990642 257.432377 \n",
"50% 21.186000 193.908816 61.642115 627.417402 257.432377 \n",
"75% 23.657700 247.803462 134.009015 975.649259 257.432377 \n",
"max 29.307300 2447.810550 344.644105 179250.252900 50092.459300 \n",
"\n",
" CB CC CD CF CH \\\n",
"count 615.000000 614.000000 617.000000 617.000000 617.000000 \n",
"mean 77.104151 0.688801 90.251735 11.241064 0.030615 \n",
"std 159.049302 0.263994 51.585130 13.571133 0.014808 \n",
"min 12.499760 0.176874 23.387600 0.510888 0.003184 \n",
"25% 23.317567 0.563688 64.724192 5.066306 0.023482 \n",
"50% 42.554330 0.658715 79.819104 9.123000 0.027860 \n",
"75% 77.310097 0.772206 99.813520 13.565901 0.034427 \n",
"max 2271.436167 4.103032 633.534408 200.967526 0.224074 \n",
"\n",
" CL CR CS CU CW DA \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 1.403761 0.742262 36.917590 1.383792 27.165653 51.128326 \n",
"std 1.922210 0.281195 17.266347 0.538717 14.645993 21.210888 \n",
"min 1.050225 0.069225 13.784111 0.137925 7.030640 6.906400 \n",
"25% 1.050225 0.589575 29.782467 1.070298 7.030640 37.942520 \n",
"50% 1.050225 0.730800 34.835130 1.351665 36.019104 49.180940 \n",
"75% 1.228445 0.859350 40.529401 1.660617 37.935832 61.408760 \n",
"max 31.688153 3.039675 267.942823 4.951507 64.521624 210.330920 \n",
"\n",
" DE DF DH DI DL \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 401.901299 0.633884 0.367002 146.972099 94.795377 \n",
"std 317.745623 1.912384 0.112989 86.084419 28.243187 \n",
"min 35.998895 0.238680 0.040995 60.232470 10.345600 \n",
"25% 188.815690 0.238680 0.295164 102.703553 78.232240 \n",
"50% 307.509595 0.238680 0.358023 130.050630 96.264960 \n",
"75% 507.896200 0.238680 0.426348 165.836955 110.640680 \n",
"max 2103.405190 37.895013 1.060404 1049.168078 326.236200 \n",
"\n",
" DN DU DV DY EB EE \\\n",
"count 617.000000 616.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 26.370568 1.802900 1.924830 26.388989 9.072700 3.064778 \n",
"std 8.038825 9.034721 1.484555 18.116679 6.200281 2.058344 \n",
"min 6.339496 0.005518 1.743070 0.804068 4.926396 0.286201 \n",
"25% 20.888264 0.005518 1.743070 14.715792 5.965392 1.648679 \n",
"50% 25.248800 0.251741 1.743070 21.642456 8.149404 2.616119 \n",
"75% 30.544224 1.058690 1.743070 34.058344 10.503048 3.910070 \n",
"max 62.808096 161.355315 25.192930 152.355164 94.958580 18.324926 \n",
"\n",
" EG EH EL EP EU \\\n",
"count 617.000000 617.000000 557.000000 617.000000 617.000000 \n",
"mean 1731.248215 0.305107 69.582596 105.060712 69.117005 \n",
"std 1790.227476 1.847499 38.555707 68.445620 390.187057 \n",
"min 185.594100 0.003042 5.394675 78.526968 3.828384 \n",
"25% 1111.160625 0.003042 30.927468 78.526968 4.324656 \n",
"50% 1493.817413 0.085176 71.949306 78.526968 22.641144 \n",
"75% 1905.701475 0.237276 109.125159 112.766654 49.085352 \n",
"max 30243.758780 42.569748 109.125159 1063.594578 6501.264480 \n",
"\n",
" FC FD FE FI FL \\\n",
"count 616.000000 617.000000 617.000000 617.000000 616.000000 \n",
"mean 71.341526 6.930086 10306.810737 10.111079 5.433199 \n",
"std 165.551545 64.754262 11331.294051 2.934025 11.496257 \n",
"min 7.534128 0.296850 1563.136688 3.583450 0.173229 \n",
"25% 25.815384 0.296850 5164.666260 8.523098 0.173229 \n",
"50% 36.394008 1.870155 7345.143424 9.945452 3.028141 \n",
"75% 56.714448 4.880214 10647.951650 11.516657 6.238814 \n",
"max 3030.655824 1578.654237 143224.682300 35.851039 137.932739 \n",
"\n",
" FR FS GB GE GF \\\n",
"count 617.000000 615.000000 617.000000 617.000000 617.000000 \n",
"mean 3.533905 0.421501 20.724856 131.714987 14679.595398 \n",
"std 50.181948 1.305365 9.991907 144.181524 19352.959387 \n",
"min 0.497060 0.067730 4.102182 72.611063 13.038894 \n",
"25% 0.497060 0.067730 14.036718 72.611063 2798.992584 \n",
"50% 1.131000 0.250601 18.771436 72.611063 7838.273610 \n",
"75% 1.512060 0.535067 25.608406 127.591671 19035.709240 \n",
"max 1244.227020 31.365763 135.781294 1497.351958 143790.071200 \n",
"\n",
" GH GI GL Class \n",
"count 617.000000 617.000000 616.000000 617.000000 \n",
"mean 31.489716 50.584437 8.530961 0.175041 \n",
"std 9.864239 36.266251 10.327010 0.380310 \n",
"min 9.432735 0.897628 0.001129 0.000000 \n",
"25% 25.034888 23.011684 0.124392 0.000000 \n",
"50% 30.608946 41.007968 0.337827 0.000000 \n",
"75% 36.863947 67.931664 21.978000 0.000000 \n",
"max 81.210825 191.194764 21.978000 1.000000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.describe()"
]
},
{
"cell_type": "markdown",
"id": "83668022",
"metadata": {
"papermill": {
"duration": 0.008702,
"end_time": "2023-07-08T12:37:12.400982",
"exception": false,
"start_time": "2023-07-08T12:37:12.392280",
"status": "completed"
},
"tags": []
},
"source": [
"# 5. Encoding"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "38227807",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.420754Z",
"iopub.status.busy": "2023-07-08T12:37:12.420359Z",
"iopub.status.idle": "2023-07-08T12:37:12.426175Z",
"shell.execute_reply": "2023-07-08T12:37:12.425023Z"
},
"papermill": {
"duration": 0.018785,
"end_time": "2023-07-08T12:37:12.428835",
"exception": false,
"start_time": "2023-07-08T12:37:12.410050",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def encode(dataframe):\n",
" le = LabelEncoder()\n",
" obj = list(dataframe.loc[:, dataframe.dtypes == 'object'].columns)\n",
" for i in obj:\n",
" if i not in ['id', 'Epsilon']:\n",
" dataframe[i] = le.fit_transform(dataframe[i])\n",
" return dataframe"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0e25d090",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.448556Z",
"iopub.status.busy": "2023-07-08T12:37:12.448219Z",
"iopub.status.idle": "2023-07-08T12:37:12.461925Z",
"shell.execute_reply": "2023-07-08T12:37:12.460920Z"
},
"papermill": {
"duration": 0.026491,
"end_time": "2023-07-08T12:37:12.464641",
"exception": false,
"start_time": "2023-07-08T12:37:12.438150",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"df = encode(train_df)\n",
"test_df = encode(test_df)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d8d92abb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.484887Z",
"iopub.status.busy": "2023-07-08T12:37:12.484487Z",
"iopub.status.idle": "2023-07-08T12:37:12.492017Z",
"shell.execute_reply": "2023-07-08T12:37:12.490682Z"
},
"papermill": {
"duration": 0.020058,
"end_time": "2023-07-08T12:37:12.494463",
"exception": false,
"start_time": "2023-07-08T12:37:12.474405",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Id', 'AB', 'AF', 'AH', 'AM', 'AR', 'AX', 'AY', 'AZ', 'BC', 'BD ', 'BN',\n",
" 'BP', 'BQ', 'BR', 'BZ', 'CB', 'CC', 'CD ', 'CF', 'CH', 'CL', 'CR', 'CS',\n",
" 'CU', 'CW ', 'DA', 'DE', 'DF', 'DH', 'DI', 'DL', 'DN', 'DU', 'DV', 'DY',\n",
" 'EB', 'EE', 'EG', 'EH', 'EJ', 'EL', 'EP', 'EU', 'FC', 'FD ', 'FE', 'FI',\n",
" 'FL', 'FR', 'FS', 'GB', 'GE', 'GF', 'GH', 'GI', 'GL', 'Class'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"id": "8c002e33",
"metadata": {
"papermill": {
"duration": 0.008727,
"end_time": "2023-07-08T12:37:12.512640",
"exception": false,
"start_time": "2023-07-08T12:37:12.503913",
"status": "completed"
},
"tags": []
},
"source": [
"# 6. Separate into Dependent and Independent "
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4a189550",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.532567Z",
"iopub.status.busy": "2023-07-08T12:37:12.532205Z",
"iopub.status.idle": "2023-07-08T12:37:12.538788Z",
"shell.execute_reply": "2023-07-08T12:37:12.537366Z"
},
"papermill": {
"duration": 0.018953,
"end_time": "2023-07-08T12:37:12.540718",
"exception": false,
"start_time": "2023-07-08T12:37:12.521765",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"indep_cols = ['Id', 'AB', 'AF', 'AH', 'AM', 'AR', 'AX', 'AY', 'AZ', 'BC', 'BD ', 'BN',\n",
" 'BP', 'BQ', 'BR', 'BZ', 'CB', 'CC', 'CD ', 'CF', 'CH', 'CL', 'CR', 'CS',\n",
" 'CU', 'CW ', 'DA', 'DE', 'DF', 'DH', 'DI', 'DL', 'DN', 'DU', 'DV', 'DY',\n",
" 'EB', 'EE', 'EG', 'EH', 'EJ', 'EL', 'EP', 'EU', 'FC', 'FD ', 'FE', 'FI',\n",
" 'FL', 'FR', 'FS', 'GB', 'GE', 'GF', 'GH', 'GI', 'GL']\n",
"\n",
"dep_cols = ['Class']"
]
},
{
"cell_type": "markdown",
"id": "d4fc397f",
"metadata": {
"papermill": {
"duration": 0.008825,
"end_time": "2023-07-08T12:37:12.558875",
"exception": false,
"start_time": "2023-07-08T12:37:12.550050",
"status": "completed"
},
"tags": []
},
"source": [
"# 7. Simple Imputer"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4021c983",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.579038Z",
"iopub.status.busy": "2023-07-08T12:37:12.578626Z",
"iopub.status.idle": "2023-07-08T12:37:12.615548Z",
"shell.execute_reply": "2023-07-08T12:37:12.614038Z"
},
"papermill": {
"duration": 0.050168,
"end_time": "2023-07-08T12:37:12.618109",
"exception": false,
"start_time": "2023-07-08T12:37:12.567941",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"imputer = SimpleImputer(strategy = 'mean')\n",
"df[indep_cols] = imputer.fit_transform(df[indep_cols])\n",
"test_df[indep_cols] = imputer.fit_transform(test_df[indep_cols])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c5748b87",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.638761Z",
"iopub.status.busy": "2023-07-08T12:37:12.638334Z",
"iopub.status.idle": "2023-07-08T12:37:12.645764Z",
"shell.execute_reply": "2023-07-08T12:37:12.644599Z"
},
"papermill": {
"duration": 0.019942,
"end_time": "2023-07-08T12:37:12.647818",
"exception": false,
"start_time": "2023-07-08T12:37:12.627876",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[2.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0,\n",
" 0.0]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(imputer.statistics_)"
]
},
{
"cell_type": "markdown",
"id": "ccb402f0",
"metadata": {
"papermill": {
"duration": 0.009508,
"end_time": "2023-07-08T12:37:12.666898",
"exception": false,
"start_time": "2023-07-08T12:37:12.657390",
"status": "completed"
},
"tags": []
},
"source": [
"# 8. MinMax Scaler "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "1bc5c2a2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.687784Z",
"iopub.status.busy": "2023-07-08T12:37:12.687364Z",
"iopub.status.idle": "2023-07-08T12:37:12.692087Z",
"shell.execute_reply": "2023-07-08T12:37:12.691246Z"
},
"papermill": {
"duration": 0.01743,
"end_time": "2023-07-08T12:37:12.693923",
"exception": false,
"start_time": "2023-07-08T12:37:12.676493",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"scaler = MinMaxScaler()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7f9feb8b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.714293Z",
"iopub.status.busy": "2023-07-08T12:37:12.713907Z",
"iopub.status.idle": "2023-07-08T12:37:12.731282Z",
"shell.execute_reply": "2023-07-08T12:37:12.729918Z"
},
"papermill": {
"duration": 0.030037,
"end_time": "2023-07-08T12:37:12.733256",
"exception": false,
"start_time": "2023-07-08T12:37:12.703219",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"MinMaxScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
],
"text/plain": [
"MinMaxScaler()"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler.fit(df[indep_cols])"
]
},
{
"cell_type": "markdown",
"id": "b4b22656",
"metadata": {
"papermill": {
"duration": 0.008936,
"end_time": "2023-07-08T12:37:12.751338",
"exception": false,
"start_time": "2023-07-08T12:37:12.742402",
"status": "completed"
},
"tags": []
},
"source": [
"**Minimum**"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dcc126ed",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.771383Z",
"iopub.status.busy": "2023-07-08T12:37:12.771043Z",
"iopub.status.idle": "2023-07-08T12:37:12.778332Z",
"shell.execute_reply": "2023-07-08T12:37:12.777449Z"
},
"papermill": {
"duration": 0.020392,
"end_time": "2023-07-08T12:37:12.780961",
"exception": false,
"start_time": "2023-07-08T12:37:12.760569",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Minimum:\n"
]
},
{
"data": {
"text/plain": [
"[0.0,\n",
" 0.081187,\n",
" 192.59328,\n",
" 85.200147,\n",
" 3.177522,\n",
" 8.138688,\n",
" 0.699861,\n",
" 0.025578,\n",
" 3.396778,\n",
" 1.2299,\n",
" 1693.62432,\n",
" 9.8868,\n",
" 72.948951,\n",
" 1.331155,\n",
" 51.216883,\n",
" 257.432377,\n",
" 12.49976,\n",
" 0.17687412,\n",
" 23.3876,\n",
" 0.510888,\n",
" 0.003184,\n",
" 1.050225,\n",
" 0.069225,\n",
" 13.784111,\n",
" 0.137925,\n",
" 7.03064,\n",
" 6.9064,\n",
" 35.998895,\n",
" 0.23868,\n",
" 0.040995,\n",
" 60.23247,\n",
" 10.3456,\n",
" 6.339496,\n",
" 0.0055176,\n",
" 1.74307,\n",
" 0.804068,\n",
" 4.926396,\n",
" 0.286201,\n",
" 185.5941,\n",
" 0.003042,\n",
" 0.0,\n",
" 5.394675,\n",
" 78.526968,\n",
" 3.828384,\n",
" 7.534128,\n",
" 0.29685,\n",
" 1563.136688,\n",
" 3.58345,\n",
" 0.173229,\n",
" 0.49706,\n",
" 0.06773,\n",
" 4.102182,\n",
" 72.611063,\n",
" 13.038894,\n",
" 9.432735,\n",
" 0.897628,\n",
" 0.001129278]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('Minimum:')\n",
"list(scaler.data_min_)"
]
},
{
"cell_type": "markdown",
"id": "7f74d791",
"metadata": {
"papermill": {
"duration": 0.009625,
"end_time": "2023-07-08T12:37:12.800557",
"exception": false,
"start_time": "2023-07-08T12:37:12.790932",
"status": "completed"
},
"tags": []
},
"source": [
"**Maximum**"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "cb67e6d1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.822334Z",
"iopub.status.busy": "2023-07-08T12:37:12.821896Z",
"iopub.status.idle": "2023-07-08T12:37:12.829448Z",
"shell.execute_reply": "2023-07-08T12:37:12.827991Z"
},
"papermill": {
"duration": 0.020683,
"end_time": "2023-07-08T12:37:12.831361",
"exception": false,
"start_time": "2023-07-08T12:37:12.810678",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Maximum:\n"
]
},
{
"data": {
"text/plain": [
"[616.0,\n",
" 6.161666,\n",
" 28688.18766,\n",
" 1910.123198,\n",
" 630.51823,\n",
" 178.943634,\n",
" 38.27088,\n",
" 10.315851,\n",
" 38.971568,\n",
" 1463.693448,\n",
" 53060.59924,\n",
" 29.3073,\n",
" 2447.81055,\n",
" 344.644105,\n",
" 179250.2529,\n",
" 50092.4593,\n",
" 2271.436167,\n",
" 4.1030316,\n",
" 633.534408,\n",
" 200.967526,\n",
" 0.224074,\n",
" 31.6881525,\n",
" 3.039675,\n",
" 267.9428235,\n",
" 4.9515075,\n",
" 64.521624,\n",
" 210.33092,\n",
" 2103.40519,\n",
" 37.895013,\n",
" 1.060404,\n",
" 1049.168078,\n",
" 326.2362,\n",
" 62.808096,\n",
" 161.355315,\n",
" 25.19293,\n",
" 152.355164,\n",
" 94.95858,\n",
" 18.324926,\n",
" 30243.75878,\n",
" 42.569748,\n",
" 1.0,\n",
" 109.125159,\n",
" 1063.594578,\n",
" 6501.26448,\n",
" 3030.655824,\n",
" 1578.654237,\n",
" 143224.6823,\n",
" 35.851039,\n",
" 137.9327388,\n",
" 1244.22702,\n",
" 31.365763,\n",
" 135.781294,\n",
" 1497.351958,\n",
" 143790.0712,\n",
" 81.210825,\n",
" 191.194764,\n",
" 21.978]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('Maximum:')\n",
"list(scaler.data_max_)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7e748f04",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.853646Z",
"iopub.status.busy": "2023-07-08T12:37:12.853285Z",
"iopub.status.idle": "2023-07-08T12:37:12.867262Z",
"shell.execute_reply": "2023-07-08T12:37:12.865854Z"
},
"papermill": {
"duration": 0.027491,
"end_time": "2023-07-08T12:37:12.869838",
"exception": false,
"start_time": "2023-07-08T12:37:12.842347",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"df[indep_cols] = scaler.transform(df[indep_cols])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "39a0433b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:12.891609Z",
"iopub.status.busy": "2023-07-08T12:37:12.891215Z",
"iopub.status.idle": "2023-07-08T12:37:13.033360Z",
"shell.execute_reply": "2023-07-08T12:37:13.032168Z"
},
"papermill": {
"duration": 0.155513,
"end_time": "2023-07-08T12:37:13.035553",
"exception": false,
"start_time": "2023-07-08T12:37:12.880040",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" AB | \n",
" AF | \n",
" AH | \n",
" AM | \n",
" AR | \n",
" AX | \n",
" AY | \n",
" AZ | \n",
" BC | \n",
" BD | \n",
" BN | \n",
" BP | \n",
" BQ | \n",
" BR | \n",
" BZ | \n",
" CB | \n",
" CC | \n",
" CD | \n",
" CF | \n",
" CH | \n",
" CL | \n",
" CR | \n",
" CS | \n",
" CU | \n",
" CW | \n",
" DA | \n",
" DE | \n",
" DF | \n",
" DH | \n",
" DI | \n",
" DL | \n",
" DN | \n",
" DU | \n",
" DV | \n",
" DY | \n",
" EB | \n",
" EE | \n",
" EG | \n",
" EH | \n",
" EJ | \n",
" EL | \n",
" EP | \n",
" EU | \n",
" FC | \n",
" FD | \n",
" FE | \n",
" FI | \n",
" FL | \n",
" FR | \n",
" FS | \n",
" GB | \n",
" GE | \n",
" GF | \n",
" GH | \n",
" GI | \n",
" GL | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
" 617.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.500000 | \n",
" 0.065120 | \n",
" 0.116138 | \n",
" 0.018315 | \n",
" 0.057052 | \n",
" 0.011648 | \n",
" 0.128975 | \n",
" 0.003376 | \n",
" 0.201538 | \n",
" 0.004665 | \n",
" 0.071189 | \n",
" 0.593841 | \n",
" 0.066687 | \n",
" 0.282534 | \n",
" 0.006512 | \n",
" 0.005883 | \n",
" 0.028599 | \n",
" 0.130389 | \n",
" 0.109587 | \n",
" 0.053529 | \n",
" 0.124183 | \n",
" 0.011539 | \n",
" 0.226577 | \n",
" 0.091020 | \n",
" 0.258823 | \n",
" 0.350229 | \n",
" 0.217387 | \n",
" 0.176986 | \n",
" 0.010495 | \n",
" 0.319800 | \n",
" 0.087710 | \n",
" 0.267339 | \n",
" 0.354729 | \n",
" 0.011140 | \n",
" 0.007751 | \n",
" 0.168820 | \n",
" 0.046054 | \n",
" 0.154034 | \n",
" 0.051422 | \n",
" 0.007096 | \n",
" 0.640194 | \n",
" 0.618795 | \n",
" 0.026936 | \n",
" 0.010048 | \n",
" 0.021106 | \n",
" 0.004203 | \n",
" 0.061722 | \n",
" 0.202297 | \n",
" 0.038182 | \n",
" 0.002442 | \n",
" 0.011303 | \n",
" 0.126236 | \n",
" 0.041484 | \n",
" 0.102009 | \n",
" 0.307294 | \n",
" 0.261101 | \n",
" 0.388128 | \n",
"
\n",
" \n",
" std | \n",
" 0.289378 | \n",
" 0.077031 | \n",
" 0.080726 | \n",
" 0.070052 | \n",
" 0.111149 | \n",
" 0.061584 | \n",
" 0.067917 | \n",
" 0.040506 | \n",
" 0.122296 | \n",
" 0.044560 | \n",
" 0.058818 | \n",
" 0.179103 | \n",
" 0.077475 | \n",
" 0.266988 | \n",
" 0.042273 | \n",
" 0.041665 | \n",
" 0.070295 | \n",
" 0.067076 | \n",
" 0.084545 | \n",
" 0.067701 | \n",
" 0.067039 | \n",
" 0.062740 | \n",
" 0.094664 | \n",
" 0.067935 | \n",
" 0.111916 | \n",
" 0.254753 | \n",
" 0.104269 | \n",
" 0.153693 | \n",
" 0.050785 | \n",
" 0.110838 | \n",
" 0.087048 | \n",
" 0.089408 | \n",
" 0.142359 | \n",
" 0.055949 | \n",
" 0.063308 | \n",
" 0.119542 | \n",
" 0.068867 | \n",
" 0.114107 | \n",
" 0.059559 | \n",
" 0.043402 | \n",
" 0.480333 | \n",
" 0.353126 | \n",
" 0.069483 | \n",
" 0.060052 | \n",
" 0.054717 | \n",
" 0.041026 | \n",
" 0.079988 | \n",
" 0.090928 | \n",
" 0.083384 | \n",
" 0.040348 | \n",
" 0.041640 | \n",
" 0.075881 | \n",
" 0.101198 | \n",
" 0.134604 | \n",
" 0.137427 | \n",
" 0.190577 | \n",
" 0.469522 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.250000 | \n",
" 0.028110 | \n",
" 0.070353 | \n",
" 0.000000 | \n",
" 0.014494 | \n",
" 0.000000 | \n",
" 0.091252 | \n",
" 0.000000 | \n",
" 0.133038 | \n",
" 0.000000 | \n",
" 0.047931 | \n",
" 0.490909 | \n",
" 0.035328 | \n",
" 0.083785 | \n",
" 0.002086 | \n",
" 0.000000 | \n",
" 0.004789 | \n",
" 0.098681 | \n",
" 0.067749 | \n",
" 0.022725 | \n",
" 0.091892 | \n",
" 0.000000 | \n",
" 0.175175 | \n",
" 0.062946 | \n",
" 0.193696 | \n",
" 0.000000 | \n",
" 0.152568 | \n",
" 0.073917 | \n",
" 0.000000 | \n",
" 0.249330 | \n",
" 0.042946 | \n",
" 0.214906 | \n",
" 0.257644 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.091796 | \n",
" 0.011540 | \n",
" 0.075531 | \n",
" 0.030793 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.269686 | \n",
" 0.000000 | \n",
" 0.000076 | \n",
" 0.006056 | \n",
" 0.000000 | \n",
" 0.025423 | \n",
" 0.153084 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.075445 | \n",
" 0.000000 | \n",
" 0.019377 | \n",
" 0.217367 | \n",
" 0.116208 | \n",
" 0.005610 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.500000 | \n",
" 0.044975 | \n",
" 0.102743 | \n",
" 0.000000 | \n",
" 0.027665 | \n",
" 0.000000 | \n",
" 0.115303 | \n",
" 0.000000 | \n",
" 0.198583 | \n",
" 0.000000 | \n",
" 0.064328 | \n",
" 0.581818 | \n",
" 0.050933 | \n",
" 0.209618 | \n",
" 0.003215 | \n",
" 0.000000 | \n",
" 0.013358 | \n",
" 0.123009 | \n",
" 0.092488 | \n",
" 0.042962 | \n",
" 0.111712 | \n",
" 0.000000 | \n",
" 0.222719 | \n",
" 0.082826 | \n",
" 0.252149 | \n",
" 0.504226 | \n",
" 0.207814 | \n",
" 0.131329 | \n",
" 0.000000 | \n",
" 0.310992 | \n",
" 0.070599 | \n",
" 0.271991 | \n",
" 0.334864 | \n",
" 0.001547 | \n",
" 0.000000 | \n",
" 0.137501 | \n",
" 0.035798 | \n",
" 0.129162 | \n",
" 0.043523 | \n",
" 0.001930 | \n",
" 1.000000 | \n",
" 0.618795 | \n",
" 0.000000 | \n",
" 0.002895 | \n",
" 0.009566 | \n",
" 0.000997 | \n",
" 0.040816 | \n",
" 0.197164 | \n",
" 0.020787 | \n",
" 0.000510 | \n",
" 0.006059 | \n",
" 0.111402 | \n",
" 0.000000 | \n",
" 0.054426 | \n",
" 0.295023 | \n",
" 0.210777 | \n",
" 0.015393 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.750000 | \n",
" 0.078707 | \n",
" 0.146305 | \n",
" 0.015639 | \n",
" 0.057325 | \n",
" 0.000000 | \n",
" 0.152558 | \n",
" 0.001095 | \n",
" 0.269088 | \n",
" 0.002633 | \n",
" 0.084534 | \n",
" 0.709091 | \n",
" 0.073627 | \n",
" 0.354078 | \n",
" 0.005159 | \n",
" 0.000000 | \n",
" 0.028670 | \n",
" 0.151557 | \n",
" 0.125258 | \n",
" 0.065126 | \n",
" 0.141441 | \n",
" 0.005817 | \n",
" 0.265995 | \n",
" 0.105231 | \n",
" 0.316332 | \n",
" 0.537566 | \n",
" 0.267924 | \n",
" 0.228256 | \n",
" 0.000000 | \n",
" 0.378016 | \n",
" 0.106786 | \n",
" 0.317499 | \n",
" 0.428640 | \n",
" 0.006591 | \n",
" 0.000000 | \n",
" 0.219426 | \n",
" 0.061941 | \n",
" 0.200894 | \n",
" 0.057226 | \n",
" 0.005503 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.034759 | \n",
" 0.006965 | \n",
" 0.016279 | \n",
" 0.002904 | \n",
" 0.064130 | \n",
" 0.245857 | \n",
" 0.044019 | \n",
" 0.000816 | \n",
" 0.014932 | \n",
" 0.163323 | \n",
" 0.038590 | \n",
" 0.132307 | \n",
" 0.382167 | \n",
" 0.352260 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id AB AF AH AM AR \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.500000 0.065120 0.116138 0.018315 0.057052 0.011648 \n",
"std 0.289378 0.077031 0.080726 0.070052 0.111149 0.061584 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.250000 0.028110 0.070353 0.000000 0.014494 0.000000 \n",
"50% 0.500000 0.044975 0.102743 0.000000 0.027665 0.000000 \n",
"75% 0.750000 0.078707 0.146305 0.015639 0.057325 0.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" AX AY AZ BC BD BN \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.128975 0.003376 0.201538 0.004665 0.071189 0.593841 \n",
"std 0.067917 0.040506 0.122296 0.044560 0.058818 0.179103 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.091252 0.000000 0.133038 0.000000 0.047931 0.490909 \n",
"50% 0.115303 0.000000 0.198583 0.000000 0.064328 0.581818 \n",
"75% 0.152558 0.001095 0.269088 0.002633 0.084534 0.709091 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" BP BQ BR BZ CB CC \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.066687 0.282534 0.006512 0.005883 0.028599 0.130389 \n",
"std 0.077475 0.266988 0.042273 0.041665 0.070295 0.067076 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.035328 0.083785 0.002086 0.000000 0.004789 0.098681 \n",
"50% 0.050933 0.209618 0.003215 0.000000 0.013358 0.123009 \n",
"75% 0.073627 0.354078 0.005159 0.000000 0.028670 0.151557 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" CD CF CH CL CR CS \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.109587 0.053529 0.124183 0.011539 0.226577 0.091020 \n",
"std 0.084545 0.067701 0.067039 0.062740 0.094664 0.067935 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.067749 0.022725 0.091892 0.000000 0.175175 0.062946 \n",
"50% 0.092488 0.042962 0.111712 0.000000 0.222719 0.082826 \n",
"75% 0.125258 0.065126 0.141441 0.005817 0.265995 0.105231 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" CU CW DA DE DF DH \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.258823 0.350229 0.217387 0.176986 0.010495 0.319800 \n",
"std 0.111916 0.254753 0.104269 0.153693 0.050785 0.110838 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.193696 0.000000 0.152568 0.073917 0.000000 0.249330 \n",
"50% 0.252149 0.504226 0.207814 0.131329 0.000000 0.310992 \n",
"75% 0.316332 0.537566 0.267924 0.228256 0.000000 0.378016 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" DI DL DN DU DV DY \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.087710 0.267339 0.354729 0.011140 0.007751 0.168820 \n",
"std 0.087048 0.089408 0.142359 0.055949 0.063308 0.119542 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.042946 0.214906 0.257644 0.000000 0.000000 0.091796 \n",
"50% 0.070599 0.271991 0.334864 0.001547 0.000000 0.137501 \n",
"75% 0.106786 0.317499 0.428640 0.006591 0.000000 0.219426 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" EB EE EG EH EJ EL \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.046054 0.154034 0.051422 0.007096 0.640194 0.618795 \n",
"std 0.068867 0.114107 0.059559 0.043402 0.480333 0.353126 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.011540 0.075531 0.030793 0.000000 0.000000 0.269686 \n",
"50% 0.035798 0.129162 0.043523 0.001930 1.000000 0.618795 \n",
"75% 0.061941 0.200894 0.057226 0.005503 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" EP EU FC FD FE FI \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.026936 0.010048 0.021106 0.004203 0.061722 0.202297 \n",
"std 0.069483 0.060052 0.054717 0.041026 0.079988 0.090928 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000076 0.006056 0.000000 0.025423 0.153084 \n",
"50% 0.000000 0.002895 0.009566 0.000997 0.040816 0.197164 \n",
"75% 0.034759 0.006965 0.016279 0.002904 0.064130 0.245857 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" FL FR FS GB GE GF \\\n",
"count 617.000000 617.000000 617.000000 617.000000 617.000000 617.000000 \n",
"mean 0.038182 0.002442 0.011303 0.126236 0.041484 0.102009 \n",
"std 0.083384 0.040348 0.041640 0.075881 0.101198 0.134604 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.075445 0.000000 0.019377 \n",
"50% 0.020787 0.000510 0.006059 0.111402 0.000000 0.054426 \n",
"75% 0.044019 0.000816 0.014932 0.163323 0.038590 0.132307 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" GH GI GL \n",
"count 617.000000 617.000000 617.000000 \n",
"mean 0.307294 0.261101 0.388128 \n",
"std 0.137427 0.190577 0.469522 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.217367 0.116208 0.005610 \n",
"50% 0.295023 0.210777 0.015393 \n",
"75% 0.382167 0.352260 1.000000 \n",
"max 1.000000 1.000000 1.000000 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[indep_cols].describe()"
]
},
{
"cell_type": "markdown",
"id": "2134f930",
"metadata": {
"papermill": {
"duration": 0.010066,
"end_time": "2023-07-08T12:37:13.056108",
"exception": false,
"start_time": "2023-07-08T12:37:13.046042",
"status": "completed"
},
"tags": []
},
"source": [
"# 9. Train the model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "7bd4d1c7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.079638Z",
"iopub.status.busy": "2023-07-08T12:37:13.078489Z",
"iopub.status.idle": "2023-07-08T12:37:13.085119Z",
"shell.execute_reply": "2023-07-08T12:37:13.084292Z"
},
"papermill": {
"duration": 0.020513,
"end_time": "2023-07-08T12:37:13.087059",
"exception": false,
"start_time": "2023-07-08T12:37:13.066546",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"X = df[indep_cols]\n",
"y = df[dep_cols]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2e4439d4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.111279Z",
"iopub.status.busy": "2023-07-08T12:37:13.110273Z",
"iopub.status.idle": "2023-07-08T12:37:13.114976Z",
"shell.execute_reply": "2023-07-08T12:37:13.114298Z"
},
"papermill": {
"duration": 0.018764,
"end_time": "2023-07-08T12:37:13.116778",
"exception": false,
"start_time": "2023-07-08T12:37:13.098014",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n",
"scores = []"
]
},
{
"cell_type": "markdown",
"id": "cbaf561e",
"metadata": {
"papermill": {
"duration": 0.010193,
"end_time": "2023-07-08T12:37:13.137558",
"exception": false,
"start_time": "2023-07-08T12:37:13.127365",
"status": "completed"
},
"tags": []
},
"source": [
"**Logistic Regression**"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "545fab1a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.161255Z",
"iopub.status.busy": "2023-07-08T12:37:13.160534Z",
"iopub.status.idle": "2023-07-08T12:37:13.271565Z",
"shell.execute_reply": "2023-07-08T12:37:13.270303Z"
},
"papermill": {
"duration": 0.125613,
"end_time": "2023-07-08T12:37:13.273760",
"exception": false,
"start_time": "2023-07-08T12:37:13.148147",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Log-loss scores: [0.32225114643241054, 0.3403899503127844, 0.3592379987382104, 0.34510276634634535, 0.30361286139047855]\n",
"*********************************************\n",
"Mean Log-loss: 0.33411894464404585\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
}
],
"source": [
"for train_idx, val_idx in skf.split(X, y):\n",
" X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]\n",
" X_valid, y_valid = X.iloc[val_idx], y.iloc[val_idx]\n",
" model = LogisticRegression(solver='liblinear') \n",
" model.fit(X_train, y_train)\n",
" val_preds = model.predict_proba(X_valid)\n",
" val_score = log_loss(y_valid, val_preds)\n",
" scores.append(val_score)\n",
"\n",
"print(f'Log-loss scores: {scores}')\n",
"print('*' * 45)\n",
"print(f'Mean Log-loss: {np.mean(scores)}')"
]
},
{
"cell_type": "markdown",
"id": "12d5f7be",
"metadata": {
"papermill": {
"duration": 0.010444,
"end_time": "2023-07-08T12:37:13.295213",
"exception": false,
"start_time": "2023-07-08T12:37:13.284769",
"status": "completed"
},
"tags": []
},
"source": [
"***Checking the weights and biases of the trained mode***"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2cbc9503",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.318905Z",
"iopub.status.busy": "2023-07-08T12:37:13.318441Z",
"iopub.status.idle": "2023-07-08T12:37:13.324221Z",
"shell.execute_reply": "2023-07-08T12:37:13.323113Z"
},
"papermill": {
"duration": 0.020425,
"end_time": "2023-07-08T12:37:13.326320",
"exception": false,
"start_time": "2023-07-08T12:37:13.305895",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-0.048052289727862726, 0.8901303031883299, 1.3150279115943055, 0.13174306487470439, 0.2622248197824984, 0.0390137362811528, -0.26887734576414224, 0.3189733468464598, -0.20667907391503582, 0.5075642847440733, 0.4732638629119724, 1.588431634183748, 0.6643016265569007, 1.6813665369969006, 0.534784482343446, 0.1569225386396318, -0.18079557638749433, -0.71948639212963, 1.323381673155981, 0.5661967044583167, -0.6725976239897116, 0.44640002874247514, -2.104515356021582, -0.4106919581294929, -0.476265241357835, -0.33458320371675826, -0.9935583565069934, -0.6994955837121527, -0.2514334312777739, -1.5392087395159657, 1.0610628850468167, -1.094798666372673, -0.9686861797798433, 1.423025118877475, 0.4905439829928888, 0.8913732117099116, 0.4053340712554371, -1.05398978966617, -0.40837875387742967, 0.5336512972480261, -0.6399629894331432, 0.19323320859572463, -0.47744452211118044, -0.25097807252395554, 0.02380150507745937, 0.22365378392300575, 1.44986245473914, -0.966989469933129, 1.1012802725256605, 0.6576609075694497, -0.017191565566879646, 0.4430270764946399, -0.8875754351515566, -0.8787775777763472, 0.04030522478117337, 0.06678679668677007, -0.8578451912673534]]\n"
]
}
],
"source": [
"print(model.coef_.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "969f3b95",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.352103Z",
"iopub.status.busy": "2023-07-08T12:37:13.351584Z",
"iopub.status.idle": "2023-07-08T12:37:13.357637Z",
"shell.execute_reply": "2023-07-08T12:37:13.356342Z"
},
"papermill": {
"duration": 0.021725,
"end_time": "2023-07-08T12:37:13.359917",
"exception": false,
"start_time": "2023-07-08T12:37:13.338192",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.57293004]\n"
]
}
],
"source": [
"print(model.intercept_)"
]
},
{
"cell_type": "markdown",
"id": "f565b6f8",
"metadata": {
"papermill": {
"duration": 0.01066,
"end_time": "2023-07-08T12:37:13.381716",
"exception": false,
"start_time": "2023-07-08T12:37:13.371056",
"status": "completed"
},
"tags": []
},
"source": [
"# 10. Sample Submission File"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e62489de",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.406292Z",
"iopub.status.busy": "2023-07-08T12:37:13.405815Z",
"iopub.status.idle": "2023-07-08T12:37:13.421639Z",
"shell.execute_reply": "2023-07-08T12:37:13.419698Z"
},
"papermill": {
"duration": 0.031693,
"end_time": "2023-07-08T12:37:13.424809",
"exception": false,
"start_time": "2023-07-08T12:37:13.393116",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" class_0 | \n",
" class_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 00eed32682bb | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 1 | \n",
" 010ebe33f668 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 2 | \n",
" 02fa521e1838 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 3 | \n",
" 040e15f562a2 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 4 | \n",
" 046e85c7cc7f | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id class_0 class_1\n",
"0 00eed32682bb 0.5 0.5\n",
"1 010ebe33f668 0.5 0.5\n",
"2 02fa521e1838 0.5 0.5\n",
"3 040e15f562a2 0.5 0.5\n",
"4 046e85c7cc7f 0.5 0.5"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_submission_df.head()"
]
},
{
"cell_type": "markdown",
"id": "2b7aaa18",
"metadata": {
"papermill": {
"duration": 0.011352,
"end_time": "2023-07-08T12:37:13.447797",
"exception": false,
"start_time": "2023-07-08T12:37:13.436445",
"status": "completed"
},
"tags": []
},
"source": [
"# 11. Prediction of Final Submission File"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "8844deaf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.475396Z",
"iopub.status.busy": "2023-07-08T12:37:13.475006Z",
"iopub.status.idle": "2023-07-08T12:37:13.484283Z",
"shell.execute_reply": "2023-07-08T12:37:13.482881Z"
},
"papermill": {
"duration": 0.025612,
"end_time": "2023-07-08T12:37:13.486824",
"exception": false,
"start_time": "2023-07-08T12:37:13.461212",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"prediction = model.predict_proba(test_df[indep_cols])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "32531474",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.510477Z",
"iopub.status.busy": "2023-07-08T12:37:13.510120Z",
"iopub.status.idle": "2023-07-08T12:37:13.516037Z",
"shell.execute_reply": "2023-07-08T12:37:13.514794Z"
},
"papermill": {
"duration": 0.020275,
"end_time": "2023-07-08T12:37:13.518043",
"exception": false,
"start_time": "2023-07-08T12:37:13.497768",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"sample_submission_df[['class_0', 'class_1']] = prediction"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "44999d42",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.542605Z",
"iopub.status.busy": "2023-07-08T12:37:13.542180Z",
"iopub.status.idle": "2023-07-08T12:37:13.554019Z",
"shell.execute_reply": "2023-07-08T12:37:13.552916Z"
},
"papermill": {
"duration": 0.027046,
"end_time": "2023-07-08T12:37:13.556262",
"exception": false,
"start_time": "2023-07-08T12:37:13.529216",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" class_0 | \n",
" class_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 00eed32682bb | \n",
" 0.639439 | \n",
" 0.360561 | \n",
"
\n",
" \n",
" 1 | \n",
" 010ebe33f668 | \n",
" 0.650442 | \n",
" 0.349558 | \n",
"
\n",
" \n",
" 2 | \n",
" 02fa521e1838 | \n",
" 0.661287 | \n",
" 0.338713 | \n",
"
\n",
" \n",
" 3 | \n",
" 040e15f562a2 | \n",
" 0.671965 | \n",
" 0.328035 | \n",
"
\n",
" \n",
" 4 | \n",
" 046e85c7cc7f | \n",
" 0.682468 | \n",
" 0.317532 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id class_0 class_1\n",
"0 00eed32682bb 0.639439 0.360561\n",
"1 010ebe33f668 0.650442 0.349558\n",
"2 02fa521e1838 0.661287 0.338713\n",
"3 040e15f562a2 0.671965 0.328035\n",
"4 046e85c7cc7f 0.682468 0.317532"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_submission_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "00bbb885",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-08T12:37:13.580094Z",
"iopub.status.busy": "2023-07-08T12:37:13.579775Z",
"iopub.status.idle": "2023-07-08T12:37:13.590826Z",
"shell.execute_reply": "2023-07-08T12:37:13.589760Z"
},
"papermill": {
"duration": 0.025435,
"end_time": "2023-07-08T12:37:13.593299",
"exception": false,
"start_time": "2023-07-08T12:37:13.567864",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"sample_submission_df.to_csv('submission.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"papermill": {
"default_parameters": {},
"duration": 20.953622,
"end_time": "2023-07-08T12:37:15.230170",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2023-07-08T12:36:54.276548",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}