{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step by Step OIV 452-1 predictor Training"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "from pathlib import Path\n",
    "import shutil\n",
    "\n",
    "from tqdm import tqdm\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn.metrics import (\n",
    "    confusion_matrix,\n",
    "    mean_squared_error,\n",
    "    ConfusionMatrixDisplay,\n",
    "    classification_report,\n",
    ")\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import altair as alt\n",
    "\n",
    "import panel as pn\n",
    "\n",
    "import com_const as cc\n",
    "import com_func as cf\n",
    "import com_augmentations as ca\n",
    "import leaf_patch_oiv_predictor_model as lpopm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove warnings\n",
    "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",
    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.float_format = \"{:4,.4f}\".format\n",
    "\n",
    "pd.set_option(\"display.max_colwidth\", 500)\n",
    "pd.set_option(\"display.max_columns\", 500)\n",
    "pd.set_option(\"display.width\", 1000)\n",
    "pd.set_option(\"display.max_rows\", 16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "alt.data_transformers.disable_max_rows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pn.extension(\"plotly\", \"vega\", notifications=True, console_output=\"disable\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train, val, test = [\n",
    "    cf.read_dataframe(cc.path_to_data.joinpath(f\"oiv_{d}.csv\"))\n",
    "    for d in [\"train\", \"val\", \"test\"]\n",
    "]\n",
    "alt.hconcat(\n",
    "    *[\n",
    "        alt.Chart(df.assign(oiv=lambda x: x.oiv.astype(str)))\n",
    "        .mark_bar()\n",
    "        .encode(x=\"oiv\", y=\"count()\", color=\"source\", tooltip=\"count()\")\n",
    "        .properties(width=200, height=300, title=title)\n",
    "        for (df, title) in [\n",
    "            (train, \"train\"),\n",
    "            (val, \"val\"),\n",
    "            (test, \"test\"),\n",
    "        ]\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# src_patches = (\n",
    "#     Path(cc.path_to_root)\n",
    "#     .joinpath(\"..\")\n",
    "#     .joinpath(\"leafdisks_powderymildew\")\n",
    "#     .joinpath(\"data_in\")\n",
    "#     .joinpath(\"202311_dataset\")\n",
    "#     .joinpath(\"patches\")\n",
    "# )\n",
    "# src_patches.is_dir()\n",
    "\n",
    "# for d in [train, val, test]:\n",
    "#     for fn in tqdm(d.file_name):\n",
    "#         shutil.copy(src=src_patches.joinpath(fn), dst=cc.path_to_leaf_patches.joinpath(fn))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "augmentations_kinds = [\"fix_brightness\", \"resize\", \"affine\", \"color\", \"to_tensor\"]\n",
    "augmentations_params = dict(\n",
    "    gamma=(60, 120),\n",
    "    brightness_limit=0.15,\n",
    "    contrast_limit=0.25,\n",
    "    brightness_target=115,\n",
    "    brightness_thresholds=(115, 130),\n",
    ")\n",
    "\n",
    "ca.test_augmentations(\n",
    "    df=train,\n",
    "    image_size=224,\n",
    "    path_to_images=cc.path_to_leaf_patches,\n",
    "    kinds=augmentations_kinds,\n",
    "    columns=[\"oiv\"],\n",
    "    **augmentations_params\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find Batch Size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 615"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We trained the models on an NVIDIA A100 80GB PCIe that allowed us a batch size of 769 that we reduced to 615 t avoid monopolizing the GPU. Uncomment the the following block to calculate optimal batch size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# batch_size = lpopm.get_bs(\n",
    "#     batch_size=300,\n",
    "#     train=train,\n",
    "#     val=val,\n",
    "#     test=test,\n",
    "#     augmentations_kinds=augmentations_kinds,\n",
    "#     augmentations_params=augmentations_params,\n",
    "#     shrink_factor=0.8,\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "batch_size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find Learning Rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 0.000363"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We found that we our selected batch size the best learning rate was 0.000363. The function hereafter will calculate on optimal learning rate for your setup."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# learning_rate = lpopm.get_lr(\n",
    "#     train=train,\n",
    "#     val=val,\n",
    "#     test=test,\n",
    "#     augmentations_params=augmentations_params,\n",
    "#     augmentations_kinds=augmentations_kinds,\n",
    "#     batch_size=batch_size,\n",
    "#     lr_times=10,\n",
    "# )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lpopm.train_model(\n",
    "#     path_to_images=cc.path_to_leaf_patches,\n",
    "#     train=train,\n",
    "#     val=val,\n",
    "#     test=test,\n",
    "#     monitor_loss=\"mse\",\n",
    "#     augmentations_kinds=augmentations_kinds,\n",
    "#     augmentations_params=augmentations_params,\n",
    "#     batch_size=batch_size,\n",
    "#     learning_rate=learning_rate,\n",
    "# )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Validate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = lpopm.OivDetPatchesNet.load_from_checkpoint(\n",
    "    cc.path_to_chk_oiv.joinpath(\"oiv_scorer.ckpt\")\n",
    ")\n",
    "model.path_to_images = cc.path_to_leaf_patches\n",
    "model.hr_desc()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_data = model.test_data.assign(oiv=lambda x :x.fixed_oiv)\n",
    "test_data[\"p_oiv\"] = model.predict(test_data)\n",
    "\n",
    "print(f\"MSE: {mean_squared_error(test_data.oiv.astype(int), test_data.p_oiv.astype(int)):.3f}\")\n",
    "ConfusionMatrixDisplay.from_predictions(\n",
    "    test_data.oiv.astype(int), test_data.p_oiv.astype(int)\n",
    ");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}