{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Step by Step OIV 452-1 predictor Training" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "from pathlib import Path\n", "import shutil\n", "\n", "from tqdm import tqdm\n", "\n", "import pandas as pd\n", "\n", "from sklearn.metrics import (\n", " confusion_matrix,\n", " mean_squared_error,\n", " ConfusionMatrixDisplay,\n", " classification_report,\n", ")\n", "\n", "import matplotlib.pyplot as plt\n", "import altair as alt\n", "\n", "import panel as pn\n", "\n", "import com_const as cc\n", "import com_func as cf\n", "import com_augmentations as ca\n", "import leaf_patch_oiv_predictor_model as lpopm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Remove warnings\n", "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.options.display.float_format = \"{:4,.4f}\".format\n", "\n", "pd.set_option(\"display.max_colwidth\", 500)\n", "pd.set_option(\"display.max_columns\", 500)\n", "pd.set_option(\"display.width\", 1000)\n", "pd.set_option(\"display.max_rows\", 16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alt.data_transformers.disable_max_rows()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pn.extension(\"plotly\", \"vega\", notifications=True, console_output=\"disable\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train, val, test = [\n", " cf.read_dataframe(cc.path_to_data.joinpath(f\"oiv_{d}.csv\"))\n", " for d in [\"train\", \"val\", \"test\"]\n", "]\n", "alt.hconcat(\n", " *[\n", " alt.Chart(df.assign(oiv=lambda x: x.oiv.astype(str)))\n", " .mark_bar()\n", " .encode(x=\"oiv\", y=\"count()\", color=\"source\", tooltip=\"count()\")\n", " .properties(width=200, height=300, title=title)\n", " for (df, title) in [\n", " (train, \"train\"),\n", " (val, \"val\"),\n", " (test, \"test\"),\n", " ]\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# src_patches = (\n", "# Path(cc.path_to_root)\n", "# .joinpath(\"..\")\n", "# .joinpath(\"leafdisks_powderymildew\")\n", "# .joinpath(\"data_in\")\n", "# .joinpath(\"202311_dataset\")\n", "# .joinpath(\"patches\")\n", "# )\n", "# src_patches.is_dir()\n", "\n", "# for d in [train, val, test]:\n", "# for fn in tqdm(d.file_name):\n", "# shutil.copy(src=src_patches.joinpath(fn), dst=cc.path_to_leaf_patches.joinpath(fn))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Augmentation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "augmentations_kinds = [\"fix_brightness\", \"resize\", \"affine\", \"color\", \"to_tensor\"]\n", "augmentations_params = dict(\n", " gamma=(60, 120),\n", " brightness_limit=0.15,\n", " contrast_limit=0.25,\n", " brightness_target=115,\n", " brightness_thresholds=(115, 130),\n", ")\n", "\n", "ca.test_augmentations(\n", " df=train,\n", " image_size=224,\n", " path_to_images=cc.path_to_leaf_patches,\n", " kinds=augmentations_kinds,\n", " columns=[\"oiv\"],\n", " **augmentations_params\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Batch Size" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "batch_size = 615" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We trained the models on an NVIDIA A100 80GB PCIe that allowed us a batch size of 769 that we reduced to 615 t avoid monopolizing the GPU. Uncomment the the following block to calculate optimal batch size" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# batch_size = lpopm.get_bs(\n", "# batch_size=300,\n", "# train=train,\n", "# val=val,\n", "# test=test,\n", "# augmentations_kinds=augmentations_kinds,\n", "# augmentations_params=augmentations_params,\n", "# shrink_factor=0.8,\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "batch_size" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Learning Rate" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.000363" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We found that we our selected batch size the best learning rate was 0.000363. The function hereafter will calculate on optimal learning rate for your setup." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# learning_rate = lpopm.get_lr(\n", "# train=train,\n", "# val=val,\n", "# test=test,\n", "# augmentations_params=augmentations_params,\n", "# augmentations_kinds=augmentations_kinds,\n", "# batch_size=batch_size,\n", "# lr_times=10,\n", "# )\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learning_rate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# lpopm.train_model(\n", "# path_to_images=cc.path_to_leaf_patches,\n", "# train=train,\n", "# val=val,\n", "# test=test,\n", "# monitor_loss=\"mse\",\n", "# augmentations_kinds=augmentations_kinds,\n", "# augmentations_params=augmentations_params,\n", "# batch_size=batch_size,\n", "# learning_rate=learning_rate,\n", "# )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Validate" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = lpopm.OivDetPatchesNet.load_from_checkpoint(\n", " cc.path_to_chk_oiv.joinpath(\"oiv_scorer.ckpt\")\n", ")\n", "model.path_to_images = cc.path_to_leaf_patches\n", "model.hr_desc()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_data = model.test_data.assign(oiv=lambda x :x.fixed_oiv)\n", "test_data[\"p_oiv\"] = model.predict(test_data)\n", "\n", "print(f\"MSE: {mean_squared_error(test_data.oiv.astype(int), test_data.p_oiv.astype(int)):.3f}\")\n", "ConfusionMatrixDisplay.from_predictions(\n", " test_data.oiv.astype(int), test_data.p_oiv.astype(int)\n", ");" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 2 }