{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract Leaf Patches From Plates"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime as dt\n",
    "import warnings\n",
    "import random\n",
    "\n",
    "from tqdm import tqdm\n",
    "\n",
    "import cv2\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from siuba import _ as s\n",
    "from siuba import filter as sfilter\n",
    "from siuba import mutate, select, if_else\n",
    "\n",
    "import panel as pn\n",
    "\n",
    "import torch\n",
    "\n",
    "from pytorch_lightning.callbacks import (\n",
    "    RichProgressBar,\n",
    "    ModelCheckpoint,\n",
    "    LearningRateMonitor,\n",
    ")\n",
    "from pytorch_lightning import Trainer\n",
    "from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
    "from pytorch_lightning.loggers import TensorBoardLogger\n",
    "\n",
    "\n",
    "import com_const as cc\n",
    "import com_image as ci\n",
    "import com_func as cf\n",
    "import leaf_patch_extractor_model as lpem"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",
    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option(\"display.max_colwidth\", 500)\n",
    "pd.set_option(\"display.max_columns\", 500)\n",
    "pd.set_option(\"display.width\", 1000)\n",
    "pd.set_option(\"display.max_rows\", 16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pn.extension(notifications=True, console_output=\"disable\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train Disc Detector"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train, val, test = [\n",
    "    cf.read_dataframe(cc.path_to_data.joinpath(f\"ldd_{d}.csv\"))\n",
    "    for d in [\"train\", \"val\", \"test\"]\n",
    "]\n",
    "\n",
    "print(len(train), len(test), len(val))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test Augmentations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# aug_ = lpem.get_augmentations(image_size=10, kinds=[\"resize\", \"train\"])\n",
    "\n",
    "# test_aug_dataset = lpem.LeafDiskDetectorDataset(csv=train, transform=aug_)\n",
    "\n",
    "# file_name = train.sample(n=1).plate_name.to_list()[0]\n",
    "\n",
    "# print(aug_[0].width, aug_[0].height)\n",
    "\n",
    "# lpem.make_patches_grid(\n",
    "#     images=[\n",
    "#         test_aug_dataset.draw_image_with_boxes(plate_name=file_name) for _ in range(12)\n",
    "#     ],\n",
    "#     row_count=3,\n",
    "#     col_count=4,\n",
    "#     figsize=(12, 6),\n",
    "# )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model = lpem.LeafDiskDetector(\n",
    "#     batch_size=15,\n",
    "#     learning_rate=7.0e-05,\n",
    "#     image_factor=10,\n",
    "#     max_epochs=1000,\n",
    "#     train_data=train,\n",
    "#     val_data=val,\n",
    "#     test_data=test,\n",
    "#     augmentations_kinds=[\"resize\", \"train\", \"to_tensor\"],\n",
    "#     augmentations_params={\"gamma\": (60, 180)},\n",
    "#     num_workers=2,\n",
    "#     accumulate_grad_batches=5,\n",
    "#     scheduler=\"steplr\",\n",
    "#     scheduler_params={\"step_size\": 10, \"gamma\": 0.80},\n",
    "# )\n",
    "\n",
    "# model.eval()\n",
    "# len(model(torch.rand(2, 3, 128, 128)))\n",
    "\n",
    "# model.hr_desc()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trainer = Trainer(\n",
    "#     default_root_dir=str(cc.path_to_chk_detector),\n",
    "#     logger=TensorBoardLogger(\n",
    "#         save_dir=str(cc.path_to_chk_detector),\n",
    "#         version=model.model_name + \"_\" + dt.now().strftime(\"%Y%m%d_%H%M%S\"),\n",
    "#         name=\"lightning_logs\",\n",
    "#     ),\n",
    "#     accelerator=\"gpu\",\n",
    "#     max_epochs=model.max_epochs,\n",
    "#     log_every_n_steps=5,\n",
    "#     callbacks=[\n",
    "#         RichProgressBar(),\n",
    "#         EarlyStopping(monitor=\"val_loss\", mode=\"min\", patience=10, min_delta=0.0005),\n",
    "#         ModelCheckpoint(\n",
    "#             save_top_k=1,\n",
    "#             monitor=\"val_loss\",\n",
    "#             auto_insert_metric_name=True,\n",
    "#             filename=model.model_name\n",
    "#             + \"-{val_loss:.3f}-{epoch}-{train_loss:.3f}-{step}\",\n",
    "#         ),\n",
    "#         LearningRateMonitor(logging_interval=\"epoch\"),\n",
    "#     ],\n",
    "#     accumulate_grad_batches=model.accumulate_grad_batches,\n",
    "# )\n",
    "\n",
    "# trainer.fit(model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extract Patches"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ld_model: lpem.LeafDiskDetector = lpem.LeafDiskDetector.load_from_checkpoint(\n",
    "    cc.path_to_chk_detector.joinpath(\"leaf_disc_detector.ckpt\")\n",
    ")\n",
    "ld_model.hr_desc()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Predict All Bounding Boxes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bb_predictions_path = cc.path_to_data.joinpath(\"train_ld_bounding_boxes.csv\")\n",
    "\n",
    "bb_predictions = (\n",
    "    cf.read_dataframe(bb_predictions_path)\n",
    "    if bb_predictions_path.is_file() is True\n",
    "    else pd.DataFrame()\n",
    ")\n",
    "\n",
    "bb_predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plates = list(cc.path_to_plates.rglob(\"*.JPG\"))\n",
    "len(plates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "errors = []\n",
    "handled_plates = bb_predictions.file_name.unique()\n",
    "\n",
    "for plate in tqdm(plates):\n",
    "    if \"file_name\" in bb_predictions and plate.name in handled_plates:\n",
    "        continue\n",
    "    try:\n",
    "        current_data = ld_model.index_plate(plate) >> mutate(\n",
    "            disc_name=s.file_name.str.replace(\" \", \"\").replace(\".JPG\", \"\")\n",
    "            + \"_\"\n",
    "            + s.row.astype(str)\n",
    "            + \"_\"\n",
    "            + s.col.astype(str)\n",
    "            + \".png\"\n",
    "        )\n",
    "        bb_predictions = pd.concat([bb_predictions, current_data])\n",
    "    except:\n",
    "        errors.append(plate)\n",
    "\n",
    "print(errors)\n",
    "cf.write_dataframe(\n",
    "    bb_predictions.sort_values([\"file_name\", \"col\", \"row\"]).reset_index(drop=True)\n",
    "    >> mutate(disc_name=s.disc_name.str.replace(\".JPG\", \"\")),\n",
    "    bb_predictions_path,\n",
    ")\n",
    "\n",
    "bb_predictions = cf.read_dataframe(bb_predictions_path)\n",
    "bb_predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_image = random.choice(plates)\n",
    "bboxes = bb_predictions >> sfilter(s.file_name == selected_image.name)\n",
    "pn.Column(\n",
    "    pn.pane.Markdown(f\"### {selected_image.name}\"),\n",
    "    pn.pane.DataFrame(bboxes),\n",
    "    pn.pane.Image(\n",
    "        ci.to_pil(\n",
    "            lpem.print_boxes(\n",
    "                image_name=selected_image,\n",
    "                boxes=bboxes,\n",
    "                draw_first_line=True,\n",
    "                return_plot=False,\n",
    "            )  #\n",
    "        ),\n",
    "        sizing_mode=\"scale_width\",\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extract Needed Patches"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_model_training = pd.concat(\n",
    "    [\n",
    "        cf.read_dataframe(cc.path_to_data.joinpath(f\"oiv_{d}.csv\"))\n",
    "        for d in [\"train\", \"val\", \"test\"]\n",
    "    ]\n",
    ").sort_values([\"file_name\"]).reset_index(drop=True)\n",
    "df_model_training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "err = {}\n",
    "\n",
    "for file_name in tqdm(df_model_training.file_name):\n",
    "    row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n",
    "    lpem.handle_bbox(\n",
    "        row.iloc[0],\n",
    "        add_process_image=True,\n",
    "        paths=dict(\n",
    "            segmented_leaf_disc=cc.path_to_leaf_discs,\n",
    "            leaf_disc_patch=cc.path_to_leaf_patches,\n",
    "            plates=cc.path_to_plates,\n",
    "        ),\n",
    "        errors=err,\n",
    "    )\n",
    "err"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Genotype differenciation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_gd = cf.read_dataframe(\n",
    "    cc.path_to_data.joinpath(\"genotype_differenciation_dataset.csv\")\n",
    ")\n",
    "df_gd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "err = {}\n",
    "\n",
    "for file_name in tqdm(df_gd.file_name):\n",
    "    row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n",
    "    lpem.handle_bbox(\n",
    "        row.iloc[0],\n",
    "        add_process_image=True,\n",
    "        paths=dict(\n",
    "            segmented_leaf_disc=cc.path_to_leaf_discs,\n",
    "            leaf_disc_patch=cc.path_to_leaf_patches,\n",
    "            plates=cc.path_to_plates,\n",
    "        ),\n",
    "        errors=err,\n",
    "    )\n",
    "err"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}