{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract Leaf Patches From Plates" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime as dt\n", "import warnings\n", "import random\n", "\n", "from tqdm import tqdm\n", "\n", "import cv2\n", "\n", "import pandas as pd\n", "\n", "from siuba import _ as s\n", "from siuba import filter as sfilter\n", "from siuba import mutate, select, if_else\n", "\n", "import panel as pn\n", "\n", "import torch\n", "\n", "from pytorch_lightning.callbacks import (\n", " RichProgressBar,\n", " ModelCheckpoint,\n", " LearningRateMonitor,\n", ")\n", "from pytorch_lightning import Trainer\n", "from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n", "from pytorch_lightning.loggers import TensorBoardLogger\n", "\n", "\n", "import com_const as cc\n", "import com_image as ci\n", "import com_func as cf\n", "import leaf_patch_extractor_model as lpem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.set_option(\"display.max_colwidth\", 500)\n", "pd.set_option(\"display.max_columns\", 500)\n", "pd.set_option(\"display.width\", 1000)\n", "pd.set_option(\"display.max_rows\", 16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pn.extension(notifications=True, console_output=\"disable\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train Disc Detector" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train, val, test = [\n", " cf.read_dataframe(cc.path_to_data.joinpath(f\"ldd_{d}.csv\"))\n", " for d in [\"train\", \"val\", \"test\"]\n", "]\n", "\n", "print(len(train), len(test), len(val))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test Augmentations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# aug_ = lpem.get_augmentations(image_size=10, kinds=[\"resize\", \"train\"])\n", "\n", "# test_aug_dataset = lpem.LeafDiskDetectorDataset(csv=train, transform=aug_)\n", "\n", "# file_name = train.sample(n=1).plate_name.to_list()[0]\n", "\n", "# print(aug_[0].width, aug_[0].height)\n", "\n", "# lpem.make_patches_grid(\n", "# images=[\n", "# test_aug_dataset.draw_image_with_boxes(plate_name=file_name) for _ in range(12)\n", "# ],\n", "# row_count=3,\n", "# col_count=4,\n", "# figsize=(12, 6),\n", "# )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# model = lpem.LeafDiskDetector(\n", "# batch_size=15,\n", "# learning_rate=7.0e-05,\n", "# image_factor=10,\n", "# max_epochs=1000,\n", "# train_data=train,\n", "# val_data=val,\n", "# test_data=test,\n", "# augmentations_kinds=[\"resize\", \"train\", \"to_tensor\"],\n", "# augmentations_params={\"gamma\": (60, 180)},\n", "# num_workers=2,\n", "# accumulate_grad_batches=5,\n", "# scheduler=\"steplr\",\n", "# scheduler_params={\"step_size\": 10, \"gamma\": 0.80},\n", "# )\n", "\n", "# model.eval()\n", "# len(model(torch.rand(2, 3, 128, 128)))\n", "\n", "# model.hr_desc()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# trainer = Trainer(\n", "# default_root_dir=str(cc.path_to_chk_detector),\n", "# logger=TensorBoardLogger(\n", "# save_dir=str(cc.path_to_chk_detector),\n", "# version=model.model_name + \"_\" + dt.now().strftime(\"%Y%m%d_%H%M%S\"),\n", "# name=\"lightning_logs\",\n", "# ),\n", "# accelerator=\"gpu\",\n", "# max_epochs=model.max_epochs,\n", "# log_every_n_steps=5,\n", "# callbacks=[\n", "# RichProgressBar(),\n", "# EarlyStopping(monitor=\"val_loss\", mode=\"min\", patience=10, min_delta=0.0005),\n", "# ModelCheckpoint(\n", "# save_top_k=1,\n", "# monitor=\"val_loss\",\n", "# auto_insert_metric_name=True,\n", "# filename=model.model_name\n", "# + \"-{val_loss:.3f}-{epoch}-{train_loss:.3f}-{step}\",\n", "# ),\n", "# LearningRateMonitor(logging_interval=\"epoch\"),\n", "# ],\n", "# accumulate_grad_batches=model.accumulate_grad_batches,\n", "# )\n", "\n", "# trainer.fit(model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Extract Patches" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ld_model: lpem.LeafDiskDetector = lpem.LeafDiskDetector.load_from_checkpoint(\n", " cc.path_to_chk_detector.joinpath(\"leaf_disc_detector.ckpt\")\n", ")\n", "ld_model.hr_desc()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Predict All Bounding Boxes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bb_predictions_path = cc.path_to_data.joinpath(\"train_ld_bounding_boxes.csv\")\n", "\n", "bb_predictions = (\n", " cf.read_dataframe(bb_predictions_path)\n", " if bb_predictions_path.is_file() is True\n", " else pd.DataFrame()\n", ")\n", "\n", "bb_predictions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plates = list(cc.path_to_plates.rglob(\"*.JPG\"))\n", "len(plates)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "errors = []\n", "handled_plates = bb_predictions.file_name.unique()\n", "\n", "for plate in tqdm(plates):\n", " if \"file_name\" in bb_predictions and plate.name in handled_plates:\n", " continue\n", " try:\n", " current_data = ld_model.index_plate(plate) >> mutate(\n", " disc_name=s.file_name.str.replace(\" \", \"\").replace(\".JPG\", \"\")\n", " + \"_\"\n", " + s.row.astype(str)\n", " + \"_\"\n", " + s.col.astype(str)\n", " + \".png\"\n", " )\n", " bb_predictions = pd.concat([bb_predictions, current_data])\n", " except:\n", " errors.append(plate)\n", "\n", "print(errors)\n", "cf.write_dataframe(\n", " bb_predictions.sort_values([\"file_name\", \"col\", \"row\"]).reset_index(drop=True)\n", " >> mutate(disc_name=s.disc_name.str.replace(\".JPG\", \"\")),\n", " bb_predictions_path,\n", ")\n", "\n", "bb_predictions = cf.read_dataframe(bb_predictions_path)\n", "bb_predictions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "selected_image = random.choice(plates)\n", "bboxes = bb_predictions >> sfilter(s.file_name == selected_image.name)\n", "pn.Column(\n", " pn.pane.Markdown(f\"### {selected_image.name}\"),\n", " pn.pane.DataFrame(bboxes),\n", " pn.pane.Image(\n", " ci.to_pil(\n", " lpem.print_boxes(\n", " image_name=selected_image,\n", " boxes=bboxes,\n", " draw_first_line=True,\n", " return_plot=False,\n", " ) #\n", " ),\n", " sizing_mode=\"scale_width\",\n", " ),\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extract Needed Patches" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_model_training = pd.concat(\n", " [\n", " cf.read_dataframe(cc.path_to_data.joinpath(f\"oiv_{d}.csv\"))\n", " for d in [\"train\", \"val\", \"test\"]\n", " ]\n", ").sort_values([\"file_name\"]).reset_index(drop=True)\n", "df_model_training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "err = {}\n", "\n", "for file_name in tqdm(df_model_training.file_name):\n", " row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n", " lpem.handle_bbox(\n", " row.iloc[0],\n", " add_process_image=True,\n", " paths=dict(\n", " segmented_leaf_disc=cc.path_to_leaf_discs,\n", " leaf_disc_patch=cc.path_to_leaf_patches,\n", " plates=cc.path_to_plates,\n", " ),\n", " errors=err,\n", " )\n", "err" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Genotype differenciation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_gd = cf.read_dataframe(\n", " cc.path_to_data.joinpath(\"genotype_differenciation_dataset.csv\")\n", ")\n", "df_gd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "err = {}\n", "\n", "for file_name in tqdm(df_gd.file_name):\n", " row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n", " lpem.handle_bbox(\n", " row.iloc[0],\n", " add_process_image=True,\n", " paths=dict(\n", " segmented_leaf_disc=cc.path_to_leaf_discs,\n", " leaf_disc_patch=cc.path_to_leaf_patches,\n", " plates=cc.path_to_plates,\n", " ),\n", " errors=err,\n", " )\n", "err" ] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 2 }