### Step by Step OIV 452-1 predictor Training

## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
from pathlib import Path
import shutil

from tqdm import tqdm

import pandas as pd

from sklearn.metrics import (
 confusion_matrix,
 mean_squared_error,
 ConfusionMatrixDisplay,
 classification_report,
)

import matplotlib.pyplot as plt
import altair as alt

import panel as pn

import com_const as cc
import com_func as cf
import com_augmentations as ca
import leaf_patch_oiv_predictor_model as lpopm

## Setup

In [None]:
# Remove warnings
warnings.simplefilter(action="ignore", category=UserWarning)
warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
pd.options.display.float_format = "{:4,.4f}".format

pd.set_option("display.max_colwidth", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("display.max_rows", 16)

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
pn.extension("plotly", "vega", notifications=True, console_output="disable")

## Dataset

### Load

In [None]:
train, val, test = [
 cf.read_dataframe(cc.path_to_data.joinpath(f"oiv_{d}.csv"))
 for d in ["train", "val", "test"]
]
alt.hconcat(
 *[
 alt.Chart(df.assign(oiv=lambda x: x.oiv.astype(str)))
 .mark_bar()
 .encode(x="oiv", y="count()", color="source", tooltip="count()")
 .properties(width=200, height=300, title=title)
 for (df, title) in [
 (train, "train"),
 (val, "val"),
 (test, "test"),
 ]
 ]
)

In [None]:
# src_patches = (
# Path(cc.path_to_root)
# .joinpath("..")
# .joinpath("leafdisks_powderymildew")
# .joinpath("data_in")
# .joinpath("202311_dataset")
# .joinpath("patches")
# )
# src_patches.is_dir()

# for d in [train, val, test]:
# for fn in tqdm(d.file_name):
# shutil.copy(src=src_patches.joinpath(fn), dst=cc.path_to_leaf_patches.joinpath(fn))

### Augmentation

In [None]:
augmentations_kinds = ["fix_brightness", "resize", "affine", "color", "to_tensor"]
augmentations_params = dict(
 gamma=(60, 120),
 brightness_limit=0.15,
 contrast_limit=0.25,
 brightness_target=115,
 brightness_thresholds=(115, 130),
)

ca.test_augmentations(
 df=train,
 image_size=224,
 path_to_images=cc.path_to_leaf_patches,
 kinds=augmentations_kinds,
 columns=["oiv"],
 **augmentations_params
)

## Model

### Find Batch Size

In [None]:
batch_size = 615

We trained the models on an NVIDIA A100 80GB PCIe that allowed us a batch size of 769 that we reduced to 615 t avoid monopolizing the GPU. Uncomment the the following block to calculate optimal batch size

In [None]:
# batch_size = lpopm.get_bs(
# batch_size=300,
# train=train,
# val=val,
# test=test,
# augmentations_kinds=augmentations_kinds,
# augmentations_params=augmentations_params,
# shrink_factor=0.8,
# )

In [None]:

batch_size

### Find Learning Rate

In [None]:
learning_rate = 0.000363

We found that we our selected batch size the best learning rate was 0.000363. The function hereafter will calculate on optimal learning rate for your setup.

In [None]:
# learning_rate = lpopm.get_lr(
# train=train,
# val=val,
# test=test,
# augmentations_params=augmentations_params,
# augmentations_kinds=augmentations_kinds,
# batch_size=batch_size,
# lr_times=10,
# )


In [None]:
learning_rate

### Train

In [None]:
# lpopm.train_model(
# path_to_images=cc.path_to_leaf_patches,
# train=train,
# val=val,
# test=test,
# monitor_loss="mse",
# augmentations_kinds=augmentations_kinds,
# augmentations_params=augmentations_params,
# batch_size=batch_size,
# learning_rate=learning_rate,
# )

### Validate

In [None]:
model = lpopm.OivDetPatchesNet.load_from_checkpoint(
 cc.path_to_chk_oiv.joinpath("oiv_scorer.ckpt")
)
model.path_to_images = cc.path_to_leaf_patches
model.hr_desc()

In [None]:
test_data = model.test_data.assign(oiv=lambda x :x.fixed_oiv)
test_data["p_oiv"] = model.predict(test_data)

print(f"MSE: {mean_squared_error(test_data.oiv.astype(int), test_data.p_oiv.astype(int)):.3f}")
ConfusionMatrixDisplay.from_predictions(
 test_data.oiv.astype(int), test_data.p_oiv.astype(int)
);