Upload folder using huggingface_hub (#5)
Browse files- 4e1fb57dec599fbdb2a0e0d197d6792d7e3efabdaeb1ed4d3dfc2116b125be4b (1451339857657696f76a3d5dc8988f4ac4cd2728)
- .gitattributes +1 -0
- src/.gitattributes +5 -0
- src/com_const.py +1 -1
- src/leaf_patch_annotation.ipynb +49 -44
- src/leaf_patch_extractor.ipynb +3 -470
- src/leaf_patch_gen_diff.ipynb +0 -0
- src/leaf_patch_gen_diff.py +227 -0
- src/leaf_patch_oiv_predictor.ipynb +0 -0
- src/repo_manager.ipynb +95 -49
.gitattributes
CHANGED
@@ -5333,3 +5333,4 @@ images/plates/Exp23DM09_inoc2_T6_P059.JPG filter=lfs diff=lfs merge=lfs -text
|
|
5333 |
images/plates/Exp23DM09_inoc2_T6_P060.JPG filter=lfs diff=lfs merge=lfs -text
|
5334 |
images/plates/Exp23DM09_inoc2_T6_P061.JPG filter=lfs diff=lfs merge=lfs -text
|
5335 |
images/plates/Exp23DM09_inoc2_T6_P062.JPG filter=lfs diff=lfs merge=lfs -text
|
|
|
|
5333 |
images/plates/Exp23DM09_inoc2_T6_P060.JPG filter=lfs diff=lfs merge=lfs -text
|
5334 |
images/plates/Exp23DM09_inoc2_T6_P061.JPG filter=lfs diff=lfs merge=lfs -text
|
5335 |
images/plates/Exp23DM09_inoc2_T6_P062.JPG filter=lfs diff=lfs merge=lfs -text
|
5336 |
+
src/leaf_patch_extractor.ipynb filter=lfs diff=lfs merge=lfs -text
|
src/.gitattributes
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
leaf_patch_annotation.ipynb filter=lfs diff=lfs merge=lfs -text
|
2 |
+
leaf_patch_extractor.ipynb filter=lfs diff=lfs merge=lfs -text
|
3 |
+
leaf_patch_gen_diff.ipynb filter=lfs diff=lfs merge=lfs -text
|
4 |
+
leaf_patch_oiv_predictor.ipynb filter=lfs diff=lfs merge=lfs -text
|
5 |
+
repo_manager.ipynb filter=lfs diff=lfs merge=lfs -text
|
src/com_const.py
CHANGED
@@ -4,7 +4,7 @@ path_to_here = Path(__file__).resolve().parent
|
|
4 |
path_to_root = path_to_here.parent
|
5 |
|
6 |
path_to_data = path_to_root.joinpath("data")
|
7 |
-
|
8 |
path_to_images = path_to_root.joinpath("images")
|
9 |
path_to_plates = path_to_images.joinpath("plates")
|
10 |
path_to_leaf_discs = path_to_images.joinpath("leaf_discs")
|
|
|
4 |
path_to_root = path_to_here.parent
|
5 |
|
6 |
path_to_data = path_to_root.joinpath("data")
|
7 |
+
path_to_resources = path_to_root.joinpath("resources")
|
8 |
path_to_images = path_to_root.joinpath("images")
|
9 |
path_to_plates = path_to_images.joinpath("plates")
|
10 |
path_to_leaf_discs = path_to_images.joinpath("leaf_discs")
|
src/leaf_patch_annotation.ipynb
CHANGED
@@ -4,7 +4,40 @@
|
|
4 |
"cell_type": "markdown",
|
5 |
"metadata": {},
|
6 |
"source": [
|
7 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
]
|
9 |
},
|
10 |
{
|
@@ -158,11 +191,18 @@
|
|
158 |
"metadata": {},
|
159 |
"outputs": [],
|
160 |
"source": [
|
161 |
-
"df = cf.read_dataframe(path=cc.path_to_data.joinpath(
|
162 |
-
" [\"experiment\", \"inoc\", \"dpi\", \"plaque\", \"row\", \"col\"]\n",
|
163 |
-
")\n",
|
164 |
"if \"seen_at\" not in df:\n",
|
165 |
" df = df >> mutate(seen_at=np.nan)\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
"df.seen_at = pd.to_datetime(df.seen_at)\n",
|
167 |
"df = df.set_index(\"file_name\")\n",
|
168 |
"df"
|
@@ -183,7 +223,9 @@
|
|
183 |
"source": [
|
184 |
"def update_image(image_name:str, color, brightness, contrast, sharpness):\n",
|
185 |
" image_path = cc.path_to_leaf_patches.joinpath(image_name)\n",
|
186 |
-
" if
|
|
|
|
|
187 |
" fig = px.imshow(\n",
|
188 |
" np.array(\n",
|
189 |
" [\n",
|
@@ -342,12 +384,6 @@
|
|
342 |
" sizing_mode=\"scale_width\",\n",
|
343 |
")\n",
|
344 |
"\n",
|
345 |
-
"sw_ui_state = pn.widgets.Switch(name=\"active\", value=False)\n",
|
346 |
-
"alt_ui_state = pn.pane.Alert(\"Annotations will be stored\", alert_type=\"primary\")\n",
|
347 |
-
"\n",
|
348 |
-
"pn_ui_state = pn.Row(sw_ui_state, alt_ui_state)\n",
|
349 |
-
"\n",
|
350 |
-
"\n",
|
351 |
"bt_next = pn.widgets.Button(name=\"Next\", button_type=\"primary\")\n",
|
352 |
"bt_previous = pn.widgets.Button(name=\"Previous\", button_type=\"primary\")\n",
|
353 |
"\n",
|
@@ -389,20 +425,6 @@
|
|
389 |
" )\n",
|
390 |
"\n",
|
391 |
"\n",
|
392 |
-
"def update_ui_state(ui_state: bool):\n",
|
393 |
-
" if ui_state is True:\n",
|
394 |
-
" alt_ui_state.object = \"Annotations will be stored\"\n",
|
395 |
-
" alt_ui_state.alert_type = \"primary\"\n",
|
396 |
-
" else:\n",
|
397 |
-
" alt_ui_state.object = \"Annotations will be discarded\"\n",
|
398 |
-
" alt_ui_state.alert_type = \"danger\"\n",
|
399 |
-
"\n",
|
400 |
-
"\n",
|
401 |
-
"@pn.depends(sw_ui_state, watch=True)\n",
|
402 |
-
"def on_ui_State_changed(new_state: bool):\n",
|
403 |
-
" update_ui_state(new_state)\n",
|
404 |
-
"\n",
|
405 |
-
"\n",
|
406 |
"def select_next(event):\n",
|
407 |
" global current_row\n",
|
408 |
" global df\n",
|
@@ -419,9 +441,7 @@
|
|
419 |
" ]\n",
|
420 |
" cf.write_dataframe(\n",
|
421 |
" df=df.reset_index(),\n",
|
422 |
-
" path=cc.path_to_data.joinpath(
|
423 |
-
" \"oiv_annotation.csv\" if sw_ui_state.value is True else \"oiv_annotation_test.csv\"\n",
|
424 |
-
" ),\n",
|
425 |
" )\n",
|
426 |
" df.at[current_row.file_name, \"seen_at\"] = now\n",
|
427 |
"\n",
|
@@ -477,15 +497,9 @@
|
|
477 |
" rgb_source.disabled = target == \"OIV\"\n",
|
478 |
"\n",
|
479 |
"\n",
|
480 |
-
"# @pn.depends(rgb_oiv, watch=True)\n",
|
481 |
-
"# def on_oiv_changed(_):\n",
|
482 |
-
"# select_next(None)\n",
|
483 |
-
"\n",
|
484 |
-
"\n",
|
485 |
"bt_next.on_click(select_next)\n",
|
486 |
"bt_previous.on_click(select_next)\n",
|
487 |
"\n",
|
488 |
-
"update_ui_state(sw_ui_state.value)\n",
|
489 |
"select_next(None)"
|
490 |
]
|
491 |
},
|
@@ -502,14 +516,12 @@
|
|
502 |
"metadata": {},
|
503 |
"outputs": [],
|
504 |
"source": [
|
505 |
-
"template.sidebar.append(pn_ui_state)\n",
|
506 |
"template.sidebar.append(c_image_processing)\n",
|
507 |
"template.sidebar.append(c_anno_options)\n",
|
508 |
"\n",
|
509 |
"template.main.append(\n",
|
510 |
" pn.Row(\n",
|
511 |
" pn.Column(\n",
|
512 |
-
" # mkd_current,\n",
|
513 |
" img_current,\n",
|
514 |
" ui_annotation,\n",
|
515 |
" ),\n",
|
@@ -519,13 +531,6 @@
|
|
519 |
"\n",
|
520 |
"template.servable()"
|
521 |
]
|
522 |
-
},
|
523 |
-
{
|
524 |
-
"cell_type": "markdown",
|
525 |
-
"metadata": {},
|
526 |
-
"source": [
|
527 |
-
"# Please launch with command \"panel serve leaf_patch_annotation.ipynb --show --dev\" from the \"src\" folder"
|
528 |
-
]
|
529 |
}
|
530 |
],
|
531 |
"metadata": {
|
@@ -544,7 +549,7 @@
|
|
544 |
"name": "python",
|
545 |
"nbconvert_exporter": "python",
|
546 |
"pygments_lexer": "ipython3",
|
547 |
-
"version": "3.
|
548 |
}
|
549 |
},
|
550 |
"nbformat": 4,
|
|
|
4 |
"cell_type": "markdown",
|
5 |
"metadata": {},
|
6 |
"source": [
|
7 |
+
"# 202311 Dataset Annotation"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "markdown",
|
12 |
+
"metadata": {},
|
13 |
+
"source": [
|
14 |
+
"## Please launch with command \n",
|
15 |
+
"\n",
|
16 |
+
" panel serve leaf_patch_annotation.ipynb --show --dev\n",
|
17 |
+
" \n",
|
18 |
+
"from the \"src\" folder"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "markdown",
|
23 |
+
"metadata": {},
|
24 |
+
"source": [
|
25 |
+
"## Source Selection\n",
|
26 |
+
"Three options for SOURCE_FILE:\n",
|
27 |
+
"- oiv_annotation.csv for the already annotated CSV file\n",
|
28 |
+
"- oiv_annotation_empty.csv for an empty file ready to be annotated\n",
|
29 |
+
"- Your own semicolon separated CSV file containing at least a column named \"file_name\" with the name of patches located in the \"images/leaf_patches\" folder"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": null,
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [],
|
37 |
+
"source": [
|
38 |
+
"SOURCE_FILE = \"oiv_annotation.csv\"\n",
|
39 |
+
"# SOURCE_FILE = \"oiv_annotation_empty.csv\"\n",
|
40 |
+
"# SOURCE_FILE = \"my_csv.csv\""
|
41 |
]
|
42 |
},
|
43 |
{
|
|
|
191 |
"metadata": {},
|
192 |
"outputs": [],
|
193 |
"source": [
|
194 |
+
"df = cf.read_dataframe(path=cc.path_to_data.joinpath(SOURCE_FILE))\n",
|
|
|
|
|
195 |
"if \"seen_at\" not in df:\n",
|
196 |
" df = df >> mutate(seen_at=np.nan)\n",
|
197 |
+
"if \"oiv_annotated_at\" not in df:\n",
|
198 |
+
" df = df >> mutate(oiv_annotated_at=np.nan)\n",
|
199 |
+
"if \"source_annotated_at\" not in df:\n",
|
200 |
+
" df = df >> mutate(source_annotated_at=np.nan)\n",
|
201 |
+
"if \"source\" not in df:\n",
|
202 |
+
" df = df >> mutate(source=np.nan)\n",
|
203 |
+
"if \"oiv\" not in df:\n",
|
204 |
+
" df = df >> mutate(oiv=np.nan)\n",
|
205 |
+
"\n",
|
206 |
"df.seen_at = pd.to_datetime(df.seen_at)\n",
|
207 |
"df = df.set_index(\"file_name\")\n",
|
208 |
"df"
|
|
|
223 |
"source": [
|
224 |
"def update_image(image_name:str, color, brightness, contrast, sharpness):\n",
|
225 |
" image_path = cc.path_to_leaf_patches.joinpath(image_name)\n",
|
226 |
+
" if not image_name:\n",
|
227 |
+
" fig = px.imshow(Image.open(cc.path_to_resources.joinpath(\"well_done.png\")))\n",
|
228 |
+
" elif image_path.is_file() is False:\n",
|
229 |
" fig = px.imshow(\n",
|
230 |
" np.array(\n",
|
231 |
" [\n",
|
|
|
384 |
" sizing_mode=\"scale_width\",\n",
|
385 |
")\n",
|
386 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
"bt_next = pn.widgets.Button(name=\"Next\", button_type=\"primary\")\n",
|
388 |
"bt_previous = pn.widgets.Button(name=\"Previous\", button_type=\"primary\")\n",
|
389 |
"\n",
|
|
|
425 |
" )\n",
|
426 |
"\n",
|
427 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
"def select_next(event):\n",
|
429 |
" global current_row\n",
|
430 |
" global df\n",
|
|
|
441 |
" ]\n",
|
442 |
" cf.write_dataframe(\n",
|
443 |
" df=df.reset_index(),\n",
|
444 |
+
" path=cc.path_to_data.joinpath(SOURCE_FILE),\n",
|
|
|
|
|
445 |
" )\n",
|
446 |
" df.at[current_row.file_name, \"seen_at\"] = now\n",
|
447 |
"\n",
|
|
|
497 |
" rgb_source.disabled = target == \"OIV\"\n",
|
498 |
"\n",
|
499 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
500 |
"bt_next.on_click(select_next)\n",
|
501 |
"bt_previous.on_click(select_next)\n",
|
502 |
"\n",
|
|
|
503 |
"select_next(None)"
|
504 |
]
|
505 |
},
|
|
|
516 |
"metadata": {},
|
517 |
"outputs": [],
|
518 |
"source": [
|
|
|
519 |
"template.sidebar.append(c_image_processing)\n",
|
520 |
"template.sidebar.append(c_anno_options)\n",
|
521 |
"\n",
|
522 |
"template.main.append(\n",
|
523 |
" pn.Row(\n",
|
524 |
" pn.Column(\n",
|
|
|
525 |
" img_current,\n",
|
526 |
" ui_annotation,\n",
|
527 |
" ),\n",
|
|
|
531 |
"\n",
|
532 |
"template.servable()"
|
533 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
}
|
535 |
],
|
536 |
"metadata": {
|
|
|
549 |
"name": "python",
|
550 |
"nbconvert_exporter": "python",
|
551 |
"pygments_lexer": "ipython3",
|
552 |
+
"version": "3.12.4"
|
553 |
}
|
554 |
},
|
555 |
"nbformat": 4,
|
src/leaf_patch_extractor.ipynb
CHANGED
@@ -1,470 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# Extract Leaf Patches From Plates"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## Imports"
|
15 |
-
]
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"cell_type": "code",
|
19 |
-
"execution_count": null,
|
20 |
-
"metadata": {},
|
21 |
-
"outputs": [],
|
22 |
-
"source": [
|
23 |
-
"%load_ext autoreload\n",
|
24 |
-
"%autoreload 2"
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"cell_type": "code",
|
29 |
-
"execution_count": null,
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [],
|
32 |
-
"source": [
|
33 |
-
"from datetime import datetime as dt\n",
|
34 |
-
"import warnings\n",
|
35 |
-
"import random\n",
|
36 |
-
"\n",
|
37 |
-
"from tqdm import tqdm\n",
|
38 |
-
"\n",
|
39 |
-
"import cv2\n",
|
40 |
-
"\n",
|
41 |
-
"import pandas as pd\n",
|
42 |
-
"\n",
|
43 |
-
"from siuba import _ as s\n",
|
44 |
-
"from siuba import filter as sfilter\n",
|
45 |
-
"from siuba import mutate, select, if_else\n",
|
46 |
-
"\n",
|
47 |
-
"import panel as pn\n",
|
48 |
-
"\n",
|
49 |
-
"import torch\n",
|
50 |
-
"\n",
|
51 |
-
"from pytorch_lightning.callbacks import (\n",
|
52 |
-
" RichProgressBar,\n",
|
53 |
-
" ModelCheckpoint,\n",
|
54 |
-
" LearningRateMonitor,\n",
|
55 |
-
")\n",
|
56 |
-
"from pytorch_lightning import Trainer\n",
|
57 |
-
"from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
|
58 |
-
"from pytorch_lightning.loggers import TensorBoardLogger\n",
|
59 |
-
"\n",
|
60 |
-
"\n",
|
61 |
-
"import com_const as cc\n",
|
62 |
-
"import com_image as ci\n",
|
63 |
-
"import com_func as cf\n",
|
64 |
-
"import leaf_patch_extractor_model as lpem"
|
65 |
-
]
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"cell_type": "markdown",
|
69 |
-
"metadata": {},
|
70 |
-
"source": [
|
71 |
-
"## Setup"
|
72 |
-
]
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"cell_type": "code",
|
76 |
-
"execution_count": null,
|
77 |
-
"metadata": {},
|
78 |
-
"outputs": [],
|
79 |
-
"source": [
|
80 |
-
"warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",
|
81 |
-
"warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
|
82 |
-
]
|
83 |
-
},
|
84 |
-
{
|
85 |
-
"cell_type": "code",
|
86 |
-
"execution_count": null,
|
87 |
-
"metadata": {},
|
88 |
-
"outputs": [],
|
89 |
-
"source": [
|
90 |
-
"pd.set_option(\"display.max_colwidth\", 500)\n",
|
91 |
-
"pd.set_option(\"display.max_columns\", 500)\n",
|
92 |
-
"pd.set_option(\"display.width\", 1000)\n",
|
93 |
-
"pd.set_option(\"display.max_rows\", 16)"
|
94 |
-
]
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"cell_type": "code",
|
98 |
-
"execution_count": null,
|
99 |
-
"metadata": {},
|
100 |
-
"outputs": [],
|
101 |
-
"source": [
|
102 |
-
"pn.extension(notifications=True, console_output=\"disable\")"
|
103 |
-
]
|
104 |
-
},
|
105 |
-
{
|
106 |
-
"cell_type": "markdown",
|
107 |
-
"metadata": {},
|
108 |
-
"source": [
|
109 |
-
"## Train Disc Detector"
|
110 |
-
]
|
111 |
-
},
|
112 |
-
{
|
113 |
-
"cell_type": "markdown",
|
114 |
-
"metadata": {},
|
115 |
-
"source": [
|
116 |
-
"### Load Datasets"
|
117 |
-
]
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"cell_type": "code",
|
121 |
-
"execution_count": null,
|
122 |
-
"metadata": {},
|
123 |
-
"outputs": [],
|
124 |
-
"source": [
|
125 |
-
"train, val, test = [\n",
|
126 |
-
" cf.read_dataframe(cc.path_to_data.joinpath(f\"ldd_{d}.csv\"))\n",
|
127 |
-
" for d in [\"train\", \"val\", \"test\"]\n",
|
128 |
-
"]\n",
|
129 |
-
"\n",
|
130 |
-
"print(len(train), len(test), len(val))"
|
131 |
-
]
|
132 |
-
},
|
133 |
-
{
|
134 |
-
"cell_type": "markdown",
|
135 |
-
"metadata": {},
|
136 |
-
"source": [
|
137 |
-
"### Test Augmentations"
|
138 |
-
]
|
139 |
-
},
|
140 |
-
{
|
141 |
-
"cell_type": "code",
|
142 |
-
"execution_count": null,
|
143 |
-
"metadata": {},
|
144 |
-
"outputs": [],
|
145 |
-
"source": [
|
146 |
-
"# aug_ = lpem.get_augmentations(image_size=10, kinds=[\"resize\", \"train\"])\n",
|
147 |
-
"\n",
|
148 |
-
"# test_aug_dataset = lpem.LeafDiskDetectorDataset(csv=train, transform=aug_)\n",
|
149 |
-
"\n",
|
150 |
-
"# file_name = train.sample(n=1).plate_name.to_list()[0]\n",
|
151 |
-
"\n",
|
152 |
-
"# print(aug_[0].width, aug_[0].height)\n",
|
153 |
-
"\n",
|
154 |
-
"# lpem.make_patches_grid(\n",
|
155 |
-
"# images=[\n",
|
156 |
-
"# test_aug_dataset.draw_image_with_boxes(plate_name=file_name) for _ in range(12)\n",
|
157 |
-
"# ],\n",
|
158 |
-
"# row_count=3,\n",
|
159 |
-
"# col_count=4,\n",
|
160 |
-
"# figsize=(12, 6),\n",
|
161 |
-
"# )"
|
162 |
-
]
|
163 |
-
},
|
164 |
-
{
|
165 |
-
"cell_type": "markdown",
|
166 |
-
"metadata": {},
|
167 |
-
"source": [
|
168 |
-
"### Train"
|
169 |
-
]
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"cell_type": "code",
|
173 |
-
"execution_count": null,
|
174 |
-
"metadata": {},
|
175 |
-
"outputs": [],
|
176 |
-
"source": [
|
177 |
-
"# model = lpem.LeafDiskDetector(\n",
|
178 |
-
"# batch_size=15,\n",
|
179 |
-
"# learning_rate=7.0e-05,\n",
|
180 |
-
"# image_factor=10,\n",
|
181 |
-
"# max_epochs=1000,\n",
|
182 |
-
"# train_data=train,\n",
|
183 |
-
"# val_data=val,\n",
|
184 |
-
"# test_data=test,\n",
|
185 |
-
"# augmentations_kinds=[\"resize\", \"train\", \"to_tensor\"],\n",
|
186 |
-
"# augmentations_params={\"gamma\": (60, 180)},\n",
|
187 |
-
"# num_workers=2,\n",
|
188 |
-
"# accumulate_grad_batches=5,\n",
|
189 |
-
"# scheduler=\"steplr\",\n",
|
190 |
-
"# scheduler_params={\"step_size\": 10, \"gamma\": 0.80},\n",
|
191 |
-
"# )\n",
|
192 |
-
"\n",
|
193 |
-
"# model.eval()\n",
|
194 |
-
"# len(model(torch.rand(2, 3, 128, 128)))\n",
|
195 |
-
"\n",
|
196 |
-
"# model.hr_desc()"
|
197 |
-
]
|
198 |
-
},
|
199 |
-
{
|
200 |
-
"cell_type": "code",
|
201 |
-
"execution_count": null,
|
202 |
-
"metadata": {},
|
203 |
-
"outputs": [],
|
204 |
-
"source": [
|
205 |
-
"# trainer = Trainer(\n",
|
206 |
-
"# default_root_dir=str(cc.path_to_chk_detector),\n",
|
207 |
-
"# logger=TensorBoardLogger(\n",
|
208 |
-
"# save_dir=str(cc.path_to_chk_detector),\n",
|
209 |
-
"# version=model.model_name + \"_\" + dt.now().strftime(\"%Y%m%d_%H%M%S\"),\n",
|
210 |
-
"# name=\"lightning_logs\",\n",
|
211 |
-
"# ),\n",
|
212 |
-
"# accelerator=\"gpu\",\n",
|
213 |
-
"# max_epochs=model.max_epochs,\n",
|
214 |
-
"# log_every_n_steps=5,\n",
|
215 |
-
"# callbacks=[\n",
|
216 |
-
"# RichProgressBar(),\n",
|
217 |
-
"# EarlyStopping(monitor=\"val_loss\", mode=\"min\", patience=10, min_delta=0.0005),\n",
|
218 |
-
"# ModelCheckpoint(\n",
|
219 |
-
"# save_top_k=1,\n",
|
220 |
-
"# monitor=\"val_loss\",\n",
|
221 |
-
"# auto_insert_metric_name=True,\n",
|
222 |
-
"# filename=model.model_name\n",
|
223 |
-
"# + \"-{val_loss:.3f}-{epoch}-{train_loss:.3f}-{step}\",\n",
|
224 |
-
"# ),\n",
|
225 |
-
"# LearningRateMonitor(logging_interval=\"epoch\"),\n",
|
226 |
-
"# ],\n",
|
227 |
-
"# accumulate_grad_batches=model.accumulate_grad_batches,\n",
|
228 |
-
"# )\n",
|
229 |
-
"\n",
|
230 |
-
"# trainer.fit(model)"
|
231 |
-
]
|
232 |
-
},
|
233 |
-
{
|
234 |
-
"cell_type": "markdown",
|
235 |
-
"metadata": {},
|
236 |
-
"source": [
|
237 |
-
"## Extract Patches"
|
238 |
-
]
|
239 |
-
},
|
240 |
-
{
|
241 |
-
"cell_type": "markdown",
|
242 |
-
"metadata": {},
|
243 |
-
"source": [
|
244 |
-
"### Load Model"
|
245 |
-
]
|
246 |
-
},
|
247 |
-
{
|
248 |
-
"cell_type": "code",
|
249 |
-
"execution_count": null,
|
250 |
-
"metadata": {},
|
251 |
-
"outputs": [],
|
252 |
-
"source": [
|
253 |
-
"ld_model: lpem.LeafDiskDetector = lpem.LeafDiskDetector.load_from_checkpoint(\n",
|
254 |
-
" cc.path_to_chk_detector.joinpath(\"leaf_disc_detector.ckpt\")\n",
|
255 |
-
")\n",
|
256 |
-
"ld_model.hr_desc()"
|
257 |
-
]
|
258 |
-
},
|
259 |
-
{
|
260 |
-
"cell_type": "markdown",
|
261 |
-
"metadata": {},
|
262 |
-
"source": [
|
263 |
-
"### Predict All Bounding Boxes"
|
264 |
-
]
|
265 |
-
},
|
266 |
-
{
|
267 |
-
"cell_type": "code",
|
268 |
-
"execution_count": null,
|
269 |
-
"metadata": {},
|
270 |
-
"outputs": [],
|
271 |
-
"source": [
|
272 |
-
"bb_predictions_path = cc.path_to_data.joinpath(\"train_ld_bounding_boxes.csv\")\n",
|
273 |
-
"\n",
|
274 |
-
"bb_predictions = (\n",
|
275 |
-
" cf.read_dataframe(bb_predictions_path)\n",
|
276 |
-
" if bb_predictions_path.is_file() is True\n",
|
277 |
-
" else pd.DataFrame()\n",
|
278 |
-
")\n",
|
279 |
-
"\n",
|
280 |
-
"bb_predictions"
|
281 |
-
]
|
282 |
-
},
|
283 |
-
{
|
284 |
-
"cell_type": "code",
|
285 |
-
"execution_count": null,
|
286 |
-
"metadata": {},
|
287 |
-
"outputs": [],
|
288 |
-
"source": [
|
289 |
-
"plates = list(cc.path_to_plates.rglob(\"*.JPG\"))\n",
|
290 |
-
"len(plates)"
|
291 |
-
]
|
292 |
-
},
|
293 |
-
{
|
294 |
-
"cell_type": "code",
|
295 |
-
"execution_count": null,
|
296 |
-
"metadata": {},
|
297 |
-
"outputs": [],
|
298 |
-
"source": [
|
299 |
-
"errors = []\n",
|
300 |
-
"handled_plates = bb_predictions.file_name.unique()\n",
|
301 |
-
"\n",
|
302 |
-
"for plate in tqdm(plates):\n",
|
303 |
-
" if \"file_name\" in bb_predictions and plate.name in handled_plates:\n",
|
304 |
-
" continue\n",
|
305 |
-
" try:\n",
|
306 |
-
" current_data = ld_model.index_plate(plate) >> mutate(\n",
|
307 |
-
" disc_name=s.file_name.str.replace(\" \", \"\").replace(\".JPG\", \"\")\n",
|
308 |
-
" + \"_\"\n",
|
309 |
-
" + s.row.astype(str)\n",
|
310 |
-
" + \"_\"\n",
|
311 |
-
" + s.col.astype(str)\n",
|
312 |
-
" + \".png\"\n",
|
313 |
-
" )\n",
|
314 |
-
" bb_predictions = pd.concat([bb_predictions, current_data])\n",
|
315 |
-
" except:\n",
|
316 |
-
" errors.append(plate)\n",
|
317 |
-
"\n",
|
318 |
-
"print(errors)\n",
|
319 |
-
"cf.write_dataframe(\n",
|
320 |
-
" bb_predictions.sort_values([\"file_name\", \"col\", \"row\"]).reset_index(drop=True)\n",
|
321 |
-
" >> mutate(disc_name=s.disc_name.str.replace(\".JPG\", \"\")),\n",
|
322 |
-
" bb_predictions_path,\n",
|
323 |
-
")\n",
|
324 |
-
"\n",
|
325 |
-
"bb_predictions = cf.read_dataframe(bb_predictions_path)\n",
|
326 |
-
"bb_predictions"
|
327 |
-
]
|
328 |
-
},
|
329 |
-
{
|
330 |
-
"cell_type": "code",
|
331 |
-
"execution_count": null,
|
332 |
-
"metadata": {},
|
333 |
-
"outputs": [],
|
334 |
-
"source": [
|
335 |
-
"selected_image = random.choice(plates)\n",
|
336 |
-
"bboxes = bb_predictions >> sfilter(s.file_name == selected_image.name)\n",
|
337 |
-
"pn.Column(\n",
|
338 |
-
" pn.pane.Markdown(f\"### {selected_image.name}\"),\n",
|
339 |
-
" pn.pane.DataFrame(bboxes),\n",
|
340 |
-
" pn.pane.Image(\n",
|
341 |
-
" ci.to_pil(\n",
|
342 |
-
" lpem.print_boxes(\n",
|
343 |
-
" image_name=selected_image,\n",
|
344 |
-
" boxes=bboxes,\n",
|
345 |
-
" draw_first_line=True,\n",
|
346 |
-
" return_plot=False,\n",
|
347 |
-
" ) #\n",
|
348 |
-
" ),\n",
|
349 |
-
" sizing_mode=\"scale_width\",\n",
|
350 |
-
" ),\n",
|
351 |
-
")"
|
352 |
-
]
|
353 |
-
},
|
354 |
-
{
|
355 |
-
"cell_type": "markdown",
|
356 |
-
"metadata": {},
|
357 |
-
"source": [
|
358 |
-
"### Extract Needed Patches"
|
359 |
-
]
|
360 |
-
},
|
361 |
-
{
|
362 |
-
"cell_type": "markdown",
|
363 |
-
"metadata": {},
|
364 |
-
"source": [
|
365 |
-
"#### Model Training"
|
366 |
-
]
|
367 |
-
},
|
368 |
-
{
|
369 |
-
"cell_type": "code",
|
370 |
-
"execution_count": null,
|
371 |
-
"metadata": {},
|
372 |
-
"outputs": [],
|
373 |
-
"source": [
|
374 |
-
"df_model_training = pd.concat(\n",
|
375 |
-
" [\n",
|
376 |
-
" cf.read_dataframe(cc.path_to_data.joinpath(f\"oiv_{d}.csv\"))\n",
|
377 |
-
" for d in [\"train\", \"val\", \"test\"]\n",
|
378 |
-
" ]\n",
|
379 |
-
").sort_values([\"file_name\"]).reset_index(drop=True)\n",
|
380 |
-
"df_model_training"
|
381 |
-
]
|
382 |
-
},
|
383 |
-
{
|
384 |
-
"cell_type": "code",
|
385 |
-
"execution_count": null,
|
386 |
-
"metadata": {},
|
387 |
-
"outputs": [],
|
388 |
-
"source": [
|
389 |
-
"err = {}\n",
|
390 |
-
"\n",
|
391 |
-
"for file_name in tqdm(df_model_training.file_name):\n",
|
392 |
-
" row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n",
|
393 |
-
" lpem.handle_bbox(\n",
|
394 |
-
" row.iloc[0],\n",
|
395 |
-
" add_process_image=True,\n",
|
396 |
-
" paths=dict(\n",
|
397 |
-
" segmented_leaf_disc=cc.path_to_leaf_discs,\n",
|
398 |
-
" leaf_disc_patch=cc.path_to_leaf_patches,\n",
|
399 |
-
" plates=cc.path_to_plates,\n",
|
400 |
-
" ),\n",
|
401 |
-
" errors=err,\n",
|
402 |
-
" )\n",
|
403 |
-
"err"
|
404 |
-
]
|
405 |
-
},
|
406 |
-
{
|
407 |
-
"cell_type": "markdown",
|
408 |
-
"metadata": {},
|
409 |
-
"source": [
|
410 |
-
"#### Genotype differenciation"
|
411 |
-
]
|
412 |
-
},
|
413 |
-
{
|
414 |
-
"cell_type": "code",
|
415 |
-
"execution_count": null,
|
416 |
-
"metadata": {},
|
417 |
-
"outputs": [],
|
418 |
-
"source": [
|
419 |
-
"df_gd = cf.read_dataframe(\n",
|
420 |
-
" cc.path_to_data.joinpath(\"genotype_differenciation_dataset.csv\")\n",
|
421 |
-
")\n",
|
422 |
-
"df_gd"
|
423 |
-
]
|
424 |
-
},
|
425 |
-
{
|
426 |
-
"cell_type": "code",
|
427 |
-
"execution_count": null,
|
428 |
-
"metadata": {},
|
429 |
-
"outputs": [],
|
430 |
-
"source": [
|
431 |
-
"err = {}\n",
|
432 |
-
"\n",
|
433 |
-
"for file_name in tqdm(df_gd.file_name):\n",
|
434 |
-
" row = (bb_predictions >> sfilter(s.disc_name == file_name)).reset_index(drop=True)\n",
|
435 |
-
" lpem.handle_bbox(\n",
|
436 |
-
" row.iloc[0],\n",
|
437 |
-
" add_process_image=True,\n",
|
438 |
-
" paths=dict(\n",
|
439 |
-
" segmented_leaf_disc=cc.path_to_leaf_discs,\n",
|
440 |
-
" leaf_disc_patch=cc.path_to_leaf_patches,\n",
|
441 |
-
" plates=cc.path_to_plates,\n",
|
442 |
-
" ),\n",
|
443 |
-
" errors=err,\n",
|
444 |
-
" )\n",
|
445 |
-
"err"
|
446 |
-
]
|
447 |
-
}
|
448 |
-
],
|
449 |
-
"metadata": {
|
450 |
-
"kernelspec": {
|
451 |
-
"display_name": "env",
|
452 |
-
"language": "python",
|
453 |
-
"name": "python3"
|
454 |
-
},
|
455 |
-
"language_info": {
|
456 |
-
"codemirror_mode": {
|
457 |
-
"name": "ipython",
|
458 |
-
"version": 3
|
459 |
-
},
|
460 |
-
"file_extension": ".py",
|
461 |
-
"mimetype": "text/x-python",
|
462 |
-
"name": "python",
|
463 |
-
"nbconvert_exporter": "python",
|
464 |
-
"pygments_lexer": "ipython3",
|
465 |
-
"version": "3.9.2"
|
466 |
-
}
|
467 |
-
},
|
468 |
-
"nbformat": 4,
|
469 |
-
"nbformat_minor": 2
|
470 |
-
}
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0baba4ac3a68926405c55c55f18e86a03e58db10eb26d91c6b2e496d6c11108
|
3 |
+
size 12983025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/leaf_patch_gen_diff.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/leaf_patch_gen_diff.py
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
import scipy.stats as stats
|
5 |
+
import statsmodels.api as sm
|
6 |
+
from statsmodels.formula.api import ols
|
7 |
+
from statsmodels.regression.linear_model import RegressionResultsWrapper
|
8 |
+
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
9 |
+
|
10 |
+
from matplotlib.figure import Figure
|
11 |
+
import seaborn as sns
|
12 |
+
import panel as pn
|
13 |
+
|
14 |
+
import com_const as cc
|
15 |
+
import com_func as cf
|
16 |
+
import com_image as ci
|
17 |
+
|
18 |
+
stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)]
|
19 |
+
|
20 |
+
|
21 |
+
def plot_single_progression(
|
22 |
+
ax,
|
23 |
+
df,
|
24 |
+
target,
|
25 |
+
title: str,
|
26 |
+
hue="gen",
|
27 |
+
style="gen",
|
28 |
+
show_legend: bool = False,
|
29 |
+
):
|
30 |
+
lp = sns.lineplot(
|
31 |
+
df.sort_values(hue),
|
32 |
+
x="dpi",
|
33 |
+
y=target,
|
34 |
+
hue=hue,
|
35 |
+
markers=True,
|
36 |
+
style=style,
|
37 |
+
dashes=False,
|
38 |
+
palette="tab10",
|
39 |
+
markersize=12,
|
40 |
+
ax=ax,
|
41 |
+
)
|
42 |
+
lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"])
|
43 |
+
ax.set_title(title)
|
44 |
+
if show_legend is True:
|
45 |
+
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
|
46 |
+
else:
|
47 |
+
ax.get_legend().set_visible(False)
|
48 |
+
|
49 |
+
|
50 |
+
def get_model(
|
51 |
+
df: pd.DataFrame, target: str, formula: str, dpi: int = None
|
52 |
+
) -> RegressionResultsWrapper:
|
53 |
+
df_ = df[df.dpi == dpi] if dpi is not None else df
|
54 |
+
return ols(f"{target} {formula}", data=df_).fit()
|
55 |
+
|
56 |
+
|
57 |
+
def anova_table(aov, add_columns: bool = True):
|
58 |
+
"""
|
59 |
+
The function below was created specifically for the one-way ANOVA table
|
60 |
+
results returned for Type II sum of squares
|
61 |
+
"""
|
62 |
+
if add_columns is True:
|
63 |
+
aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"]
|
64 |
+
|
65 |
+
aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"])
|
66 |
+
|
67 |
+
aov["omega_sq"] = (
|
68 |
+
aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1])
|
69 |
+
) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1])
|
70 |
+
|
71 |
+
cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"]
|
72 |
+
aov = aov[cols]
|
73 |
+
return aov
|
74 |
+
|
75 |
+
|
76 |
+
def plot_assumptions(models: list, titles: list, figsize=(12, 4)):
|
77 |
+
fig = Figure(figsize=figsize)
|
78 |
+
fig.suptitle("Probability plot of model residual's", fontsize="x-large")
|
79 |
+
axii = fig.subplots(1, len(models))
|
80 |
+
for ax, model, title in zip(axii, models, titles):
|
81 |
+
_ = stats.probplot(model.resid, plot=ax, rvalue=True)
|
82 |
+
ax.set_title(title)
|
83 |
+
|
84 |
+
return fig
|
85 |
+
|
86 |
+
|
87 |
+
def hghlight_rejection(s):
|
88 |
+
df = pd.DataFrame(columns=s.columns, index=s.index)
|
89 |
+
df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = (
|
90 |
+
"background: red"
|
91 |
+
)
|
92 |
+
df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = (
|
93 |
+
"background: green"
|
94 |
+
)
|
95 |
+
df.loc[s.reject_pred, ["reject_pred"]] = "background: green"
|
96 |
+
df.loc[~s.reject_pred, ["reject_pred"]] = "background: red"
|
97 |
+
df.loc[s.reject_obs, ["reject_obs"]] = "background: green"
|
98 |
+
df.loc[~s.reject_obs, ["reject_obs"]] = "background: red"
|
99 |
+
return df
|
100 |
+
|
101 |
+
|
102 |
+
def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame:
|
103 |
+
tukey = pairwise_tukeyhsd(endog=endog, groups=groups)
|
104 |
+
df_tuc = pd.DataFrame(tukey._results_table)
|
105 |
+
df_tuc.columns = [str(c) for c in df_tuc.iloc[0]]
|
106 |
+
ret = (
|
107 |
+
df_tuc.drop(df_tuc.index[0])
|
108 |
+
.assign(group1=lambda s: s.group1.astype(str))
|
109 |
+
.assign(group2=lambda s: s.group2.astype(str))
|
110 |
+
.assign(reject=lambda s: s.reject.astype(str) == "True")
|
111 |
+
)
|
112 |
+
ret["p-adj"] = tukey.pvalues
|
113 |
+
if df_genotypes is None:
|
114 |
+
return ret
|
115 |
+
else:
|
116 |
+
return (
|
117 |
+
ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen")
|
118 |
+
.drop(["gen"], axis=1)
|
119 |
+
.rename(columns={"rpvloci": "group1_rpvloci"})
|
120 |
+
.merge(right=df_genotypes, how="left", left_on="group2", right_on="gen")
|
121 |
+
.drop(["gen"], axis=1)
|
122 |
+
.rename(columns={"rpvloci": "group2_rpvloci"})
|
123 |
+
)
|
124 |
+
|
125 |
+
|
126 |
+
def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"):
|
127 |
+
merge_on = (
|
128 |
+
["group1", "group2"]
|
129 |
+
if df_genotypes is None
|
130 |
+
else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"]
|
131 |
+
)
|
132 |
+
df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes)
|
133 |
+
df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes)
|
134 |
+
df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"])
|
135 |
+
return df
|
136 |
+
|
137 |
+
|
138 |
+
def df_tukey_cmp_plot(df, groups):
|
139 |
+
df_tukey = (
|
140 |
+
get_tuckey_compare(df=df, groups=groups, df_genotypes=None)
|
141 |
+
.assign(pair_groups=lambda s: s.group1 + "\n" + s.group2)
|
142 |
+
.sort_values("p-adj_obs")
|
143 |
+
)
|
144 |
+
|
145 |
+
df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred]
|
146 |
+
df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred]
|
147 |
+
df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred]
|
148 |
+
|
149 |
+
fig = Figure(figsize=(20, 6))
|
150 |
+
ax_reject, ax_diverge, ax_accept = fig.subplots(
|
151 |
+
1,
|
152 |
+
3,
|
153 |
+
gridspec_kw={
|
154 |
+
"width_ratios": [
|
155 |
+
len(df_tukey_reject),
|
156 |
+
len(df_tukey_diverge),
|
157 |
+
len(df_tukey_accept),
|
158 |
+
]
|
159 |
+
},
|
160 |
+
sharey=True,
|
161 |
+
)
|
162 |
+
|
163 |
+
for ax in [ax_reject, ax_accept, ax_diverge]:
|
164 |
+
ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"])
|
165 |
+
ax.grid(False)
|
166 |
+
|
167 |
+
ax_reject.set_title("Rejected")
|
168 |
+
ax_diverge.set_title("Conflict")
|
169 |
+
ax_accept.set_title("Accepted")
|
170 |
+
|
171 |
+
for ax, df in zip(
|
172 |
+
[ax_reject, ax_accept, ax_diverge],
|
173 |
+
[df_tukey_reject, df_tukey_accept, df_tukey_diverge],
|
174 |
+
):
|
175 |
+
for star in stars:
|
176 |
+
ax.axhline(y=star, linestyle="-", color="black", alpha=0.5)
|
177 |
+
ax.bar(
|
178 |
+
x=df["pair_groups"],
|
179 |
+
height=-np.log(df["p-adj_pred"]),
|
180 |
+
width=-0.4,
|
181 |
+
align="edge",
|
182 |
+
color="green",
|
183 |
+
label="predictions",
|
184 |
+
)
|
185 |
+
ax.bar(
|
186 |
+
x=df["pair_groups"],
|
187 |
+
height=-np.log(df["p-adj_obs"]),
|
188 |
+
width=0.4,
|
189 |
+
align="edge",
|
190 |
+
color="blue",
|
191 |
+
label="scorings",
|
192 |
+
)
|
193 |
+
ax.margins(0.01)
|
194 |
+
|
195 |
+
ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True)
|
196 |
+
ax_reject.set_ylabel("-log(p value)")
|
197 |
+
ax_reject.tick_params(axis="y", which="major", labelsize=16)
|
198 |
+
|
199 |
+
fig.subplots_adjust(wspace=0.05, hspace=0.05)
|
200 |
+
|
201 |
+
return fig
|
202 |
+
|
203 |
+
|
204 |
+
def plot_patches(df, diff_only: bool = True):
|
205 |
+
if diff_only is True:
|
206 |
+
df = df[(df.oiv != df.p_oiv)]
|
207 |
+
df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values(
|
208 |
+
["diff", "oiv", "p_oiv"]
|
209 |
+
)
|
210 |
+
return pn.GridBox(
|
211 |
+
*[
|
212 |
+
pn.Column(
|
213 |
+
pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"),
|
214 |
+
pn.pane.Image(
|
215 |
+
object=ci.enhance_pil_image(
|
216 |
+
image=ci.load_image(
|
217 |
+
file_name=row.file_name,
|
218 |
+
path_to_images=cc.path_to_leaf_patches,
|
219 |
+
),
|
220 |
+
brightness=1.5,
|
221 |
+
)
|
222 |
+
),
|
223 |
+
)
|
224 |
+
for _, row in df.iterrows()
|
225 |
+
],
|
226 |
+
ncols=len(df),
|
227 |
+
)
|
src/leaf_patch_oiv_predictor.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/repo_manager.ipynb
CHANGED
@@ -2,10 +2,13 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
|
|
|
|
9 |
"from huggingface_hub import create_repo, HfApi\n",
|
10 |
"\n",
|
11 |
"import com_const as cc"
|
@@ -13,7 +16,7 @@
|
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
-
"execution_count":
|
17 |
"metadata": {},
|
18 |
"outputs": [],
|
19 |
"source": [
|
@@ -22,27 +25,16 @@
|
|
22 |
},
|
23 |
{
|
24 |
"cell_type": "code",
|
25 |
-
"execution_count":
|
26 |
"metadata": {},
|
27 |
-
"outputs": [
|
28 |
-
{
|
29 |
-
"data": {
|
30 |
-
"text/plain": [
|
31 |
-
"RepoUrl('https://huggingface.co/treizh/oiv_ld_phenotyping', endpoint='https://huggingface.co', repo_type='model', repo_id='treizh/oiv_ld_phenotyping')"
|
32 |
-
]
|
33 |
-
},
|
34 |
-
"execution_count": 3,
|
35 |
-
"metadata": {},
|
36 |
-
"output_type": "execute_result"
|
37 |
-
}
|
38 |
-
],
|
39 |
"source": [
|
40 |
"create_repo(repo_id, exist_ok=True)"
|
41 |
]
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "code",
|
45 |
-
"execution_count":
|
46 |
"metadata": {},
|
47 |
"outputs": [],
|
48 |
"source": [
|
@@ -51,22 +43,71 @@
|
|
51 |
},
|
52 |
{
|
53 |
"cell_type": "code",
|
54 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"metadata": {},
|
56 |
-
"outputs": [
|
57 |
-
{
|
58 |
-
"data": {
|
59 |
-
"text/plain": [
|
60 |
-
"CommitInfo(commit_url='https://huggingface.co/treizh/oiv_ld_phenotyping/commit/ac0c4c71a2b6842d45cd5fa99ca15429f647027c', commit_message='Upload .gitignore with huggingface_hub', commit_description='', oid='ac0c4c71a2b6842d45cd5fa99ca15429f647027c', pr_url=None, pr_revision=None, pr_num=None)"
|
61 |
-
]
|
62 |
-
},
|
63 |
-
"execution_count": 5,
|
64 |
-
"metadata": {},
|
65 |
-
"output_type": "execute_result"
|
66 |
-
}
|
67 |
-
],
|
68 |
"source": [
|
69 |
-
"
|
|
|
70 |
]
|
71 |
},
|
72 |
{
|
@@ -75,28 +116,35 @@
|
|
75 |
"metadata": {},
|
76 |
"outputs": [],
|
77 |
"source": [
|
78 |
-
"
|
79 |
-
"
|
80 |
]
|
81 |
},
|
82 |
{
|
83 |
"cell_type": "code",
|
84 |
-
"execution_count":
|
85 |
"metadata": {},
|
86 |
-
"outputs": [
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
"source": [
|
99 |
-
"
|
100 |
]
|
101 |
},
|
102 |
{
|
@@ -105,9 +153,7 @@
|
|
105 |
"metadata": {},
|
106 |
"outputs": [],
|
107 |
"source": [
|
108 |
-
"#
|
109 |
-
"# if api.file_exists(repo_id=repo_name, filename=file.name) is True:\n",
|
110 |
-
"# api.delete_file(path_in_repo=file.name, repo_id=repo_name)"
|
111 |
]
|
112 |
},
|
113 |
{
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
+
"from pathlib import Path\n",
|
10 |
+
"import time\n",
|
11 |
+
"\n",
|
12 |
"from huggingface_hub import create_repo, HfApi\n",
|
13 |
"\n",
|
14 |
"import com_const as cc"
|
|
|
16 |
},
|
17 |
{
|
18 |
"cell_type": "code",
|
19 |
+
"execution_count": null,
|
20 |
"metadata": {},
|
21 |
"outputs": [],
|
22 |
"source": [
|
|
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "code",
|
28 |
+
"execution_count": null,
|
29 |
"metadata": {},
|
30 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
"source": [
|
32 |
"create_repo(repo_id, exist_ok=True)"
|
33 |
]
|
34 |
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
+
"execution_count": null,
|
38 |
"metadata": {},
|
39 |
"outputs": [],
|
40 |
"source": [
|
|
|
43 |
},
|
44 |
{
|
45 |
"cell_type": "code",
|
46 |
+
"execution_count": null,
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [],
|
49 |
+
"source": [
|
50 |
+
"def upload_file(file_path: Path, dst_path=None):\n",
|
51 |
+
" api.upload_file(\n",
|
52 |
+
" path_or_fileobj=file_path,\n",
|
53 |
+
" path_in_repo=(\n",
|
54 |
+
" dst_path\n",
|
55 |
+
" if dst_path is not None\n",
|
56 |
+
" else str(file_path.relative_to(cc.path_to_root))\n",
|
57 |
+
" ),\n",
|
58 |
+
" repo_id=repo_id,\n",
|
59 |
+
" )"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": null,
|
65 |
+
"metadata": {},
|
66 |
+
"outputs": [],
|
67 |
+
"source": [
|
68 |
+
"def upload_folder(fld, multi_commits=True, multi_commits_verbose=True):\n",
|
69 |
+
" api.upload_folder(\n",
|
70 |
+
" folder_path=fld,\n",
|
71 |
+
" repo_id=repo_id,\n",
|
72 |
+
" path_in_repo=str(fld.relative_to(cc.path_to_root)),\n",
|
73 |
+
" multi_commits=multi_commits,\n",
|
74 |
+
" multi_commits_verbose=multi_commits_verbose,\n",
|
75 |
+
" )"
|
76 |
+
]
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"cell_type": "code",
|
80 |
+
"execution_count": null,
|
81 |
+
"metadata": {},
|
82 |
+
"outputs": [],
|
83 |
+
"source": [
|
84 |
+
"def upload_big_folder(\n",
|
85 |
+
" fld, multi_commits=True, multi_commits_verbose=True, max_attempts: int = -1\n",
|
86 |
+
"):\n",
|
87 |
+
" i = 1\n",
|
88 |
+
" while i < max_attempts or max_attempts < 0:\n",
|
89 |
+
" try:\n",
|
90 |
+
" upload_folder(\n",
|
91 |
+
" fld=fld,\n",
|
92 |
+
" multi_commits=multi_commits,\n",
|
93 |
+
" multi_commits_verbose=multi_commits_verbose,\n",
|
94 |
+
" )\n",
|
95 |
+
" except:\n",
|
96 |
+
" print(f\"Trying once more: {i}\")\n",
|
97 |
+
" i += 1\n",
|
98 |
+
" time.sleep(100)\n",
|
99 |
+
" else:\n",
|
100 |
+
" break"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"cell_type": "code",
|
105 |
+
"execution_count": null,
|
106 |
"metadata": {},
|
107 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
"source": [
|
109 |
+
"# for file in [\".gitignore\", \"LICENSE\", \"README.md\", \"requirements.txt\"]:\n",
|
110 |
+
"# upload_file(Path(\"..\").joinpath(file), dst_path=file)"
|
111 |
]
|
112 |
},
|
113 |
{
|
|
|
116 |
"metadata": {},
|
117 |
"outputs": [],
|
118 |
"source": [
|
119 |
+
"# upload_folder(cc.path_to_src)\n",
|
120 |
+
"# upload_folder(cc.path_to_data)"
|
121 |
]
|
122 |
},
|
123 |
{
|
124 |
"cell_type": "code",
|
125 |
+
"execution_count": null,
|
126 |
"metadata": {},
|
127 |
+
"outputs": [],
|
128 |
+
"source": [
|
129 |
+
"# upload_file(file_path=cc.path_to_chk_detector.joinpath(\"leaf_disc_detector.ckpt\"))"
|
130 |
+
]
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"cell_type": "code",
|
134 |
+
"execution_count": null,
|
135 |
+
"metadata": {},
|
136 |
+
"outputs": [],
|
137 |
+
"source": [
|
138 |
+
"# upload_file(file_path=cc.path_to_chk_oiv.joinpath(\"oiv_scorer.ckpt\"))"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": null,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [],
|
146 |
"source": [
|
147 |
+
"# upload_big_folder(fld=cc.path_to_leaf_patches, multi_commits_verbose=False)"
|
148 |
]
|
149 |
},
|
150 |
{
|
|
|
153 |
"metadata": {},
|
154 |
"outputs": [],
|
155 |
"source": [
|
156 |
+
"# upload_big_folder(fld=cc.path_to_plates, multi_commits_verbose=False)"
|
|
|
|
|
157 |
]
|
158 |
},
|
159 |
{
|