Hannes Kuchelmeister commited on
Commit
b67f297
·
1 Parent(s): 93d387e

Add dataset in preparation for the data module

Browse files
models/notebooks/1.0-hfk-datamodules-exploration.ipynb ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## Exploring Code for Data"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import pandas as pd"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 2,
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "metadata = pd.read_csv(\"../data/focus/metadata.csv\")"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 3,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "data": {
35
+ "text/html": [
36
+ "<div>\n",
37
+ "<style scoped>\n",
38
+ " .dataframe tbody tr th:only-of-type {\n",
39
+ " vertical-align: middle;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe tbody tr th {\n",
43
+ " vertical-align: top;\n",
44
+ " }\n",
45
+ "\n",
46
+ " .dataframe thead th {\n",
47
+ " text-align: right;\n",
48
+ " }\n",
49
+ "</style>\n",
50
+ "<table border=\"1\" class=\"dataframe\">\n",
51
+ " <thead>\n",
52
+ " <tr style=\"text-align: right;\">\n",
53
+ " <th></th>\n",
54
+ " <th>Unnamed: 0</th>\n",
55
+ " <th>image_path</th>\n",
56
+ " <th>original_filename</th>\n",
57
+ " <th>study_id</th>\n",
58
+ " <th>scan_uuid</th>\n",
59
+ " <th>focus_value</th>\n",
60
+ " <th>stack_id</th>\n",
61
+ " <th>obj_name</th>\n",
62
+ " </tr>\n",
63
+ " </thead>\n",
64
+ " <tbody>\n",
65
+ " <tr>\n",
66
+ " <th>0</th>\n",
67
+ " <td>0</td>\n",
68
+ " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631...</td>\n",
69
+ " <td>I01631_X013_Y012_Z5107.jpg</td>\n",
70
+ " <td>31</td>\n",
71
+ " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
72
+ " <td>-2.82953</td>\n",
73
+ " <td>1658220</td>\n",
74
+ " <td>133</td>\n",
75
+ " </tr>\n",
76
+ " <tr>\n",
77
+ " <th>1</th>\n",
78
+ " <td>1</td>\n",
79
+ " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01632...</td>\n",
80
+ " <td>I01632_X013_Y012_Z5175.jpg</td>\n",
81
+ " <td>31</td>\n",
82
+ " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
83
+ " <td>-2.70408</td>\n",
84
+ " <td>1658220</td>\n",
85
+ " <td>133</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>2</th>\n",
89
+ " <td>2</td>\n",
90
+ " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01633...</td>\n",
91
+ " <td>I01633_X013_Y012_Z5722.jpg</td>\n",
92
+ " <td>31</td>\n",
93
+ " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
94
+ " <td>-2.69918</td>\n",
95
+ " <td>1658220</td>\n",
96
+ " <td>133</td>\n",
97
+ " </tr>\n",
98
+ " <tr>\n",
99
+ " <th>3</th>\n",
100
+ " <td>3</td>\n",
101
+ " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01634...</td>\n",
102
+ " <td>I01634_X013_Y012_Z5244.jpg</td>\n",
103
+ " <td>31</td>\n",
104
+ " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
105
+ " <td>-2.50266</td>\n",
106
+ " <td>1658220</td>\n",
107
+ " <td>133</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>4</th>\n",
111
+ " <td>4</td>\n",
112
+ " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01635...</td>\n",
113
+ " <td>I01635_X013_Y012_Z5654.jpg</td>\n",
114
+ " <td>31</td>\n",
115
+ " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
116
+ " <td>-2.36450</td>\n",
117
+ " <td>1658220</td>\n",
118
+ " <td>133</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>...</th>\n",
122
+ " <td>...</td>\n",
123
+ " <td>...</td>\n",
124
+ " <td>...</td>\n",
125
+ " <td>...</td>\n",
126
+ " <td>...</td>\n",
127
+ " <td>...</td>\n",
128
+ " <td>...</td>\n",
129
+ " <td>...</td>\n",
130
+ " </tr>\n",
131
+ " <tr>\n",
132
+ " <th>565</th>\n",
133
+ " <td>565</td>\n",
134
+ " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01406...</td>\n",
135
+ " <td>I01406_X016_Y009_Z5361.jpg</td>\n",
136
+ " <td>31</td>\n",
137
+ " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
138
+ " <td>-3.41147</td>\n",
139
+ " <td>1674918</td>\n",
140
+ " <td>217</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>566</th>\n",
144
+ " <td>566</td>\n",
145
+ " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01407...</td>\n",
146
+ " <td>I01407_X016_Y009_Z5087.jpg</td>\n",
147
+ " <td>31</td>\n",
148
+ " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
149
+ " <td>-3.05424</td>\n",
150
+ " <td>1674918</td>\n",
151
+ " <td>217</td>\n",
152
+ " </tr>\n",
153
+ " <tr>\n",
154
+ " <th>567</th>\n",
155
+ " <td>567</td>\n",
156
+ " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01408...</td>\n",
157
+ " <td>I01408_X016_Y009_Z5292.jpg</td>\n",
158
+ " <td>31</td>\n",
159
+ " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
160
+ " <td>-1.48608</td>\n",
161
+ " <td>1674918</td>\n",
162
+ " <td>217</td>\n",
163
+ " </tr>\n",
164
+ " <tr>\n",
165
+ " <th>568</th>\n",
166
+ " <td>568</td>\n",
167
+ " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01409...</td>\n",
168
+ " <td>I01409_X016_Y009_Z5156.jpg</td>\n",
169
+ " <td>31</td>\n",
170
+ " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
171
+ " <td>-0.52804</td>\n",
172
+ " <td>1674918</td>\n",
173
+ " <td>217</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>569</th>\n",
177
+ " <td>569</td>\n",
178
+ " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01410...</td>\n",
179
+ " <td>I01410_X016_Y009_Z5224.jpg</td>\n",
180
+ " <td>31</td>\n",
181
+ " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
182
+ " <td>0.00000</td>\n",
183
+ " <td>1674918</td>\n",
184
+ " <td>217</td>\n",
185
+ " </tr>\n",
186
+ " </tbody>\n",
187
+ "</table>\n",
188
+ "<p>570 rows × 8 columns</p>\n",
189
+ "</div>"
190
+ ],
191
+ "text/plain": [
192
+ " Unnamed: 0 image_path \\\n",
193
+ "0 0 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631... \n",
194
+ "1 1 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01632... \n",
195
+ "2 2 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01633... \n",
196
+ "3 3 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01634... \n",
197
+ "4 4 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01635... \n",
198
+ ".. ... ... \n",
199
+ "565 565 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01406... \n",
200
+ "566 566 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01407... \n",
201
+ "567 567 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01408... \n",
202
+ "568 568 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01409... \n",
203
+ "569 569 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01410... \n",
204
+ "\n",
205
+ " original_filename study_id \\\n",
206
+ "0 I01631_X013_Y012_Z5107.jpg 31 \n",
207
+ "1 I01632_X013_Y012_Z5175.jpg 31 \n",
208
+ "2 I01633_X013_Y012_Z5722.jpg 31 \n",
209
+ "3 I01634_X013_Y012_Z5244.jpg 31 \n",
210
+ "4 I01635_X013_Y012_Z5654.jpg 31 \n",
211
+ ".. ... ... \n",
212
+ "565 I01406_X016_Y009_Z5361.jpg 31 \n",
213
+ "566 I01407_X016_Y009_Z5087.jpg 31 \n",
214
+ "567 I01408_X016_Y009_Z5292.jpg 31 \n",
215
+ "568 I01409_X016_Y009_Z5156.jpg 31 \n",
216
+ "569 I01410_X016_Y009_Z5224.jpg 31 \n",
217
+ "\n",
218
+ " scan_uuid focus_value stack_id obj_name \n",
219
+ "0 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.82953 1658220 133 \n",
220
+ "1 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.70408 1658220 133 \n",
221
+ "2 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.69918 1658220 133 \n",
222
+ "3 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.50266 1658220 133 \n",
223
+ "4 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.36450 1658220 133 \n",
224
+ ".. ... ... ... ... \n",
225
+ "565 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -3.41147 1674918 217 \n",
226
+ "566 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -3.05424 1674918 217 \n",
227
+ "567 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -1.48608 1674918 217 \n",
228
+ "568 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -0.52804 1674918 217 \n",
229
+ "569 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 0.00000 1674918 217 \n",
230
+ "\n",
231
+ "[570 rows x 8 columns]"
232
+ ]
233
+ },
234
+ "execution_count": 3,
235
+ "metadata": {},
236
+ "output_type": "execute_result"
237
+ }
238
+ ],
239
+ "source": [
240
+ "metadata"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": 4,
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/plain": [
251
+ "'31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631_X013_Y012_Z5107_600_375.jpg'"
252
+ ]
253
+ },
254
+ "execution_count": 4,
255
+ "metadata": {},
256
+ "output_type": "execute_result"
257
+ }
258
+ ],
259
+ "source": [
260
+ "idx = 0\n",
261
+ "# File Path\n",
262
+ "metadata.iloc[idx, 1]"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 5,
268
+ "metadata": {},
269
+ "outputs": [
270
+ {
271
+ "data": {
272
+ "text/plain": [
273
+ "-2.82953"
274
+ ]
275
+ },
276
+ "execution_count": 5,
277
+ "metadata": {},
278
+ "output_type": "execute_result"
279
+ }
280
+ ],
281
+ "source": [
282
+ "# Focus Value\n",
283
+ "metadata.iloc[idx, 5]"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "markdown",
288
+ "metadata": {},
289
+ "source": [
290
+ "## Testing FocusDataSet"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": 7,
296
+ "metadata": {},
297
+ "outputs": [
298
+ {
299
+ "data": {
300
+ "text/plain": [
301
+ "{'image': array([[[181, 190, 171],\n",
302
+ " [180, 189, 170],\n",
303
+ " [180, 186, 172],\n",
304
+ " ...,\n",
305
+ " [172, 176, 177],\n",
306
+ " [171, 176, 179],\n",
307
+ " [170, 178, 180]],\n",
308
+ " \n",
309
+ " [[181, 190, 173],\n",
310
+ " [181, 190, 173],\n",
311
+ " [180, 188, 175],\n",
312
+ " ...,\n",
313
+ " [169, 173, 174],\n",
314
+ " [169, 175, 175],\n",
315
+ " [170, 176, 176]],\n",
316
+ " \n",
317
+ " [[179, 190, 176],\n",
318
+ " [179, 190, 176],\n",
319
+ " [179, 189, 180],\n",
320
+ " ...,\n",
321
+ " [169, 169, 167],\n",
322
+ " [169, 171, 170],\n",
323
+ " [169, 171, 170]],\n",
324
+ " \n",
325
+ " ...,\n",
326
+ " \n",
327
+ " [[195, 201, 197],\n",
328
+ " [195, 201, 197],\n",
329
+ " [195, 201, 197],\n",
330
+ " ...,\n",
331
+ " [198, 195, 188],\n",
332
+ " [199, 198, 196],\n",
333
+ " [202, 200, 205]],\n",
334
+ " \n",
335
+ " [[195, 201, 197],\n",
336
+ " [195, 201, 197],\n",
337
+ " [195, 201, 197],\n",
338
+ " ...,\n",
339
+ " [198, 195, 188],\n",
340
+ " [199, 198, 196],\n",
341
+ " [202, 200, 205]],\n",
342
+ " \n",
343
+ " [[195, 201, 197],\n",
344
+ " [195, 201, 197],\n",
345
+ " [195, 201, 197],\n",
346
+ " ...,\n",
347
+ " [198, 195, 188],\n",
348
+ " [199, 198, 196],\n",
349
+ " [202, 200, 203]]], dtype=uint8),\n",
350
+ " 'focus_value': -2.70408}"
351
+ ]
352
+ },
353
+ "execution_count": 7,
354
+ "metadata": {},
355
+ "output_type": "execute_result"
356
+ }
357
+ ],
358
+ "source": [
359
+ "from importlib.machinery import SourceFileLoader\n",
360
+ "\n",
361
+ "focus_datamodule = SourceFileLoader(\"focus_datamodule\", \"../src/datamodules/focus_datamodule.py\").load_module()\n",
362
+ "from focus_datamodule import FocusDataSet\n",
363
+ "\n",
364
+ "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n",
365
+ "ds[1]"
366
+ ]
367
+ }
368
+ ],
369
+ "metadata": {
370
+ "interpreter": {
371
+ "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
372
+ },
373
+ "kernelspec": {
374
+ "display_name": "Python 3.9.7 64-bit",
375
+ "language": "python",
376
+ "name": "python3"
377
+ },
378
+ "language_info": {
379
+ "codemirror_mode": {
380
+ "name": "ipython",
381
+ "version": 3
382
+ },
383
+ "file_extension": ".py",
384
+ "mimetype": "text/x-python",
385
+ "name": "python",
386
+ "nbconvert_exporter": "python",
387
+ "pygments_lexer": "ipython3",
388
+ "version": "3.8.10"
389
+ },
390
+ "orig_nbformat": 4
391
+ },
392
+ "nbformat": 4,
393
+ "nbformat_minor": 2
394
+ }
models/requirements.txt CHANGED
@@ -4,6 +4,9 @@ torchvision>=0.11.0
4
  pytorch-lightning>=1.5.10
5
  torchmetrics>=0.7.0
6
 
 
 
 
7
  # --------- hydra --------- #
8
  hydra-core>=1.1.0
9
  hydra-colorlog>=1.1.0
 
4
  pytorch-lightning>=1.5.10
5
  torchmetrics>=0.7.0
6
 
7
+ # --------- data and model dependencies --------- #
8
+ scikit-image
9
+
10
  # --------- hydra --------- #
11
  hydra-core>=1.1.0
12
  hydra-colorlog>=1.1.0
models/src/datamodules/focus_datamodule.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any, Optional, Tuple, Union
3
+ from typing_extensions import Self
4
+ import numpy as np
5
+ import pandas as pd
6
+ from skimage import io
7
+
8
+ import torch
9
+ from pytorch_lightning import LightningDataModule
10
+ from torch.utils.data import ConcatDataset, DataLoader, Dataset, random_split
11
+ from torchvision.datasets import MNIST
12
+ from torchvision.transforms import transforms
13
+
14
+
15
+ class FocusDataSet(Dataset):
16
+ """Dataset for z-stacked images of neglected tropical diseaeses."""
17
+
18
+ def __init__(self, csv_file, root_dir, transform=None):
19
+ """Initialize focus satck dataset.
20
+
21
+ Args:
22
+ csv_file (string): Path to the csv file with annotations.
23
+ root_dir (string): Directory with all the images.
24
+ transform (callable, optional): Optional transform to be applied
25
+ on a sample.
26
+ """
27
+ self.metadata = pd.read_csv(csv_file)
28
+ self.root_dir = root_dir
29
+ self.transform = transform
30
+
31
+ def __len__(self) -> int:
32
+ """Get the length of the dataset.
33
+
34
+ Returns:
35
+ int: the length
36
+ """
37
+ return len(self.metadata)
38
+
39
+ def __getitem__(self, idx):
40
+ """Get one items from the dataset.
41
+
42
+ Args:
43
+ idx (int) The index of the sample that is to be retrieved
44
+
45
+ Returns:
46
+ Item/Items which is a dictionary containing "image" and "focus_value"
47
+ """
48
+ if torch.is_tensor(idx):
49
+ idx = idx.tolist()
50
+
51
+ img_name = os.path.join(self.root_dir, self.metadata.iloc[idx, 1])
52
+ image = io.imread(img_name)
53
+ focus_value = self.metadata.iloc[idx, 5]
54
+ sample = {"image": image, "focus_value": focus_value}
55
+
56
+ if self.transform:
57
+ sample = self.transform(sample)
58
+
59
+ return sample
60
+