Hannes Kuchelmeister commited on
Commit
d170477
·
1 Parent(s): ba9c868

added in_memory loading to reduce disk reads and increase speed

Browse files
notebooks/1.0-hfk-datamodules-exploration.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  {
11
  "cell_type": "code",
12
- "execution_count": 1,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 2,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
@@ -27,235 +27,18 @@
27
  },
28
  {
29
  "cell_type": "code",
30
- "execution_count": 3,
31
  "metadata": {},
32
- "outputs": [
33
- {
34
- "data": {
35
- "text/html": [
36
- "<div>\n",
37
- "<style scoped>\n",
38
- " .dataframe tbody tr th:only-of-type {\n",
39
- " vertical-align: middle;\n",
40
- " }\n",
41
- "\n",
42
- " .dataframe tbody tr th {\n",
43
- " vertical-align: top;\n",
44
- " }\n",
45
- "\n",
46
- " .dataframe thead th {\n",
47
- " text-align: right;\n",
48
- " }\n",
49
- "</style>\n",
50
- "<table border=\"1\" class=\"dataframe\">\n",
51
- " <thead>\n",
52
- " <tr style=\"text-align: right;\">\n",
53
- " <th></th>\n",
54
- " <th>Unnamed: 0</th>\n",
55
- " <th>image_path</th>\n",
56
- " <th>original_filename</th>\n",
57
- " <th>study_id</th>\n",
58
- " <th>scan_uuid</th>\n",
59
- " <th>focus_value</th>\n",
60
- " <th>stack_id</th>\n",
61
- " <th>obj_name</th>\n",
62
- " </tr>\n",
63
- " </thead>\n",
64
- " <tbody>\n",
65
- " <tr>\n",
66
- " <th>0</th>\n",
67
- " <td>0</td>\n",
68
- " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631...</td>\n",
69
- " <td>I01631_X013_Y012_Z5107.jpg</td>\n",
70
- " <td>31</td>\n",
71
- " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
72
- " <td>-2.82953</td>\n",
73
- " <td>1658220</td>\n",
74
- " <td>133</td>\n",
75
- " </tr>\n",
76
- " <tr>\n",
77
- " <th>1</th>\n",
78
- " <td>1</td>\n",
79
- " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01632...</td>\n",
80
- " <td>I01632_X013_Y012_Z5175.jpg</td>\n",
81
- " <td>31</td>\n",
82
- " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
83
- " <td>-2.70408</td>\n",
84
- " <td>1658220</td>\n",
85
- " <td>133</td>\n",
86
- " </tr>\n",
87
- " <tr>\n",
88
- " <th>2</th>\n",
89
- " <td>2</td>\n",
90
- " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01633...</td>\n",
91
- " <td>I01633_X013_Y012_Z5722.jpg</td>\n",
92
- " <td>31</td>\n",
93
- " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
94
- " <td>-2.69918</td>\n",
95
- " <td>1658220</td>\n",
96
- " <td>133</td>\n",
97
- " </tr>\n",
98
- " <tr>\n",
99
- " <th>3</th>\n",
100
- " <td>3</td>\n",
101
- " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01634...</td>\n",
102
- " <td>I01634_X013_Y012_Z5244.jpg</td>\n",
103
- " <td>31</td>\n",
104
- " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
105
- " <td>-2.50266</td>\n",
106
- " <td>1658220</td>\n",
107
- " <td>133</td>\n",
108
- " </tr>\n",
109
- " <tr>\n",
110
- " <th>4</th>\n",
111
- " <td>4</td>\n",
112
- " <td>31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01635...</td>\n",
113
- " <td>I01635_X013_Y012_Z5654.jpg</td>\n",
114
- " <td>31</td>\n",
115
- " <td>fba56d57-656e-4b6f-ba63-e4ba3ad083f5</td>\n",
116
- " <td>-2.36450</td>\n",
117
- " <td>1658220</td>\n",
118
- " <td>133</td>\n",
119
- " </tr>\n",
120
- " <tr>\n",
121
- " <th>...</th>\n",
122
- " <td>...</td>\n",
123
- " <td>...</td>\n",
124
- " <td>...</td>\n",
125
- " <td>...</td>\n",
126
- " <td>...</td>\n",
127
- " <td>...</td>\n",
128
- " <td>...</td>\n",
129
- " <td>...</td>\n",
130
- " </tr>\n",
131
- " <tr>\n",
132
- " <th>565</th>\n",
133
- " <td>565</td>\n",
134
- " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01406...</td>\n",
135
- " <td>I01406_X016_Y009_Z5361.jpg</td>\n",
136
- " <td>31</td>\n",
137
- " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
138
- " <td>-3.41147</td>\n",
139
- " <td>1674918</td>\n",
140
- " <td>217</td>\n",
141
- " </tr>\n",
142
- " <tr>\n",
143
- " <th>566</th>\n",
144
- " <td>566</td>\n",
145
- " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01407...</td>\n",
146
- " <td>I01407_X016_Y009_Z5087.jpg</td>\n",
147
- " <td>31</td>\n",
148
- " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
149
- " <td>-3.05424</td>\n",
150
- " <td>1674918</td>\n",
151
- " <td>217</td>\n",
152
- " </tr>\n",
153
- " <tr>\n",
154
- " <th>567</th>\n",
155
- " <td>567</td>\n",
156
- " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01408...</td>\n",
157
- " <td>I01408_X016_Y009_Z5292.jpg</td>\n",
158
- " <td>31</td>\n",
159
- " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
160
- " <td>-1.48608</td>\n",
161
- " <td>1674918</td>\n",
162
- " <td>217</td>\n",
163
- " </tr>\n",
164
- " <tr>\n",
165
- " <th>568</th>\n",
166
- " <td>568</td>\n",
167
- " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01409...</td>\n",
168
- " <td>I01409_X016_Y009_Z5156.jpg</td>\n",
169
- " <td>31</td>\n",
170
- " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
171
- " <td>-0.52804</td>\n",
172
- " <td>1674918</td>\n",
173
- " <td>217</td>\n",
174
- " </tr>\n",
175
- " <tr>\n",
176
- " <th>569</th>\n",
177
- " <td>569</td>\n",
178
- " <td>31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01410...</td>\n",
179
- " <td>I01410_X016_Y009_Z5224.jpg</td>\n",
180
- " <td>31</td>\n",
181
- " <td>4c7e9e66-61a1-47ca-aa4e-340b0eef8db1</td>\n",
182
- " <td>0.00000</td>\n",
183
- " <td>1674918</td>\n",
184
- " <td>217</td>\n",
185
- " </tr>\n",
186
- " </tbody>\n",
187
- "</table>\n",
188
- "<p>570 rows × 8 columns</p>\n",
189
- "</div>"
190
- ],
191
- "text/plain": [
192
- " Unnamed: 0 image_path \\\n",
193
- "0 0 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631... \n",
194
- "1 1 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01632... \n",
195
- "2 2 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01633... \n",
196
- "3 3 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01634... \n",
197
- "4 4 31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01635... \n",
198
- ".. ... ... \n",
199
- "565 565 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01406... \n",
200
- "566 566 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01407... \n",
201
- "567 567 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01408... \n",
202
- "568 568 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01409... \n",
203
- "569 569 31/4c7e9e66-61a1-47ca-aa4e-340b0eef8db1/I01410... \n",
204
- "\n",
205
- " original_filename study_id \\\n",
206
- "0 I01631_X013_Y012_Z5107.jpg 31 \n",
207
- "1 I01632_X013_Y012_Z5175.jpg 31 \n",
208
- "2 I01633_X013_Y012_Z5722.jpg 31 \n",
209
- "3 I01634_X013_Y012_Z5244.jpg 31 \n",
210
- "4 I01635_X013_Y012_Z5654.jpg 31 \n",
211
- ".. ... ... \n",
212
- "565 I01406_X016_Y009_Z5361.jpg 31 \n",
213
- "566 I01407_X016_Y009_Z5087.jpg 31 \n",
214
- "567 I01408_X016_Y009_Z5292.jpg 31 \n",
215
- "568 I01409_X016_Y009_Z5156.jpg 31 \n",
216
- "569 I01410_X016_Y009_Z5224.jpg 31 \n",
217
- "\n",
218
- " scan_uuid focus_value stack_id obj_name \n",
219
- "0 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.82953 1658220 133 \n",
220
- "1 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.70408 1658220 133 \n",
221
- "2 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.69918 1658220 133 \n",
222
- "3 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.50266 1658220 133 \n",
223
- "4 fba56d57-656e-4b6f-ba63-e4ba3ad083f5 -2.36450 1658220 133 \n",
224
- ".. ... ... ... ... \n",
225
- "565 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -3.41147 1674918 217 \n",
226
- "566 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -3.05424 1674918 217 \n",
227
- "567 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -1.48608 1674918 217 \n",
228
- "568 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 -0.52804 1674918 217 \n",
229
- "569 4c7e9e66-61a1-47ca-aa4e-340b0eef8db1 0.00000 1674918 217 \n",
230
- "\n",
231
- "[570 rows x 8 columns]"
232
- ]
233
- },
234
- "execution_count": 3,
235
- "metadata": {},
236
- "output_type": "execute_result"
237
- }
238
- ],
239
  "source": [
240
  "metadata"
241
  ]
242
  },
243
  {
244
  "cell_type": "code",
245
- "execution_count": 4,
246
  "metadata": {},
247
- "outputs": [
248
- {
249
- "data": {
250
- "text/plain": [
251
- "'31/fba56d57-656e-4b6f-ba63-e4ba3ad083f5/I01631_X013_Y012_Z5107_600_375.jpg'"
252
- ]
253
- },
254
- "execution_count": 4,
255
- "metadata": {},
256
- "output_type": "execute_result"
257
- }
258
- ],
259
  "source": [
260
  "idx = 0\n",
261
  "# File Path\n",
@@ -264,20 +47,9 @@
264
  },
265
  {
266
  "cell_type": "code",
267
- "execution_count": 5,
268
  "metadata": {},
269
- "outputs": [
270
- {
271
- "data": {
272
- "text/plain": [
273
- "-2.82953"
274
- ]
275
- },
276
- "execution_count": 5,
277
- "metadata": {},
278
- "output_type": "execute_result"
279
- }
280
- ],
281
  "source": [
282
  "# Focus Value\n",
283
  "metadata.iloc[idx, 5]"
@@ -292,76 +64,9 @@
292
  },
293
  {
294
  "cell_type": "code",
295
- "execution_count": 6,
296
  "metadata": {},
297
- "outputs": [
298
- {
299
- "name": "stdout",
300
- "output_type": "stream",
301
- "text": [
302
- "570\n"
303
- ]
304
- },
305
- {
306
- "data": {
307
- "text/plain": [
308
- "{'image': array([[[172, 173, 159],\n",
309
- " [166, 167, 153],\n",
310
- " [171, 173, 160],\n",
311
- " ...,\n",
312
- " [199, 202, 173],\n",
313
- " [199, 202, 173],\n",
314
- " [200, 201, 170]],\n",
315
- " \n",
316
- " [[167, 169, 155],\n",
317
- " [164, 166, 152],\n",
318
- " [171, 175, 160],\n",
319
- " ...,\n",
320
- " [194, 197, 168],\n",
321
- " [195, 198, 169],\n",
322
- " [199, 200, 169]],\n",
323
- " \n",
324
- " [[146, 153, 135],\n",
325
- " [149, 156, 138],\n",
326
- " [163, 172, 153],\n",
327
- " ...,\n",
328
- " [189, 192, 163],\n",
329
- " [191, 194, 165],\n",
330
- " [197, 198, 167]],\n",
331
- " \n",
332
- " ...,\n",
333
- " \n",
334
- " [[ 57, 62, 68],\n",
335
- " [ 41, 46, 52],\n",
336
- " [ 24, 31, 39],\n",
337
- " ...,\n",
338
- " [198, 189, 180],\n",
339
- " [188, 179, 170],\n",
340
- " [180, 171, 164]],\n",
341
- " \n",
342
- " [[ 46, 51, 57],\n",
343
- " [ 34, 39, 45],\n",
344
- " [ 21, 28, 36],\n",
345
- " ...,\n",
346
- " [208, 200, 189],\n",
347
- " [197, 190, 180],\n",
348
- " [188, 181, 173]],\n",
349
- " \n",
350
- " [[ 31, 39, 42],\n",
351
- " [ 23, 31, 34],\n",
352
- " [ 18, 25, 31],\n",
353
- " ...,\n",
354
- " [215, 209, 197],\n",
355
- " [205, 199, 187],\n",
356
- " [197, 190, 180]]], dtype=uint8),\n",
357
- " 'focus_value': 0.0}"
358
- ]
359
- },
360
- "execution_count": 6,
361
- "metadata": {},
362
- "output_type": "execute_result"
363
- }
364
- ],
365
  "source": [
366
  "from importlib.machinery import SourceFileLoader\n",
367
  "\n",
@@ -370,18 +75,15 @@
370
  "\n",
371
  "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n",
372
  "\n",
373
- "counter = 0\n",
374
  "for d in ds:\n",
375
- " counter += 1\n",
376
- "\n",
377
- "print(counter)\n",
378
  "\n",
379
  "d"
380
  ]
381
  },
382
  {
383
  "cell_type": "code",
384
- "execution_count": 7,
385
  "metadata": {},
386
  "outputs": [],
387
  "source": [
@@ -393,20 +95,9 @@
393
  },
394
  {
395
  "cell_type": "code",
396
- "execution_count": 8,
397
  "metadata": {},
398
- "outputs": [
399
- {
400
- "data": {
401
- "text/plain": [
402
- "64"
403
- ]
404
- },
405
- "execution_count": 8,
406
- "metadata": {},
407
- "output_type": "execute_result"
408
- }
409
- ],
410
  "source": [
411
  "for data in datamodule.test_dataloader():\n",
412
  " break\n",
@@ -416,40 +107,9 @@
416
  },
417
  {
418
  "cell_type": "code",
419
- "execution_count": 9,
420
  "metadata": {},
421
- "outputs": [
422
- {
423
- "name": "stderr",
424
- "output_type": "stream",
425
- "text": [
426
- "/home/hku/.local/lib/python3.8/site-packages/torch/nn/modules/loss.py:96: UserWarning: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
427
- " return F.l1_loss(input, target, reduction=self.reduction)\n"
428
- ]
429
- },
430
- {
431
- "data": {
432
- "text/plain": [
433
- "(tensor(2.5787, grad_fn=<L1LossBackward0>),\n",
434
- " tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
435
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
436
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
437
- " tensor([-1.2805, -0.0943, -2.3645, 0.8542, -0.8047, -6.0020, 0.0000, -4.3352,\n",
438
- " -1.8066, -2.7189, -6.4697, -3.2557, -4.2778, -5.0264, -3.4891, 0.0000,\n",
439
- " -1.7181, -2.7314, 0.3324, -0.0943, -0.8991, 0.0000, -4.4178, 1.9723,\n",
440
- " -3.0026, -5.5685, 3.8374, 3.8625, -0.4125, -4.1936, -1.5781, -1.6393,\n",
441
- " -2.9583, -5.4933, -1.7807, -3.3135, -5.3423, -0.7978, -5.3971, -4.9412,\n",
442
- " 0.0000, -4.4128, -5.7744, -5.2755, -1.0996, -5.7482, 0.0000, -0.1737,\n",
443
- " -3.5851, -6.1429, -6.3642, -3.9653, -0.2081, -0.9539, -0.4159, -0.5388,\n",
444
- " -1.3643, -4.4441, -1.5161, 0.6395, -5.4710, -2.6482, 0.0000, -2.6257],\n",
445
- " dtype=torch.float64))"
446
- ]
447
- },
448
- "execution_count": 9,
449
- "metadata": {},
450
- "output_type": "execute_result"
451
- }
452
- ],
453
  "source": [
454
  "import types\n",
455
  "import importlib.machinery\n",
@@ -460,6 +120,60 @@
460
  "\n",
461
  "model.step(data)"
462
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  }
464
  ],
465
  "metadata": {
 
9
  },
10
  {
11
  "cell_type": "code",
12
+ "execution_count": null,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": null,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
 
27
  },
28
  {
29
  "cell_type": "code",
30
+ "execution_count": null,
31
  "metadata": {},
32
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "source": [
34
  "metadata"
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
+ "execution_count": null,
40
  "metadata": {},
41
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
42
  "source": [
43
  "idx = 0\n",
44
  "# File Path\n",
 
47
  },
48
  {
49
  "cell_type": "code",
50
+ "execution_count": null,
51
  "metadata": {},
52
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
53
  "source": [
54
  "# Focus Value\n",
55
  "metadata.iloc[idx, 5]"
 
64
  },
65
  {
66
  "cell_type": "code",
67
+ "execution_count": null,
68
  "metadata": {},
69
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  "source": [
71
  "from importlib.machinery import SourceFileLoader\n",
72
  "\n",
 
75
  "\n",
76
  "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n",
77
  "\n",
 
78
  "for d in ds:\n",
79
+ " break\n",
 
 
80
  "\n",
81
  "d"
82
  ]
83
  },
84
  {
85
  "cell_type": "code",
86
+ "execution_count": null,
87
  "metadata": {},
88
  "outputs": [],
89
  "source": [
 
95
  },
96
  {
97
  "cell_type": "code",
98
+ "execution_count": null,
99
  "metadata": {},
100
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
101
  "source": [
102
  "for data in datamodule.test_dataloader():\n",
103
  " break\n",
 
107
  },
108
  {
109
  "cell_type": "code",
110
+ "execution_count": null,
111
  "metadata": {},
112
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  "source": [
114
  "import types\n",
115
  "import importlib.machinery\n",
 
120
  "\n",
121
  "model.step(data)"
122
  ]
123
+ },
124
+ {
125
+ "cell_type": "markdown",
126
+ "metadata": {},
127
+ "source": [
128
+ "## Benchmark in-memory and from disk"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": null,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "import time\n",
138
+ "\n",
139
+ "iterations = 10"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": null,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "datamodule = FocusDataModule(data_dir=\"../data/focus150\", csv_file=\"../data/focus150/metadata.csv\")\n",
149
+ "datamodule.setup()\n",
150
+ "\n",
151
+ "\n",
152
+ "start = time.perf_counter()\n",
153
+ "counter = 0\n",
154
+ "for i in range(iterations):\n",
155
+ " for data in datamodule.train_dataloader():\n",
156
+ " counter += 1\n",
157
+ "\n",
158
+ "print(time.perf_counter() - start)"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": [
167
+ "datamodule = FocusDataModule(data_dir=\"../data/focus150\", csv_file=\"../data/focus150/metadata.csv\", in_memory=False)\n",
168
+ "datamodule.setup()\n",
169
+ "\n",
170
+ "start = time.perf_counter()\n",
171
+ "counter = 0\n",
172
+ "for i in range(iterations):\n",
173
+ " for data in datamodule.train_dataloader():\n",
174
+ " counter += 1\n",
175
+ "print(time.perf_counter() - start)"
176
+ ]
177
  }
178
  ],
179
  "metadata": {
src/datamodules/focus_datamodule.py CHANGED
@@ -14,7 +14,7 @@ from torchvision.transforms import transforms
14
  class FocusDataSet(Dataset):
15
  """Dataset for z-stacked images of neglected tropical diseaeses."""
16
 
17
- def __init__(self, csv_file, root_dir, transform=None):
18
  """Initialize focus satck dataset.
19
 
20
  Args:
@@ -24,11 +24,23 @@ class FocusDataSet(Dataset):
24
  on a sample.
25
  """
26
  self.metadata = pd.read_csv(csv_file)
 
27
  self.col_index_path = self.metadata.columns.get_loc("image_path")
28
  self.col_index_focus = self.metadata.columns.get_loc("focus_value")
29
  self.root_dir = root_dir
30
  self.transform = transform
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  def __len__(self) -> int:
33
  """Get the length of the dataset.
34
 
@@ -49,17 +61,19 @@ class FocusDataSet(Dataset):
49
  if torch.is_tensor(idx):
50
  idx = idx.tolist()
51
 
52
- img_name = os.path.join(
53
- self.root_dir, self.metadata.iloc[idx, self.col_index_path]
54
- )
55
- image = io.imread(img_name)
 
 
 
 
56
  focus_value = torch.from_numpy(
57
  np.asarray(self.metadata.iloc[idx, self.col_index_focus])
58
  ).float()
59
- sample = {"image": image, "focus_value": focus_value}
60
 
61
- if self.transform:
62
- sample["image"] = self.transform(sample["image"])
63
 
64
  return sample
65
 
@@ -77,6 +91,7 @@ class FocusDataModule(LightningDataModule):
77
  batch_size: int = 64,
78
  num_workers: int = 0,
79
  pin_memory: bool = False,
 
80
  ):
81
  super().__init__()
82
 
@@ -91,6 +106,7 @@ class FocusDataModule(LightningDataModule):
91
  self.data_train: Optional[Dataset] = None
92
  self.data_val: Optional[Dataset] = None
93
  self.data_test: Optional[Dataset] = None
 
94
 
95
  def prepare_data(self):
96
  """This method is not implemented as of yet.
@@ -108,7 +124,10 @@ class FocusDataModule(LightningDataModule):
108
  # load datasets only if they're not loaded already
109
  if not self.data_train and not self.data_val and not self.data_test:
110
  dataset = FocusDataSet(
111
- self.hparams.csv_file, self.hparams.data_dir, transform=self.transforms
 
 
 
112
  )
113
  train_length = int(
114
  len(dataset) * self.hparams.train_val_test_split_percentage[0]
 
14
  class FocusDataSet(Dataset):
15
  """Dataset for z-stacked images of neglected tropical diseaeses."""
16
 
17
+ def __init__(self, csv_file, root_dir, transform=None, in_memory=True):
18
  """Initialize focus satck dataset.
19
 
20
  Args:
 
24
  on a sample.
25
  """
26
  self.metadata = pd.read_csv(csv_file)
27
+ self.in_memory = in_memory
28
  self.col_index_path = self.metadata.columns.get_loc("image_path")
29
  self.col_index_focus = self.metadata.columns.get_loc("focus_value")
30
  self.root_dir = root_dir
31
  self.transform = transform
32
 
33
+ self.images = []
34
+ if self.in_memory:
35
+ self.images = np.array(
36
+ list(map(self._load_img, self.metadata["image_path"].tolist()))
37
+ )
38
+
39
+ def _load_img(self, img_path):
40
+ path = os.path.join(self.root_dir, img_path)
41
+ img = io.imread(path)
42
+ return img
43
+
44
  def __len__(self) -> int:
45
  """Get the length of the dataset.
46
 
 
61
  if torch.is_tensor(idx):
62
  idx = idx.tolist()
63
 
64
+ if self.in_memory:
65
+ image = self.images[idx]
66
+ else:
67
+ image = self._load_img(self.metadata.iloc[idx, self.col_index_path])
68
+
69
+ if self.transform:
70
+ image = self.transform(image)
71
+
72
  focus_value = torch.from_numpy(
73
  np.asarray(self.metadata.iloc[idx, self.col_index_focus])
74
  ).float()
 
75
 
76
+ sample = {"image": image, "focus_value": focus_value}
 
77
 
78
  return sample
79
 
 
91
  batch_size: int = 64,
92
  num_workers: int = 0,
93
  pin_memory: bool = False,
94
+ in_memory: bool = True,
95
  ):
96
  super().__init__()
97
 
 
106
  self.data_train: Optional[Dataset] = None
107
  self.data_val: Optional[Dataset] = None
108
  self.data_test: Optional[Dataset] = None
109
+ self.in_memory = in_memory
110
 
111
  def prepare_data(self):
112
  """This method is not implemented as of yet.
 
124
  # load datasets only if they're not loaded already
125
  if not self.data_train and not self.data_val and not self.data_test:
126
  dataset = FocusDataSet(
127
+ self.hparams.csv_file,
128
+ self.hparams.data_dir,
129
+ transform=self.transforms,
130
+ in_memory=self.in_memory,
131
  )
132
  train_length = int(
133
  len(dataset) * self.hparams.train_val_test_split_percentage[0]