implement data loader

Browse files

Files changed (2) hide show

models/notebooks/1.0-hfk-datamodules-exploration.ipynb +89 -40
models/src/datamodules/focus_datamodule.py +87 -5

models/notebooks/1.0-hfk-datamodules-exploration.ipynb CHANGED Viewed

@@ -292,65 +292,72 @@
  },
  {
  "cell_type": "code",
- "execution_count": 7,
  "metadata": {},
  "outputs": [
  {
  "data": {
  "text/plain": [
- "{'image': array([[[181, 190, 171],\n",
- " [180, 189, 170],\n",
- " [180, 186, 172],\n",
  " ...,\n",
- " [172, 176, 177],\n",
- " [171, 176, 179],\n",
- " [170, 178, 180]],\n",
  " \n",
- " [[181, 190, 173],\n",
- " [181, 190, 173],\n",
- " [180, 188, 175],\n",
  " ...,\n",
- " [169, 173, 174],\n",
- " [169, 175, 175],\n",
- " [170, 176, 176]],\n",
  " \n",
- " [[179, 190, 176],\n",
- " [179, 190, 176],\n",
- " [179, 189, 180],\n",
  " ...,\n",
- " [169, 169, 167],\n",
- " [169, 171, 170],\n",
- " [169, 171, 170]],\n",
  " \n",
  " ...,\n",
  " \n",
- " [[195, 201, 197],\n",
- " [195, 201, 197],\n",
- " [195, 201, 197],\n",
  " ...,\n",
- " [198, 195, 188],\n",
- " [199, 198, 196],\n",
- " [202, 200, 205]],\n",
  " \n",
- " [[195, 201, 197],\n",
- " [195, 201, 197],\n",
- " [195, 201, 197],\n",
  " ...,\n",
- " [198, 195, 188],\n",
- " [199, 198, 196],\n",
- " [202, 200, 205]],\n",
  " \n",
- " [[195, 201, 197],\n",
- " [195, 201, 197],\n",
- " [195, 201, 197],\n",
  " ...,\n",
- " [198, 195, 188],\n",
- " [199, 198, 196],\n",
- " [202, 200, 203]]], dtype=uint8),\n",
- " 'focus_value': -2.70408}"
  ]
  },
- "execution_count": 7,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -362,7 +369,49 @@
  "from focus_datamodule import FocusDataSet\n",
  "\n",
  "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n",
- "ds[1]"
  ]
  }
  ],

  },
  {
  "cell_type": "code",
+ "execution_count": 6,
  "metadata": {},
  "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "570\n"
+ ]
+ },
  {
  "data": {
  "text/plain": [
+ "{'image': array([[[172, 173, 159],\n",
+ " [166, 167, 153],\n",
+ " [171, 173, 160],\n",
  " ...,\n",
+ " [199, 202, 173],\n",
+ " [199, 202, 173],\n",
+ " [200, 201, 170]],\n",
  " \n",
+ " [[167, 169, 155],\n",
+ " [164, 166, 152],\n",
+ " [171, 175, 160],\n",
  " ...,\n",
+ " [194, 197, 168],\n",
+ " [195, 198, 169],\n",
+ " [199, 200, 169]],\n",
  " \n",
+ " [[146, 153, 135],\n",
+ " [149, 156, 138],\n",
+ " [163, 172, 153],\n",
  " ...,\n",
+ " [189, 192, 163],\n",
+ " [191, 194, 165],\n",
+ " [197, 198, 167]],\n",
  " \n",
  " ...,\n",
  " \n",
+ " [[ 57, 62, 68],\n",
+ " [ 41, 46, 52],\n",
+ " [ 24, 31, 39],\n",
  " ...,\n",
+ " [198, 189, 180],\n",
+ " [188, 179, 170],\n",
+ " [180, 171, 164]],\n",
  " \n",
+ " [[ 46, 51, 57],\n",
+ " [ 34, 39, 45],\n",
+ " [ 21, 28, 36],\n",
  " ...,\n",
+ " [208, 200, 189],\n",
+ " [197, 190, 180],\n",
+ " [188, 181, 173]],\n",
  " \n",
+ " [[ 31, 39, 42],\n",
+ " [ 23, 31, 34],\n",
+ " [ 18, 25, 31],\n",
  " ...,\n",
+ " [215, 209, 197],\n",
+ " [205, 199, 187],\n",
+ " [197, 190, 180]]], dtype=uint8),\n",
+ " 'focus_value': 0.0}"
  ]
  },
+ "execution_count": 6,
  "metadata": {},
  "output_type": "execute_result"
  }
  "from focus_datamodule import FocusDataSet\n",
  "\n",
  "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n",
+ "\n",
+ "counter = 0\n",
+ "for d in ds:\n",
+ " counter += 1\n",
+ "\n",
+ "print(counter)\n",
+ "\n",
+ "d"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from focus_datamodule import FocusDataModule\n",
+ "\n",
+ "datamodule = FocusDataModule(data_dir=\"../data/focus\", csv_file=\"../data/focus/metadata.csv\")\n",
+ "datamodule.setup()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "64"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "for data in datamodule.test_dataloader():\n",
+ " break\n",
+ "\n",
+ "len(data[\"focus_value\"])"
  ]
  }
  ],

models/src/datamodules/focus_datamodule.py CHANGED Viewed

@@ -1,14 +1,11 @@
 import os
-from typing import Any, Optional, Tuple, Union
-from typing_extensions import Self
-import numpy as np
 import pandas as pd
 from skimage import io
 import torch
 from pytorch_lightning import LightningDataModule
-from torch.utils.data import ConcatDataset, DataLoader, Dataset, random_split
-from torchvision.datasets import MNIST
 from torchvision.transforms import transforms
@@ -58,3 +55,88 @@ class FocusDataSet(Dataset):
  return sample

 import os
+from typing import Optional, Tuple
 import pandas as pd
 from skimage import io
 import torch
 from pytorch_lightning import LightningDataModule
+from torch.utils.data import DataLoader, Dataset, random_split
 from torchvision.transforms import transforms
  return sample
+class FocusDataModule(LightningDataModule):
+ """
+ LightningDataModule for FocusStack dataset.
+ """
+ def __init__(
+ self,
+ data_dir: str = "data/",
+ csv_file: str = "data/metadata.csv",
+ train_val_test_split_percentage: Tuple[int, int, int] = (0.75, 0.15, 0.15),
+ batch_size: int = 64,
+ num_workers: int = 0,
+ pin_memory: bool = False,
+ ):
+ super().__init__()
+ # this line allows to access init params with 'self.hparams' attribute
+ self.save_hyperparameters(logger=False)
+ # data transformations
+ self.transforms = transforms.Compose([])
+ self.data_train: Optional[Dataset] = None
+ self.data_val: Optional[Dataset] = None
+ self.data_test: Optional[Dataset] = None
+ def prepare_data(self):
+ """This method is not implemented as of yet.
+ Download data if needed. This method is called only from a single GPU.
+ Do not use it to assign state (self.x = y).
+ """
+ pass
+ def setup(self, stage: Optional[str] = None):
+ """Load data. Set variables: `self.data_train`, `self.data_val`, `self.data_test`.
+ This method is called by lightning twice for `trainer.fit()` and `trainer.test()`, so be careful if you do a random split!
+ The `stage` can be used to differentiate whether it's called before trainer.fit()` or `trainer.test()`."""
+ # load datasets only if they're not loaded already
+ if not self.data_train and not self.data_val and not self.data_test:
+ dataset = FocusDataSet(
+ self.hparams.csv_file, self.hparams.data_dir, transform=self.transforms
+ )
+ train_length = int(
+ len(dataset) * self.hparams.train_val_test_split_percentage[0]
+ )
+ val_length = int(
+ len(dataset) * self.hparams.train_val_test_split_percentage[1]
+ )
+ test_length = len(dataset) - val_length - train_length
+ self.data_train, self.data_val, self.data_test = random_split(
+ dataset=dataset,
+ lengths=(train_length, test_length, val_length),
+ generator=torch.Generator().manual_seed(42),
+ )
+ def train_dataloader(self):
+ return DataLoader(
+ dataset=self.data_train,
+ batch_size=self.hparams.batch_size,
+ num_workers=self.hparams.num_workers,
+ pin_memory=self.hparams.pin_memory,
+ shuffle=True,
+ )
+ def val_dataloader(self):
+ return DataLoader(
+ dataset=self.data_val,
+ batch_size=self.hparams.batch_size,
+ num_workers=self.hparams.num_workers,
+ pin_memory=self.hparams.pin_memory,
+ shuffle=False,
+ )
+ def test_dataloader(self):
+ return DataLoader(
+ dataset=self.data_test,
+ batch_size=self.hparams.batch_size,
+ num_workers=self.hparams.num_workers,
+ pin_memory=self.hparams.pin_memory,
+ shuffle=False,
+ )