{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Exploring Code for Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metadata = pd.read_csv(\"../data/focus/metadata.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metadata" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "idx = 0\n", "# File Path\n", "metadata.iloc[idx, 1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Focus Value\n", "metadata.iloc[idx, 5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testing FocusDataSet" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from importlib.machinery import SourceFileLoader\n", "\n", "focus_datamodule = SourceFileLoader(\"focus_datamodule\", \"../src/datamodules/focus_datamodule.py\").load_module()\n", "from focus_datamodule import FocusDataSet\n", "\n", "ds = FocusDataSet(\"../data/focus/metadata.csv\", \"../data/focus/\")\n", "\n", "for d in ds:\n", " break\n", "\n", "d" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from focus_datamodule import FocusDataModule\n", "\n", "datamodule = FocusDataModule(data_dir=\"../data/focus\", csv_file=\"../data/focus/metadata.csv\")\n", "datamodule.setup()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for data in datamodule.test_dataloader():\n", " break\n", "\n", "len(data[\"focus_value\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import types\n", "import importlib.machinery\n", "focus_module = SourceFileLoader('focus_module', '../src/models/focus_module.py').load_module()\n", "from focus_module import FocusLitModule\n", "\n", "model = FocusLitModule()\n", "\n", "model.step(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Benchmark in-memory and from disk" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "iterations = 10" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "datamodule = FocusDataModule(data_dir=\"../data/focus150\", csv_file=\"../data/focus150/metadata.csv\")\n", "datamodule.setup()\n", "\n", "\n", "start = time.perf_counter()\n", "counter = 0\n", "for i in range(iterations):\n", " for data in datamodule.train_dataloader():\n", " counter += 1\n", "\n", "print(time.perf_counter() - start)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "datamodule = FocusDataModule(data_dir=\"../data/focus150\", csv_file=\"../data/focus150/metadata.csv\", in_memory=False)\n", "datamodule.setup()\n", "\n", "start = time.perf_counter()\n", "counter = 0\n", "for i in range(iterations):\n", " for data in datamodule.train_dataloader():\n", " counter += 1\n", "print(time.perf_counter() - start)" ] } ], "metadata": { "interpreter": { "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }