sycod
commited on
Commit
·
0fe2514
1
Parent(s):
fc06865
start EfficientNetV2M
Browse files- EDA.ipynb +262 -310
- README.md +2 -3
- config.yaml +3 -1
- src/load_data.py +69 -26
EDA.ipynb
CHANGED
@@ -16,14 +16,13 @@
|
|
16 |
" - [🚧 Image exploration](#toc3_3_) \n",
|
17 |
"- [Data preprocessing](#toc4_) \n",
|
18 |
"- [🦄🦄 CHECKPOINT 🦄🦄](#toc5_) \n",
|
19 |
-
"- [
|
20 |
-
"- [
|
21 |
-
"
|
22 |
-
"- [
|
23 |
-
" - [Prepare
|
24 |
-
" - [
|
25 |
-
" - [
|
26 |
-
" - [Random Baseline](#toc8_4_) \n",
|
27 |
"\n",
|
28 |
"<!-- vscode-jupyter-toc-config\n",
|
29 |
"\tnumbering=false\n",
|
@@ -71,25 +70,25 @@
|
|
71 |
"text": [
|
72 |
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
73 |
" from .autonotebook import tqdm as notebook_tqdm\n",
|
74 |
-
"[codecarbon WARNING @
|
75 |
-
"[codecarbon INFO @
|
76 |
-
"[codecarbon INFO @
|
77 |
-
"[codecarbon WARNING @
|
78 |
" Mac OS and ARM processor detected: Please enable PowerMetrics sudo to measure CPU\n",
|
79 |
"\n",
|
80 |
-
"[codecarbon INFO @
|
81 |
-
"[codecarbon INFO @
|
82 |
-
"[codecarbon INFO @
|
83 |
-
"[codecarbon INFO @
|
84 |
-
"[codecarbon INFO @
|
85 |
-
"[codecarbon INFO @
|
86 |
-
"[codecarbon INFO @
|
87 |
-
"[codecarbon INFO @
|
88 |
-
"[codecarbon INFO @
|
89 |
-
"[codecarbon INFO @
|
90 |
-
"[codecarbon INFO @
|
91 |
-
"[codecarbon INFO @
|
92 |
-
"[codecarbon INFO @
|
93 |
]
|
94 |
}
|
95 |
],
|
@@ -106,8 +105,8 @@
|
|
106 |
"\n",
|
107 |
"# ML\n",
|
108 |
"from keras import Model\n",
|
109 |
-
"from keras.applications import EfficientNetB0\n",
|
110 |
-
"from keras.layers import Flatten, Dense\n",
|
111 |
"from keras.metrics import Precision, Recall\n",
|
112 |
"from keras.optimizers import AdamW\n",
|
113 |
"from keras.utils import image_dataset_from_directory\n",
|
@@ -126,7 +125,7 @@
|
|
126 |
"from tasks.image import parse_boxes, compute_iou, compute_max_iou\n",
|
127 |
"\n",
|
128 |
"\n",
|
129 |
-
"# Logging
|
130 |
"logger = logging.getLogger()\n",
|
131 |
"logger.setLevel(logging.INFO)"
|
132 |
]
|
@@ -149,7 +148,9 @@
|
|
149 |
" cfg = yaml.safe_load(f)\n",
|
150 |
"# Data\n",
|
151 |
"OUTPUT_DIR = cfg[\"data_root_dir\"]\n",
|
152 |
-
"
|
|
|
|
|
153 |
"REPO_ID = cfg[\"repo_id\"]\n",
|
154 |
"SPLIT_SIZE = cfg[\"split_size\"]\n",
|
155 |
"RDM_SEED = cfg[\"rdm_seed\"]\n",
|
@@ -1858,16 +1859,14 @@
|
|
1858 |
"metadata": {},
|
1859 |
"outputs": [
|
1860 |
{
|
1861 |
-
"name": "
|
1862 |
"output_type": "stream",
|
1863 |
"text": [
|
1864 |
-
"
|
1865 |
-
"Wall time: 17.5 s\n"
|
1866 |
]
|
1867 |
}
|
1868 |
],
|
1869 |
"source": [
|
1870 |
-
"%%time\n",
|
1871 |
"df_format = format_data_keras(df_clean.copy())"
|
1872 |
]
|
1873 |
},
|
@@ -1880,6 +1879,15 @@
|
|
1880 |
"- update dataframe"
|
1881 |
]
|
1882 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1883 |
{
|
1884 |
"cell_type": "code",
|
1885 |
"execution_count": 13,
|
@@ -1889,34 +1897,12 @@
|
|
1889 |
"name": "stderr",
|
1890 |
"output_type": "stream",
|
1891 |
"text": [
|
1892 |
-
"INFO:root
|
1893 |
-
"INFO:root:⚙️ Transforming 16025 images...\n",
|
1894 |
-
"INFO:root:✅ Transformed 3802 images with blur_img\n",
|
1895 |
-
"INFO:root:✅ Transformed 3802 images with flip_img\n",
|
1896 |
-
"INFO:root:✅ Transformed 3802 images with blur_flip_img\n",
|
1897 |
-
"INFO:root:✅ Transformed 3802 images with eq_img\n",
|
1898 |
-
"INFO:root:✅ Transformed 817 images with 180_img\n",
|
1899 |
-
"INFO:root:⚙️ Processing val split...\n",
|
1900 |
-
"INFO:root:⚙️ Transforming 2571 images...\n",
|
1901 |
-
"INFO:root:✅ Transformed 764 images with blur_img\n",
|
1902 |
-
"INFO:root:✅ Transformed 764 images with flip_img\n",
|
1903 |
-
"INFO:root:✅ Transformed 764 images with blur_flip_img\n",
|
1904 |
-
"INFO:root:✅ Transformed 279 images with eq_img\n",
|
1905 |
-
"INFO:root:✅ Transformed 0 images with 180_img\n"
|
1906 |
-
]
|
1907 |
-
},
|
1908 |
-
{
|
1909 |
-
"name": "stdout",
|
1910 |
-
"output_type": "stream",
|
1911 |
-
"text": [
|
1912 |
-
"CPU times: user 3min 54s, sys: 15.9 s, total: 4min 10s\n",
|
1913 |
-
"Wall time: 4min 19s\n"
|
1914 |
]
|
1915 |
}
|
1916 |
],
|
1917 |
"source": [
|
1918 |
-
"
|
1919 |
-
"df_aug = oversample_class(df_format.copy())"
|
1920 |
]
|
1921 |
},
|
1922 |
{
|
@@ -1928,7 +1914,7 @@
|
|
1928 |
},
|
1929 |
{
|
1930 |
"cell_type": "code",
|
1931 |
-
"execution_count":
|
1932 |
"metadata": {},
|
1933 |
"outputs": [
|
1934 |
{
|
@@ -2841,21 +2827,23 @@
|
|
2841 |
"cell_type": "markdown",
|
2842 |
"metadata": {},
|
2843 |
"source": [
|
2844 |
-
"
|
2845 |
]
|
2846 |
},
|
2847 |
{
|
2848 |
-
"cell_type": "
|
|
|
2849 |
"metadata": {},
|
|
|
2850 |
"source": [
|
2851 |
-
"
|
2852 |
]
|
2853 |
},
|
2854 |
{
|
2855 |
"cell_type": "markdown",
|
2856 |
"metadata": {},
|
2857 |
"source": [
|
2858 |
-
"
|
2859 |
]
|
2860 |
},
|
2861 |
{
|
@@ -2864,68 +2852,96 @@
|
|
2864 |
"metadata": {},
|
2865 |
"outputs": [
|
2866 |
{
|
2867 |
-
"
|
2868 |
-
"evalue": "name 'stop' is not defined",
|
2869 |
-
|
2870 |
-
|
2871 |
-
|
2872 |
-
|
2873 |
-
|
2874 |
-
|
2875 |
-
]
|
2876 |
}
|
2877 |
],
|
2878 |
"source": [
|
2879 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2880 |
]
|
2881 |
},
|
2882 |
{
|
2883 |
"cell_type": "markdown",
|
2884 |
"metadata": {},
|
2885 |
"source": [
|
2886 |
-
"# <a id='
|
2887 |
]
|
2888 |
},
|
2889 |
{
|
2890 |
"cell_type": "markdown",
|
2891 |
"metadata": {},
|
2892 |
"source": [
|
2893 |
-
"## <a id='
|
2894 |
]
|
2895 |
},
|
2896 |
{
|
2897 |
"cell_type": "markdown",
|
2898 |
"metadata": {},
|
2899 |
"source": [
|
2900 |
-
"# <a id='
|
2901 |
"\n",
|
2902 |
"- https://paperswithcode.com/sota/image-classification-on-imagenet\n",
|
2903 |
-
"- https://keras.io/api/applications/efficientnet_v2/#efficientnetv2m-function"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2904 |
]
|
2905 |
},
|
2906 |
{
|
2907 |
"cell_type": "code",
|
2908 |
-
"execution_count":
|
2909 |
"metadata": {},
|
2910 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2911 |
"source": [
|
2912 |
-
"
|
2913 |
-
"\n",
|
2914 |
-
"# model = EfficientNetV2M("
|
2915 |
]
|
2916 |
},
|
2917 |
{
|
2918 |
"cell_type": "code",
|
2919 |
-
"execution_count":
|
2920 |
"metadata": {},
|
2921 |
"outputs": [],
|
2922 |
"source": [
|
2923 |
"# Model config\n",
|
2924 |
-
"model_name = \"
|
2925 |
"input_size = (224, 224)\n",
|
2926 |
-
"batch_size =
|
2927 |
-
"n_epochs = 100\n",
|
2928 |
"optimizer = AdamW(learning_rate=0.0002, weight_decay=0.05)\n",
|
|
|
|
|
|
|
|
|
|
|
2929 |
"loss = \"binary_crossentropy\"\n",
|
2930 |
"metrics = [\"accuracy\", Precision(), Recall()]\n",
|
2931 |
"# metrics = [\"accuracy\", Precision(), Recall(), \"f1_score\"]\n",
|
@@ -2945,97 +2961,22 @@
|
|
2945 |
"cell_type": "markdown",
|
2946 |
"metadata": {},
|
2947 |
"source": [
|
2948 |
-
"## <a id='
|
2949 |
-
]
|
2950 |
-
},
|
2951 |
-
{
|
2952 |
-
"cell_type": "markdown",
|
2953 |
-
"metadata": {},
|
2954 |
-
"source": [
|
2955 |
-
"Create datasets from local images and labels"
|
2956 |
-
]
|
2957 |
-
},
|
2958 |
-
{
|
2959 |
-
"cell_type": "code",
|
2960 |
-
"execution_count": 184,
|
2961 |
-
"metadata": {},
|
2962 |
-
"outputs": [
|
2963 |
-
{
|
2964 |
-
"name": "stdout",
|
2965 |
-
"output_type": "stream",
|
2966 |
-
"text": [
|
2967 |
-
"Train dataset:\n",
|
2968 |
-
"Found 23629 files belonging to 2 classes.\n",
|
2969 |
-
"\n",
|
2970 |
-
"Val dataset:\n",
|
2971 |
-
"Found 4099 files belonging to 2 classes.\n",
|
2972 |
-
"\n",
|
2973 |
-
"Test dataset:\n",
|
2974 |
-
"Found 5908 files belonging to 2 classes.\n"
|
2975 |
-
]
|
2976 |
-
}
|
2977 |
-
],
|
2978 |
-
"source": [
|
2979 |
-
"# Prepare for outputs\n",
|
2980 |
-
"os.makedirs(MODELS_ROOT_DIR, exist_ok=True)\n",
|
2981 |
-
"y = df[\"label\"]\n",
|
2982 |
-
"X = df[\"uri\"]\n",
|
2983 |
-
"\n",
|
2984 |
-
"# Create datasets\n",
|
2985 |
-
"print(\"Train dataset:\")\n",
|
2986 |
-
"train_ds = image_dataset_from_directory(\n",
|
2987 |
-
" train_dir,\n",
|
2988 |
-
" labels=\"inferred\", # class names upon folders structure\n",
|
2989 |
-
" label_mode=\"int\", # integer encoding\n",
|
2990 |
-
" shuffle=True, # shuffle images\n",
|
2991 |
-
" seed=42, # random seed\n",
|
2992 |
-
" image_size=input_size, # automatic resizing\n",
|
2993 |
-
" batch_size=batch_size, # tensor shape[0]\n",
|
2994 |
-
")\n",
|
2995 |
-
"\n",
|
2996 |
-
"print(\"\\nVal dataset:\")\n",
|
2997 |
-
"val_ds = image_dataset_from_directory(\n",
|
2998 |
-
" val_dir,\n",
|
2999 |
-
" labels=\"inferred\", # class names upon folders structure\n",
|
3000 |
-
" label_mode=\"int\", # integer encoding\n",
|
3001 |
-
" shuffle=True, # shuffle images\n",
|
3002 |
-
" seed=42, # random seed\n",
|
3003 |
-
" image_size=input_size, # automatic resizing\n",
|
3004 |
-
" batch_size=batch_size, # tensor shape[0]\n",
|
3005 |
-
")\n",
|
3006 |
-
"\n",
|
3007 |
-
"print(\"\\nTest dataset:\")\n",
|
3008 |
-
"test_ds = image_dataset_from_directory(\n",
|
3009 |
-
" test_dir,\n",
|
3010 |
-
" labels=\"inferred\", # class names upon folders structure\n",
|
3011 |
-
" label_mode=\"int\", # integer encoding\n",
|
3012 |
-
" shuffle=False, # do not shuffle images\n",
|
3013 |
-
" seed=42, # random seed\n",
|
3014 |
-
" image_size=input_size, # automatic resizing\n",
|
3015 |
-
" batch_size=batch_size, # tensor shape[0]\n",
|
3016 |
-
")"
|
3017 |
-
]
|
3018 |
-
},
|
3019 |
-
{
|
3020 |
-
"cell_type": "markdown",
|
3021 |
-
"metadata": {},
|
3022 |
-
"source": [
|
3023 |
-
"## <a id='toc8_2_'></a>[Prepare model](#toc0_)"
|
3024 |
]
|
3025 |
},
|
3026 |
{
|
3027 |
"cell_type": "code",
|
3028 |
-
"execution_count":
|
3029 |
"metadata": {},
|
3030 |
"outputs": [
|
3031 |
{
|
3032 |
"data": {
|
3033 |
"text/html": [
|
3034 |
-
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"
|
3035 |
"</pre>\n"
|
3036 |
],
|
3037 |
"text/plain": [
|
3038 |
-
"\u001b[1mModel: \"
|
3039 |
]
|
3040 |
},
|
3041 |
"metadata": {},
|
@@ -3044,31 +2985,33 @@
|
|
3044 |
{
|
3045 |
"data": {
|
3046 |
"text/html": [
|
3047 |
-
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"
|
3048 |
-
"┃<span style=\"font-weight: bold\"> Layer (type)
|
3049 |
-
"
|
3050 |
-
"│
|
3051 |
-
"
|
3052 |
-
"│
|
3053 |
-
"
|
3054 |
-
"
|
3055 |
-
"
|
3056 |
-
"
|
3057 |
-
"
|
|
|
3058 |
"</pre>\n"
|
3059 |
],
|
3060 |
"text/plain": [
|
3061 |
-
"
|
3062 |
-
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type)
|
3063 |
-
"
|
3064 |
-
"│
|
3065 |
-
"
|
3066 |
-
"│
|
3067 |
-
"
|
3068 |
-
"
|
3069 |
-
"
|
3070 |
-
"
|
3071 |
-
"
|
|
|
3072 |
]
|
3073 |
},
|
3074 |
"metadata": {},
|
@@ -3077,11 +3020,11 @@
|
|
3077 |
{
|
3078 |
"data": {
|
3079 |
"text/html": [
|
3080 |
-
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">
|
3081 |
"</pre>\n"
|
3082 |
],
|
3083 |
"text/plain": [
|
3084 |
-
"\u001b[1m Total params: \u001b[0m\u001b[38;5;
|
3085 |
]
|
3086 |
},
|
3087 |
"metadata": {},
|
@@ -3103,11 +3046,11 @@
|
|
3103 |
{
|
3104 |
"data": {
|
3105 |
"text/html": [
|
3106 |
-
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">
|
3107 |
"</pre>\n"
|
3108 |
],
|
3109 |
"text/plain": [
|
3110 |
-
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;
|
3111 |
]
|
3112 |
},
|
3113 |
"metadata": {},
|
@@ -3116,176 +3059,152 @@
|
|
3116 |
],
|
3117 |
"source": [
|
3118 |
"# Load pre-trained model without top layers\n",
|
3119 |
-
"
|
3120 |
" weights=\"imagenet\", # pre-trained weights\n",
|
3121 |
" include_top=False, # no dense layer\n",
|
3122 |
-
" input_shape=(input_size[0], input_size[1], 3), # input shape
|
3123 |
")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3124 |
"# Create explicit input layer\n",
|
3125 |
"inputs = tf.keras.Input(shape=(input_size[0], input_size[1], 3))\n",
|
3126 |
-
"# add data augmentation\n",
|
3127 |
-
"# augmented = ConditionalAugmentation(rate=0.4)(inputs)\n",
|
3128 |
-
"# x = model(augmented)\n",
|
3129 |
-
"x = model(inputs)\n",
|
3130 |
"\n",
|
|
|
|
|
|
|
|
|
3131 |
"# Flatten output\n",
|
3132 |
"x = Flatten()(x)\n",
|
3133 |
-
"# For feature extraction only\n",
|
3134 |
-
"for layer in model.layers:\n",
|
3135 |
-
" layer.trainable = False\n",
|
3136 |
"# New FC layer for binary classification\n",
|
3137 |
"predictions = Dense(1, activation=\"sigmoid\")(x)\n",
|
|
|
3138 |
"# Define new model\n",
|
3139 |
-
"
|
|
|
3140 |
"# Display model summary\n",
|
3141 |
-
"
|
3142 |
]
|
3143 |
},
|
3144 |
{
|
3145 |
"cell_type": "markdown",
|
3146 |
"metadata": {},
|
3147 |
"source": [
|
3148 |
-
"## <a id='
|
3149 |
]
|
3150 |
},
|
3151 |
{
|
3152 |
"cell_type": "markdown",
|
3153 |
"metadata": {},
|
3154 |
"source": [
|
3155 |
-
"
|
3156 |
]
|
3157 |
},
|
3158 |
{
|
3159 |
"cell_type": "code",
|
3160 |
-
"execution_count":
|
3161 |
"metadata": {},
|
3162 |
"outputs": [
|
3163 |
-
{
|
3164 |
-
"name": "stderr",
|
3165 |
-
"output_type": "stream",
|
3166 |
-
"text": [
|
3167 |
-
"INFO:root:⚙️ compiling\n",
|
3168 |
-
"INFO:root:🛎️ declaring callbacks\n",
|
3169 |
-
"INFO:root:💪 starting training\n"
|
3170 |
-
]
|
3171 |
-
},
|
3172 |
-
{
|
3173 |
-
"name": "stdout",
|
3174 |
-
"output_type": "stream",
|
3175 |
-
"text": [
|
3176 |
-
"Epoch 1/100\n",
|
3177 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m406s\u001b[0m 815ms/step - accuracy: 0.8178 - loss: 0.4710 - precision_3: 0.8521 - recall_3: 0.9467 - val_accuracy: 0.7821 - val_loss: 0.5143 - val_precision_3: 0.8256 - val_recall_3: 0.9283\n",
|
3178 |
-
"Epoch 2/100\n",
|
3179 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m398s\u001b[0m 808ms/step - accuracy: 0.8334 - loss: 0.4258 - precision_3: 0.8697 - recall_3: 0.9422 - val_accuracy: 0.8046 - val_loss: 0.5463 - val_precision_3: 0.8188 - val_recall_3: 0.9757\n",
|
3180 |
-
"Epoch 3/100\n",
|
3181 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m394s\u001b[0m 799ms/step - accuracy: 0.8461 - loss: 0.4060 - precision_3: 0.8763 - recall_3: 0.9506 - val_accuracy: 0.7590 - val_loss: 0.6189 - val_precision_3: 0.8438 - val_recall_3: 0.8636\n",
|
3182 |
-
"Epoch 4/100\n",
|
3183 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m383s\u001b[0m 776ms/step - accuracy: 0.8472 - loss: 0.3938 - precision_3: 0.8793 - recall_3: 0.9476 - val_accuracy: 0.7785 - val_loss: 0.5448 - val_precision_3: 0.8263 - val_recall_3: 0.9214\n",
|
3184 |
-
"Epoch 5/100\n",
|
3185 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m361s\u001b[0m 733ms/step - accuracy: 0.8579 - loss: 0.3752 - precision_3: 0.8865 - recall_3: 0.9524 - val_accuracy: 0.7856 - val_loss: 0.6732 - val_precision_3: 0.8133 - val_recall_3: 0.9559\n",
|
3186 |
-
"Epoch 6/100\n",
|
3187 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m368s\u001b[0m 747ms/step - accuracy: 0.8504 - loss: 0.3840 - precision_3: 0.8866 - recall_3: 0.9417 - val_accuracy: 0.8104 - val_loss: 0.5844 - val_precision_3: 0.8233 - val_recall_3: 0.9766\n",
|
3188 |
-
"Epoch 7/100\n",
|
3189 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m366s\u001b[0m 743ms/step - accuracy: 0.8543 - loss: 0.3848 - precision_3: 0.8870 - recall_3: 0.9468 - val_accuracy: 0.7868 - val_loss: 0.6400 - val_precision_3: 0.8209 - val_recall_3: 0.9439\n",
|
3190 |
-
"Epoch 8/100\n",
|
3191 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m359s\u001b[0m 728ms/step - accuracy: 0.8572 - loss: 0.3700 - precision_3: 0.8884 - recall_3: 0.9486 - val_accuracy: 0.7965 - val_loss: 0.5693 - val_precision_3: 0.8252 - val_recall_3: 0.9514\n",
|
3192 |
-
"Epoch 9/100\n",
|
3193 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m363s\u001b[0m 737ms/step - accuracy: 0.8564 - loss: 0.3684 - precision_3: 0.8890 - recall_3: 0.9467 - val_accuracy: 0.7939 - val_loss: 0.6147 - val_precision_3: 0.8341 - val_recall_3: 0.9319\n",
|
3194 |
-
"Epoch 10/100\n",
|
3195 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m369s\u001b[0m 749ms/step - accuracy: 0.8603 - loss: 0.3629 - precision_3: 0.8904 - recall_3: 0.9500 - val_accuracy: 0.8026 - val_loss: 0.5798 - val_precision_3: 0.8311 - val_recall_3: 0.9505\n",
|
3196 |
-
"Epoch 11/100\n",
|
3197 |
-
"\u001b[1m493/493\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m368s\u001b[0m 746ms/step - accuracy: 0.8634 - loss: 0.3589 - precision_3: 0.8931 - recall_3: 0.9507 - val_accuracy: 0.7573 - val_loss: 0.6606 - val_precision_3: 0.8164 - val_recall_3: 0.9052\n"
|
3198 |
-
]
|
3199 |
-
},
|
3200 |
-
{
|
3201 |
-
"name": "stderr",
|
3202 |
-
"output_type": "stream",
|
3203 |
-
"text": [
|
3204 |
-
"INFO:root:🧐 evaluating model\n"
|
3205 |
-
]
|
3206 |
-
},
|
3207 |
-
{
|
3208 |
-
"name": "stdout",
|
3209 |
-
"output_type": "stream",
|
3210 |
-
"text": [
|
3211 |
-
"\u001b[1m124/124\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 579ms/step - accuracy: 0.6824 - loss: 0.8199 - precision_3: 0.6028 - recall_3: 0.7630\n",
|
3212 |
-
"\u001b[1m124/124\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m74s\u001b[0m 586ms/step\n"
|
3213 |
-
]
|
3214 |
-
},
|
3215 |
-
{
|
3216 |
-
"name": "stderr",
|
3217 |
-
"output_type": "stream",
|
3218 |
-
"text": [
|
3219 |
-
"INFO:root:📈 plotting results\n"
|
3220 |
-
]
|
3221 |
-
},
|
3222 |
{
|
3223 |
"name": "stdout",
|
3224 |
"output_type": "stream",
|
3225 |
"text": [
|
3226 |
-
"
|
|
|
3227 |
"\n",
|
3228 |
-
"
|
3229 |
-
"
|
3230 |
"\n",
|
3231 |
-
"
|
3232 |
-
"
|
3233 |
-
"weighted avg 0.03 0.16 0.05 5908\n",
|
3234 |
-
"\n"
|
3235 |
]
|
3236 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3237 |
{
|
3238 |
"name": "stderr",
|
3239 |
"output_type": "stream",
|
3240 |
"text": [
|
3241 |
-
"
|
3242 |
-
"
|
3243 |
-
"\n"
|
3244 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3245 |
-
"\n",
|
3246 |
-
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning:\n",
|
3247 |
-
"\n",
|
3248 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3249 |
-
"\n",
|
3250 |
-
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning:\n",
|
3251 |
-
"\n",
|
3252 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3253 |
-
"\n",
|
3254 |
-
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning:\n",
|
3255 |
-
"\n",
|
3256 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3257 |
-
"\n",
|
3258 |
-
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning:\n",
|
3259 |
-
"\n",
|
3260 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3261 |
-
"\n",
|
3262 |
-
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning:\n",
|
3263 |
-
"\n",
|
3264 |
-
"Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
3265 |
-
"\n"
|
3266 |
]
|
3267 |
},
|
3268 |
{
|
3269 |
-
"
|
3270 |
-
"image/png": "",
|
3271 |
-
"text/plain": [
|
3272 |
-
"<Figure size 600x400 with 2 Axes>"
|
3273 |
-
]
|
3274 |
-
},
|
3275 |
-
"metadata": {},
|
3276 |
-
"output_type": "display_data"
|
3277 |
-
},
|
3278 |
-
{
|
3279 |
-
"name": "stderr",
|
3280 |
"output_type": "stream",
|
3281 |
"text": [
|
3282 |
-
"
|
|
|
3283 |
]
|
3284 |
}
|
3285 |
],
|
3286 |
"source": [
|
3287 |
"model_trained, history = eval_pretrained_model(\n",
|
3288 |
-
" model=
|
3289 |
" train_ds=train_ds,\n",
|
3290 |
" val_ds=val_ds,\n",
|
3291 |
" test_ds=test_ds,\n",
|
@@ -3366,7 +3285,7 @@
|
|
3366 |
"cell_type": "markdown",
|
3367 |
"metadata": {},
|
3368 |
"source": [
|
3369 |
-
"## <a id='
|
3370 |
]
|
3371 |
},
|
3372 |
{
|
@@ -3389,6 +3308,39 @@
|
|
3389 |
"tracker.start_task(\"inference\")"
|
3390 |
]
|
3391 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3392 |
{
|
3393 |
"cell_type": "code",
|
3394 |
"execution_count": 11,
|
|
|
16 |
" - [🚧 Image exploration](#toc3_3_) \n",
|
17 |
"- [Data preprocessing](#toc4_) \n",
|
18 |
"- [🦄🦄 CHECKPOINT 🦄🦄](#toc5_) \n",
|
19 |
+
"- [Model Training](#toc6_) \n",
|
20 |
+
" - [Load configuration](#toc6_1_) \n",
|
21 |
+
"- [🚧 MODEL CHOICE](#toc7_) \n",
|
22 |
+
" - [Prepare data](#toc7_1_) \n",
|
23 |
+
" - [Prepare model](#toc7_2_) \n",
|
24 |
+
" - [Training](#toc7_3_) \n",
|
25 |
+
" - [Random Baseline](#toc7_4_) \n",
|
|
|
26 |
"\n",
|
27 |
"<!-- vscode-jupyter-toc-config\n",
|
28 |
"\tnumbering=false\n",
|
|
|
70 |
"text": [
|
71 |
"/Users/julmat/Documents/hugging_face/frugal_cviz/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
72 |
" from .autonotebook import tqdm as notebook_tqdm\n",
|
73 |
+
"[codecarbon WARNING @ 23:01:31] Multiple instances of codecarbon are allowed to run at the same time.\n",
|
74 |
+
"[codecarbon INFO @ 23:01:31] [setup] RAM Tracking...\n",
|
75 |
+
"[codecarbon INFO @ 23:01:31] [setup] CPU Tracking...\n",
|
76 |
+
"[codecarbon WARNING @ 23:01:31] No CPU tracking mode found. Falling back on CPU constant mode. \n",
|
77 |
" Mac OS and ARM processor detected: Please enable PowerMetrics sudo to measure CPU\n",
|
78 |
"\n",
|
79 |
+
"[codecarbon INFO @ 23:01:31] CPU Model on constant consumption mode: Apple M1\n",
|
80 |
+
"[codecarbon INFO @ 23:01:31] [setup] GPU Tracking...\n",
|
81 |
+
"[codecarbon INFO @ 23:01:31] No GPU found.\n",
|
82 |
+
"[codecarbon INFO @ 23:01:31] >>> Tracker's metadata:\n",
|
83 |
+
"[codecarbon INFO @ 23:01:31] Platform system: macOS-15.2-arm64-arm-64bit\n",
|
84 |
+
"[codecarbon INFO @ 23:01:31] Python version: 3.12.7\n",
|
85 |
+
"[codecarbon INFO @ 23:01:31] CodeCarbon version: 2.8.3\n",
|
86 |
+
"[codecarbon INFO @ 23:01:31] Available RAM : 16.000 GB\n",
|
87 |
+
"[codecarbon INFO @ 23:01:31] CPU count: 8\n",
|
88 |
+
"[codecarbon INFO @ 23:01:31] CPU model: Apple M1\n",
|
89 |
+
"[codecarbon INFO @ 23:01:31] GPU count: None\n",
|
90 |
+
"[codecarbon INFO @ 23:01:31] GPU model: None\n",
|
91 |
+
"[codecarbon INFO @ 23:01:34] Saving emissions data to file /Users/julmat/Documents/hugging_face/frugal_cviz/emissions.csv\n"
|
92 |
]
|
93 |
}
|
94 |
],
|
|
|
105 |
"\n",
|
106 |
"# ML\n",
|
107 |
"from keras import Model\n",
|
108 |
+
"from keras.applications import EfficientNetB0, EfficientNetV2M\n",
|
109 |
+
"from keras.layers import Flatten, Dense, Dropout\n",
|
110 |
"from keras.metrics import Precision, Recall\n",
|
111 |
"from keras.optimizers import AdamW\n",
|
112 |
"from keras.utils import image_dataset_from_directory\n",
|
|
|
125 |
"from tasks.image import parse_boxes, compute_iou, compute_max_iou\n",
|
126 |
"\n",
|
127 |
"\n",
|
128 |
+
"# Logging outputs config (DEBUG < INFO)\n",
|
129 |
"logger = logging.getLogger()\n",
|
130 |
"logger.setLevel(logging.INFO)"
|
131 |
]
|
|
|
148 |
" cfg = yaml.safe_load(f)\n",
|
149 |
"# Data\n",
|
150 |
"OUTPUT_DIR = cfg[\"data_root_dir\"]\n",
|
151 |
+
"DB_RAW_INFO_URI = os.path.join(OUTPUT_DIR, cfg[\"db_raw_info_uri\"])\n",
|
152 |
+
"DB_KERAS_INFO_URI = os.path.join(OUTPUT_DIR, cfg[\"db_keras_info_uri\"])\n",
|
153 |
+
"DB_AUG_INFO_URI = os.path.join(OUTPUT_DIR, cfg[\"db_aug_info_uri\"])\n",
|
154 |
"REPO_ID = cfg[\"repo_id\"]\n",
|
155 |
"SPLIT_SIZE = cfg[\"split_size\"]\n",
|
156 |
"RDM_SEED = cfg[\"rdm_seed\"]\n",
|
|
|
1859 |
"metadata": {},
|
1860 |
"outputs": [
|
1861 |
{
|
1862 |
+
"name": "stderr",
|
1863 |
"output_type": "stream",
|
1864 |
"text": [
|
1865 |
+
"INFO:root:data/data_keras_info.csv already exists: data already formatted\n"
|
|
|
1866 |
]
|
1867 |
}
|
1868 |
],
|
1869 |
"source": [
|
|
|
1870 |
"df_format = format_data_keras(df_clean.copy())"
|
1871 |
]
|
1872 |
},
|
|
|
1879 |
"- update dataframe"
|
1880 |
]
|
1881 |
},
|
1882 |
+
{
|
1883 |
+
"cell_type": "markdown",
|
1884 |
+
"metadata": {},
|
1885 |
+
"source": [
|
1886 |
+
"> 💡 Much better... but a quality balance issue just appeared: \"no_smoke\" images are often blured, flipped, equalized or rotated whereas \"smoke\" images are not.\n",
|
1887 |
+
"\n",
|
1888 |
+
"👉 Following algorithm will oversample and balance classes quality, by applying the same amount of image for the other class"
|
1889 |
+
]
|
1890 |
+
},
|
1891 |
{
|
1892 |
"cell_type": "code",
|
1893 |
"execution_count": 13,
|
|
|
1897 |
"name": "stderr",
|
1898 |
"output_type": "stream",
|
1899 |
"text": [
|
1900 |
+
"INFO:root:data/data_aug_info.csv already exists: data already formatted\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1901 |
]
|
1902 |
}
|
1903 |
],
|
1904 |
"source": [
|
1905 |
+
"df_aug = oversample_class(df_format)"
|
|
|
1906 |
]
|
1907 |
},
|
1908 |
{
|
|
|
1914 |
},
|
1915 |
{
|
1916 |
"cell_type": "code",
|
1917 |
+
"execution_count": 14,
|
1918 |
"metadata": {},
|
1919 |
"outputs": [
|
1920 |
{
|
|
|
2827 |
"cell_type": "markdown",
|
2828 |
"metadata": {},
|
2829 |
"source": [
|
2830 |
+
"Keep last dataframe as model basis"
|
2831 |
]
|
2832 |
},
|
2833 |
{
|
2834 |
+
"cell_type": "code",
|
2835 |
+
"execution_count": 15,
|
2836 |
"metadata": {},
|
2837 |
+
"outputs": [],
|
2838 |
"source": [
|
2839 |
+
"df = df_aug.copy()"
|
2840 |
]
|
2841 |
},
|
2842 |
{
|
2843 |
"cell_type": "markdown",
|
2844 |
"metadata": {},
|
2845 |
"source": [
|
2846 |
+
"New splits distribution"
|
2847 |
]
|
2848 |
},
|
2849 |
{
|
|
|
2852 |
"metadata": {},
|
2853 |
"outputs": [
|
2854 |
{
|
2855 |
+
"data": {
|
2856 |
+
"image/png": "",
|
2857 |
+
"text/plain": [
|
2858 |
+
"<Figure size 640x480 with 1 Axes>"
|
2859 |
+
]
|
2860 |
+
},
|
2861 |
+
"metadata": {},
|
2862 |
+
"output_type": "display_data"
|
|
|
2863 |
}
|
2864 |
],
|
2865 |
"source": [
|
2866 |
+
"labels_distr = df.split.value_counts()\n",
|
2867 |
+
"\n",
|
2868 |
+
"plt.pie(\n",
|
2869 |
+
" labels_distr,\n",
|
2870 |
+
" labels=labels_distr.index,\n",
|
2871 |
+
" autopct=make_autopct(labels_distr),\n",
|
2872 |
+
")\n",
|
2873 |
+
"plt.show()"
|
2874 |
]
|
2875 |
},
|
2876 |
{
|
2877 |
"cell_type": "markdown",
|
2878 |
"metadata": {},
|
2879 |
"source": [
|
2880 |
+
"# <a id='toc6_'></a>[Model Training](#toc0_)"
|
2881 |
]
|
2882 |
},
|
2883 |
{
|
2884 |
"cell_type": "markdown",
|
2885 |
"metadata": {},
|
2886 |
"source": [
|
2887 |
+
"## <a id='toc6_1_'></a>[Load configuration](#toc0_)"
|
2888 |
]
|
2889 |
},
|
2890 |
{
|
2891 |
"cell_type": "markdown",
|
2892 |
"metadata": {},
|
2893 |
"source": [
|
2894 |
+
"# <a id='toc7_'></a>[🚧 MODEL CHOICE](#toc0_)\n",
|
2895 |
"\n",
|
2896 |
"- https://paperswithcode.com/sota/image-classification-on-imagenet\n",
|
2897 |
+
"- https://keras.io/api/applications/efficientnet_v2/#efficientnetv2m-function\n",
|
2898 |
+
"- [ViTForImageClassification](https://huggingface.co/docs/transformers/main/en/model_doc/vit)"
|
2899 |
+
]
|
2900 |
+
},
|
2901 |
+
{
|
2902 |
+
"cell_type": "markdown",
|
2903 |
+
"metadata": {},
|
2904 |
+
"source": [
|
2905 |
+
"# <a id='toc5_'></a>[🦄🦄 CHECKPOINT 🦄🦄](#toc0_)"
|
2906 |
]
|
2907 |
},
|
2908 |
{
|
2909 |
"cell_type": "code",
|
2910 |
+
"execution_count": 17,
|
2911 |
"metadata": {},
|
2912 |
+
"outputs": [
|
2913 |
+
{
|
2914 |
+
"ename": "NameError",
|
2915 |
+
"evalue": "name 'stop' is not defined",
|
2916 |
+
"output_type": "error",
|
2917 |
+
"traceback": [
|
2918 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
2919 |
+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
2920 |
+
"Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n",
|
2921 |
+
"\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined"
|
2922 |
+
]
|
2923 |
+
}
|
2924 |
+
],
|
2925 |
"source": [
|
2926 |
+
"stop"
|
|
|
|
|
2927 |
]
|
2928 |
},
|
2929 |
{
|
2930 |
"cell_type": "code",
|
2931 |
+
"execution_count": 18,
|
2932 |
"metadata": {},
|
2933 |
"outputs": [],
|
2934 |
"source": [
|
2935 |
"# Model config\n",
|
2936 |
+
"model_name = \"EfficientNetV2M\"\n",
|
2937 |
"input_size = (224, 224)\n",
|
2938 |
+
"batch_size = 32\n",
|
|
|
2939 |
"optimizer = AdamW(learning_rate=0.0002, weight_decay=0.05)\n",
|
2940 |
+
"# model_name = \"EfficientNetB0\"\n",
|
2941 |
+
"# input_size = (224, 224)\n",
|
2942 |
+
"# batch_size = 48\n",
|
2943 |
+
"# optimizer = AdamW(learning_rate=0.0002, weight_decay=0.05)\n",
|
2944 |
+
"n_epochs = 100\n",
|
2945 |
"loss = \"binary_crossentropy\"\n",
|
2946 |
"metrics = [\"accuracy\", Precision(), Recall()]\n",
|
2947 |
"# metrics = [\"accuracy\", Precision(), Recall(), \"f1_score\"]\n",
|
|
|
2961 |
"cell_type": "markdown",
|
2962 |
"metadata": {},
|
2963 |
"source": [
|
2964 |
+
"## <a id='toc7_2_'></a>[Prepare model](#toc0_)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2965 |
]
|
2966 |
},
|
2967 |
{
|
2968 |
"cell_type": "code",
|
2969 |
+
"execution_count": 19,
|
2970 |
"metadata": {},
|
2971 |
"outputs": [
|
2972 |
{
|
2973 |
"data": {
|
2974 |
"text/html": [
|
2975 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional\"</span>\n",
|
2976 |
"</pre>\n"
|
2977 |
],
|
2978 |
"text/plain": [
|
2979 |
+
"\u001b[1mModel: \"functional\"\u001b[0m\n"
|
2980 |
]
|
2981 |
},
|
2982 |
"metadata": {},
|
|
|
2985 |
{
|
2986 |
"data": {
|
2987 |
"text/html": [
|
2988 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓\n",
|
2989 |
+
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃<span style=\"font-weight: bold\"> Trai… </span>┃\n",
|
2990 |
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩\n",
|
2991 |
+
"│ input_layer_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">224</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">224</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ <span style=\"font-weight: bold\">-</span> │\n",
|
2992 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
2993 |
+
"│ efficientnetv2-m │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">7</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">7</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1280</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">53,150,388</span> │ <span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">N</span> │\n",
|
2994 |
+
"│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Functional</span>) │ │ │ │\n",
|
2995 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
2996 |
+
"│ flatten (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Flatten</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">62720</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ <span style=\"font-weight: bold\">-</span> │\n",
|
2997 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
2998 |
+
"│ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">62,721</span> │ <span style=\"color: #00af00; text-decoration-color: #00af00; font-weight: bold\">Y</span> │\n",
|
2999 |
+
"└─────────────────────────────┴───────────────────────┴────────────┴───────┘\n",
|
3000 |
"</pre>\n"
|
3001 |
],
|
3002 |
"text/plain": [
|
3003 |
+
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓\n",
|
3004 |
+
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mTrai…\u001b[0m\u001b[1m \u001b[0m┃\n",
|
3005 |
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩\n",
|
3006 |
+
"│ input_layer_1 (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m224\u001b[0m, \u001b[38;5;34m224\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ \u001b[1m-\u001b[0m │\n",
|
3007 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
3008 |
+
"│ efficientnetv2-m │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m7\u001b[0m, \u001b[38;5;34m7\u001b[0m, \u001b[38;5;34m1280\u001b[0m) │ \u001b[38;5;34m53,150,388\u001b[0m │ \u001b[1;91mN\u001b[0m │\n",
|
3009 |
+
"│ (\u001b[38;5;33mFunctional\u001b[0m) │ �� │ │\n",
|
3010 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
3011 |
+
"│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m62720\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │ \u001b[1m-\u001b[0m │\n",
|
3012 |
+
"├─────────────────────────────┼───────────────────────┼────────────┼───────┤\n",
|
3013 |
+
"│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m62,721\u001b[0m │ \u001b[1;38;5;34mY\u001b[0m │\n",
|
3014 |
+
"└─────────────────────────────┴───────────────────────┴────────────┴───────┘\n"
|
3015 |
]
|
3016 |
},
|
3017 |
"metadata": {},
|
|
|
3020 |
{
|
3021 |
"data": {
|
3022 |
"text/html": [
|
3023 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">53,213,109</span> (202.99 MB)\n",
|
3024 |
"</pre>\n"
|
3025 |
],
|
3026 |
"text/plain": [
|
3027 |
+
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m53,213,109\u001b[0m (202.99 MB)\n"
|
3028 |
]
|
3029 |
},
|
3030 |
"metadata": {},
|
|
|
3046 |
{
|
3047 |
"data": {
|
3048 |
"text/html": [
|
3049 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">53,150,388</span> (202.75 MB)\n",
|
3050 |
"</pre>\n"
|
3051 |
],
|
3052 |
"text/plain": [
|
3053 |
+
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m53,150,388\u001b[0m (202.75 MB)\n"
|
3054 |
]
|
3055 |
},
|
3056 |
"metadata": {},
|
|
|
3059 |
],
|
3060 |
"source": [
|
3061 |
"# Load pre-trained model without top layers\n",
|
3062 |
+
"base_model = EfficientNetV2M(\n",
|
3063 |
" weights=\"imagenet\", # pre-trained weights\n",
|
3064 |
" include_top=False, # no dense layer\n",
|
3065 |
+
" input_shape=(input_size[0], input_size[1], 3), # input shape,\n",
|
3066 |
")\n",
|
3067 |
+
"\n",
|
3068 |
+
"# For feature extraction\n",
|
3069 |
+
"base_model.trainable = False\n",
|
3070 |
+
"# # For partial fine-tuning: freeze bottom layers\n",
|
3071 |
+
"# for layer in base_model.layers[:735]:\n",
|
3072 |
+
"# layer.trainable = False\n",
|
3073 |
+
"\n",
|
3074 |
"# Create explicit input layer\n",
|
3075 |
"inputs = tf.keras.Input(shape=(input_size[0], input_size[1], 3))\n",
|
|
|
|
|
|
|
|
|
3076 |
"\n",
|
3077 |
+
"x = base_model(inputs)\n",
|
3078 |
+
"\n",
|
3079 |
+
"# # Dropout for regularization\n",
|
3080 |
+
"# x = Dropout(0.2)(x)\n",
|
3081 |
"# Flatten output\n",
|
3082 |
"x = Flatten()(x)\n",
|
|
|
|
|
|
|
3083 |
"# New FC layer for binary classification\n",
|
3084 |
"predictions = Dense(1, activation=\"sigmoid\")(x)\n",
|
3085 |
+
"\n",
|
3086 |
"# Define new model\n",
|
3087 |
+
"model_ready = Model(inputs=inputs, outputs=predictions)\n",
|
3088 |
+
"\n",
|
3089 |
"# Display model summary\n",
|
3090 |
+
"model_ready.summary(show_trainable=True)"
|
3091 |
]
|
3092 |
},
|
3093 |
{
|
3094 |
"cell_type": "markdown",
|
3095 |
"metadata": {},
|
3096 |
"source": [
|
3097 |
+
"## <a id='toc7_1_'></a>[Prepare data](#toc0_)"
|
3098 |
]
|
3099 |
},
|
3100 |
{
|
3101 |
"cell_type": "markdown",
|
3102 |
"metadata": {},
|
3103 |
"source": [
|
3104 |
+
"Create datasets from local images and labels"
|
3105 |
]
|
3106 |
},
|
3107 |
{
|
3108 |
"cell_type": "code",
|
3109 |
+
"execution_count": 20,
|
3110 |
"metadata": {},
|
3111 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3112 |
{
|
3113 |
"name": "stdout",
|
3114 |
"output_type": "stream",
|
3115 |
"text": [
|
3116 |
+
"Train dataset:\n",
|
3117 |
+
"Found 39654 files belonging to 2 classes.\n",
|
3118 |
"\n",
|
3119 |
+
"Val dataset:\n",
|
3120 |
+
"Found 6670 files belonging to 2 classes.\n",
|
3121 |
"\n",
|
3122 |
+
"Test dataset:\n",
|
3123 |
+
"Found 5908 files belonging to 2 classes.\n"
|
|
|
|
|
3124 |
]
|
3125 |
+
}
|
3126 |
+
],
|
3127 |
+
"source": [
|
3128 |
+
"# Prepare for outputs\n",
|
3129 |
+
"os.makedirs(MODELS_ROOT_DIR, exist_ok=True)\n",
|
3130 |
+
"y = df[\"label\"]\n",
|
3131 |
+
"X = df[\"uri\"]\n",
|
3132 |
+
"\n",
|
3133 |
+
"# Create datasets\n",
|
3134 |
+
"print(\"Train dataset:\")\n",
|
3135 |
+
"train_ds = image_dataset_from_directory(\n",
|
3136 |
+
" train_dir,\n",
|
3137 |
+
" labels=\"inferred\", # class names upon folders structure\n",
|
3138 |
+
" label_mode=\"int\", # integer encoding\n",
|
3139 |
+
" shuffle=True, # shuffle images\n",
|
3140 |
+
" seed=42, # random seed\n",
|
3141 |
+
" image_size=input_size, # automatic resizing\n",
|
3142 |
+
" batch_size=batch_size, # tensor shape[0]\n",
|
3143 |
+
")\n",
|
3144 |
+
"\n",
|
3145 |
+
"print(\"\\nVal dataset:\")\n",
|
3146 |
+
"val_ds = image_dataset_from_directory(\n",
|
3147 |
+
" val_dir,\n",
|
3148 |
+
" labels=\"inferred\", # class names upon folders structure\n",
|
3149 |
+
" label_mode=\"int\", # integer encoding\n",
|
3150 |
+
" shuffle=True, # shuffle images\n",
|
3151 |
+
" seed=42, # random seed\n",
|
3152 |
+
" image_size=input_size, # automatic resizing\n",
|
3153 |
+
" batch_size=batch_size, # tensor shape[0]\n",
|
3154 |
+
")\n",
|
3155 |
+
"\n",
|
3156 |
+
"print(\"\\nTest dataset:\")\n",
|
3157 |
+
"test_ds = image_dataset_from_directory(\n",
|
3158 |
+
" test_dir,\n",
|
3159 |
+
" labels=\"inferred\", # class names upon folders structure\n",
|
3160 |
+
" label_mode=\"int\", # integer encoding\n",
|
3161 |
+
" shuffle=False, # do not shuffle images\n",
|
3162 |
+
" seed=42, # random seed\n",
|
3163 |
+
" image_size=input_size, # automatic resizing\n",
|
3164 |
+
" batch_size=batch_size, # tensor shape[0]\n",
|
3165 |
+
")"
|
3166 |
+
]
|
3167 |
+
},
|
3168 |
+
{
|
3169 |
+
"cell_type": "markdown",
|
3170 |
+
"metadata": {},
|
3171 |
+
"source": [
|
3172 |
+
"## <a id='toc7_3_'></a>[Training](#toc0_)"
|
3173 |
+
]
|
3174 |
+
},
|
3175 |
+
{
|
3176 |
+
"cell_type": "markdown",
|
3177 |
+
"metadata": {},
|
3178 |
+
"source": [
|
3179 |
+
"Follow with : `tensorboard --logdir models/EfficientNetB0/runs`"
|
3180 |
+
]
|
3181 |
+
},
|
3182 |
+
{
|
3183 |
+
"cell_type": "code",
|
3184 |
+
"execution_count": 101,
|
3185 |
+
"metadata": {},
|
3186 |
+
"outputs": [
|
3187 |
{
|
3188 |
"name": "stderr",
|
3189 |
"output_type": "stream",
|
3190 |
"text": [
|
3191 |
+
"INFO:root:⚙️ compiling\n",
|
3192 |
+
"INFO:root:🛎️ declaring callbacks\n",
|
3193 |
+
"INFO:root:💪 starting training\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3194 |
]
|
3195 |
},
|
3196 |
{
|
3197 |
+
"name": "stdout",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3198 |
"output_type": "stream",
|
3199 |
"text": [
|
3200 |
+
"Epoch 1/100\n",
|
3201 |
+
"\u001b[1m 7/1240\u001b[0m \u001b[37m━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[1m36:58\u001b[0m 2s/step - accuracy: 0.4936 - loss: 0.8204 - precision: 0.5297 - recall: 0.5054"
|
3202 |
]
|
3203 |
}
|
3204 |
],
|
3205 |
"source": [
|
3206 |
"model_trained, history = eval_pretrained_model(\n",
|
3207 |
+
" model=model_ready,\n",
|
3208 |
" train_ds=train_ds,\n",
|
3209 |
" val_ds=val_ds,\n",
|
3210 |
" test_ds=test_ds,\n",
|
|
|
3285 |
"cell_type": "markdown",
|
3286 |
"metadata": {},
|
3287 |
"source": [
|
3288 |
+
"## <a id='toc7_4_'></a>[Random Baseline](#toc0_)"
|
3289 |
]
|
3290 |
},
|
3291 |
{
|
|
|
3308 |
"tracker.start_task(\"inference\")"
|
3309 |
]
|
3310 |
},
|
3311 |
+
{
|
3312 |
+
"cell_type": "markdown",
|
3313 |
+
"metadata": {},
|
3314 |
+
"source": [
|
3315 |
+
"# 🚧 INFERENCE"
|
3316 |
+
]
|
3317 |
+
},
|
3318 |
+
{
|
3319 |
+
"cell_type": "code",
|
3320 |
+
"execution_count": null,
|
3321 |
+
"metadata": {},
|
3322 |
+
"outputs": [],
|
3323 |
+
"source": [
|
3324 |
+
"# import keras\n",
|
3325 |
+
"# from keras.applications.resnet50 import ResNet50\n",
|
3326 |
+
"# from keras.applications.resnet50 import preprocess_input, decode_predictions\n",
|
3327 |
+
"# import numpy as np\n",
|
3328 |
+
"\n",
|
3329 |
+
"# model = ResNet50(weights='imagenet')\n",
|
3330 |
+
"\n",
|
3331 |
+
"# img_path = 'elephant.jpg'\n",
|
3332 |
+
"# img = keras.utils.load_img(img_path, target_size=(224, 224))\n",
|
3333 |
+
"# x = keras.utils.img_to_array(img)\n",
|
3334 |
+
"# x = np.expand_dims(x, axis=0)\n",
|
3335 |
+
"# x = preprocess_input(x)\n",
|
3336 |
+
"\n",
|
3337 |
+
"# preds = model.predict(x)\n",
|
3338 |
+
"# # decode the results into a list of tuples (class, description, probability)\n",
|
3339 |
+
"# # (one such list for each sample in the batch)\n",
|
3340 |
+
"# print('Predicted:', decode_predictions(preds, top=3)[0])\n",
|
3341 |
+
"# # Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]\n"
|
3342 |
+
]
|
3343 |
+
},
|
3344 |
{
|
3345 |
"cell_type": "code",
|
3346 |
"execution_count": 11,
|
README.md
CHANGED
@@ -12,11 +12,10 @@ datasets:
|
|
12 |
|
13 |
# 🚧 TODO
|
14 |
|
15 |
-
- 👉
|
16 |
-
- réduire **n_epochs**
|
17 |
- voir répartition partenaires, caméras, temporalité, annotations
|
18 |
- métriques : **matrice de confusion** complète
|
19 |
-
- décrire
|
20 |
- tester plusieurs pré-entraînements (est-ce que charger un modèle pré-entraîné ImageNet aide vraiment ?)
|
21 |
- tester si amélioration inférence avec et sans égalisation
|
22 |
- voir répartition physique des annotations sur l'image
|
|
|
12 |
|
13 |
# 🚧 TODO
|
14 |
|
15 |
+
- 👉 tester EfficientNetV2M
|
|
|
16 |
- voir répartition partenaires, caméras, temporalité, annotations
|
17 |
- métriques : **matrice de confusion** complète
|
18 |
+
- décrire **erreurs de types et conséquences**
|
19 |
- tester plusieurs pré-entraînements (est-ce que charger un modèle pré-entraîné ImageNet aide vraiment ?)
|
20 |
- tester si amélioration inférence avec et sans égalisation
|
21 |
- voir répartition physique des annotations sur l'image
|
config.yaml
CHANGED
@@ -7,7 +7,9 @@ rdm_seed: 42
|
|
7 |
data_root_dir: "data"
|
8 |
raw_data_dir: "raw"
|
9 |
clr_hf_cache_script_abs_path: './src/clear_hf_cache.sh'
|
10 |
-
|
|
|
|
|
11 |
|
12 |
# Models
|
13 |
models_common:
|
|
|
7 |
data_root_dir: "data"
|
8 |
raw_data_dir: "raw"
|
9 |
clr_hf_cache_script_abs_path: './src/clear_hf_cache.sh'
|
10 |
+
db_raw_info_uri: "data_raw_info.csv"
|
11 |
+
db_keras_info_uri: "data_keras_info.csv"
|
12 |
+
db_aug_info_uri: "data_aug_info.csv"
|
13 |
|
14 |
# Models
|
15 |
models_common:
|
src/load_data.py
CHANGED
@@ -6,13 +6,14 @@ import os
|
|
6 |
import pandas as pd
|
7 |
from PIL import Image, ImageOps, ImageEnhance, ImageFilter
|
8 |
import shutil
|
|
|
9 |
import subprocess
|
10 |
import yaml
|
11 |
|
12 |
|
13 |
-
# Logging
|
14 |
logger = logging.getLogger()
|
15 |
-
logger.setLevel(logging.
|
16 |
|
17 |
# local config
|
18 |
with open("config.yaml", "r") as f:
|
@@ -23,7 +24,9 @@ RDM_SEED = cfg["rdm_seed"]
|
|
23 |
OUTPUT_DIR = cfg["data_root_dir"]
|
24 |
RAW_DATA_DIR = os.path.join(OUTPUT_DIR, cfg["raw_data_dir"])
|
25 |
CLR_CACHE_SCRIPT = cfg["clr_hf_cache_script_abs_path"]
|
26 |
-
|
|
|
|
|
27 |
|
28 |
|
29 |
# Save in Ultralytics format
|
@@ -81,8 +84,8 @@ def load_raw_data():
|
|
81 |
"""Main function for downloading, splitting and formatting data"""
|
82 |
|
83 |
# Check if data information already exists before eventually loading model
|
84 |
-
if os.path.exists(
|
85 |
-
df = pd.read_csv(
|
86 |
return df
|
87 |
|
88 |
# Load data
|
@@ -116,7 +119,7 @@ def load_raw_data():
|
|
116 |
df_train_4 = create_df(ds_train[18000:], "train", RAW_DATA_DIR)
|
117 |
df_val = create_df(ds_val, "val", RAW_DATA_DIR)
|
118 |
df_test = create_df(ds_test, "test", RAW_DATA_DIR)
|
119 |
-
# Save as one
|
120 |
df = pd.concat(
|
121 |
[df_train_1, df_train_2, df_train_3, df_train_4, df_val, df_test],
|
122 |
axis=0,
|
@@ -144,7 +147,8 @@ def load_raw_data():
|
|
144 |
],
|
145 |
]
|
146 |
# Save as CSV
|
147 |
-
|
|
|
148 |
df.to_csv(f)
|
149 |
|
150 |
# Clear HF default cache folder after it is done (6GB)
|
@@ -173,12 +177,14 @@ def format_data_keras(df):
|
|
173 |
logging.warning(f"{OUTPUT_DIR} doesn't exist: (re)load data first")
|
174 |
return df
|
175 |
|
176 |
-
# Create Keras parent folder
|
177 |
-
keras_dir = os.path.join(OUTPUT_DIR, "keras")
|
178 |
# Check if data already exists
|
179 |
-
if os.path.exists(
|
180 |
-
logging.info(f"{
|
|
|
181 |
return df
|
|
|
|
|
|
|
182 |
os.makedirs(keras_dir, exist_ok=True)
|
183 |
# Create splits folders
|
184 |
for split in df.split.unique():
|
@@ -199,11 +205,16 @@ def format_data_keras(df):
|
|
199 |
df.drop(columns="uri", inplace=True)
|
200 |
df.rename(columns={"uri_dest": "uri"}, inplace=True)
|
201 |
|
|
|
|
|
|
|
|
|
|
|
202 |
return df
|
203 |
|
204 |
|
205 |
-
def add_data_aug(aug_name, df_sample,
|
206 |
-
"""Add data augmentation
|
207 |
# Rename images and update URI
|
208 |
df_sample.loc[:, "name"] += f"_DA-{aug_name[:-4]}"
|
209 |
df_sample.rename(columns={'uri': 'input_uri'}, inplace=True)
|
@@ -225,25 +236,32 @@ def add_data_aug(aug_name, df_sample, df_aug):
|
|
225 |
img_aug = img.rotate(180)
|
226 |
else:
|
227 |
logging.warn("Wrong data augmentation name: passing")
|
228 |
-
return
|
229 |
|
230 |
img_aug.save(row['uri'])
|
|
|
|
|
231 |
|
232 |
-
# Add to dataframe
|
233 |
df_sample.drop(columns='input_uri', inplace=True)
|
234 |
-
result = pd.concat([df_aug, df_sample], axis=0, ignore_index=True)
|
235 |
|
236 |
-
logging.
|
237 |
|
238 |
-
return
|
239 |
|
240 |
|
241 |
def oversample_class(df):
|
242 |
"""Oversample an under-represented class"""
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
count_df = df.groupby(["split", "label"]).size().reset_index(name="count")
|
244 |
count_df = count_df.loc[count_df["split"] != "test"]
|
245 |
|
246 |
df_aug = df.copy()
|
|
|
247 |
|
248 |
for split in count_df.split.unique():
|
249 |
logging.info(f"⚙️ Processing {split} split...")
|
@@ -251,27 +269,52 @@ def oversample_class(df):
|
|
251 |
# Minimum label
|
252 |
idxmin = _["count"].idxmin()
|
253 |
min_row = _.loc[idxmin, :]
|
254 |
-
min_label = min_row["label"]
|
255 |
min_count = min_row["count"]
|
|
|
256 |
# Maximum label
|
257 |
idxmax = _["count"].idxmax()
|
258 |
max_row = _.loc[idxmax, :]
|
259 |
max_count = max_row["count"]
|
|
|
260 |
# Needed labels
|
261 |
need = max_count - min_count
|
262 |
|
263 |
-
logging.info(f"
|
|
|
264 |
|
265 |
# Loop over augmentation techniques until need is covered
|
266 |
for aug_name in ["blur_img", "flip_img", "blur_flip_img", "eq_img", "180_img"]:
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
-
|
271 |
-
|
|
|
|
|
272 |
|
273 |
-
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
return df_aug
|
277 |
|
|
|
6 |
import pandas as pd
|
7 |
from PIL import Image, ImageOps, ImageEnhance, ImageFilter
|
8 |
import shutil
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
import subprocess
|
11 |
import yaml
|
12 |
|
13 |
|
14 |
+
# Logging outputs config (DEBUG < INFO)
|
15 |
logger = logging.getLogger()
|
16 |
+
logger.setLevel(logging.DEBUG)
|
17 |
|
18 |
# local config
|
19 |
with open("config.yaml", "r") as f:
|
|
|
24 |
OUTPUT_DIR = cfg["data_root_dir"]
|
25 |
RAW_DATA_DIR = os.path.join(OUTPUT_DIR, cfg["raw_data_dir"])
|
26 |
CLR_CACHE_SCRIPT = cfg["clr_hf_cache_script_abs_path"]
|
27 |
+
DB_RAW_INFO_URI = os.path.join(OUTPUT_DIR, cfg["db_raw_info_uri"])
|
28 |
+
DB_KERAS_INFO_URI = os.path.join(OUTPUT_DIR, cfg["db_keras_info_uri"])
|
29 |
+
DB_AUG_INFO_URI = os.path.join(OUTPUT_DIR, cfg["db_aug_info_uri"])
|
30 |
|
31 |
|
32 |
# Save in Ultralytics format
|
|
|
84 |
"""Main function for downloading, splitting and formatting data"""
|
85 |
|
86 |
# Check if data information already exists before eventually loading model
|
87 |
+
if os.path.exists(DB_RAW_INFO_URI):
|
88 |
+
df = pd.read_csv(DB_RAW_INFO_URI, index_col=0)
|
89 |
return df
|
90 |
|
91 |
# Load data
|
|
|
119 |
df_train_4 = create_df(ds_train[18000:], "train", RAW_DATA_DIR)
|
120 |
df_val = create_df(ds_val, "val", RAW_DATA_DIR)
|
121 |
df_test = create_df(ds_test, "test", RAW_DATA_DIR)
|
122 |
+
# Save as one dataframe
|
123 |
df = pd.concat(
|
124 |
[df_train_1, df_train_2, df_train_3, df_train_4, df_val, df_test],
|
125 |
axis=0,
|
|
|
147 |
],
|
148 |
]
|
149 |
# Save as CSV
|
150 |
+
logging.info(f"Dataframe saved in: {DB_RAW_INFO_URI}")
|
151 |
+
with open(DB_RAW_INFO_URI, "wb") as f:
|
152 |
df.to_csv(f)
|
153 |
|
154 |
# Clear HF default cache folder after it is done (6GB)
|
|
|
177 |
logging.warning(f"{OUTPUT_DIR} doesn't exist: (re)load data first")
|
178 |
return df
|
179 |
|
|
|
|
|
180 |
# Check if data already exists
|
181 |
+
if os.path.exists(DB_KERAS_INFO_URI):
|
182 |
+
logging.info(f"{DB_KERAS_INFO_URI} already exists: data already formatted")
|
183 |
+
df = pd.read_csv(DB_KERAS_INFO_URI, index_col=0)
|
184 |
return df
|
185 |
+
|
186 |
+
# Create Keras parent folder
|
187 |
+
keras_dir = os.path.join(OUTPUT_DIR, "keras")
|
188 |
os.makedirs(keras_dir, exist_ok=True)
|
189 |
# Create splits folders
|
190 |
for split in df.split.unique():
|
|
|
205 |
df.drop(columns="uri", inplace=True)
|
206 |
df.rename(columns={"uri_dest": "uri"}, inplace=True)
|
207 |
|
208 |
+
# Save as CSV
|
209 |
+
logging.info(f"Dataframe saved in: {DB_KERAS_INFO_URI}")
|
210 |
+
with open(DB_KERAS_INFO_URI, "wb") as f:
|
211 |
+
df.to_csv(f)
|
212 |
+
|
213 |
return df
|
214 |
|
215 |
|
216 |
+
def add_data_aug(aug_name, df_sample, replace=False):
|
217 |
+
"""Add data augmentation to a ataframe sample"""
|
218 |
# Rename images and update URI
|
219 |
df_sample.loc[:, "name"] += f"_DA-{aug_name[:-4]}"
|
220 |
df_sample.rename(columns={'uri': 'input_uri'}, inplace=True)
|
|
|
236 |
img_aug = img.rotate(180)
|
237 |
else:
|
238 |
logging.warn("Wrong data augmentation name: passing")
|
239 |
+
return None
|
240 |
|
241 |
img_aug.save(row['uri'])
|
242 |
+
if replace:
|
243 |
+
os.remove(row['input_uri'])
|
244 |
|
|
|
245 |
df_sample.drop(columns='input_uri', inplace=True)
|
|
|
246 |
|
247 |
+
logging.debug(f"\t✅ Transformed {len(df_sample)} images with {aug_name} (replace={replace})")
|
248 |
|
249 |
+
return df_sample
|
250 |
|
251 |
|
252 |
def oversample_class(df):
|
253 |
"""Oversample an under-represented class"""
|
254 |
+
# Check if data already exists
|
255 |
+
if os.path.exists(DB_AUG_INFO_URI):
|
256 |
+
logging.info(f"{DB_AUG_INFO_URI} already exists: data already formatted")
|
257 |
+
df = pd.read_csv(DB_AUG_INFO_URI, index_col=0)
|
258 |
+
return df
|
259 |
+
|
260 |
count_df = df.groupby(["split", "label"]).size().reset_index(name="count")
|
261 |
count_df = count_df.loc[count_df["split"] != "test"]
|
262 |
|
263 |
df_aug = df.copy()
|
264 |
+
df_aug.loc[:, "augmented"] = False
|
265 |
|
266 |
for split in count_df.split.unique():
|
267 |
logging.info(f"⚙️ Processing {split} split...")
|
|
|
269 |
# Minimum label
|
270 |
idxmin = _["count"].idxmin()
|
271 |
min_row = _.loc[idxmin, :]
|
|
|
272 |
min_count = min_row["count"]
|
273 |
+
min_label = min_row["label"]
|
274 |
# Maximum label
|
275 |
idxmax = _["count"].idxmax()
|
276 |
max_row = _.loc[idxmax, :]
|
277 |
max_count = max_row["count"]
|
278 |
+
max_label = max_row["label"]
|
279 |
# Needed labels
|
280 |
need = max_count - min_count
|
281 |
|
282 |
+
logging.info(f"Min class count = {min_count} ; Max class count = {max_count}")
|
283 |
+
logging.info(f"⚙️ Transforming {need} images...\n")
|
284 |
|
285 |
# Loop over augmentation techniques until need is covered
|
286 |
for aug_name in ["blur_img", "flip_img", "blur_flip_img", "eq_img", "180_img"]:
|
287 |
+
logging.info(f"\tApplying {aug_name[:-4]}")
|
288 |
+
if need <= 0:
|
289 |
+
break
|
290 |
+
|
291 |
+
# Prepare data samples to apply image transformation
|
292 |
+
df_sample_min = df.loc[(df["split"] == split) & (df["label"] == min_label)].iloc[:need].copy()
|
293 |
+
|
294 |
+
df_sample_max_full = df_aug.loc[(df_aug["split"] == split) & (df_aug["label"] == max_label) & (df_aug["augmented"] == False)].copy()
|
295 |
+
_, df_sample_max = train_test_split(df_sample_max_full, test_size=len(df_sample_min), random_state=RDM_SEED)
|
296 |
+
df_aug = df_aug[~df_aug['uri'].isin(df_sample_max['uri'])]
|
297 |
|
298 |
+
# Apply data augmentation for smaller class
|
299 |
+
df_sample_min_transf = add_data_aug(aug_name, df_sample_min)
|
300 |
+
if df_sample_min_transf is not None:
|
301 |
+
df_aug = pd.concat([df_aug, df_sample_min_transf], axis=0, ignore_index=True)
|
302 |
|
303 |
+
# Apply data quality balance by samely transforming most frequent class
|
304 |
+
df_sample_max_transf = add_data_aug(aug_name, df_sample_max, replace=True)
|
305 |
+
df_sample_max_transf.loc[:, "augmented"] = True
|
306 |
+
df_aug = pd.concat([df_aug, df_sample_max_transf], axis=0, ignore_index=True)
|
307 |
+
|
308 |
+
need -= len(df_sample_min_transf)
|
309 |
+
|
310 |
+
df_aug.drop(columns='augmented', inplace=True)
|
311 |
+
|
312 |
+
logging.info(f"Augmented dataframe shape = {df_aug.shape}\n")
|
313 |
+
|
314 |
+
# Save as CSV
|
315 |
+
logging.info(f"Dataframe saved in: {DB_AUG_INFO_URI}")
|
316 |
+
with open(DB_AUG_INFO_URI, "wb") as f:
|
317 |
+
df_aug.to_csv(f)
|
318 |
|
319 |
return df_aug
|
320 |
|