dnth commited on
Commit
ed9c2a5
·
0 Parent(s):

reduce train ntebook

Browse files
Files changed (6) hide show
  1. .github/workflows/main.yml +21 -0
  2. .gitignore +3 -0
  3. app.py +73 -0
  4. gradioapp.ipynb +262 -0
  5. models/model.pth +3 -0
  6. train.ipynb +0 -0
.github/workflows/main.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+
14
+
15
+ - uses: actions/checkout@v2
16
+ with:
17
+ fetch-depth: 0
18
+ - name: Push to hub
19
+ env:
20
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
+ run: git push --force https://dnth:[email protected]/spaces/dnth/webdemo-fridge-detection main
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .ipynb_checkpoints
2
+ .ipynb_checkpoints/*
3
+ models/*.pth
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from icevision.all import *
2
+ import icedata
3
+ import PIL, requests
4
+ import torch
5
+ from torchvision import transforms
6
+ import gradio as gr
7
+
8
+ # Download the dataset
9
+ url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
10
+ dest_dir = "fridge"
11
+ data_dir = icedata.load_data(url, dest_dir)
12
+
13
+ # Create the parser
14
+ parser = parsers.VOCBBoxParser(annotations_dir=data_dir / "odFridgeObjects/annotations", images_dir=data_dir / "odFridgeObjects/images")
15
+
16
+ # Parse annotations to create records
17
+ train_records, valid_records = parser.parse()
18
+
19
+ class_map = parser.class_map
20
+
21
+ extra_args = {}
22
+ model_type = models.torchvision.retinanet
23
+ backbone = model_type.backbones.resnet50_fpn
24
+ # Instantiate the model
25
+ model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args)
26
+
27
+ # Transforms
28
+ # size is set to 384 because EfficientDet requires its inputs to be divisible by 128
29
+ image_size = 384
30
+ train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
31
+ valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])
32
+ # Datasets
33
+ train_ds = Dataset(train_records, train_tfms)
34
+ valid_ds = Dataset(valid_records, valid_tfms)
35
+ # Data Loaders
36
+ train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
37
+ valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
38
+ metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
39
+ learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
40
+
41
+ learn = learn.load('model')
42
+
43
+ def show_preds(input_image, display_label, display_bbox, detection_threshold):
44
+
45
+ if detection_threshold==0: detection_threshold=0.5
46
+
47
+ img = PIL.Image.fromarray(input_image, 'RGB')
48
+
49
+ pred_dict = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,
50
+ display_label=display_label, display_bbox=display_bbox, return_img=True,
51
+ font_size=16, label_color="#FF59D6")
52
+
53
+ return pred_dict['img']
54
+
55
+ # display_chkbox = gr.inputs.CheckboxGroup(["Label", "BBox"], label="Display", default=True)
56
+ display_chkbox_label = gr.inputs.Checkbox(label="Label", default=True)
57
+ display_chkbox_box = gr.inputs.Checkbox(label="Box", default=True)
58
+
59
+ detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label="Detection Threshold")
60
+
61
+ outputs = gr.outputs.Image(type="pil")
62
+
63
+ # Option 1: Get an image from local drive
64
+ gr_interface = gr.Interface(fn=show_preds, inputs=["image", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - Fridge Object')
65
+
66
+ # # Option 2: Grab an image from a webcam
67
+ # gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)
68
+
69
+ # # Option 3: Continuous image stream from the webcam
70
+ # gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)
71
+
72
+
73
+ gr_interface.launch(inline=False, share=True, debug=True)
gradioapp.ipynb ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "ee7e0c23-3fa5-4547-8598-7df27a3876c5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from icevision.all import *\n",
11
+ "import icedata\n",
12
+ "import PIL, requests\n",
13
+ "import torch\n",
14
+ "from torchvision import transforms\n",
15
+ "import gradio as gr"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": 2,
21
+ "id": "646cc218-f7de-4f32-a3d9-fccdc9b54592",
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "# Download the dataset\n",
26
+ "url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n",
27
+ "dest_dir = \"fridge\"\n",
28
+ "data_dir = icedata.load_data(url, dest_dir)"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 3,
34
+ "id": "96184ca0-0b0a-4a20-8ab9-30dee6096588",
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "# Create the parser\n",
39
+ "parser = parsers.VOCBBoxParser(annotations_dir=data_dir / \"odFridgeObjects/annotations\", images_dir=data_dir / \"odFridgeObjects/images\")"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 4,
45
+ "id": "dfa20f76-4970-479a-9497-871fe4cfd170",
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "data": {
50
+ "application/vnd.jupyter.widget-view+json": {
51
+ "model_id": "fc8e676815314038a40c884c8c7f5b67",
52
+ "version_major": 2,
53
+ "version_minor": 0
54
+ },
55
+ "text/plain": [
56
+ " 0%| | 0/128 [00:00<?, ?it/s]"
57
+ ]
58
+ },
59
+ "metadata": {},
60
+ "output_type": "display_data"
61
+ },
62
+ {
63
+ "name": "stderr",
64
+ "output_type": "stream",
65
+ "text": [
66
+ "\u001b[1m\u001b[1mINFO \u001b[0m\u001b[1m\u001b[0m - \u001b[1m\u001b[34m\u001b[1mAutofixing records\u001b[0m\u001b[1m\u001b[34m\u001b[0m\u001b[1m\u001b[0m | \u001b[36micevision.parsers.parser\u001b[0m:\u001b[36mparse\u001b[0m:\u001b[36m122\u001b[0m\n"
67
+ ]
68
+ },
69
+ {
70
+ "data": {
71
+ "application/vnd.jupyter.widget-view+json": {
72
+ "model_id": "3db4bc1ae388495eb3b62289459a5c00",
73
+ "version_major": 2,
74
+ "version_minor": 0
75
+ },
76
+ "text/plain": [
77
+ " 0%| | 0/128 [00:00<?, ?it/s]"
78
+ ]
79
+ },
80
+ "metadata": {},
81
+ "output_type": "display_data"
82
+ },
83
+ {
84
+ "data": {
85
+ "text/plain": [
86
+ "<ClassMap: {'background': 0, 'carton': 1, 'milk_bottle': 2, 'can': 3, 'water_bottle': 4}>"
87
+ ]
88
+ },
89
+ "execution_count": 4,
90
+ "metadata": {},
91
+ "output_type": "execute_result"
92
+ }
93
+ ],
94
+ "source": [
95
+ "# Parse annotations to create records\n",
96
+ "train_records, valid_records = parser.parse()\n",
97
+ "parser.class_map"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 5,
103
+ "id": "26d4f2f7-db51-413c-838f-f80c5898ab52",
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "class_map = parser.class_map"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 6,
113
+ "id": "007b2e97-d546-4178-84e7-d4fe597f3731",
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "extra_args = {}\n",
118
+ "model_type = models.torchvision.retinanet\n",
119
+ "backbone = model_type.backbones.resnet50_fpn\n",
120
+ "# Instantiate the model\n",
121
+ "model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) "
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 7,
127
+ "id": "7b664cbf-3ab0-46df-a9d0-c4eb5c3c026d",
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "# Transforms\n",
132
+ "# size is set to 384 because EfficientDet requires its inputs to be divisible by 128\n",
133
+ "image_size = 384\n",
134
+ "train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])\n",
135
+ "valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])\n",
136
+ "# Datasets\n",
137
+ "train_ds = Dataset(train_records, train_tfms)\n",
138
+ "valid_ds = Dataset(valid_records, valid_tfms)\n",
139
+ "# Data Loaders\n",
140
+ "train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)\n",
141
+ "valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)\n",
142
+ "metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]\n",
143
+ "learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 8,
149
+ "id": "7bddb248-215d-4998-9d90-14ea6989c236",
150
+ "metadata": {},
151
+ "outputs": [
152
+ {
153
+ "name": "stderr",
154
+ "output_type": "stream",
155
+ "text": [
156
+ "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/fastai/learner.py:56: UserWarning: Saved filed doesn't contain an optimizer state.\n",
157
+ " elif with_opt: warn(\"Saved filed doesn't contain an optimizer state.\")\n"
158
+ ]
159
+ }
160
+ ],
161
+ "source": [
162
+ "learn = learn.load('model')"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 9,
168
+ "id": "745315f6-8aa5-486e-a7bc-e11348bec6a6",
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": [
172
+ "def show_preds(input_image, display_label, display_bbox, detection_threshold):\n",
173
+ "\n",
174
+ " if detection_threshold==0: detection_threshold=0.5\n",
175
+ "\n",
176
+ " img = PIL.Image.fromarray(input_image, 'RGB')\n",
177
+ "\n",
178
+ " pred_dict = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,\n",
179
+ " display_label=display_label, display_bbox=display_bbox, return_img=True, \n",
180
+ " font_size=16, label_color=\"#FF59D6\")\n",
181
+ "\n",
182
+ " return pred_dict['img']"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": null,
188
+ "id": "63ac7fab-2068-4dbc-a464-0551b6fc12b2",
189
+ "metadata": {},
190
+ "outputs": [
191
+ {
192
+ "name": "stdout",
193
+ "output_type": "stream",
194
+ "text": [
195
+ "Running on local URL: http://127.0.0.1:7860/\n",
196
+ "Running on public URL: https://11839.gradio.app\n",
197
+ "\n",
198
+ "This share link will expire in 72 hours. To get longer links, send an email to: [email protected]\n"
199
+ ]
200
+ },
201
+ {
202
+ "name": "stderr",
203
+ "output_type": "stream",
204
+ "text": [
205
+ "/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)\n",
206
+ " return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n"
207
+ ]
208
+ }
209
+ ],
210
+ "source": [
211
+ "# display_chkbox = gr.inputs.CheckboxGroup([\"Label\", \"BBox\"], label=\"Display\", default=True)\n",
212
+ "display_chkbox_label = gr.inputs.Checkbox(label=\"Label\", default=True)\n",
213
+ "display_chkbox_box = gr.inputs.Checkbox(label=\"Box\", default=True)\n",
214
+ "\n",
215
+ "detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label=\"Detection Threshold\")\n",
216
+ "\n",
217
+ "outputs = gr.outputs.Image(type=\"pil\")\n",
218
+ "\n",
219
+ "# Option 1: Get an image from local drive\n",
220
+ "gr_interface = gr.Interface(fn=show_preds, inputs=[\"image\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO')\n",
221
+ "\n",
222
+ "# # Option 2: Grab an image from a webcam\n",
223
+ "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)\n",
224
+ "\n",
225
+ "# # Option 3: Continuous image stream from the webcam\n",
226
+ "# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)\n",
227
+ "\n",
228
+ "\n",
229
+ "gr_interface.launch(inline=False, share=True, debug=True)\n"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": null,
235
+ "id": "727a3589-364b-4bfd-9c32-bef5ebe34dbe",
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": []
239
+ }
240
+ ],
241
+ "metadata": {
242
+ "kernelspec": {
243
+ "display_name": "Python 3",
244
+ "language": "python",
245
+ "name": "python3"
246
+ },
247
+ "language_info": {
248
+ "codemirror_mode": {
249
+ "name": "ipython",
250
+ "version": 3
251
+ },
252
+ "file_extension": ".py",
253
+ "mimetype": "text/x-python",
254
+ "name": "python",
255
+ "nbconvert_exporter": "python",
256
+ "pygments_lexer": "ipython3",
257
+ "version": "3.8.12"
258
+ }
259
+ },
260
+ "nbformat": 4,
261
+ "nbformat_minor": 5
262
+ }
models/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb46c5796093f5921996b04f3d85ded01f4070366e38898e748e06c00d262972
3
+ size 129455527
train.ipynb ADDED
The diff for this file is too large to render. See raw diff