Spaces:
Runtime error
Runtime error
dnth
commited on
Commit
·
ed9c2a5
0
Parent(s):
reduce train ntebook
Browse files- .github/workflows/main.yml +21 -0
- .gitignore +3 -0
- app.py +73 -0
- gradioapp.ipynb +262 -0
- models/model.pth +3 -0
- train.ipynb +0 -0
.github/workflows/main.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
# to run this workflow manually from the Actions tab
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync-to-hub:
|
11 |
+
runs-on: ubuntu-latest
|
12 |
+
steps:
|
13 |
+
|
14 |
+
|
15 |
+
- uses: actions/checkout@v2
|
16 |
+
with:
|
17 |
+
fetch-depth: 0
|
18 |
+
- name: Push to hub
|
19 |
+
env:
|
20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
21 |
+
run: git push --force https://dnth:[email protected]/spaces/dnth/webdemo-fridge-detection main
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints
|
2 |
+
.ipynb_checkpoints/*
|
3 |
+
models/*.pth
|
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from icevision.all import *
|
2 |
+
import icedata
|
3 |
+
import PIL, requests
|
4 |
+
import torch
|
5 |
+
from torchvision import transforms
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
# Download the dataset
|
9 |
+
url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
|
10 |
+
dest_dir = "fridge"
|
11 |
+
data_dir = icedata.load_data(url, dest_dir)
|
12 |
+
|
13 |
+
# Create the parser
|
14 |
+
parser = parsers.VOCBBoxParser(annotations_dir=data_dir / "odFridgeObjects/annotations", images_dir=data_dir / "odFridgeObjects/images")
|
15 |
+
|
16 |
+
# Parse annotations to create records
|
17 |
+
train_records, valid_records = parser.parse()
|
18 |
+
|
19 |
+
class_map = parser.class_map
|
20 |
+
|
21 |
+
extra_args = {}
|
22 |
+
model_type = models.torchvision.retinanet
|
23 |
+
backbone = model_type.backbones.resnet50_fpn
|
24 |
+
# Instantiate the model
|
25 |
+
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args)
|
26 |
+
|
27 |
+
# Transforms
|
28 |
+
# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
|
29 |
+
image_size = 384
|
30 |
+
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
|
31 |
+
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])
|
32 |
+
# Datasets
|
33 |
+
train_ds = Dataset(train_records, train_tfms)
|
34 |
+
valid_ds = Dataset(valid_records, valid_tfms)
|
35 |
+
# Data Loaders
|
36 |
+
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
|
37 |
+
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
|
38 |
+
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
|
39 |
+
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
|
40 |
+
|
41 |
+
learn = learn.load('model')
|
42 |
+
|
43 |
+
def show_preds(input_image, display_label, display_bbox, detection_threshold):
|
44 |
+
|
45 |
+
if detection_threshold==0: detection_threshold=0.5
|
46 |
+
|
47 |
+
img = PIL.Image.fromarray(input_image, 'RGB')
|
48 |
+
|
49 |
+
pred_dict = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,
|
50 |
+
display_label=display_label, display_bbox=display_bbox, return_img=True,
|
51 |
+
font_size=16, label_color="#FF59D6")
|
52 |
+
|
53 |
+
return pred_dict['img']
|
54 |
+
|
55 |
+
# display_chkbox = gr.inputs.CheckboxGroup(["Label", "BBox"], label="Display", default=True)
|
56 |
+
display_chkbox_label = gr.inputs.Checkbox(label="Label", default=True)
|
57 |
+
display_chkbox_box = gr.inputs.Checkbox(label="Box", default=True)
|
58 |
+
|
59 |
+
detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label="Detection Threshold")
|
60 |
+
|
61 |
+
outputs = gr.outputs.Image(type="pil")
|
62 |
+
|
63 |
+
# Option 1: Get an image from local drive
|
64 |
+
gr_interface = gr.Interface(fn=show_preds, inputs=["image", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - Fridge Object')
|
65 |
+
|
66 |
+
# # Option 2: Grab an image from a webcam
|
67 |
+
# gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)
|
68 |
+
|
69 |
+
# # Option 3: Continuous image stream from the webcam
|
70 |
+
# gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)
|
71 |
+
|
72 |
+
|
73 |
+
gr_interface.launch(inline=False, share=True, debug=True)
|
gradioapp.ipynb
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "ee7e0c23-3fa5-4547-8598-7df27a3876c5",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"from icevision.all import *\n",
|
11 |
+
"import icedata\n",
|
12 |
+
"import PIL, requests\n",
|
13 |
+
"import torch\n",
|
14 |
+
"from torchvision import transforms\n",
|
15 |
+
"import gradio as gr"
|
16 |
+
]
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"cell_type": "code",
|
20 |
+
"execution_count": 2,
|
21 |
+
"id": "646cc218-f7de-4f32-a3d9-fccdc9b54592",
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [],
|
24 |
+
"source": [
|
25 |
+
"# Download the dataset\n",
|
26 |
+
"url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n",
|
27 |
+
"dest_dir = \"fridge\"\n",
|
28 |
+
"data_dir = icedata.load_data(url, dest_dir)"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 3,
|
34 |
+
"id": "96184ca0-0b0a-4a20-8ab9-30dee6096588",
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [],
|
37 |
+
"source": [
|
38 |
+
"# Create the parser\n",
|
39 |
+
"parser = parsers.VOCBBoxParser(annotations_dir=data_dir / \"odFridgeObjects/annotations\", images_dir=data_dir / \"odFridgeObjects/images\")"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 4,
|
45 |
+
"id": "dfa20f76-4970-479a-9497-871fe4cfd170",
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [
|
48 |
+
{
|
49 |
+
"data": {
|
50 |
+
"application/vnd.jupyter.widget-view+json": {
|
51 |
+
"model_id": "fc8e676815314038a40c884c8c7f5b67",
|
52 |
+
"version_major": 2,
|
53 |
+
"version_minor": 0
|
54 |
+
},
|
55 |
+
"text/plain": [
|
56 |
+
" 0%| | 0/128 [00:00<?, ?it/s]"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
"metadata": {},
|
60 |
+
"output_type": "display_data"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"name": "stderr",
|
64 |
+
"output_type": "stream",
|
65 |
+
"text": [
|
66 |
+
"\u001b[1m\u001b[1mINFO \u001b[0m\u001b[1m\u001b[0m - \u001b[1m\u001b[34m\u001b[1mAutofixing records\u001b[0m\u001b[1m\u001b[34m\u001b[0m\u001b[1m\u001b[0m | \u001b[36micevision.parsers.parser\u001b[0m:\u001b[36mparse\u001b[0m:\u001b[36m122\u001b[0m\n"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"data": {
|
71 |
+
"application/vnd.jupyter.widget-view+json": {
|
72 |
+
"model_id": "3db4bc1ae388495eb3b62289459a5c00",
|
73 |
+
"version_major": 2,
|
74 |
+
"version_minor": 0
|
75 |
+
},
|
76 |
+
"text/plain": [
|
77 |
+
" 0%| | 0/128 [00:00<?, ?it/s]"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
"metadata": {},
|
81 |
+
"output_type": "display_data"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"data": {
|
85 |
+
"text/plain": [
|
86 |
+
"<ClassMap: {'background': 0, 'carton': 1, 'milk_bottle': 2, 'can': 3, 'water_bottle': 4}>"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
"execution_count": 4,
|
90 |
+
"metadata": {},
|
91 |
+
"output_type": "execute_result"
|
92 |
+
}
|
93 |
+
],
|
94 |
+
"source": [
|
95 |
+
"# Parse annotations to create records\n",
|
96 |
+
"train_records, valid_records = parser.parse()\n",
|
97 |
+
"parser.class_map"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": 5,
|
103 |
+
"id": "26d4f2f7-db51-413c-838f-f80c5898ab52",
|
104 |
+
"metadata": {},
|
105 |
+
"outputs": [],
|
106 |
+
"source": [
|
107 |
+
"class_map = parser.class_map"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cell_type": "code",
|
112 |
+
"execution_count": 6,
|
113 |
+
"id": "007b2e97-d546-4178-84e7-d4fe597f3731",
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": [
|
117 |
+
"extra_args = {}\n",
|
118 |
+
"model_type = models.torchvision.retinanet\n",
|
119 |
+
"backbone = model_type.backbones.resnet50_fpn\n",
|
120 |
+
"# Instantiate the model\n",
|
121 |
+
"model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) "
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"execution_count": 7,
|
127 |
+
"id": "7b664cbf-3ab0-46df-a9d0-c4eb5c3c026d",
|
128 |
+
"metadata": {},
|
129 |
+
"outputs": [],
|
130 |
+
"source": [
|
131 |
+
"# Transforms\n",
|
132 |
+
"# size is set to 384 because EfficientDet requires its inputs to be divisible by 128\n",
|
133 |
+
"image_size = 384\n",
|
134 |
+
"train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])\n",
|
135 |
+
"valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])\n",
|
136 |
+
"# Datasets\n",
|
137 |
+
"train_ds = Dataset(train_records, train_tfms)\n",
|
138 |
+
"valid_ds = Dataset(valid_records, valid_tfms)\n",
|
139 |
+
"# Data Loaders\n",
|
140 |
+
"train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)\n",
|
141 |
+
"valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)\n",
|
142 |
+
"metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]\n",
|
143 |
+
"learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"cell_type": "code",
|
148 |
+
"execution_count": 8,
|
149 |
+
"id": "7bddb248-215d-4998-9d90-14ea6989c236",
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [
|
152 |
+
{
|
153 |
+
"name": "stderr",
|
154 |
+
"output_type": "stream",
|
155 |
+
"text": [
|
156 |
+
"/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/fastai/learner.py:56: UserWarning: Saved filed doesn't contain an optimizer state.\n",
|
157 |
+
" elif with_opt: warn(\"Saved filed doesn't contain an optimizer state.\")\n"
|
158 |
+
]
|
159 |
+
}
|
160 |
+
],
|
161 |
+
"source": [
|
162 |
+
"learn = learn.load('model')"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": 9,
|
168 |
+
"id": "745315f6-8aa5-486e-a7bc-e11348bec6a6",
|
169 |
+
"metadata": {},
|
170 |
+
"outputs": [],
|
171 |
+
"source": [
|
172 |
+
"def show_preds(input_image, display_label, display_bbox, detection_threshold):\n",
|
173 |
+
"\n",
|
174 |
+
" if detection_threshold==0: detection_threshold=0.5\n",
|
175 |
+
"\n",
|
176 |
+
" img = PIL.Image.fromarray(input_image, 'RGB')\n",
|
177 |
+
"\n",
|
178 |
+
" pred_dict = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,\n",
|
179 |
+
" display_label=display_label, display_bbox=display_bbox, return_img=True, \n",
|
180 |
+
" font_size=16, label_color=\"#FF59D6\")\n",
|
181 |
+
"\n",
|
182 |
+
" return pred_dict['img']"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"cell_type": "code",
|
187 |
+
"execution_count": null,
|
188 |
+
"id": "63ac7fab-2068-4dbc-a464-0551b6fc12b2",
|
189 |
+
"metadata": {},
|
190 |
+
"outputs": [
|
191 |
+
{
|
192 |
+
"name": "stdout",
|
193 |
+
"output_type": "stream",
|
194 |
+
"text": [
|
195 |
+
"Running on local URL: http://127.0.0.1:7860/\n",
|
196 |
+
"Running on public URL: https://11839.gradio.app\n",
|
197 |
+
"\n",
|
198 |
+
"This share link will expire in 72 hours. To get longer links, send an email to: [email protected]\n"
|
199 |
+
]
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"name": "stderr",
|
203 |
+
"output_type": "stream",
|
204 |
+
"text": [
|
205 |
+
"/home/dnth/anaconda3/envs/icevision-gradio/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)\n",
|
206 |
+
" return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n"
|
207 |
+
]
|
208 |
+
}
|
209 |
+
],
|
210 |
+
"source": [
|
211 |
+
"# display_chkbox = gr.inputs.CheckboxGroup([\"Label\", \"BBox\"], label=\"Display\", default=True)\n",
|
212 |
+
"display_chkbox_label = gr.inputs.Checkbox(label=\"Label\", default=True)\n",
|
213 |
+
"display_chkbox_box = gr.inputs.Checkbox(label=\"Box\", default=True)\n",
|
214 |
+
"\n",
|
215 |
+
"detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label=\"Detection Threshold\")\n",
|
216 |
+
"\n",
|
217 |
+
"outputs = gr.outputs.Image(type=\"pil\")\n",
|
218 |
+
"\n",
|
219 |
+
"# Option 1: Get an image from local drive\n",
|
220 |
+
"gr_interface = gr.Interface(fn=show_preds, inputs=[\"image\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO')\n",
|
221 |
+
"\n",
|
222 |
+
"# # Option 2: Grab an image from a webcam\n",
|
223 |
+
"# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)\n",
|
224 |
+
"\n",
|
225 |
+
"# # Option 3: Continuous image stream from the webcam\n",
|
226 |
+
"# gr_interface = gr.Interface(fn=show_preds, inputs=[\"webcam\", display_chkbox_label, display_chkbox_box, detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)\n",
|
227 |
+
"\n",
|
228 |
+
"\n",
|
229 |
+
"gr_interface.launch(inline=False, share=True, debug=True)\n"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"cell_type": "code",
|
234 |
+
"execution_count": null,
|
235 |
+
"id": "727a3589-364b-4bfd-9c32-bef5ebe34dbe",
|
236 |
+
"metadata": {},
|
237 |
+
"outputs": [],
|
238 |
+
"source": []
|
239 |
+
}
|
240 |
+
],
|
241 |
+
"metadata": {
|
242 |
+
"kernelspec": {
|
243 |
+
"display_name": "Python 3",
|
244 |
+
"language": "python",
|
245 |
+
"name": "python3"
|
246 |
+
},
|
247 |
+
"language_info": {
|
248 |
+
"codemirror_mode": {
|
249 |
+
"name": "ipython",
|
250 |
+
"version": 3
|
251 |
+
},
|
252 |
+
"file_extension": ".py",
|
253 |
+
"mimetype": "text/x-python",
|
254 |
+
"name": "python",
|
255 |
+
"nbconvert_exporter": "python",
|
256 |
+
"pygments_lexer": "ipython3",
|
257 |
+
"version": "3.8.12"
|
258 |
+
}
|
259 |
+
},
|
260 |
+
"nbformat": 4,
|
261 |
+
"nbformat_minor": 5
|
262 |
+
}
|
models/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb46c5796093f5921996b04f3d85ded01f4070366e38898e748e06c00d262972
|
3 |
+
size 129455527
|
train.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|