Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,318 +4,8 @@ import imutils
|
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
from PIL import Image
|
7 |
-
import torchvision.transforms as transforms
|
8 |
-
from typing import NamedTuple, List, Callable, List, Tuple, Optional
|
9 |
-
from torch import nn
|
10 |
-
import torch.nn.functional as F
|
11 |
|
12 |
|
13 |
-
|
14 |
-
class LinData(NamedTuple):
|
15 |
-
in_dim : int # input dimension
|
16 |
-
hidden_layers : List[int] # hidden layers including the output layer
|
17 |
-
activations : List[Optional[Callable[[torch.Tensor],torch.Tensor]]] # list of activations
|
18 |
-
bns : List[bool] # list of bools
|
19 |
-
dropouts : List[Optional[float]] # list of dropouts probas
|
20 |
-
|
21 |
-
class CNNData(NamedTuple):
|
22 |
-
in_dim : int # input dimension
|
23 |
-
n_f : List[int] # num filters
|
24 |
-
kernel_size : List[Tuple] # kernel size [(5,5,5), (3,3,3),(3,3,3)]
|
25 |
-
activations : List[Optional[Callable[[torch.Tensor],torch.Tensor]]] # activation list
|
26 |
-
bns : List[bool] # batch normialization [True, True, False]
|
27 |
-
dropouts : List[Optional[float]] # # list of dropouts probas [.5,0,0]
|
28 |
-
#dropouts_ps : list # [0.5,.7, 0]
|
29 |
-
paddings : List[Optional[Tuple]] #[(0,0,0),(0,0,0), (0,0,0)]
|
30 |
-
strides : List[Optional[Tuple]] #[(1,1,1),(1,1,1),(1,1,1)]
|
31 |
-
|
32 |
-
|
33 |
-
class NetData(NamedTuple):
|
34 |
-
cnn3d : CNNData
|
35 |
-
lin : LinData
|
36 |
-
|
37 |
-
def conv3D_output_size(args, img_size):
|
38 |
-
|
39 |
-
if not isinstance(args, CNNData):
|
40 |
-
raise TypeError("input must be a ParserClass")
|
41 |
-
|
42 |
-
(cin, h , w) = img_size
|
43 |
-
# compute output shape of conv3D
|
44 |
-
for idx , chan in enumerate(args.kernel_size):
|
45 |
-
padding = args.paddings[idx]
|
46 |
-
stride = args.strides[idx]
|
47 |
-
(cin, h , w) = (np.floor((cin + 2 * padding[0] - chan[0] ) / stride[0] + 1).astype(int),
|
48 |
-
np.floor((h + 2 * padding[1] - chan[1] ) / stride[1] + 1).astype(int),
|
49 |
-
np.floor((w + 2 * padding[2] - chan[2] ) / stride[2] + 1).astype(int))
|
50 |
-
|
51 |
-
|
52 |
-
final_dim = int(args.n_f[-1] * cin * h * w)
|
53 |
-
|
54 |
-
return final_dim
|
55 |
-
|
56 |
-
class CNN3D_Mike(nn.Module):
|
57 |
-
def __init__(self, t_dim=30, img_x=256 , img_y=342, drop_p=0, fc_hidden1=256, fc_hidden2=256):
|
58 |
-
super(CNN3D_Mike, self).__init__() # set video dimension
|
59 |
-
self.t_dim = t_dim
|
60 |
-
self.img_x = img_x
|
61 |
-
self.img_y = img_y
|
62 |
-
# fully connected layer hidden nodes
|
63 |
-
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
|
64 |
-
self.drop_p = drop_p
|
65 |
-
#self.num_classes = num_classes
|
66 |
-
self.ch1, self.ch2 = 32, 48
|
67 |
-
self.k1, self.k2 = (5, 5, 5), (3, 3, 3) # 3d kernel size
|
68 |
-
self.s1, self.s2 = (2, 2, 2), (2, 2, 2) # 3d strides
|
69 |
-
self.pd1, self.pd2 = (0, 0, 0), (0, 0, 0) # 3d padding # compute conv1 & conv2 output shape
|
70 |
-
self.conv1_outshape = conv3D_output_size((self.t_dim, self.img_x, self.img_y), self.pd1, self.k1, self.s1)
|
71 |
-
self.conv2_outshape = conv3D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
|
72 |
-
self.conv1 = nn.Conv3d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1,
|
73 |
-
padding=self.pd1)
|
74 |
-
self.bn1 = nn.BatchNorm3d(self.ch1)
|
75 |
-
self.conv2 = nn.Conv3d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2,
|
76 |
-
padding=self.pd2)
|
77 |
-
self.bn2 = nn.BatchNorm3d(self.ch2)
|
78 |
-
self.relu = nn.ReLU(inplace=True)
|
79 |
-
self.drop = nn.Dropout3d(self.drop_p)
|
80 |
-
self.pool = nn.MaxPool3d(2)
|
81 |
-
self.fc1 = nn.Linear(self.ch2*self.conv2_outshape[0]*self.conv2_outshape[1]*self.conv2_outshape[2],
|
82 |
-
self.fc_hidden1) # fully connected hidden layer
|
83 |
-
self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
|
84 |
-
self.fc3 = nn.Linear(self.fc_hidden2,1) # fully connected layer, output = multi-classes
|
85 |
-
|
86 |
-
|
87 |
-
def forward(self, x_3d):
|
88 |
-
# Conv 1
|
89 |
-
x = self.conv1(x_3d)
|
90 |
-
|
91 |
-
x = self.bn1(x)
|
92 |
-
x = self.relu(x)
|
93 |
-
x = self.drop(x)
|
94 |
-
# Conv 2
|
95 |
-
x = self.conv2(x)
|
96 |
-
x = self.bn2(x)
|
97 |
-
x = self.relu(x)
|
98 |
-
x = self.drop(x)
|
99 |
-
# FC 1 and 2
|
100 |
-
x = x.view(x.size(0), -1)
|
101 |
-
x = F.relu(self.fc1(x))
|
102 |
-
x = F.relu(self.fc2(x))
|
103 |
-
|
104 |
-
#x = F.relu(self.fc3(x))
|
105 |
-
#x = F.relu(self.fc3(x))
|
106 |
-
x = F.dropout(x, p=self.drop_p, training=self.training)
|
107 |
-
#x = self.fc3(x)
|
108 |
-
#x = F.softmax(self.fc2(x))
|
109 |
-
|
110 |
-
x = self.fc3(x)
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
return x
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
class CNNLayers(nn.Module):
|
119 |
-
|
120 |
-
def __init__(self, args):
|
121 |
-
|
122 |
-
super(CNNLayers, self).__init__()
|
123 |
-
|
124 |
-
self.in_dim = args.in_dim# 1/3
|
125 |
-
self.n_f = args.n_f#[32,64]
|
126 |
-
self.kernel_size = args.kernel_size # [(5,5,5), (3,3,3)]
|
127 |
-
self.activations = args.activations#['relu', 'relu']
|
128 |
-
self.bns = args.bns #[True, True],
|
129 |
-
self.dropouts = args.dropouts #[True, True]
|
130 |
-
#self.dropouts_ps = args.dropouts_ps#[0.5,.7]
|
131 |
-
self.paddings = args.paddings #[(0,0,0),(0,0,0)]
|
132 |
-
self.strides = args.strides # strides [(1,1,1),(1,1,1),(1,1,1)])
|
133 |
-
#self.poolings = args.poolings
|
134 |
-
|
135 |
-
assert len(self.n_f) == len(self.activations) == len(self.bns) == len(self.dropouts), 'dimensions mismatch : check dimensions!'
|
136 |
-
|
137 |
-
# generate layers seq of seq
|
138 |
-
self._get_layers()
|
139 |
-
|
140 |
-
def _get_layers(self):
|
141 |
-
|
142 |
-
layers =nn.ModuleList()
|
143 |
-
in_channels = self.in_dim
|
144 |
-
|
145 |
-
for idx, chans in enumerate(self.n_f):
|
146 |
-
sub_layers = nn.ModuleList()
|
147 |
-
|
148 |
-
sub_layers.append(nn.Conv3d(in_channels = in_channels,
|
149 |
-
out_channels = chans, #self.n_f[idx],
|
150 |
-
kernel_size = self.kernel_size[idx],
|
151 |
-
stride = self.strides[idx],
|
152 |
-
padding = self.paddings[idx]
|
153 |
-
))
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
if self.bns[idx] : sub_layers.append(nn.BatchNorm3d(num_features = self.n_f[idx]))
|
158 |
-
|
159 |
-
#if self.dropouts[idx] : sub_layers.append(nn.Dropout3d(p = self.dropouts_ps[idx]))
|
160 |
-
|
161 |
-
if self.dropouts[idx] : sub_layers.append(nn.Dropout3d(p = self.dropouts[idx]))
|
162 |
-
|
163 |
-
#if self.activations[idx] : sub_layers.append(self.__class__.get_activation(self.activations[idx]))
|
164 |
-
|
165 |
-
if self.activations[idx] : sub_layers.append(self.activations[idx])
|
166 |
-
|
167 |
-
sub_layers = nn.Sequential(*sub_layers)
|
168 |
-
|
169 |
-
layers.append(sub_layers)
|
170 |
-
|
171 |
-
in_channels = self.n_f[idx]
|
172 |
-
|
173 |
-
self.layers = nn.Sequential(*layers)
|
174 |
-
|
175 |
-
|
176 |
-
@staticmethod
|
177 |
-
def get_activation(activation):
|
178 |
-
if activation == 'relu':
|
179 |
-
activation=nn.ReLU()
|
180 |
-
elif activation == 'leakyrelu':
|
181 |
-
activation=nn.LeakyReLU(negative_slope=0.1)
|
182 |
-
elif activation == 'selu':
|
183 |
-
activation=nn.SELU()
|
184 |
-
|
185 |
-
return activation
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
def forward(self, x):
|
190 |
-
|
191 |
-
x = self.layers(x)
|
192 |
-
|
193 |
-
return x
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
class CNN3D(nn.Module):
|
198 |
-
|
199 |
-
def __init__(self, args):
|
200 |
-
super(CNN3D,self).__init__()
|
201 |
-
# check datatype
|
202 |
-
if not isinstance(args, NetData):
|
203 |
-
raise TypeError("input must be a ParserClass")
|
204 |
-
|
205 |
-
self.cnn3d = CNNLayers(args.cnn3d)
|
206 |
-
|
207 |
-
self.lin = LinLayers(args.lin)
|
208 |
-
|
209 |
-
self.in_dim = args.lin.in_dim
|
210 |
-
|
211 |
-
|
212 |
-
def forward(self, x):
|
213 |
-
|
214 |
-
# cnn 3d
|
215 |
-
x = self.cnn3d(x)
|
216 |
-
|
217 |
-
x = x.view(-1, self.in_dim)
|
218 |
-
|
219 |
-
# feedforward
|
220 |
-
x = self.lin(x)
|
221 |
-
|
222 |
-
return x
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
class LinLayers(nn.Module):
|
228 |
-
|
229 |
-
def __init__(self, args):
|
230 |
-
super(LinLayers,self).__init__()
|
231 |
-
|
232 |
-
in_dim= args.in_dim #16,
|
233 |
-
hidden_layers= args.hidden_layers #[512,256,128,2],
|
234 |
-
activations=args.activations#[nn.LeakyReLU(0.2),nn.LeakyReLU(0.2),nn.LeakyReLU(0.2)],
|
235 |
-
batchnorms=args.bns#[True,True,True],
|
236 |
-
dropouts = args.dropouts#[None, 0.2, 0.2]
|
237 |
-
|
238 |
-
|
239 |
-
assert len(hidden_layers) == len(activations) == len(batchnorms) == len(dropouts), 'dimensions mismatch!'
|
240 |
-
|
241 |
-
|
242 |
-
layers=nn.ModuleList()
|
243 |
-
|
244 |
-
if hidden_layers:
|
245 |
-
old_dim=in_dim
|
246 |
-
for idx,layer in enumerate(hidden_layers):
|
247 |
-
sub_layers = nn.ModuleList()
|
248 |
-
sub_layers.append(nn.Linear(old_dim,layer))
|
249 |
-
if batchnorms[idx] : sub_layers.append(nn.BatchNorm1d(num_features=layer))
|
250 |
-
if activations[idx] : sub_layers.append(activations[idx])
|
251 |
-
if dropouts[idx] : sub_layers.append(nn.Dropout(p=dropouts[idx]))
|
252 |
-
old_dim = layer
|
253 |
-
|
254 |
-
sub_layers = nn.Sequential(*sub_layers)
|
255 |
-
|
256 |
-
layers.append(sub_layers)
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
else:# for single layer
|
261 |
-
layers.append(nn.Linear(in_dim,out_dim))
|
262 |
-
if batchnorms : layers.append(nn.BatchNorm1d(num_features=out_dim))
|
263 |
-
if activations : layers.append(activations)
|
264 |
-
if dropouts : layers.append(nn.Dropout(p=dropouts))
|
265 |
-
|
266 |
-
self.layers = nn.Sequential(*layers)
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
def forward(self,x):
|
271 |
-
|
272 |
-
x = self.layers(x)
|
273 |
-
|
274 |
-
return x
|
275 |
-
|
276 |
-
'''
|
277 |
-
def _check_dimensions(self):
|
278 |
-
if isinstance(self.hidden_layers,list) :
|
279 |
-
assert len(self.hidden_layers)==len(self.activations)
|
280 |
-
assert len(self.hidden_layers)==len(self.batchnorms)
|
281 |
-
assert len(self.hidden_layers)==len(self.dropouts)
|
282 |
-
'''
|
283 |
-
|
284 |
-
|
285 |
-
def load_model():
|
286 |
-
# CNN3D Layer's architecture
|
287 |
-
cnndata = CNNData(in_dim = 1,
|
288 |
-
n_f =[32,48],
|
289 |
-
kernel_size=[(5,5,5), (3,3,3)],
|
290 |
-
activations=[nn.ReLU(),nn.ReLU()],
|
291 |
-
bns = [True, True],
|
292 |
-
dropouts = [0, 0],
|
293 |
-
paddings = [(0,0,0),(0,0,0)],
|
294 |
-
strides = [(2,2,2),(2,2,2)])
|
295 |
-
|
296 |
-
# Feedforward layer's architecture
|
297 |
-
lindata = LinData(in_dim = conv3D_output_size(cnndata, [30, 256, 342]),
|
298 |
-
hidden_layers= [256,256,1],
|
299 |
-
activations=[nn.ReLU(),nn.ReLU(),None],
|
300 |
-
bns=[False,False,False],
|
301 |
-
dropouts =[0.2, 0, 0])
|
302 |
-
|
303 |
-
# combined architecture
|
304 |
-
args = NetData(cnndata, lindata)
|
305 |
-
|
306 |
-
# weight file
|
307 |
-
#weight_file = 'cnn3d_epoch_300.pt'
|
308 |
-
|
309 |
-
# CNN3D model
|
310 |
-
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
311 |
-
device = torch.device('cpu')
|
312 |
-
cnn3d = CNN3D(args).to(device)
|
313 |
-
#cnn3d.load_state_dict(torch.load(os.path.join(base_path,'weights',weight_file), map_location=device))
|
314 |
-
cnn3d.eval()
|
315 |
-
#print(cnn3d)
|
316 |
-
|
317 |
-
return cnn3d
|
318 |
-
|
319 |
def parse_video(video_file):
|
320 |
"""A utility to parse the input videos.
|
321 |
Reference: https://pyimagesearch.com/2018/11/12/yolo-object-detection-with-opencv/
|
|
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
from PIL import Image
|
|
|
|
|
|
|
|
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def parse_video(video_file):
|
10 |
"""A utility to parse the input videos.
|
11 |
Reference: https://pyimagesearch.com/2018/11/12/yolo-object-detection-with-opencv/
|