Commit
·
f9c83e1
1
Parent(s):
3a5accc
Upload darknet.py
Browse files- darknet.py +322 -0
darknet.py
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PyTorch implementation of Darknet
|
2 |
+
# This is a custom, hard-coded version of darknet with
|
3 |
+
# YOLOv3 implementation for openimages database. This
|
4 |
+
# was written to test viability of implementing YOLO
|
5 |
+
# for face detection followed by emotion / sentiment
|
6 |
+
# analysis.
|
7 |
+
#
|
8 |
+
# Configuration, weights and data are hardcoded.
|
9 |
+
# Additional options include, ability to create
|
10 |
+
# subset of data with faces exracted for labelling.
|
11 |
+
#
|
12 |
+
# Author : Saikiran Tharimena
|
13 |
+
# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
|
14 |
+
# Project : Emotion / Sentiment Detection from news images
|
15 |
+
# Date : 12 September 2022
|
16 |
+
# Version : v0.1
|
17 |
+
#
|
18 |
+
# (C) Schibsted ASA
|
19 |
+
|
20 |
+
# Libraries
|
21 |
+
import torch
|
22 |
+
import torch.nn as nn
|
23 |
+
import torch.nn.functional as F
|
24 |
+
from torch.autograd import Variable
|
25 |
+
import numpy as np
|
26 |
+
from utils import *
|
27 |
+
|
28 |
+
|
29 |
+
def parse_cfg(cfgfile):
|
30 |
+
"""
|
31 |
+
Takes a configuration file
|
32 |
+
|
33 |
+
Returns a list of blocks. Each blocks describes a block in the neural
|
34 |
+
network to be built. Block is represented as a dictionary in the list
|
35 |
+
|
36 |
+
"""
|
37 |
+
|
38 |
+
file = open(cfgfile, 'r')
|
39 |
+
lines = file.read().split('\n') # store the lines in a list
|
40 |
+
lines = [x for x in lines if len(x) > 0] # get read of the empty lines
|
41 |
+
lines = [x for x in lines if x[0] != '#'] # get rid of comments
|
42 |
+
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
|
43 |
+
|
44 |
+
block = {}
|
45 |
+
blocks = []
|
46 |
+
|
47 |
+
for line in lines:
|
48 |
+
if line[0] == "[": # This marks the start of a new block
|
49 |
+
if len(block) != 0: # If block is not empty, implies it is storing values of previous block.
|
50 |
+
blocks.append(block) # add it the blocks list
|
51 |
+
block = {} # re-init the block
|
52 |
+
block["type"] = line[1:-1].rstrip()
|
53 |
+
else:
|
54 |
+
key,value = line.split("=")
|
55 |
+
block[key.rstrip()] = value.lstrip()
|
56 |
+
blocks.append(block)
|
57 |
+
|
58 |
+
return blocks
|
59 |
+
|
60 |
+
|
61 |
+
class EmptyLayer(nn.Module):
|
62 |
+
def __init__(self):
|
63 |
+
super(EmptyLayer, self).__init__()
|
64 |
+
|
65 |
+
|
66 |
+
class DetectionLayer(nn.Module):
|
67 |
+
def __init__(self, anchors):
|
68 |
+
super(DetectionLayer, self).__init__()
|
69 |
+
self.anchors = anchors
|
70 |
+
|
71 |
+
|
72 |
+
def create_modules(blocks):
|
73 |
+
net_info = blocks[0] #Captures the information about the input and pre-processing
|
74 |
+
module_list = nn.ModuleList()
|
75 |
+
prev_filters = 3
|
76 |
+
output_filters = []
|
77 |
+
|
78 |
+
for index, x in enumerate(blocks[1:]):
|
79 |
+
module = nn.Sequential()
|
80 |
+
|
81 |
+
#check the type of block
|
82 |
+
#create a new module for the block
|
83 |
+
#append to module_list
|
84 |
+
|
85 |
+
#If it's a convolutional layer
|
86 |
+
if (x["type"] == "convolutional"):
|
87 |
+
#Get the info about the layer
|
88 |
+
activation = x["activation"]
|
89 |
+
try:
|
90 |
+
batch_normalize = int(x["batch_normalize"])
|
91 |
+
bias = False
|
92 |
+
except:
|
93 |
+
batch_normalize = 0
|
94 |
+
bias = True
|
95 |
+
|
96 |
+
filters= int(x["filters"])
|
97 |
+
padding = int(x["pad"])
|
98 |
+
kernel_size = int(x["size"])
|
99 |
+
stride = int(x["stride"])
|
100 |
+
|
101 |
+
if padding:
|
102 |
+
pad = (kernel_size - 1) // 2
|
103 |
+
else:
|
104 |
+
pad = 0
|
105 |
+
|
106 |
+
#Add the convolutional layer
|
107 |
+
conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
|
108 |
+
module.add_module("conv_{0}".format(index), conv)
|
109 |
+
|
110 |
+
#Add the Batch Norm Layer
|
111 |
+
if batch_normalize:
|
112 |
+
bn = nn.BatchNorm2d(filters)
|
113 |
+
module.add_module("batch_norm_{0}".format(index), bn)
|
114 |
+
|
115 |
+
#Check the activation.
|
116 |
+
#It is either Linear or a Leaky ReLU for YOLO
|
117 |
+
if activation == "leaky":
|
118 |
+
activn = nn.LeakyReLU(0.1, inplace = True)
|
119 |
+
module.add_module("leaky_{0}".format(index), activn)
|
120 |
+
|
121 |
+
#If it's an upsampling layer
|
122 |
+
#We use Bilinear2dUpsampling
|
123 |
+
elif (x["type"] == "upsample"):
|
124 |
+
stride = int(x["stride"])
|
125 |
+
upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
|
126 |
+
module.add_module("upsample_{}".format(index), upsample)
|
127 |
+
|
128 |
+
#If it is a route layer
|
129 |
+
elif (x["type"] == "route"):
|
130 |
+
x["layers"] = x["layers"].split(',')
|
131 |
+
#Start of a route
|
132 |
+
start = int(x["layers"][0])
|
133 |
+
#end, if there exists one.
|
134 |
+
try:
|
135 |
+
end = int(x["layers"][1])
|
136 |
+
except:
|
137 |
+
end = 0
|
138 |
+
#Positive anotation
|
139 |
+
if start > 0:
|
140 |
+
start = start - index
|
141 |
+
if end > 0:
|
142 |
+
end = end - index
|
143 |
+
route = EmptyLayer()
|
144 |
+
module.add_module("route_{0}".format(index), route)
|
145 |
+
if end < 0:
|
146 |
+
filters = output_filters[index + start] + output_filters[index + end]
|
147 |
+
else:
|
148 |
+
filters= output_filters[index + start]
|
149 |
+
|
150 |
+
#shortcut corresponds to skip connection
|
151 |
+
elif x["type"] == "shortcut":
|
152 |
+
shortcut = EmptyLayer()
|
153 |
+
module.add_module("shortcut_{}".format(index), shortcut)
|
154 |
+
|
155 |
+
#Yolo is the detection layer
|
156 |
+
elif x["type"] == "yolo":
|
157 |
+
mask = x["mask"].split(",")
|
158 |
+
mask = [int(x) for x in mask]
|
159 |
+
|
160 |
+
anchors = x["anchors"].split(",")
|
161 |
+
anchors = [int(a) for a in anchors]
|
162 |
+
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
|
163 |
+
anchors = [anchors[i] for i in mask]
|
164 |
+
|
165 |
+
detection = DetectionLayer(anchors)
|
166 |
+
module.add_module("Detection_{}".format(index), detection)
|
167 |
+
|
168 |
+
module_list.append(module)
|
169 |
+
prev_filters = filters
|
170 |
+
output_filters.append(filters)
|
171 |
+
|
172 |
+
return (net_info, module_list)
|
173 |
+
|
174 |
+
class Darknet(nn.Module):
|
175 |
+
def __init__(self, cfgfile):
|
176 |
+
super(Darknet, self).__init__()
|
177 |
+
self.blocks = parse_cfg(cfgfile)
|
178 |
+
self.net_info, self.module_list = create_modules(self.blocks)
|
179 |
+
|
180 |
+
def forward(self, x, CUDA):
|
181 |
+
modules = self.blocks[1:]
|
182 |
+
outputs = {} #We cache the outputs for the route layer
|
183 |
+
|
184 |
+
write = 0
|
185 |
+
for i, module in enumerate(modules):
|
186 |
+
module_type = (module["type"])
|
187 |
+
|
188 |
+
if module_type == "convolutional" or module_type == "upsample":
|
189 |
+
x = self.module_list[i](x)
|
190 |
+
|
191 |
+
elif module_type == "route":
|
192 |
+
layers = module["layers"]
|
193 |
+
layers = [int(a) for a in layers]
|
194 |
+
|
195 |
+
if (layers[0]) > 0:
|
196 |
+
layers[0] = layers[0] - i
|
197 |
+
|
198 |
+
if len(layers) == 1:
|
199 |
+
x = outputs[i + (layers[0])]
|
200 |
+
|
201 |
+
else:
|
202 |
+
if (layers[1]) > 0:
|
203 |
+
layers[1] = layers[1] - i
|
204 |
+
|
205 |
+
map1 = outputs[i + layers[0]]
|
206 |
+
map2 = outputs[i + layers[1]]
|
207 |
+
x = torch.cat((map1, map2), 1)
|
208 |
+
|
209 |
+
|
210 |
+
elif module_type == "shortcut":
|
211 |
+
from_ = int(module["from"])
|
212 |
+
x = outputs[i-1] + outputs[i+from_]
|
213 |
+
|
214 |
+
elif module_type == 'yolo':
|
215 |
+
anchors = self.module_list[i][0].anchors
|
216 |
+
#Get the input dimensions
|
217 |
+
inp_dim = int (self.net_info["height"])
|
218 |
+
|
219 |
+
#Get the number of classes
|
220 |
+
num_classes = int (module["classes"])
|
221 |
+
|
222 |
+
#Transform
|
223 |
+
x = x.data
|
224 |
+
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
|
225 |
+
if not write: #if no collector has been intialised.
|
226 |
+
detections = x
|
227 |
+
write = 1
|
228 |
+
|
229 |
+
else:
|
230 |
+
detections = torch.cat((detections, x), 1)
|
231 |
+
|
232 |
+
outputs[i] = x
|
233 |
+
|
234 |
+
return detections
|
235 |
+
|
236 |
+
|
237 |
+
def load_weights(self, weightfile):
|
238 |
+
#Open the weights file
|
239 |
+
fp = open(weightfile, "rb")
|
240 |
+
|
241 |
+
#The first 5 values are header information
|
242 |
+
# 1. Major version number
|
243 |
+
# 2. Minor Version Number
|
244 |
+
# 3. Subversion number
|
245 |
+
# 4,5. Images seen by the network (during training)
|
246 |
+
header = np.fromfile(fp, dtype = np.int32, count = 5)
|
247 |
+
self.header = torch.from_numpy(header)
|
248 |
+
self.seen = self.header[3]
|
249 |
+
|
250 |
+
weights = np.fromfile(fp, dtype = np.float32)
|
251 |
+
|
252 |
+
ptr = 0
|
253 |
+
for i in range(len(self.module_list)):
|
254 |
+
module_type = self.blocks[i + 1]["type"]
|
255 |
+
|
256 |
+
#If module_type is convolutional load weights
|
257 |
+
#Otherwise ignore.
|
258 |
+
|
259 |
+
if module_type == "convolutional":
|
260 |
+
model = self.module_list[i]
|
261 |
+
try:
|
262 |
+
batch_normalize = int(self.blocks[i+1]["batch_normalize"])
|
263 |
+
except:
|
264 |
+
batch_normalize = 0
|
265 |
+
|
266 |
+
conv = model[0]
|
267 |
+
|
268 |
+
|
269 |
+
if (batch_normalize):
|
270 |
+
bn = model[1]
|
271 |
+
|
272 |
+
#Get the number of weights of Batch Norm Layer
|
273 |
+
num_bn_biases = bn.bias.numel()
|
274 |
+
|
275 |
+
#Load the weights
|
276 |
+
bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
|
277 |
+
ptr += num_bn_biases
|
278 |
+
|
279 |
+
bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
280 |
+
ptr += num_bn_biases
|
281 |
+
|
282 |
+
bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
283 |
+
ptr += num_bn_biases
|
284 |
+
|
285 |
+
bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
286 |
+
ptr += num_bn_biases
|
287 |
+
|
288 |
+
#Cast the loaded weights into dims of model weights.
|
289 |
+
bn_biases = bn_biases.view_as(bn.bias.data)
|
290 |
+
bn_weights = bn_weights.view_as(bn.weight.data)
|
291 |
+
bn_running_mean = bn_running_mean.view_as(bn.running_mean)
|
292 |
+
bn_running_var = bn_running_var.view_as(bn.running_var)
|
293 |
+
|
294 |
+
#Copy the data to model
|
295 |
+
bn.bias.data.copy_(bn_biases)
|
296 |
+
bn.weight.data.copy_(bn_weights)
|
297 |
+
bn.running_mean.copy_(bn_running_mean)
|
298 |
+
bn.running_var.copy_(bn_running_var)
|
299 |
+
|
300 |
+
else:
|
301 |
+
#Number of biases
|
302 |
+
num_biases = conv.bias.numel()
|
303 |
+
|
304 |
+
#Load the weights
|
305 |
+
conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
|
306 |
+
ptr = ptr + num_biases
|
307 |
+
|
308 |
+
#reshape the loaded weights according to the dims of the model weights
|
309 |
+
conv_biases = conv_biases.view_as(conv.bias.data)
|
310 |
+
|
311 |
+
#Finally copy the data
|
312 |
+
conv.bias.data.copy_(conv_biases)
|
313 |
+
|
314 |
+
#Let us load the weights for the Convolutional layers
|
315 |
+
num_weights = conv.weight.numel()
|
316 |
+
|
317 |
+
#Do the same as above for weights
|
318 |
+
conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
|
319 |
+
ptr = ptr + num_weights
|
320 |
+
|
321 |
+
conv_weights = conv_weights.view_as(conv.weight.data)
|
322 |
+
conv.weight.data.copy_(conv_weights)
|