josedolot commited on
Commit
20f7115
·
1 Parent(s): ca27009

Upload encoders/timm_efficientnet.py

Browse files
Files changed (1) hide show
  1. encoders/timm_efficientnet.py +382 -0
encoders/timm_efficientnet.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ from timm.models.efficientnet import EfficientNet
7
+ from timm.models.efficientnet import decode_arch_def, round_channels, default_cfgs
8
+ from timm.models.layers.activations import Swish
9
+
10
+ from ._base import EncoderMixin
11
+
12
+
13
+ def get_efficientnet_kwargs(channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2):
14
+ """Creates an EfficientNet model.
15
+ Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
16
+ Paper: https://arxiv.org/abs/1905.11946
17
+ EfficientNet params
18
+ name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
19
+ 'efficientnet-b0': (1.0, 1.0, 224, 0.2),
20
+ 'efficientnet-b1': (1.0, 1.1, 240, 0.2),
21
+ 'efficientnet-b2': (1.1, 1.2, 260, 0.3),
22
+ 'efficientnet-b3': (1.2, 1.4, 300, 0.3),
23
+ 'efficientnet-b4': (1.4, 1.8, 380, 0.4),
24
+ 'efficientnet-b5': (1.6, 2.2, 456, 0.4),
25
+ 'efficientnet-b6': (1.8, 2.6, 528, 0.5),
26
+ 'efficientnet-b7': (2.0, 3.1, 600, 0.5),
27
+ 'efficientnet-b8': (2.2, 3.6, 672, 0.5),
28
+ 'efficientnet-l2': (4.3, 5.3, 800, 0.5),
29
+ Args:
30
+ channel_multiplier: multiplier to number of channels per layer
31
+ depth_multiplier: multiplier to number of repeats per stage
32
+ """
33
+ arch_def = [
34
+ ['ds_r1_k3_s1_e1_c16_se0.25'],
35
+ ['ir_r2_k3_s2_e6_c24_se0.25'],
36
+ ['ir_r2_k5_s2_e6_c40_se0.25'],
37
+ ['ir_r3_k3_s2_e6_c80_se0.25'],
38
+ ['ir_r3_k5_s1_e6_c112_se0.25'],
39
+ ['ir_r4_k5_s2_e6_c192_se0.25'],
40
+ ['ir_r1_k3_s1_e6_c320_se0.25'],
41
+ ]
42
+ model_kwargs = dict(
43
+ block_args=decode_arch_def(arch_def, depth_multiplier),
44
+ num_features=round_channels(1280, channel_multiplier, 8, None),
45
+ stem_size=32,
46
+ round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
47
+ act_layer=Swish,
48
+ drop_rate=drop_rate,
49
+ drop_path_rate=0.2,
50
+ )
51
+ return model_kwargs
52
+
53
+ def gen_efficientnet_lite_kwargs(channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2):
54
+ """Creates an EfficientNet-Lite model.
55
+
56
+ Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite
57
+ Paper: https://arxiv.org/abs/1905.11946
58
+
59
+ EfficientNet params
60
+ name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
61
+ 'efficientnet-lite0': (1.0, 1.0, 224, 0.2),
62
+ 'efficientnet-lite1': (1.0, 1.1, 240, 0.2),
63
+ 'efficientnet-lite2': (1.1, 1.2, 260, 0.3),
64
+ 'efficientnet-lite3': (1.2, 1.4, 280, 0.3),
65
+ 'efficientnet-lite4': (1.4, 1.8, 300, 0.3),
66
+
67
+ Args:
68
+ channel_multiplier: multiplier to number of channels per layer
69
+ depth_multiplier: multiplier to number of repeats per stage
70
+ """
71
+ arch_def = [
72
+ ['ds_r1_k3_s1_e1_c16'],
73
+ ['ir_r2_k3_s2_e6_c24'],
74
+ ['ir_r2_k5_s2_e6_c40'],
75
+ ['ir_r3_k3_s2_e6_c80'],
76
+ ['ir_r3_k5_s1_e6_c112'],
77
+ ['ir_r4_k5_s2_e6_c192'],
78
+ ['ir_r1_k3_s1_e6_c320'],
79
+ ]
80
+ model_kwargs = dict(
81
+ block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True),
82
+ num_features=1280,
83
+ stem_size=32,
84
+ fix_stem=True,
85
+ round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
86
+ act_layer=nn.ReLU6,
87
+ drop_rate=drop_rate,
88
+ drop_path_rate=0.2,
89
+ )
90
+ return model_kwargs
91
+
92
+ class EfficientNetBaseEncoder(EfficientNet, EncoderMixin):
93
+
94
+ def __init__(self, stage_idxs, out_channels, depth=5, **kwargs):
95
+ super().__init__(**kwargs)
96
+
97
+ self._stage_idxs = stage_idxs
98
+ self._out_channels = out_channels
99
+ self._depth = depth
100
+ self._in_channels = 3
101
+
102
+ del self.classifier
103
+
104
+ def get_stages(self):
105
+ return [
106
+ nn.Identity(),
107
+ nn.Sequential(self.conv_stem, self.bn1, self.act1),
108
+ self.blocks[:self._stage_idxs[0]],
109
+ self.blocks[self._stage_idxs[0]:self._stage_idxs[1]],
110
+ self.blocks[self._stage_idxs[1]:self._stage_idxs[2]],
111
+ self.blocks[self._stage_idxs[2]:],
112
+ ]
113
+
114
+ def forward(self, x):
115
+ stages = self.get_stages()
116
+
117
+ features = []
118
+ for i in range(self._depth + 1):
119
+ x = stages[i](x)
120
+ features.append(x)
121
+
122
+ return features
123
+
124
+ def load_state_dict(self, state_dict, **kwargs):
125
+ state_dict.pop("classifier.bias", None)
126
+ state_dict.pop("classifier.weight", None)
127
+ super().load_state_dict(state_dict, **kwargs)
128
+
129
+
130
+ class EfficientNetEncoder(EfficientNetBaseEncoder):
131
+
132
+ def __init__(self, stage_idxs, out_channels, depth=5, channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2):
133
+ kwargs = get_efficientnet_kwargs(channel_multiplier, depth_multiplier, drop_rate)
134
+ super().__init__(stage_idxs, out_channels, depth, **kwargs)
135
+
136
+
137
+ class EfficientNetLiteEncoder(EfficientNetBaseEncoder):
138
+
139
+ def __init__(self, stage_idxs, out_channels, depth=5, channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2):
140
+ kwargs = gen_efficientnet_lite_kwargs(channel_multiplier, depth_multiplier, drop_rate)
141
+ super().__init__(stage_idxs, out_channels, depth, **kwargs)
142
+
143
+
144
+ def prepare_settings(settings):
145
+ return {
146
+ "mean": settings["mean"],
147
+ "std": settings["std"],
148
+ "url": settings["url"],
149
+ "input_range": (0, 1),
150
+ "input_space": "RGB",
151
+ }
152
+
153
+
154
+ timm_efficientnet_encoders = {
155
+
156
+ "timm-efficientnet-b0": {
157
+ "encoder": EfficientNetEncoder,
158
+ "pretrained_settings": {
159
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b0"]),
160
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b0_ap"]),
161
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b0_ns"]),
162
+ },
163
+ "params": {
164
+ "out_channels": (3, 32, 24, 40, 112, 320),
165
+ "stage_idxs": (2, 3, 5),
166
+ "channel_multiplier": 1.0,
167
+ "depth_multiplier": 1.0,
168
+ "drop_rate": 0.2,
169
+ },
170
+ },
171
+
172
+ "timm-efficientnet-b1": {
173
+ "encoder": EfficientNetEncoder,
174
+ "pretrained_settings": {
175
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b1"]),
176
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b1_ap"]),
177
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b1_ns"]),
178
+ },
179
+ "params": {
180
+ "out_channels": (3, 32, 24, 40, 112, 320),
181
+ "stage_idxs": (2, 3, 5),
182
+ "channel_multiplier": 1.0,
183
+ "depth_multiplier": 1.1,
184
+ "drop_rate": 0.2,
185
+ },
186
+ },
187
+
188
+ "timm-efficientnet-b2": {
189
+ "encoder": EfficientNetEncoder,
190
+ "pretrained_settings": {
191
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b2"]),
192
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b2_ap"]),
193
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b2_ns"]),
194
+ },
195
+ "params": {
196
+ "out_channels": (3, 32, 24, 48, 120, 352),
197
+ "stage_idxs": (2, 3, 5),
198
+ "channel_multiplier": 1.1,
199
+ "depth_multiplier": 1.2,
200
+ "drop_rate": 0.3,
201
+ },
202
+ },
203
+
204
+ "timm-efficientnet-b3": {
205
+ "encoder": EfficientNetEncoder,
206
+ "pretrained_settings": {
207
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b3"]),
208
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b3_ap"]),
209
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b3_ns"]),
210
+ },
211
+ "params": {
212
+ "out_channels": (3, 40, 32, 48, 136, 384),
213
+ "stage_idxs": (2, 3, 5),
214
+ "channel_multiplier": 1.2,
215
+ "depth_multiplier": 1.4,
216
+ "drop_rate": 0.3,
217
+ },
218
+ },
219
+
220
+ "timm-efficientnet-b4": {
221
+ "encoder": EfficientNetEncoder,
222
+ "pretrained_settings": {
223
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b4"]),
224
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b4_ap"]),
225
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b4_ns"]),
226
+ },
227
+ "params": {
228
+ "out_channels": (3, 48, 32, 56, 160, 448),
229
+ "stage_idxs": (2, 3, 5),
230
+ "channel_multiplier": 1.4,
231
+ "depth_multiplier": 1.8,
232
+ "drop_rate": 0.4,
233
+ },
234
+ },
235
+
236
+ "timm-efficientnet-b5": {
237
+ "encoder": EfficientNetEncoder,
238
+ "pretrained_settings": {
239
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b5"]),
240
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b5_ap"]),
241
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b5_ns"]),
242
+ },
243
+ "params": {
244
+ "out_channels": (3, 48, 40, 64, 176, 512),
245
+ "stage_idxs": (2, 3, 5),
246
+ "channel_multiplier": 1.6,
247
+ "depth_multiplier": 2.2,
248
+ "drop_rate": 0.4,
249
+ },
250
+ },
251
+
252
+ "timm-efficientnet-b6": {
253
+ "encoder": EfficientNetEncoder,
254
+ "pretrained_settings": {
255
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b6"]),
256
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b6_ap"]),
257
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b6_ns"]),
258
+ },
259
+ "params": {
260
+ "out_channels": (3, 56, 40, 72, 200, 576),
261
+ "stage_idxs": (2, 3, 5),
262
+ "channel_multiplier": 1.8,
263
+ "depth_multiplier": 2.6,
264
+ "drop_rate": 0.5,
265
+ },
266
+ },
267
+
268
+ "timm-efficientnet-b7": {
269
+ "encoder": EfficientNetEncoder,
270
+ "pretrained_settings": {
271
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b7"]),
272
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b7_ap"]),
273
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_b7_ns"]),
274
+ },
275
+ "params": {
276
+ "out_channels": (3, 64, 48, 80, 224, 640),
277
+ "stage_idxs": (2, 3, 5),
278
+ "channel_multiplier": 2.0,
279
+ "depth_multiplier": 3.1,
280
+ "drop_rate": 0.5,
281
+ },
282
+ },
283
+
284
+ "timm-efficientnet-b8": {
285
+ "encoder": EfficientNetEncoder,
286
+ "pretrained_settings": {
287
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_b8"]),
288
+ "advprop": prepare_settings(default_cfgs["tf_efficientnet_b8_ap"]),
289
+ },
290
+ "params": {
291
+ "out_channels": (3, 72, 56, 88, 248, 704),
292
+ "stage_idxs": (2, 3, 5),
293
+ "channel_multiplier": 2.2,
294
+ "depth_multiplier": 3.6,
295
+ "drop_rate": 0.5,
296
+ },
297
+ },
298
+
299
+ "timm-efficientnet-l2": {
300
+ "encoder": EfficientNetEncoder,
301
+ "pretrained_settings": {
302
+ "noisy-student": prepare_settings(default_cfgs["tf_efficientnet_l2_ns"]),
303
+ },
304
+ "params": {
305
+ "out_channels": (3, 136, 104, 176, 480, 1376),
306
+ "stage_idxs": (2, 3, 5),
307
+ "channel_multiplier": 4.3,
308
+ "depth_multiplier": 5.3,
309
+ "drop_rate": 0.5,
310
+ },
311
+ },
312
+
313
+ "timm-tf_efficientnet_lite0": {
314
+ "encoder": EfficientNetLiteEncoder,
315
+ "pretrained_settings": {
316
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_lite0"]),
317
+ },
318
+ "params": {
319
+ "out_channels": (3, 32, 24, 40, 112, 320),
320
+ "stage_idxs": (2, 3, 5),
321
+ "channel_multiplier": 1.0,
322
+ "depth_multiplier": 1.0,
323
+ "drop_rate": 0.2,
324
+ },
325
+ },
326
+
327
+ "timm-tf_efficientnet_lite1": {
328
+ "encoder": EfficientNetLiteEncoder,
329
+ "pretrained_settings": {
330
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_lite1"]),
331
+ },
332
+ "params": {
333
+ "out_channels": (3, 32, 24, 40, 112, 320),
334
+ "stage_idxs": (2, 3, 5),
335
+ "channel_multiplier": 1.0,
336
+ "depth_multiplier": 1.1,
337
+ "drop_rate": 0.2,
338
+ },
339
+ },
340
+
341
+ "timm-tf_efficientnet_lite2": {
342
+ "encoder": EfficientNetLiteEncoder,
343
+ "pretrained_settings": {
344
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_lite2"]),
345
+ },
346
+ "params": {
347
+ "out_channels": (3, 32, 24, 48, 120, 352),
348
+ "stage_idxs": (2, 3, 5),
349
+ "channel_multiplier": 1.1,
350
+ "depth_multiplier": 1.2,
351
+ "drop_rate": 0.3,
352
+ },
353
+ },
354
+
355
+ "timm-tf_efficientnet_lite3": {
356
+ "encoder": EfficientNetLiteEncoder,
357
+ "pretrained_settings": {
358
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_lite3"]),
359
+ },
360
+ "params": {
361
+ "out_channels": (3, 32, 32, 48, 136, 384),
362
+ "stage_idxs": (2, 3, 5),
363
+ "channel_multiplier": 1.2,
364
+ "depth_multiplier": 1.4,
365
+ "drop_rate": 0.3,
366
+ },
367
+ },
368
+
369
+ "timm-tf_efficientnet_lite4": {
370
+ "encoder": EfficientNetLiteEncoder,
371
+ "pretrained_settings": {
372
+ "imagenet": prepare_settings(default_cfgs["tf_efficientnet_lite4"]),
373
+ },
374
+ "params": {
375
+ "out_channels": (3, 32, 32, 56, 160, 448),
376
+ "stage_idxs": (2, 3, 5),
377
+ "channel_multiplier": 1.4,
378
+ "depth_multiplier": 1.8,
379
+ "drop_rate": 0.4,
380
+ },
381
+ },
382
+ }