pengdadaaa commited on
Commit
786f6a6
·
verified ·
1 Parent(s): 2212d04

Upload 741 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +13 -3
  3. metadata/SnakeCLEF2023-TrainMetadata-HM.csv +0 -0
  4. metadata/SnakeCLEF2023-TrainMetadata-iNat.csv +3 -0
  5. metadata/SnakeCLEF2023-ValMetadata.csv +0 -0
  6. metadata/process.py +23 -0
  7. metadata/train_full.csv +3 -0
  8. metadata/venomous_status_list.csv +1785 -0
  9. moe.py +179 -0
  10. pytorch-image-models/.gitattributes +1 -0
  11. pytorch-image-models/.github/FUNDING.yml +2 -0
  12. pytorch-image-models/.github/ISSUE_TEMPLATE/bug_report.md +32 -0
  13. pytorch-image-models/.github/ISSUE_TEMPLATE/config.yml +5 -0
  14. pytorch-image-models/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  15. pytorch-image-models/.github/workflows/build_documentation.yml +20 -0
  16. pytorch-image-models/.github/workflows/build_pr_documentation.yml +19 -0
  17. pytorch-image-models/.github/workflows/tests.yml +65 -0
  18. pytorch-image-models/.github/workflows/upload_pr_documentation.yml +16 -0
  19. pytorch-image-models/.gitignore +121 -0
  20. pytorch-image-models/CONTRIBUTING.md +106 -0
  21. pytorch-image-models/LICENSE +201 -0
  22. pytorch-image-models/MANIFEST.in +3 -0
  23. pytorch-image-models/README.md +544 -0
  24. pytorch-image-models/avg_checkpoints.py +152 -0
  25. pytorch-image-models/benchmark.py +703 -0
  26. pytorch-image-models/bulk_runner.py +210 -0
  27. pytorch-image-models/clean_checkpoint.py +115 -0
  28. pytorch-image-models/convert/convert_from_mxnet.py +107 -0
  29. pytorch-image-models/convert/convert_nest_flax.py +109 -0
  30. pytorch-image-models/distributed_train.sh +5 -0
  31. pytorch-image-models/docs/archived_changes.md +406 -0
  32. pytorch-image-models/docs/changes.md +710 -0
  33. pytorch-image-models/docs/feature_extraction.md +174 -0
  34. pytorch-image-models/docs/index.md +80 -0
  35. pytorch-image-models/docs/javascripts/tables.js +6 -0
  36. pytorch-image-models/docs/models.md +171 -0
  37. pytorch-image-models/docs/models/.pages +1 -0
  38. pytorch-image-models/docs/models/.templates/code_snippets.md +62 -0
  39. pytorch-image-models/docs/models/.templates/generate_readmes.py +64 -0
  40. pytorch-image-models/docs/models/.templates/models/adversarial-inception-v3.md +98 -0
  41. pytorch-image-models/docs/models/.templates/models/advprop.md +457 -0
  42. pytorch-image-models/docs/models/.templates/models/big-transfer.md +295 -0
  43. pytorch-image-models/docs/models/.templates/models/csp-darknet.md +81 -0
  44. pytorch-image-models/docs/models/.templates/models/csp-resnet.md +76 -0
  45. pytorch-image-models/docs/models/.templates/models/csp-resnext.md +77 -0
  46. pytorch-image-models/docs/models/.templates/models/densenet.md +305 -0
  47. pytorch-image-models/docs/models/.templates/models/dla.md +545 -0
  48. pytorch-image-models/docs/models/.templates/models/dpn.md +256 -0
  49. pytorch-image-models/docs/models/.templates/models/ecaresnet.md +236 -0
  50. pytorch-image-models/docs/models/.templates/models/efficientnet-pruned.md +145 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ metadata/SnakeCLEF2023-TrainMetadata-iNat.csv filter=lfs diff=lfs merge=lfs -text
37
+ metadata/train_full.csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,13 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLEF2024
2
+
3
+ This repository contains the code for the SnakeCLEF competitions.
4
+
5
+ ## SnakeCLEF 2024
6
+
7
+ The dataset is read from the *.csv file in the "./metadata" directory, which includes the labels and storage paths of the images. The `moe.py` file provides a model structure based on `convnext_large_mlp`, and you can choose other backbones based on this structure.
8
+
9
+ ### Usage
10
+
11
+ First, you should select the pretrained weights in the `moe.py` file.
12
+ Next, specify the paths for saving logs and checkpoints in the `train_moe.py` file.
13
+ Finally, run `python train_moe.py`.
metadata/SnakeCLEF2023-TrainMetadata-HM.csv ADDED
The diff for this file is too large to render. See raw diff
 
metadata/SnakeCLEF2023-TrainMetadata-iNat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0748b85225610459038d5899312e7ca73e89eaa469ffeb56134c4793c5b653a6
3
+ size 13509584
metadata/SnakeCLEF2023-ValMetadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
metadata/process.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ file1 = 'SnakeCLEF2023-TrainMetadata-iNat.csv'
4
+ root = '/data1/dataset/SnakeCLEF2024/'
5
+ filehmp = 'SnakeCLEF2023-TrainMetadata-HM.csv'
6
+
7
+ df1 = pd.read_csv(file1)
8
+ path1 = 'SnakeCLEF2023-large_size/'
9
+ df1['image_path'] = path1 + df1['image_path']
10
+ df2 = pd.read_csv(filehmp)
11
+ df_full = pd.concat([df1, df2],axis=0, ignore_index=True)
12
+ df_full['endemic'] = df_full['endemic'].astype(bool)
13
+ df_full['class_id'] = df_full['class_id'].astype(int)
14
+ for col in df_full.columns:
15
+ if col not in ['endemic', 'class_id']:
16
+ df_full[col] = df_full[col].astype(str)
17
+
18
+
19
+ image_exists = df_full['image_path'].apply(lambda x: os.path.exists(os.path.join(root, x)))
20
+ df_full = df_full[image_exists].reset_index(drop=True)
21
+
22
+ df_full.to_csv('train_full.csv', index=False)
23
+ print('suceess')
metadata/train_full.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b66b9f17ba0b4e99ad2072f4e830a1f16d27096c27db8c2ca2c51025d304f9f
3
+ size 18514388
metadata/venomous_status_list.csv ADDED
@@ -0,0 +1,1785 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class_id,MIVS
2
+ 1779,0
3
+ 1772,0
4
+ 95,0
5
+ 1606,0
6
+ 561,0
7
+ 469,0
8
+ 161,0
9
+ 157,0
10
+ 861,0
11
+ 621,0
12
+ 741,1
13
+ 66,0
14
+ 857,1
15
+ 1182,0
16
+ 1736,1
17
+ 387,0
18
+ 267,0
19
+ 296,0
20
+ 1363,0
21
+ 1783,0
22
+ 619,0
23
+ 1608,0
24
+ 1422,0
25
+ 1118,1
26
+ 860,0
27
+ 1008,0
28
+ 415,1
29
+ 154,1
30
+ 96,0
31
+ 1353,0
32
+ 866,0
33
+ 1530,1
34
+ 694,0
35
+ 1359,0
36
+ 1702,0
37
+ 570,0
38
+ 431,1
39
+ 1317,1
40
+ 1190,1
41
+ 1447,0
42
+ 159,1
43
+ 224,1
44
+ 557,0
45
+ 773,0
46
+ 342,0
47
+ 205,1
48
+ 1305,0
49
+ 1238,0
50
+ 995,0
51
+ 1287,0
52
+ 1013,0
53
+ 1780,0
54
+ 998,0
55
+ 1482,0
56
+ 870,0
57
+ 1035,1
58
+ 1629,0
59
+ 1778,0
60
+ 797,0
61
+ 349,0
62
+ 1765,0
63
+ 124,0
64
+ 338,0
65
+ 556,0
66
+ 673,0
67
+ 221,1
68
+ 1612,0
69
+ 678,0
70
+ 1762,0
71
+ 1764,0
72
+ 228,1
73
+ 412,1
74
+ 1295,0
75
+ 11,0
76
+ 322,0
77
+ 1424,0
78
+ 238,1
79
+ 778,0
80
+ 1267,0
81
+ 201,0
82
+ 1076,1
83
+ 1226,0
84
+ 62,0
85
+ 339,0
86
+ 703,0
87
+ 880,0
88
+ 699,0
89
+ 1579,0
90
+ 864,0
91
+ 1107,1
92
+ 568,0
93
+ 1298,0
94
+ 1346,0
95
+ 56,0
96
+ 298,0
97
+ 113,1
98
+ 892,0
99
+ 1553,0
100
+ 788,0
101
+ 1132,0
102
+ 630,0
103
+ 562,1
104
+ 109,0
105
+ 573,0
106
+ 606,0
107
+ 57,0
108
+ 1459,0
109
+ 540,0
110
+ 1775,0
111
+ 1602,0
112
+ 1072,1
113
+ 695,0
114
+ 1655,0
115
+ 1119,1
116
+ 869,0
117
+ 358,0
118
+ 1545,0
119
+ 563,0
120
+ 1145,0
121
+ 1156,0
122
+ 391,0
123
+ 385,0
124
+ 912,0
125
+ 684,0
126
+ 893,0
127
+ 1646,0
128
+ 1284,0
129
+ 237,1
130
+ 629,0
131
+ 216,1
132
+ 248,1
133
+ 225,1
134
+ 714,0
135
+ 1563,0
136
+ 887,0
137
+ 1006,0
138
+ 321,0
139
+ 701,0
140
+ 1706,0
141
+ 388,0
142
+ 1280,0
143
+ 1289,0
144
+ 1430,0
145
+ 429,1
146
+ 1503,0
147
+ 1585,0
148
+ 512,1
149
+ 1052,1
150
+ 443,1
151
+ 50,0
152
+ 842,0
153
+ 859,0
154
+ 903,0
155
+ 670,0
156
+ 886,0
157
+ 894,0
158
+ 1492,0
159
+ 1085,0
160
+ 1084,0
161
+ 1649,0
162
+ 1648,0
163
+ 67,0
164
+ 487,0
165
+ 273,0
166
+ 1714,1
167
+ 1369,0
168
+ 1066,1
169
+ 1571,0
170
+ 166,0
171
+ 1749,1
172
+ 1205,0
173
+ 1734,1
174
+ 602,0
175
+ 867,0
176
+ 622,0
177
+ 1446,0
178
+ 896,0
179
+ 323,0
180
+ 83,0
181
+ 533,0
182
+ 1709,0
183
+ 615,0
184
+ 33,1
185
+ 1137,0
186
+ 901,0
187
+ 1700,0
188
+ 1286,0
189
+ 453,0
190
+ 283,0
191
+ 41,0
192
+ 12,0
193
+ 1366,0
194
+ 421,1
195
+ 27,1
196
+ 1081,1
197
+ 312,0
198
+ 564,0
199
+ 1303,0
200
+ 1691,0
201
+ 1432,0
202
+ 1504,0
203
+ 601,1
204
+ 1750,0
205
+ 569,0
206
+ 1311,0
207
+ 771,0
208
+ 58,0
209
+ 364,0
210
+ 537,0
211
+ 637,0
212
+ 1069,0
213
+ 1004,0
214
+ 1592,0
215
+ 907,0
216
+ 1025,0
217
+ 1546,0
218
+ 462,1
219
+ 571,0
220
+ 841,0
221
+ 340,0
222
+ 973,0
223
+ 1388,0
224
+ 898,0
225
+ 708,0
226
+ 1016,0
227
+ 798,0
228
+ 1594,0
229
+ 600,1
230
+ 953,0
231
+ 1274,0
232
+ 1138,0
233
+ 790,0
234
+ 874,0
235
+ 1207,0
236
+ 1361,0
237
+ 177,0
238
+ 1233,0
239
+ 928,0
240
+ 1093,1
241
+ 791,0
242
+ 690,0
243
+ 1345,0
244
+ 739,1
245
+ 1739,1
246
+ 1746,1
247
+ 792,0
248
+ 919,0
249
+ 327,0
250
+ 423,1
251
+ 833,0
252
+ 1316,1
253
+ 1493,0
254
+ 380,0
255
+ 211,1
256
+ 300,0
257
+ 425,1
258
+ 1438,0
259
+ 737,0
260
+ 1337,0
261
+ 1626,0
262
+ 223,1
263
+ 917,0
264
+ 882,0
265
+ 1073,1
266
+ 1460,0
267
+ 1297,0
268
+ 1658,0
269
+ 408,0
270
+ 1231,0
271
+ 933,0
272
+ 890,0
273
+ 810,0
274
+ 806,1
275
+ 167,0
276
+ 82,0
277
+ 1351,0
278
+ 1609,0
279
+ 1660,0
280
+ 1498,0
281
+ 669,0
282
+ 844,0
283
+ 76,0
284
+ 1393,1
285
+ 665,0
286
+ 1261,0
287
+ 492,0
288
+ 1017,0
289
+ 1450,0
290
+ 1721,0
291
+ 1584,0
292
+ 1671,1
293
+ 832,0
294
+ 679,0
295
+ 1670,1
296
+ 442,1
297
+ 521,0
298
+ 314,0
299
+ 687,0
300
+ 1495,0
301
+ 641,0
302
+ 464,1
303
+ 1742,1
304
+ 1567,0
305
+ 755,0
306
+ 794,0
307
+ 1728,0
308
+ 1159,0
309
+ 1206,0
310
+ 1486,0
311
+ 1590,0
312
+ 905,0
313
+ 365,0
314
+ 1499,0
315
+ 218,1
316
+ 288,0
317
+ 1528,1
318
+ 640,0
319
+ 1239,0
320
+ 934,0
321
+ 657,0
322
+ 653,0
323
+ 849,0
324
+ 927,0
325
+ 1627,0
326
+ 1737,1
327
+ 1520,0
328
+ 575,0
329
+ 1512,0
330
+ 1228,0
331
+ 479,0
332
+ 417,1
333
+ 1540,0
334
+ 1639,0
335
+ 130,0
336
+ 311,0
337
+ 681,0
338
+ 1377,0
339
+ 362,0
340
+ 702,0
341
+ 102,0
342
+ 623,0
343
+ 1099,0
344
+ 1136,0
345
+ 499,0
346
+ 457,0
347
+ 711,0
348
+ 656,0
349
+ 565,0
350
+ 1552,0
351
+ 1154,0
352
+ 1148,0
353
+ 1703,1
354
+ 192,0
355
+ 1457,0
356
+ 567,0
357
+ 1325,0
358
+ 3,1
359
+ 1378,0
360
+ 1421,0
361
+ 294,0
362
+ 1142,0
363
+ 932,0
364
+ 299,0
365
+ 1550,0
366
+ 372,0
367
+ 517,0
368
+ 1106,1
369
+ 1230,1
370
+ 847,0
371
+ 814,0
372
+ 1019,0
373
+ 536,0
374
+ 677,0
375
+ 331,0
376
+ 780,0
377
+ 1575,0
378
+ 1223,0
379
+ 1524,0
380
+ 1770,0
381
+ 566,0
382
+ 333,0
383
+ 1225,0
384
+ 1288,0
385
+ 1056,1
386
+ 1657,0
387
+ 247,1
388
+ 23,0
389
+ 1452,1
390
+ 1271,0
391
+ 1224,0
392
+ 1593,0
393
+ 822,0
394
+ 1500,0
395
+ 1632,0
396
+ 1650,1
397
+ 171,0
398
+ 430,1
399
+ 616,0
400
+ 1102,0
401
+ 1037,1
402
+ 1684,1
403
+ 437,1
404
+ 1418,0
405
+ 1125,1
406
+ 1578,0
407
+ 348,0
408
+ 424,1
409
+ 572,0
410
+ 1178,0
411
+ 49,0
412
+ 1310,0
413
+ 482,0
414
+ 520,0
415
+ 47,0
416
+ 1678,0
417
+ 922,0
418
+ 803,0
419
+ 840,0
420
+ 1487,0
421
+ 291,0
422
+ 862,0
423
+ 1202,0
424
+ 1507,0
425
+ 1745,1
426
+ 328,0
427
+ 189,0
428
+ 195,0
429
+ 494,0
430
+ 957,0
431
+ 427,1
432
+ 92,0
433
+ 1234,0
434
+ 885,0
435
+ 1364,0
436
+ 371,0
437
+ 81,0
438
+ 645,0
439
+ 0,1
440
+ 301,1
441
+ 1510,0
442
+ 1485,0
443
+ 36,0
444
+ 1001,0
445
+ 185,0
446
+ 271,0
447
+ 783,1
448
+ 68,0
449
+ 329,0
450
+ 624,0
451
+ 452,0
452
+ 982,0
453
+ 386,0
454
+ 751,0
455
+ 172,0
456
+ 1360,0
457
+ 4,1
458
+ 1730,0
459
+ 993,0
460
+ 1121,1
461
+ 14,0
462
+ 46,0
463
+ 845,0
464
+ 1054,0
465
+ 1115,1
466
+ 77,0
467
+ 1688,1
468
+ 758,0
469
+ 1379,0
470
+ 1283,0
471
+ 25,0
472
+ 1198,0
473
+ 776,0
474
+ 662,0
475
+ 638,0
476
+ 643,0
477
+ 911,0
478
+ 1713,1
479
+ 728,0
480
+ 1315,1
481
+ 1053,1
482
+ 5,1
483
+ 1669,1
484
+ 584,0
485
+ 1431,0
486
+ 253,0
487
+ 160,1
488
+ 112,1
489
+ 308,1
490
+ 1568,0
491
+ 1589,0
492
+ 357,0
493
+ 1357,0
494
+ 64,0
495
+ 618,0
496
+ 1423,0
497
+ 1748,0
498
+ 1020,0
499
+ 1071,1
500
+ 1300,0
501
+ 306,0
502
+ 1240,0
503
+ 608,0
504
+ 988,0
505
+ 1367,0
506
+ 433,1
507
+ 467,0
508
+ 1481,0
509
+ 1352,0
510
+ 1735,1
511
+ 1094,1
512
+ 1701,0
513
+ 444,0
514
+ 410,1
515
+ 1633,0
516
+ 873,0
517
+ 368,0
518
+ 1501,0
519
+ 214,1
520
+ 382,0
521
+ 743,1
522
+ 1619,0
523
+ 1227,0
524
+ 347,0
525
+ 243,1
526
+ 2,1
527
+ 1339,0
528
+ 419,1
529
+ 883,0
530
+ 796,0
531
+ 1635,0
532
+ 1696,0
533
+ 997,0
534
+ 55,0
535
+ 307,1
536
+ 1347,0
537
+ 1513,0
538
+ 586,0
539
+ 255,0
540
+ 97,0
541
+ 233,1
542
+ 713,0
543
+ 1558,0
544
+ 929,0
545
+ 6,1
546
+ 1000,0
547
+ 1269,0
548
+ 951,0
549
+ 858,1
550
+ 576,0
551
+ 627,0
552
+ 785,0
553
+ 1412,0
554
+ 1191,1
555
+ 79,0
556
+ 1278,0
557
+ 325,0
558
+ 86,0
559
+ 1414,0
560
+ 523,0
561
+ 1557,0
562
+ 764,0
563
+ 151,1
564
+ 1651,1
565
+ 1258,0
566
+ 763,0
567
+ 104,0
568
+ 183,0
569
+ 1686,1
570
+ 744,0
571
+ 915,0
572
+ 483,0
573
+ 1149,0
574
+ 1723,0
575
+ 310,1
576
+ 1152,0
577
+ 889,0
578
+ 913,0
579
+ 245,1
580
+ 78,0
581
+ 293,0
582
+ 538,0
583
+ 1643,0
584
+ 1018,0
585
+ 595,1
586
+ 1290,0
587
+ 91,0
588
+ 1292,0
589
+ 596,1
590
+ 407,1
591
+ 1380,0
592
+ 959,0
593
+ 184,0
594
+ 578,0
595
+ 318,0
596
+ 454,0
597
+ 1697,0
598
+ 1603,0
599
+ 405,1
600
+ 1090,1
601
+ 369,0
602
+ 496,0
603
+ 795,0
604
+ 946,0
605
+ 317,0
606
+ 1428,0
607
+ 1751,0
608
+ 750,0
609
+ 1664,0
610
+ 1217,0
611
+ 686,0
612
+ 1187,0
613
+ 871,0
614
+ 439,1
615
+ 1040,1
616
+ 664,0
617
+ 1070,0
618
+ 1135,0
619
+ 377,0
620
+ 834,0
621
+ 61,0
622
+ 852,0
623
+ 1551,0
624
+ 955,0
625
+ 409,1
626
+ 1009,0
627
+ 1580,0
628
+ 200,0
629
+ 772,0
630
+ 1624,0
631
+ 1475,0
632
+ 8,0
633
+ 1012,0
634
+ 546,0
635
+ 610,0
636
+ 991,0
637
+ 337,0
638
+ 440,1
639
+ 434,1
640
+ 1427,0
641
+ 1615,0
642
+ 403,1
643
+ 620,0
644
+ 356,0
645
+ 1059,1
646
+ 303,0
647
+ 1082,1
648
+ 809,0
649
+ 80,0
650
+ 1285,0
651
+ 1652,1
652
+ 1348,0
653
+ 416,1
654
+ 1114,1
655
+ 522,0
656
+ 17,0
657
+ 1476,0
658
+ 839,0
659
+ 1474,0
660
+ 1640,0
661
+ 1065,1
662
+ 1497,0
663
+ 287,0
664
+ 1402,0
665
+ 980,0
666
+ 1044,1
667
+ 1373,1
668
+ 1565,0
669
+ 110,1
670
+ 44,0
671
+ 399,0
672
+ 1394,0
673
+ 1596,0
674
+ 1026,0
675
+ 1221,0
676
+ 242,1
677
+ 344,0
678
+ 1616,0
679
+ 1334,0
680
+ 949,0
681
+ 824,0
682
+ 724,0
683
+ 1034,1
684
+ 511,1
685
+ 1505,0
686
+ 1068,1
687
+ 250,0
688
+ 1516,0
689
+ 829,1
690
+ 1116,1
691
+ 73,0
692
+ 1147,0
693
+ 594,0
694
+ 1480,0
695
+ 1698,0
696
+ 979,0
697
+ 1129,1
698
+ 355,0
699
+ 222,1
700
+ 1175,0
701
+ 823,0
702
+ 187,0
703
+ 969,0
704
+ 491,0
705
+ 1761,0
706
+ 1420,0
707
+ 1164,0
708
+ 1704,0
709
+ 1676,0
710
+ 1296,0
711
+ 1381,0
712
+ 895,0
713
+ 1666,1
714
+ 830,1
715
+ 939,0
716
+ 553,0
717
+ 1604,0
718
+ 370,0
719
+ 514,0
720
+ 1007,0
721
+ 1782,0
722
+ 1134,0
723
+ 963,0
724
+ 381,0
725
+ 422,1
726
+ 696,0
727
+ 1235,0
728
+ 1247,0
729
+ 525,0
730
+ 285,1
731
+ 1618,0
732
+ 965,0
733
+ 828,1
734
+ 281,0
735
+ 1556,0
736
+ 1645,0
737
+ 203,0
738
+ 367,0
739
+ 1729,0
740
+ 1491,0
741
+ 1014,0
742
+ 345,0
743
+ 324,0
744
+ 411,1
745
+ 257,1
746
+ 1201,0
747
+ 756,0
748
+ 1667,0
749
+ 548,0
750
+ 1711,0
751
+ 1719,0
752
+ 1192,1
753
+ 1591,0
754
+ 821,0
755
+ 1173,0
756
+ 1088,1
757
+ 1549,0
758
+ 877,0
759
+ 1174,0
760
+ 280,1
761
+ 747,0
762
+ 420,1
763
+ 13,0
764
+ 70,0
765
+ 251,0
766
+ 1370,1
767
+ 1391,1
768
+ 819,0
769
+ 1042,1
770
+ 1126,1
771
+ 854,0
772
+ 1404,0
773
+ 938,0
774
+ 1293,0
775
+ 985,0
776
+ 1220,0
777
+ 1335,0
778
+ 1113,1
779
+ 904,0
780
+ 941,0
781
+ 359,0
782
+ 447,0
783
+ 1759,0
784
+ 263,0
785
+ 1265,0
786
+ 804,1
787
+ 906,0
788
+ 510,1
789
+ 725,0
790
+ 1041,1
791
+ 1732,0
792
+ 264,0
793
+ 888,0
794
+ 811,0
795
+ 1529,1
796
+ 1158,1
797
+ 1766,0
798
+ 1636,0
799
+ 207,1
800
+ 305,0
801
+ 1318,1
802
+ 149,0
803
+ 1141,0
804
+ 1456,0
805
+ 1448,0
806
+ 554,0
807
+ 722,0
808
+ 1117,1
809
+ 1755,0
810
+ 1358,0
811
+ 406,1
812
+ 414,1
813
+ 697,0
814
+ 209,1
815
+ 275,0
816
+ 352,0
817
+ 1313,1
818
+ 698,0
819
+ 1535,0
820
+ 504,0
821
+ 990,0
822
+ 908,0
823
+ 1668,1
824
+ 182,0
825
+ 801,0
826
+ 1769,0
827
+ 1060,1
828
+ 799,0
829
+ 230,0
830
+ 1189,0
831
+ 633,0
832
+ 1628,0
833
+ 1108,1
834
+ 668,0
835
+ 165,0
836
+ 872,0
837
+ 351,0
838
+ 1681,0
839
+ 1582,0
840
+ 1248,0
841
+ 947,0
842
+ 1046,1
843
+ 259,1
844
+ 1015,0
845
+ 208,1
846
+ 1523,0
847
+ 1502,0
848
+ 1722,0
849
+ 1112,1
850
+ 1463,0
851
+ 213,1
852
+ 659,0
853
+ 693,0
854
+ 1172,0
855
+ 1144,0
856
+ 1074,0
857
+ 379,0
858
+ 897,0
859
+ 1237,0
860
+ 952,0
861
+ 334,0
862
+ 1756,0
863
+ 779,0
864
+ 326,0
865
+ 1654,0
866
+ 1241,0
867
+ 1390,1
868
+ 1199,0
869
+ 1413,0
870
+ 346,0
871
+ 1340,0
872
+ 1097,0
873
+ 1641,0
874
+ 1272,0
875
+ 1277,0
876
+ 1395,1
877
+ 1547,0
878
+ 448,0
879
+ 1183,0
880
+ 865,0
881
+ 374,0
882
+ 474,0
883
+ 975,0
884
+ 916,0
885
+ 490,0
886
+ 1471,0
887
+ 1389,1
888
+ 21,0
889
+ 1124,1
890
+ 777,0
891
+ 762,0
892
+ 1150,0
893
+ 1022,0
894
+ 404,0
895
+ 876,0
896
+ 500,0
897
+ 1403,0
898
+ 354,0
899
+ 1143,0
900
+ 760,0
901
+ 551,0
902
+ 1573,0
903
+ 1461,0
904
+ 1368,0
905
+ 1588,0
906
+ 309,1
907
+ 198,0
908
+ 158,0
909
+ 1675,0
910
+ 100,0
911
+ 1562,0
912
+ 1063,1
913
+ 692,0
914
+ 1560,0
915
+ 774,0
916
+ 461,0
917
+ 498,0
918
+ 256,1
919
+ 1747,1
920
+ 846,0
921
+ 1064,0
922
+ 1232,0
923
+ 1294,0
924
+ 1146,0
925
+ 613,0
926
+ 1140,0
927
+ 899,0
928
+ 418,1
929
+ 286,0
930
+ 930,0
931
+ 1554,0
932
+ 194,0
933
+ 1376,1
934
+ 942,0
935
+ 179,0
936
+ 260,1
937
+ 1437,0
938
+ 1623,0
939
+ 1027,0
940
+ 970,0
941
+ 1101,0
942
+ 31,0
943
+ 1653,1
944
+ 881,0
945
+ 1479,0
946
+ 1109,1
947
+ 1419,0
948
+ 715,0
949
+ 545,0
950
+ 402,1
951
+ 1634,0
952
+ 635,0
953
+ 438,0
954
+ 910,0
955
+ 726,0
956
+ 1731,0
957
+ 84,0
958
+ 1128,1
959
+ 43,0
960
+ 972,0
961
+ 393,0
962
+ 1708,0
963
+ 1637,0
964
+ 1387,0
965
+ 1693,0
966
+ 1098,0
967
+ 246,0
968
+ 38,0
969
+ 164,0
970
+ 1236,0
971
+ 1738,0
972
+ 163,0
973
+ 1050,1
974
+ 1002,0
975
+ 436,0
976
+ 1245,0
977
+ 1188,0
978
+ 651,0
979
+ 1564,0
980
+ 689,0
981
+ 1399,0
982
+ 1566,0
983
+ 547,0
984
+ 746,0
985
+ 1763,0
986
+ 279,1
987
+ 1506,0
988
+ 720,0
989
+ 1411,0
990
+ 502,0
991
+ 541,0
992
+ 577,0
993
+ 925,0
994
+ 971,0
995
+ 1517,0
996
+ 1127,1
997
+ 506,0
998
+ 302,1
999
+ 986,0
1000
+ 926,0
1001
+ 266,0
1002
+ 215,1
1003
+ 428,1
1004
+ 518,0
1005
+ 289,0
1006
+ 837,0
1007
+ 660,0
1008
+ 1385,0
1009
+ 1270,0
1010
+ 128,0
1011
+ 1758,0
1012
+ 1151,0
1013
+ 1445,0
1014
+ 432,0
1015
+ 558,0
1016
+ 28,0
1017
+ 143,0
1018
+ 1595,0
1019
+ 879,0
1020
+ 519,0
1021
+ 813,0
1022
+ 704,0
1023
+ 450,0
1024
+ 493,0
1025
+ 1319,1
1026
+ 190,0
1027
+ 1329,1
1028
+ 1473,0
1029
+ 599,1
1030
+ 1767,0
1031
+ 994,0
1032
+ 1120,1
1033
+ 875,0
1034
+ 392,0
1035
+ 206,1
1036
+ 759,0
1037
+ 1409,0
1038
+ 1434,0
1039
+ 1489,0
1040
+ 680,0
1041
+ 1705,0
1042
+ 1021,0
1043
+ 1122,1
1044
+ 170,0
1045
+ 181,0
1046
+ 948,0
1047
+ 1133,0
1048
+ 1386,0
1049
+ 1454,0
1050
+ 589,0
1051
+ 868,0
1052
+ 1327,1
1053
+ 1725,0
1054
+ 470,1
1055
+ 236,1
1056
+ 1672,0
1057
+ 644,0
1058
+ 1665,1
1059
+ 646,0
1060
+ 515,0
1061
+ 144,0
1062
+ 1773,0
1063
+ 1613,0
1064
+ 394,0
1065
+ 943,0
1066
+ 1543,0
1067
+ 597,1
1068
+ 688,0
1069
+ 98,0
1070
+ 900,0
1071
+ 1023,0
1072
+ 628,0
1073
+ 316,0
1074
+ 235,1
1075
+ 85,0
1076
+ 313,0
1077
+ 1715,0
1078
+ 977,0
1079
+ 1396,1
1080
+ 336,0
1081
+ 531,0
1082
+ 232,1
1083
+ 458,0
1084
+ 319,0
1085
+ 335,0
1086
+ 383,0
1087
+ 32,1
1088
+ 1757,0
1089
+ 535,0
1090
+ 902,0
1091
+ 395,0
1092
+ 1435,0
1093
+ 152,1
1094
+ 265,0
1095
+ 1083,0
1096
+ 1307,0
1097
+ 1537,0
1098
+ 1330,1
1099
+ 1061,1
1100
+ 1168,0
1101
+ 138,0
1102
+ 1611,0
1103
+ 155,0
1104
+ 320,0
1105
+ 9,0
1106
+ 1674,1
1107
+ 145,0
1108
+ 999,1
1109
+ 1266,0
1110
+ 1429,0
1111
+ 611,0
1112
+ 1036,1
1113
+ 29,1
1114
+ 706,0
1115
+ 1449,0
1116
+ 757,0
1117
+ 204,1
1118
+ 891,0
1119
+ 1690,0
1120
+ 1252,0
1121
+ 1193,0
1122
+ 1781,0
1123
+ 1242,0
1124
+ 1442,0
1125
+ 855,1
1126
+ 1195,0
1127
+ 863,0
1128
+ 1470,0
1129
+ 1620,0
1130
+ 274,0
1131
+ 1196,0
1132
+ 1707,0
1133
+ 501,0
1134
+ 241,1
1135
+ 261,1
1136
+ 752,0
1137
+ 1408,0
1138
+ 262,1
1139
+ 1679,1
1140
+ 652,0
1141
+ 226,1
1142
+ 378,0
1143
+ 1662,1
1144
+ 1216,0
1145
+ 1488,0
1146
+ 111,0
1147
+ 1161,0
1148
+ 590,0
1149
+ 749,0
1150
+ 1105,0
1151
+ 642,0
1152
+ 156,0
1153
+ 1333,0
1154
+ 1356,0
1155
+ 1263,0
1156
+ 663,0
1157
+ 754,0
1158
+ 1264,0
1159
+ 782,0
1160
+ 1514,0
1161
+ 240,1
1162
+ 1597,0
1163
+ 800,0
1164
+ 1687,1
1165
+ 1163,0
1166
+ 1365,0
1167
+ 1257,0
1168
+ 775,0
1169
+ 353,0
1170
+ 1160,0
1171
+ 966,0
1172
+ 88,0
1173
+ 229,1
1174
+ 784,0
1175
+ 269,0
1176
+ 954,0
1177
+ 1331,0
1178
+ 649,0
1179
+ 1685,1
1180
+ 766,0
1181
+ 134,0
1182
+ 526,0
1183
+ 836,0
1184
+ 1625,0
1185
+ 1401,0
1186
+ 127,0
1187
+ 710,0
1188
+ 1273,0
1189
+ 1484,0
1190
+ 460,0
1191
+ 1246,0
1192
+ 1531,0
1193
+ 191,0
1194
+ 1630,0
1195
+ 131,0
1196
+ 1577,0
1197
+ 1744,1
1198
+ 1268,0
1199
+ 1167,0
1200
+ 770,0
1201
+ 1222,0
1202
+ 593,0
1203
+ 48,0
1204
+ 1472,0
1205
+ 816,0
1206
+ 1349,0
1207
+ 918,0
1208
+ 121,0
1209
+ 827,0
1210
+ 375,0
1211
+ 1741,1
1212
+ 1110,1
1213
+ 1038,0
1214
+ 1509,0
1215
+ 956,0
1216
+ 650,0
1217
+ 1617,0
1218
+ 1057,1
1219
+ 1324,0
1220
+ 543,0
1221
+ 1508,0
1222
+ 1677,0
1223
+ 1095,0
1224
+ 1250,0
1225
+ 853,0
1226
+ 1647,0
1227
+ 252,0
1228
+ 484,0
1229
+ 1458,0
1230
+ 674,0
1231
+ 587,0
1232
+ 817,0
1233
+ 117,0
1234
+ 805,1
1235
+ 396,0
1236
+ 69,0
1237
+ 936,0
1238
+ 1555,0
1239
+ 1733,0
1240
+ 489,0
1241
+ 666,0
1242
+ 1415,0
1243
+ 477,0
1244
+ 219,1
1245
+ 277,0
1246
+ 16,0
1247
+ 1204,0
1248
+ 1576,0
1249
+ 363,0
1250
+ 736,0
1251
+ 1302,0
1252
+ 717,0
1253
+ 244,1
1254
+ 1328,1
1255
+ 967,0
1256
+ 1203,0
1257
+ 769,0
1258
+ 634,0
1259
+ 463,1
1260
+ 455,0
1261
+ 729,0
1262
+ 612,0
1263
+ 413,1
1264
+ 39,0
1265
+ 950,0
1266
+ 721,0
1267
+ 1599,0
1268
+ 981,0
1269
+ 1398,0
1270
+ 1683,0
1271
+ 473,0
1272
+ 1467,0
1273
+ 1249,0
1274
+ 1326,0
1275
+ 1176,0
1276
+ 1726,0
1277
+ 37,0
1278
+ 1465,0
1279
+ 18,0
1280
+ 1699,0
1281
+ 735,0
1282
+ 931,0
1283
+ 1692,0
1284
+ 1548,0
1285
+ 1607,0
1286
+ 968,0
1287
+ 1621,0
1288
+ 549,0
1289
+ 1260,0
1290
+ 935,0
1291
+ 631,0
1292
+ 123,0
1293
+ 426,0
1294
+ 1663,1
1295
+ 94,0
1296
+ 53,0
1297
+ 1003,0
1298
+ 513,1
1299
+ 1753,0
1300
+ 1279,0
1301
+ 937,0
1302
+ 1466,0
1303
+ 1306,0
1304
+ 397,0
1305
+ 1171,0
1306
+ 486,0
1307
+ 1644,0
1308
+ 555,0
1309
+ 658,0
1310
+ 733,0
1311
+ 765,0
1312
+ 789,0
1313
+ 1464,0
1314
+ 639,0
1315
+ 1494,0
1316
+ 1410,0
1317
+ 1253,0
1318
+ 35,0
1319
+ 1740,1
1320
+ 176,0
1321
+ 1718,0
1322
+ 700,0
1323
+ 1058,0
1324
+ 1631,0
1325
+ 1028,0
1326
+ 278,0
1327
+ 1282,0
1328
+ 940,0
1329
+ 1559,0
1330
+ 478,0
1331
+ 1598,0
1332
+ 884,0
1333
+ 90,0
1334
+ 1078,0
1335
+ 983,0
1336
+ 530,0
1337
+ 781,0
1338
+ 920,0
1339
+ 1259,0
1340
+ 468,0
1341
+ 485,0
1342
+ 1281,0
1343
+ 122,0
1344
+ 1392,1
1345
+ 71,0
1346
+ 718,0
1347
+ 103,0
1348
+ 276,0
1349
+ 227,0
1350
+ 1299,0
1351
+ 1362,0
1352
+ 1045,0
1353
+ 1104,0
1354
+ 914,0
1355
+ 1075,0
1356
+ 1043,0
1357
+ 1153,0
1358
+ 1251,0
1359
+ 1561,0
1360
+ 989,0
1361
+ 1468,0
1362
+ 330,0
1363
+ 459,0
1364
+ 1029,0
1365
+ 598,1
1366
+ 147,0
1367
+ 42,0
1368
+ 723,0
1369
+ 142,0
1370
+ 366,0
1371
+ 116,0
1372
+ 1752,0
1373
+ 1123,1
1374
+ 958,0
1375
+ 1342,0
1376
+ 856,1
1377
+ 1586,0
1378
+ 1244,0
1379
+ 476,0
1380
+ 60,0
1381
+ 350,0
1382
+ 398,0
1383
+ 1453,0
1384
+ 1407,0
1385
+ 1030,0
1386
+ 249,1
1387
+ 108,0
1388
+ 1515,0
1389
+ 1048,1
1390
+ 106,0
1391
+ 835,0
1392
+ 292,0
1393
+ 1416,0
1394
+ 1131,0
1395
+ 850,0
1396
+ 360,0
1397
+ 1642,0
1398
+ 34,1
1399
+ 480,0
1400
+ 1712,0
1401
+ 441,0
1402
+ 1322,0
1403
+ 748,0
1404
+ 1417,0
1405
+ 632,0
1406
+ 390,0
1407
+ 1382,1
1408
+ 1100,0
1409
+ 445,0
1410
+ 105,0
1411
+ 820,0
1412
+ 1581,0
1413
+ 1321,0
1414
+ 1051,0
1415
+ 524,0
1416
+ 826,0
1417
+ 1165,0
1418
+ 1383,1
1419
+ 1262,0
1420
+ 1343,0
1421
+ 1033,1
1422
+ 1,1
1423
+ 923,0
1424
+ 234,1
1425
+ 435,0
1426
+ 1384,0
1427
+ 574,0
1428
+ 544,0
1429
+ 1062,0
1430
+ 139,0
1431
+ 1583,0
1432
+ 1720,0
1433
+ 272,0
1434
+ 1354,0
1435
+ 675,0
1436
+ 1197,0
1437
+ 1194,0
1438
+ 1350,0
1439
+ 992,0
1440
+ 1214,0
1441
+ 745,0
1442
+ 505,0
1443
+ 740,1
1444
+ 1010,0
1445
+ 960,0
1446
+ 1673,0
1447
+ 1440,0
1448
+ 341,0
1449
+ 1570,0
1450
+ 1301,0
1451
+ 1375,1
1452
+ 671,0
1453
+ 1031,0
1454
+ 63,0
1455
+ 1155,0
1456
+ 1320,0
1457
+ 1323,0
1458
+ 617,0
1459
+ 465,1
1460
+ 1166,0
1461
+ 1754,0
1462
+ 1605,0
1463
+ 1478,0
1464
+ 626,0
1465
+ 162,1
1466
+ 231,0
1467
+ 542,0
1468
+ 550,0
1469
+ 978,0
1470
+ 507,0
1471
+ 168,0
1472
+ 1079,1
1473
+ 87,0
1474
+ 1186,0
1475
+ 767,0
1476
+ 212,1
1477
+ 654,0
1478
+ 268,0
1479
+ 107,0
1480
+ 1049,1
1481
+ 89,0
1482
+ 1332,0
1483
+ 1256,0
1484
+ 848,0
1485
+ 609,0
1486
+ 647,0
1487
+ 471,0
1488
+ 1397,0
1489
+ 10,0
1490
+ 1215,0
1491
+ 1047,0
1492
+ 126,0
1493
+ 1425,0
1494
+ 605,0
1495
+ 727,0
1496
+ 603,0
1497
+ 220,0
1498
+ 449,0
1499
+ 962,0
1500
+ 332,0
1501
+ 196,0
1502
+ 685,0
1503
+ 1587,0
1504
+ 343,0
1505
+ 786,0
1506
+ 258,1
1507
+ 1777,0
1508
+ 648,0
1509
+ 1439,0
1510
+ 976,0
1511
+ 691,0
1512
+ 944,0
1513
+ 1067,0
1514
+ 1039,0
1515
+ 373,0
1516
+ 987,0
1517
+ 1011,0
1518
+ 1462,0
1519
+ 282,0
1520
+ 1336,0
1521
+ 1180,0
1522
+ 30,1
1523
+ 1229,1
1524
+ 1344,0
1525
+ 1341,0
1526
+ 254,0
1527
+ 709,0
1528
+ 1209,0
1529
+ 51,0
1530
+ 1533,0
1531
+ 1441,0
1532
+ 583,0
1533
+ 705,0
1534
+ 1774,0
1535
+ 197,0
1536
+ 1024,0
1537
+ 793,0
1538
+ 1519,0
1539
+ 1087,1
1540
+ 1544,0
1541
+ 141,0
1542
+ 1682,1
1543
+ 1210,0
1544
+ 707,0
1545
+ 175,0
1546
+ 169,0
1547
+ 401,0
1548
+ 59,0
1549
+ 133,0
1550
+ 961,0
1551
+ 1032,0
1552
+ 1139,0
1553
+ 802,0
1554
+ 140,0
1555
+ 581,0
1556
+ 1433,0
1557
+ 527,0
1558
+ 815,0
1559
+ 1208,0
1560
+ 1213,0
1561
+ 1538,0
1562
+ 65,0
1563
+ 1211,0
1564
+ 150,1
1565
+ 716,0
1566
+ 580,0
1567
+ 1005,0
1568
+ 1539,0
1569
+ 45,0
1570
+ 1338,0
1571
+ 1542,0
1572
+ 1490,0
1573
+ 732,0
1574
+ 72,0
1575
+ 1534,0
1576
+ 528,0
1577
+ 488,0
1578
+ 1276,0
1579
+ 974,0
1580
+ 878,0
1581
+ 1536,0
1582
+ 825,0
1583
+ 52,0
1584
+ 682,0
1585
+ 1312,1
1586
+ 1518,0
1587
+ 93,0
1588
+ 1522,0
1589
+ 1511,0
1590
+ 1532,0
1591
+ 22,0
1592
+ 1308,0
1593
+ 588,0
1594
+ 604,0
1595
+ 1680,1
1596
+ 1451,0
1597
+ 1521,0
1598
+ 1077,0
1599
+ 1443,0
1600
+ 1177,0
1601
+ 7,0
1602
+ 1525,0
1603
+ 731,0
1604
+ 1768,0
1605
+ 1610,0
1606
+ 1304,0
1607
+ 753,0
1608
+ 831,0
1609
+ 503,0
1610
+ 481,0
1611
+ 552,0
1612
+ 186,0
1613
+ 1727,0
1614
+ 1743,1
1615
+ 1080,0
1616
+ 1185,0
1617
+ 114,1
1618
+ 1243,0
1619
+ 1444,0
1620
+ 1314,1
1621
+ 787,0
1622
+ 742,1
1623
+ 1181,0
1624
+ 1371,1
1625
+ 1218,0
1626
+ 26,0
1627
+ 270,0
1628
+ 446,0
1629
+ 101,0
1630
+ 137,0
1631
+ 40,0
1632
+ 1574,0
1633
+ 75,0
1634
+ 376,0
1635
+ 843,0
1636
+ 1096,0
1637
+ 1541,0
1638
+ 851,0
1639
+ 129,0
1640
+ 1169,0
1641
+ 1496,0
1642
+ 1716,0
1643
+ 1601,0
1644
+ 807,0
1645
+ 202,0
1646
+ 945,0
1647
+ 451,0
1648
+ 173,0
1649
+ 667,0
1650
+ 1760,0
1651
+ 217,1
1652
+ 1275,0
1653
+ 909,0
1654
+ 1572,0
1655
+ 1355,0
1656
+ 120,0
1657
+ 19,0
1658
+ 20,0
1659
+ 1477,0
1660
+ 984,0
1661
+ 539,0
1662
+ 1483,0
1663
+ 188,0
1664
+ 1091,1
1665
+ 295,0
1666
+ 193,0
1667
+ 672,0
1668
+ 738,0
1669
+ 1170,0
1670
+ 683,0
1671
+ 1724,0
1672
+ 1162,0
1673
+ 1092,0
1674
+ 384,0
1675
+ 466,0
1676
+ 118,0
1677
+ 1694,0
1678
+ 210,1
1679
+ 153,0
1680
+ 1055,0
1681
+ 591,0
1682
+ 1309,0
1683
+ 579,0
1684
+ 1436,0
1685
+ 99,0
1686
+ 178,0
1687
+ 497,0
1688
+ 730,0
1689
+ 54,0
1690
+ 560,0
1691
+ 180,0
1692
+ 1614,0
1693
+ 655,0
1694
+ 559,0
1695
+ 135,0
1696
+ 472,0
1697
+ 1771,0
1698
+ 661,0
1699
+ 508,0
1700
+ 838,0
1701
+ 1661,0
1702
+ 585,0
1703
+ 1455,0
1704
+ 115,0
1705
+ 1695,0
1706
+ 582,0
1707
+ 15,0
1708
+ 534,0
1709
+ 529,0
1710
+ 315,0
1711
+ 119,0
1712
+ 1717,0
1713
+ 389,0
1714
+ 132,0
1715
+ 1086,0
1716
+ 304,0
1717
+ 921,0
1718
+ 1469,0
1719
+ 1426,0
1720
+ 532,0
1721
+ 625,0
1722
+ 818,0
1723
+ 1659,0
1724
+ 768,0
1725
+ 509,0
1726
+ 1527,0
1727
+ 1406,0
1728
+ 1569,0
1729
+ 996,0
1730
+ 1638,0
1731
+ 614,0
1732
+ 1291,0
1733
+ 1255,0
1734
+ 734,0
1735
+ 1776,0
1736
+ 676,0
1737
+ 1130,0
1738
+ 1710,0
1739
+ 74,0
1740
+ 719,0
1741
+ 24,0
1742
+ 146,0
1743
+ 297,0
1744
+ 239,0
1745
+ 475,0
1746
+ 712,0
1747
+ 1405,0
1748
+ 808,0
1749
+ 1372,1
1750
+ 812,0
1751
+ 1089,0
1752
+ 1219,0
1753
+ 1656,0
1754
+ 148,0
1755
+ 125,0
1756
+ 400,0
1757
+ 592,0
1758
+ 636,0
1759
+ 1111,0
1760
+ 607,0
1761
+ 1400,0
1762
+ 361,0
1763
+ 199,0
1764
+ 924,0
1765
+ 1374,1
1766
+ 174,0
1767
+ 1689,1
1768
+ 516,0
1769
+ 1157,0
1770
+ 136,0
1771
+ 1212,0
1772
+ 1526,0
1773
+ 1622,0
1774
+ 1103,0
1775
+ 495,0
1776
+ 964,0
1777
+ 1200,0
1778
+ 284,0
1779
+ 1179,0
1780
+ 1600,0
1781
+ 290,0
1782
+ 1184,0
1783
+ 456,0
1784
+ 761,0
1785
+ 1254,0
moe.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.insert(0, '/data1/PycharmProjects/FGVC11/pytorch-image-models-main')
3
+ # sys.path.insert(0, '/data1/PycharmProjects/FGVC11/submision/pytorch-image-models-main')
4
+ # sys.path.insert(0, '/data1/PycharmProjects/FGVC11/submision/pytorch-image-models')
5
+ import timm
6
+ import torch
7
+ from torch import nn
8
+ import torch.nn.functional as F
9
+ from timm.layers import LayerNorm2d, LayerNorm,NormMlpClassifierHead, ClassifierHead
10
+ from timm.models.convnext import ConvNeXtStage
11
+ import numpy as np
12
+
13
+ LARGE_CP = '' # path to pretrain weights
14
+
15
+
16
+
17
+ class expert(nn.Module):
18
+ def __init__(self, model_arch, num_classes,pretrain=True) -> None:
19
+ super().__init__()
20
+
21
+ self.model = ConvNeXtStage(
22
+ in_chs = 768,
23
+ out_chs= 1536,
24
+ kernel_size=7,
25
+ stride=2,
26
+ dilation=(1, 1),
27
+ depth=3,
28
+ drop_path_rates=[0.0,0.0,0.0],
29
+ ls_init_value=1e-6,
30
+ conv_mlp=False,
31
+ conv_bias=True,
32
+ use_grn=False,
33
+ act_layer='gelu',
34
+ norm_layer = LayerNorm2d,
35
+ norm_layer_cl = LayerNorm,
36
+ )
37
+
38
+ self.cls_head = NormMlpClassifierHead(
39
+ in_features=1536,
40
+ num_classes= num_classes,
41
+ hidden_size=1536,
42
+ pool_type='avg',#max
43
+ drop_rate=0.0,
44
+ norm_layer=LayerNorm2d,
45
+ act_layer='gelu',
46
+ )
47
+
48
+
49
+ if model_arch == 'convnext_large_mlp':
50
+ checkpoints = LARGE_CP
51
+
52
+ assert False, 'pretrain weight not found'
53
+ print('use pretrain weight:', checkpoints)
54
+ state_dict = torch.load(checkpoints)
55
+ for key in list(state_dict.keys()):
56
+ if key.startswith('module.'):
57
+ new_key = key[7:]
58
+ state_dict[new_key] = state_dict[key]
59
+ del state_dict[key]
60
+ for key in list(state_dict.keys()):
61
+ if not 'stages.3.' in key:
62
+ del state_dict[key]
63
+ if key.startswith('stages.3.'):
64
+ new_key = key[9:]
65
+ state_dict[new_key] = state_dict[key]
66
+ del state_dict[key]
67
+ self.model.load_state_dict(state_dict,strict = True)
68
+ del state_dict
69
+ def forward(self, out_stage3):
70
+ out = self.model(out_stage3)
71
+ out = self.cls_head(out)
72
+ return out
73
+
74
+ class Moe(nn.Module):
75
+ def __init__(self, model_arch, num_classes,mask,pretrain=True) -> None:
76
+ super().__init__()
77
+ if pretrain:
78
+ self.backbone = timm.create_model(model_arch, num_classes=0, pretrained=False,out_stage3=True)
79
+ if model_arch == 'convnextv2_base.fcmae_ft_in22k_in1k_384':
80
+ checkpoints = BASE_CP
81
+ elif model_arch == 'convnext_large_mlp':
82
+ checkpoints = LARGE_CP
83
+ else:
84
+ assert False, 'pretrain weight not found'
85
+ print('use pretrain weight:', checkpoints)
86
+
87
+ state_dict = torch.load(checkpoints)
88
+ self.backbone.load_state_dict(state_dict, strict=False)
89
+ del state_dict
90
+
91
+
92
+ self.head = NormMlpClassifierHead(
93
+ in_features=1536,
94
+ num_classes= num_classes,
95
+ hidden_size=1536,
96
+ pool_type='avg',#max
97
+ drop_rate=0.0,
98
+ norm_layer=LayerNorm2d,
99
+ act_layer='gelu',
100
+ )
101
+ self.expert_venomous = expert(model_arch, num_classes)
102
+ self.expert_not_venomous = expert(model_arch, num_classes)
103
+ self.venomous_head = nn.Linear(768+1536,1,bias=False)
104
+ torch.nn.init.xavier_uniform_(self.venomous_head.weight)
105
+ self.venomous_mask = mask
106
+ self.not_venomous_mask = torch.ones_like(mask)-mask
107
+ def forward(self, x):
108
+ out4,out3 = self.backbone(x)
109
+ feat = torch.cat([F.adaptive_max_pool2d(out3,1).flatten(1),
110
+ F.adaptive_max_pool2d(out4,1).flatten(1)],dim=-1)
111
+ is_venomous = self.venomous_head(feat)
112
+ alpha= torch.sigmoid(is_venomous)
113
+ venomous = self.expert_venomous(out3)*self.venomous_mask.to(x.device)
114
+ not_venomous =self.expert_not_venomous(out3)*self.not_venomous_mask.to(x.device)
115
+ y_hat = self.head(out4)
116
+
117
+ # expert_pred = venomous * alpha + not_venomous*(1-alpha)
118
+ expert_pred = venomous + not_venomous
119
+ final_pred = y_hat+expert_pred
120
+ return y_hat,expert_pred,is_venomous,final_pred
121
+
122
+ class SeesawLossWithLogits(nn.Module):
123
+ """
124
+ This is unofficial implementation for Seesaw loss,
125
+ which is proposed in the techinical report for LVIS workshop at ECCV 2020.
126
+ For more detail, please refer https://arxiv.org/pdf/2008.10032.pdf.
127
+ Args:
128
+ class_counts: The list which has number of samples for each class.
129
+ Should have same length as num_classes.
130
+ p: Scale parameter which adjust the strength of panishment.
131
+ Set to 0.8 as a default by following the original paper.
132
+ """
133
+
134
+ def __init__(self, class_counts: np.array,num_classes, p: float = 0.8):
135
+ super().__init__()
136
+
137
+ class_counts = torch.FloatTensor(class_counts)
138
+ conditions = class_counts[:, None] > class_counts[None, :]
139
+ trues = (class_counts[None, :] / class_counts[:, None]) ** p
140
+ falses = torch.ones(len(class_counts), len(class_counts))
141
+ self.s = torch.where(conditions, trues, falses)
142
+ self.num_classes = num_classes
143
+ self.eps = 1.0e-6
144
+
145
+ def forward(self, logits, targets):
146
+ targets = nn.functional.one_hot(targets,num_classes=self.num_classes).float().to(targets.device)
147
+ self.s = self.s.to(targets.device)
148
+ max_element, _ = logits.max(axis=-1)
149
+ logits = logits - max_element[:, None] # to prevent overflow
150
+
151
+ numerator = torch.exp(logits)
152
+ denominator = (
153
+ (1 - targets)[:, None, :]
154
+ * self.s[None, :, :]
155
+ * torch.exp(logits)[:, None, :]).sum(axis=-1) \
156
+ + torch.exp(logits)
157
+
158
+ sigma = numerator / (denominator + self.eps)
159
+ loss = (- targets * torch.log(sigma + self.eps)).sum(-1)
160
+ return loss.mean()
161
+
162
+
163
+ class all_loss(nn.Module):
164
+ def __init__(self, class_counts: np.array, num_classes):
165
+ super().__init__()
166
+ self.main_loss = SeesawLossWithLogits(class_counts,num_classes)
167
+ self.venomous_loss = SeesawLossWithLogits(class_counts,num_classes)
168
+ self.final_pred_loss = SeesawLossWithLogits(class_counts,num_classes)
169
+ # self.venomous_loss = nn.CrossEntropyLoss()
170
+ # self.alpha_loss = nn.BCEWithLogitsLoss()
171
+ # self.final_pred_loss = nn.CrossEntropyLoss()
172
+ def forward(self,y_hat,expert_pred,alpha,final_pred,targets,is_venomous):
173
+ loss1 = self.main_loss(y_hat,targets)
174
+ loss2 = self.venomous_loss(expert_pred,targets)
175
+ # loss3 = self.alpha_loss(alpha,is_venomous.unsqueeze(1))
176
+ loss4 = self.final_pred_loss(final_pred,targets)
177
+
178
+ return (loss1+loss2+loss4)/3
179
+
pytorch-image-models/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.ipynb linguist-documentation
pytorch-image-models/.github/FUNDING.yml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # These are supported funding model platforms
2
+ github: rwightman
pytorch-image-models/.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a bug report to help us improve. Issues are for reporting bugs or requesting
4
+ features, the discussion forum is available for asking questions or seeking help
5
+ from the community.
6
+ title: "[BUG] Issue title..."
7
+ labels: bug
8
+ assignees: rwightman
9
+
10
+ ---
11
+
12
+ **Describe the bug**
13
+ A clear and concise description of what the bug is.
14
+
15
+ **To Reproduce**
16
+ Steps to reproduce the behavior:
17
+ 1.
18
+ 2.
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. Windows 10, Ubuntu 18.04]
28
+ - This repository version [e.g. pip 0.3.1 or commit ref]
29
+ - PyTorch version w/ CUDA/cuDNN [e.g. from `conda list`, 1.7.0 py3.8_cuda11.0.221_cudnn8.0.3_0]
30
+
31
+ **Additional context**
32
+ Add any other context about the problem here.
pytorch-image-models/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Community Discussions
4
+ url: https://github.com/rwightman/pytorch-image-models/discussions
5
+ about: Hparam request in issues will be ignored! Issues are for features and bugs. Questions can be asked in Discussions.
pytorch-image-models/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project. Hparam requests, training help are not feature requests.
4
+ The discussion forum is available for asking questions or seeking help from the community.
5
+ title: "[FEATURE] Feature title..."
6
+ labels: enhancement
7
+ assignees: ''
8
+
9
+ ---
10
+
11
+ **Is your feature request related to a problem? Please describe.**
12
+ A clear and concise description of what the problem is.
13
+
14
+ **Describe the solution you'd like**
15
+ A clear and concise description of what you want to happen.
16
+
17
+ **Describe alternatives you've considered**
18
+ A clear and concise description of any alternative solutions or features you've considered.
19
+
20
+ **Additional context**
21
+ Add any other context or screenshots about the feature request here.
pytorch-image-models/.github/workflows/build_documentation.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ - doc-builder*
8
+ - v*-release
9
+
10
+ jobs:
11
+ build:
12
+ uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13
+ with:
14
+ commit_sha: ${{ github.sha }}
15
+ package: pytorch-image-models
16
+ package_name: timm
17
+ path_to_docs: pytorch-image-models/hfdocs/source
18
+ version_tag_suffix: ""
19
+ secrets:
20
+ hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
pytorch-image-models/.github/workflows/build_pr_documentation.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build PR Documentation
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ concurrency:
7
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
8
+ cancel-in-progress: true
9
+
10
+ jobs:
11
+ build:
12
+ uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13
+ with:
14
+ commit_sha: ${{ github.event.pull_request.head.sha }}
15
+ pr_number: ${{ github.event.number }}
16
+ package: pytorch-image-models
17
+ package_name: timm
18
+ path_to_docs: pytorch-image-models/hfdocs/source
19
+ version_tag_suffix: ""
pytorch-image-models/.github/workflows/tests.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ env:
10
+ OMP_NUM_THREADS: 2
11
+ MKL_NUM_THREADS: 2
12
+
13
+ jobs:
14
+ test:
15
+ name: Run tests on ${{ matrix.os }} with Python ${{ matrix.python }}
16
+ strategy:
17
+ matrix:
18
+ os: [ubuntu-latest]
19
+ python: ['3.10', '3.11']
20
+ torch: [{base: '1.13.0', vision: '0.14.0'}, {base: '2.1.0', vision: '0.16.0'}]
21
+ testmarker: ['-k "not test_models"', '-m base', '-m cfg', '-m torchscript', '-m features', '-m fxforward', '-m fxbackward']
22
+ exclude:
23
+ - python: '3.11'
24
+ torch: {base: '1.13.0', vision: '0.14.0'}
25
+ runs-on: ${{ matrix.os }}
26
+
27
+ steps:
28
+ - uses: actions/checkout@v2
29
+ - name: Set up Python ${{ matrix.python }}
30
+ uses: actions/setup-python@v1
31
+ with:
32
+ python-version: ${{ matrix.python }}
33
+ - name: Install testing dependencies
34
+ run: |
35
+ python -m pip install --upgrade pip
36
+ pip install -r requirements-dev.txt
37
+ - name: Install torch on mac
38
+ if: startsWith(matrix.os, 'macOS')
39
+ run: pip install --no-cache-dir torch==${{ matrix.torch.base }} torchvision==${{ matrix.torch.vision }}
40
+ - name: Install torch on Windows
41
+ if: startsWith(matrix.os, 'windows')
42
+ run: pip install --no-cache-dir torch==${{ matrix.torch.base }} torchvision==${{ matrix.torch.vision }}
43
+ - name: Install torch on ubuntu
44
+ if: startsWith(matrix.os, 'ubuntu')
45
+ run: |
46
+ sudo sed -i 's/azure\.//' /etc/apt/sources.list
47
+ sudo apt update
48
+ sudo apt install -y google-perftools
49
+ pip install --no-cache-dir torch==${{ matrix.torch.base }}+cpu torchvision==${{ matrix.torch.vision }}+cpu -f https://download.pytorch.org/whl/torch_stable.html
50
+ - name: Install requirements
51
+ run: |
52
+ pip install -r requirements.txt
53
+ - name: Run tests on Windows
54
+ if: startsWith(matrix.os, 'windows')
55
+ env:
56
+ PYTHONDONTWRITEBYTECODE: 1
57
+ run: |
58
+ pytest -vv tests
59
+ - name: Run '${{ matrix.testmarker }}' tests on Linux / Mac
60
+ if: ${{ !startsWith(matrix.os, 'windows') }}
61
+ env:
62
+ LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
63
+ PYTHONDONTWRITEBYTECODE: 1
64
+ run: |
65
+ pytest -vv --forked --durations=0 ${{ matrix.testmarker }} tests
pytorch-image-models/.github/workflows/upload_pr_documentation.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Upload PR Documentation
2
+
3
+ on:
4
+ workflow_run:
5
+ workflows: ["Build PR Documentation"]
6
+ types:
7
+ - completed
8
+
9
+ jobs:
10
+ build:
11
+ uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12
+ with:
13
+ package_name: timm
14
+ secrets:
15
+ hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16
+ comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
pytorch-image-models/.gitignore ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Django stuff:
55
+ *.log
56
+ local_settings.py
57
+ db.sqlite3
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # pyenv
76
+ .python-version
77
+
78
+ # celery beat schedule file
79
+ celerybeat-schedule
80
+
81
+ # SageMath parsed files
82
+ *.sage.py
83
+
84
+ # Environments
85
+ .env
86
+ .venv
87
+ env/
88
+ venv/
89
+ ENV/
90
+ env.bak/
91
+ venv.bak/
92
+
93
+ # Spyder project settings
94
+ .spyderproject
95
+ .spyproject
96
+
97
+ # Rope project settings
98
+ .ropeproject
99
+
100
+ # PyCharm
101
+ .idea
102
+
103
+ output/
104
+
105
+ # PyTorch weights
106
+ *.tar
107
+ *.pth
108
+ *.pt
109
+ *.torch
110
+ *.gz
111
+ Untitled.ipynb
112
+ Testing notebook.ipynb
113
+
114
+ # Root dir exclusions
115
+ /*.csv
116
+ /*.yaml
117
+ /*.json
118
+ /*.jpg
119
+ /*.png
120
+ /*.zip
121
+ /*.tar.*
pytorch-image-models/CONTRIBUTING.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *This guideline is very much a work-in-progress.*
2
+
3
+ Contributions to `timm` for code, documentation, tests are more than welcome!
4
+
5
+ There haven't been any formal guidelines to date so please bear with me, and feel free to add to this guide.
6
+
7
+ # Coding style
8
+
9
+ Code linting and auto-format (black) are not currently in place but open to consideration. In the meantime, the style to follow is (mostly) aligned with Google's guide: https://google.github.io/styleguide/pyguide.html.
10
+
11
+ A few specific differences from Google style (or black)
12
+ 1. Line length is 120 char. Going over is okay in some cases (e.g. I prefer not to break URL across lines).
13
+ 2. Hanging indents are always prefered, please avoid aligning arguments with closing brackets or braces.
14
+
15
+ Example, from Google guide, but this is a NO here:
16
+ ```
17
+ # Aligned with opening delimiter.
18
+ foo = long_function_name(var_one, var_two,
19
+ var_three, var_four)
20
+ meal = (spam,
21
+ beans)
22
+
23
+ # Aligned with opening delimiter in a dictionary.
24
+ foo = {
25
+ 'long_dictionary_key': value1 +
26
+ value2,
27
+ ...
28
+ }
29
+ ```
30
+ This is YES:
31
+
32
+ ```
33
+ # 4-space hanging indent; nothing on first line,
34
+ # closing parenthesis on a new line.
35
+ foo = long_function_name(
36
+ var_one, var_two, var_three,
37
+ var_four
38
+ )
39
+ meal = (
40
+ spam,
41
+ beans,
42
+ )
43
+
44
+ # 4-space hanging indent in a dictionary.
45
+ foo = {
46
+ 'long_dictionary_key':
47
+ long_dictionary_value,
48
+ ...
49
+ }
50
+ ```
51
+
52
+ When there is discrepancy in a given source file (there are many origins for various bits of code and not all have been updated to what I consider current goal), please follow the style in a given file.
53
+
54
+ In general, if you add new code, formatting it with black using the following options should result in a style that is compatible with the rest of the code base:
55
+
56
+ ```
57
+ black --skip-string-normalization --line-length 120 <path-to-file>
58
+ ```
59
+
60
+ Avoid formatting code that is unrelated to your PR though.
61
+
62
+ PR with pure formatting / style fixes will be accepted but only in isolation from functional changes, best to ask before starting such a change.
63
+
64
+ # Documentation
65
+
66
+ As with code style, docstrings style based on the Google guide: guide: https://google.github.io/styleguide/pyguide.html
67
+
68
+ The goal for the code is to eventually move to have all major functions and `__init__` methods use PEP484 type annotations.
69
+
70
+ When type annotations are used for a function, as per the Google pyguide, they should **NOT** be duplicated in the docstrings, please leave annotations as the one source of truth re typing.
71
+
72
+ There are a LOT of gaps in current documentation relative to the functionality in timm, please, document away!
73
+
74
+ # Installation
75
+
76
+ Create a Python virtual environment using Python 3.10. Inside the environment, install torch` and `torchvision` using the instructions matching your system as listed on the [PyTorch website](https://pytorch.org/).
77
+
78
+ Then install the remaining dependencies:
79
+
80
+ ```
81
+ python -m pip install -r requirements.txt
82
+ python -m pip install -r requirements-dev.txt # for testing
83
+ python -m pip install -e .
84
+ ```
85
+
86
+ ## Unit tests
87
+
88
+ Run the tests using:
89
+
90
+ ```
91
+ pytest tests/
92
+ ```
93
+
94
+ Since the whole test suite takes a lot of time to run locally (a few hours), you may want to select a subset of tests relating to the changes you made by using the `-k` option of [`pytest`](https://docs.pytest.org/en/7.1.x/example/markers.html#using-k-expr-to-select-tests-based-on-their-name). Moreover, running tests in parallel (in this example 4 processes) with the `-n` option may help:
95
+
96
+ ```
97
+ pytest -k "substring-to-match" -n 4 tests/
98
+ ```
99
+
100
+ ## Building documentation
101
+
102
+ Please refer to [this document](https://github.com/huggingface/pytorch-image-models/tree/main/hfdocs).
103
+
104
+ # Questions
105
+
106
+ If you have any questions about contribution, where / how to contribute, please ask in the [Discussions](https://github.com/huggingface/pytorch-image-models/discussions/categories/contributing) (there is a `Contributing` topic).
pytorch-image-models/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "{}"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2019 Ross Wightman
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
pytorch-image-models/MANIFEST.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include timm/models/_pruned/*.txt
2
+ include timm/data/_info/*.txt
3
+ include timm/data/_info/*.json
pytorch-image-models/README.md ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PyTorch Image Models
2
+ - [What's New](#whats-new)
3
+ - [Introduction](#introduction)
4
+ - [Models](#models)
5
+ - [Features](#features)
6
+ - [Results](#results)
7
+ - [Getting Started (Documentation)](#getting-started-documentation)
8
+ - [Train, Validation, Inference Scripts](#train-validation-inference-scripts)
9
+ - [Awesome PyTorch Resources](#awesome-pytorch-resources)
10
+ - [Licenses](#licenses)
11
+ - [Citing](#citing)
12
+
13
+ ## What's New
14
+
15
+ ❗Updates after Oct 10, 2022 are available in version >= 0.9❗
16
+ * Many changes since the last 0.6.x stable releases. They were previewed in 0.8.x dev releases but not everyone transitioned.
17
+ * `timm.models.layers` moved to `timm.layers`:
18
+ * `from timm.models.layers import name` will still work via deprecation mapping (but please transition to `timm.layers`).
19
+ * `import timm.models.layers.module` or `from timm.models.layers.module import name` needs to be changed now.
20
+ * Builder, helper, non-model modules in `timm.models` have a `_` prefix added, ie `timm.models.helpers` -> `timm.models._helpers`, there are temporary deprecation mapping files but those will be removed.
21
+ * All models now support `architecture.pretrained_tag` naming (ex `resnet50.rsb_a1`).
22
+ * The pretrained_tag is the specific weight variant (different head) for the architecture.
23
+ * Using only `architecture` defaults to the first weights in the default_cfgs for that model architecture.
24
+ * In adding pretrained tags, many model names that existed to differentiate were renamed to use the tag (ex: `vit_base_patch16_224_in21k` -> `vit_base_patch16_224.augreg_in21k`). There are deprecation mappings for these.
25
+ * A number of models had their checkpoints remaped to match architecture changes needed to better support `features_only=True`, there are `checkpoint_filter_fn` methods in any model module that was remapped. These can be passed to `timm.models.load_checkpoint(..., filter_fn=timm.models.swin_transformer_v2.checkpoint_filter_fn)` to remap your existing checkpoint.
26
+ * The Hugging Face Hub (https://huggingface.co/timm) is now the primary source for `timm` weights. Model cards include link to papers, original source, license.
27
+ * Previous 0.6.x can be cloned from [0.6.x](https://github.com/rwightman/pytorch-image-models/tree/0.6.x) branch or installed via pip with version.
28
+
29
+ ### April 11, 2024
30
+ * Prepping for a long overdue 1.0 release, things have been stable for a while now.
31
+ * Significant feature that's been missing for a while, `features_only=True` support for ViT models with flat hidden states or non-std module layouts (so far covering `'vit_*', 'twins_*', 'deit*', 'beit*', 'mvitv2*', 'eva*', 'samvit_*', 'flexivit*'`)
32
+ * Above feature support achieved through a new `forward_intermediates()` API that can be used with a feature wrapping module or direclty.
33
+ ```python
34
+ model = timm.create_model('vit_base_patch16_224')
35
+ final_feat, intermediates = model.forward_intermediates(input)
36
+ output = model.forward_head(final_feat) # pooling + classifier head
37
+
38
+ print(final_feat.shape)
39
+ torch.Size([2, 197, 768])
40
+
41
+ for f in intermediates:
42
+ print(f.shape)
43
+ torch.Size([2, 768, 14, 14])
44
+ torch.Size([2, 768, 14, 14])
45
+ torch.Size([2, 768, 14, 14])
46
+ torch.Size([2, 768, 14, 14])
47
+ torch.Size([2, 768, 14, 14])
48
+ torch.Size([2, 768, 14, 14])
49
+ torch.Size([2, 768, 14, 14])
50
+ torch.Size([2, 768, 14, 14])
51
+ torch.Size([2, 768, 14, 14])
52
+ torch.Size([2, 768, 14, 14])
53
+ torch.Size([2, 768, 14, 14])
54
+ torch.Size([2, 768, 14, 14])
55
+
56
+ print(output.shape)
57
+ torch.Size([2, 1000])
58
+ ```
59
+
60
+ ```python
61
+ model = timm.create_model('eva02_base_patch16_clip_224', pretrained=True, img_size=512, features_only=True, out_indices=(-3, -2,))
62
+ output = model(torch.randn(2, 3, 512, 512))
63
+
64
+ for o in output:
65
+ print(o.shape)
66
+ torch.Size([2, 768, 32, 32])
67
+ torch.Size([2, 768, 32, 32])
68
+ ```
69
+ * TinyCLIP vision tower weights added, thx [Thien Tran](https://github.com/gau-nernst)
70
+
71
+ ### Feb 19, 2024
72
+ * Next-ViT models added. Adapted from https://github.com/bytedance/Next-ViT
73
+ * HGNet and PP-HGNetV2 models added. Adapted from https://github.com/PaddlePaddle/PaddleClas by [SeeFun](https://github.com/seefun)
74
+ * Removed setup.py, moved to pyproject.toml based build supported by PDM
75
+ * Add updated model EMA impl using _for_each for less overhead
76
+ * Support device args in train script for non GPU devices
77
+ * Other misc fixes and small additions
78
+ * Min supported Python version increased to 3.8
79
+ * Release 0.9.16
80
+
81
+ ### Jan 8, 2024
82
+ Datasets & transform refactoring
83
+ * HuggingFace streaming (iterable) dataset support (`--dataset hfids:org/dataset`)
84
+ * Webdataset wrapper tweaks for improved split info fetching, can auto fetch splits from supported HF hub webdataset
85
+ * Tested HF `datasets` and webdataset wrapper streaming from HF hub with recent `timm` ImageNet uploads to https://huggingface.co/timm
86
+ * Make input & target column/field keys consistent across datasets and pass via args
87
+ * Full monochrome support when using e:g: `--input-size 1 224 224` or `--in-chans 1`, sets PIL image conversion appropriately in dataset
88
+ * Improved several alternate crop & resize transforms (ResizeKeepRatio, RandomCropOrPad, etc) for use in PixParse document AI project
89
+ * Add SimCLR style color jitter prob along with grayscale and gaussian blur options to augmentations and args
90
+ * Allow train without validation set (`--val-split ''`) in train script
91
+ * Add `--bce-sum` (sum over class dim) and `--bce-pos-weight` (positive weighting) args for training as they're common BCE loss tweaks I was often hard coding
92
+
93
+ ### Nov 23, 2023
94
+ * Added EfficientViT-Large models, thanks [SeeFun](https://github.com/seefun)
95
+ * Fix Python 3.7 compat, will be dropping support for it soon
96
+ * Other misc fixes
97
+ * Release 0.9.12
98
+
99
+ ### Nov 20, 2023
100
+ * Added significant flexibility for Hugging Face Hub based timm models via `model_args` config entry. `model_args` will be passed as kwargs through to models on creation.
101
+ * See example at https://huggingface.co/gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k/blob/main/config.json
102
+ * Usage: https://github.com/huggingface/pytorch-image-models/discussions/2035
103
+ * Updated imagenet eval and test set csv files with latest models
104
+ * `vision_transformer.py` typing and doc cleanup by [Laureηt](https://github.com/Laurent2916)
105
+ * 0.9.11 release
106
+
107
+ ### Nov 3, 2023
108
+ * [DFN (Data Filtering Networks)](https://huggingface.co/papers/2309.17425) and [MetaCLIP](https://huggingface.co/papers/2309.16671) ViT weights added
109
+ * DINOv2 'register' ViT model weights added (https://huggingface.co/papers/2309.16588, https://huggingface.co/papers/2304.07193)
110
+ * Add `quickgelu` ViT variants for OpenAI, DFN, MetaCLIP weights that use it (less efficient)
111
+ * Improved typing added to ResNet, MobileNet-v3 thanks to [Aryan](https://github.com/a-r-r-o-w)
112
+ * ImageNet-12k fine-tuned (from LAION-2B CLIP) `convnext_xxlarge`
113
+ * 0.9.9 release
114
+
115
+ ### Oct 20, 2023
116
+ * [SigLIP](https://huggingface.co/papers/2303.15343) image tower weights supported in `vision_transformer.py`.
117
+ * Great potential for fine-tune and downstream feature use.
118
+ * Experimental 'register' support in vit models as per [Vision Transformers Need Registers](https://huggingface.co/papers/2309.16588)
119
+ * Updated RepViT with new weight release. Thanks [wangao](https://github.com/jameslahm)
120
+ * Add patch resizing support (on pretrained weight load) to Swin models
121
+ * 0.9.8 release pending
122
+
123
+ ### Sep 1, 2023
124
+ * TinyViT added by [SeeFun](https://github.com/seefun)
125
+ * Fix EfficientViT (MIT) to use torch.autocast so it works back to PT 1.10
126
+ * 0.9.7 release
127
+
128
+ ### Aug 28, 2023
129
+ * Add dynamic img size support to models in `vision_transformer.py`, `vision_transformer_hybrid.py`, `deit.py`, and `eva.py` w/o breaking backward compat.
130
+ * Add `dynamic_img_size=True` to args at model creation time to allow changing the grid size (interpolate abs and/or ROPE pos embed each forward pass).
131
+ * Add `dynamic_img_pad=True` to allow image sizes that aren't divisible by patch size (pad bottom right to patch size each forward pass).
132
+ * Enabling either dynamic mode will break FX tracing unless PatchEmbed module added as leaf.
133
+ * Existing method of resizing position embedding by passing different `img_size` (interpolate pretrained embed weights once) on creation still works.
134
+ * Existing method of changing `patch_size` (resize pretrained patch_embed weights once) on creation still works.
135
+ * Example validation cmd `python validate.py /imagenet --model vit_base_patch16_224 --amp --amp-dtype bfloat16 --img-size 255 --crop-pct 1.0 --model-kwargs dynamic_img_size=True dyamic_img_pad=True`
136
+
137
+ ### Aug 25, 2023
138
+ * Many new models since last release
139
+ * FastViT - https://arxiv.org/abs/2303.14189
140
+ * MobileOne - https://arxiv.org/abs/2206.04040
141
+ * InceptionNeXt - https://arxiv.org/abs/2303.16900
142
+ * RepGhostNet - https://arxiv.org/abs/2211.06088 (thanks https://github.com/ChengpengChen)
143
+ * GhostNetV2 - https://arxiv.org/abs/2211.12905 (thanks https://github.com/yehuitang)
144
+ * EfficientViT (MSRA) - https://arxiv.org/abs/2305.07027 (thanks https://github.com/seefun)
145
+ * EfficientViT (MIT) - https://arxiv.org/abs/2205.14756 (thanks https://github.com/seefun)
146
+ * Add `--reparam` arg to `benchmark.py`, `onnx_export.py`, and `validate.py` to trigger layer reparameterization / fusion for models with any one of `reparameterize()`, `switch_to_deploy()` or `fuse()`
147
+ * Including FastViT, MobileOne, RepGhostNet, EfficientViT (MSRA), RepViT, RepVGG, and LeViT
148
+ * Preparing 0.9.6 'back to school' release
149
+
150
+ ### Aug 11, 2023
151
+ * Swin, MaxViT, CoAtNet, and BEiT models support resizing of image/window size on creation with adaptation of pretrained weights
152
+ * Example validation cmd to test w/ non-square resize `python validate.py /imagenet --model swin_base_patch4_window7_224.ms_in22k_ft_in1k --amp --amp-dtype bfloat16 --input-size 3 256 320 --model-kwargs window_size=8,10 img_size=256,320`
153
+
154
+ ### Aug 3, 2023
155
+ * Add GluonCV weights for HRNet w18_small and w18_small_v2. Converted by [SeeFun](https://github.com/seefun)
156
+ * Fix `selecsls*` model naming regression
157
+ * Patch and position embedding for ViT/EVA works for bfloat16/float16 weights on load (or activations for on-the-fly resize)
158
+ * v0.9.5 release prep
159
+
160
+ ### July 27, 2023
161
+ * Added timm trained `seresnextaa201d_32x8d.sw_in12k_ft_in1k_384` weights (and `.sw_in12k` pretrain) with 87.3% top-1 on ImageNet-1k, best ImageNet ResNet family model I'm aware of.
162
+ * RepViT model and weights (https://arxiv.org/abs/2307.09283) added by [wangao](https://github.com/jameslahm)
163
+ * I-JEPA ViT feature weights (no classifier) added by [SeeFun](https://github.com/seefun)
164
+ * SAM-ViT (segment anything) feature weights (no classifier) added by [SeeFun](https://github.com/seefun)
165
+ * Add support for alternative feat extraction methods and -ve indices to EfficientNet
166
+ * Add NAdamW optimizer
167
+ * Misc fixes
168
+
169
+ ### May 11, 2023
170
+ * `timm` 0.9 released, transition from 0.8.xdev releases
171
+
172
+ ### May 10, 2023
173
+ * Hugging Face Hub downloading is now default, 1132 models on https://huggingface.co/timm, 1163 weights in `timm`
174
+ * DINOv2 vit feature backbone weights added thanks to [Leng Yue](https://github.com/leng-yue)
175
+ * FB MAE vit feature backbone weights added
176
+ * OpenCLIP DataComp-XL L/14 feat backbone weights added
177
+ * MetaFormer (poolformer-v2, caformer, convformer, updated poolformer (v1)) w/ weights added by [Fredo Guan](https://github.com/fffffgggg54)
178
+ * Experimental `get_intermediate_layers` function on vit/deit models for grabbing hidden states (inspired by DINO impl). This is WIP and may change significantly... feedback welcome.
179
+ * Model creation throws error if `pretrained=True` and no weights exist (instead of continuing with random initialization)
180
+ * Fix regression with inception / nasnet TF sourced weights with 1001 classes in original classifiers
181
+ * bitsandbytes (https://github.com/TimDettmers/bitsandbytes) optimizers added to factory, use `bnb` prefix, ie `bnbadam8bit`
182
+ * Misc cleanup and fixes
183
+ * Final testing before switching to a 0.9 and bringing `timm` out of pre-release state
184
+
185
+ ### April 27, 2023
186
+ * 97% of `timm` models uploaded to HF Hub and almost all updated to support multi-weight pretrained configs
187
+ * Minor cleanup and refactoring of another batch of models as multi-weight added. More fused_attn (F.sdpa) and features_only support, and torchscript fixes.
188
+
189
+ ### April 21, 2023
190
+ * Gradient accumulation support added to train script and tested (`--grad-accum-steps`), thanks [Taeksang Kim](https://github.com/voidbag)
191
+ * More weights on HF Hub (cspnet, cait, volo, xcit, tresnet, hardcorenas, densenet, dpn, vovnet, xception_aligned)
192
+ * Added `--head-init-scale` and `--head-init-bias` to train.py to scale classiifer head and set fixed bias for fine-tune
193
+ * Remove all InplaceABN (`inplace_abn`) use, replaced use in tresnet with standard BatchNorm (modified weights accordingly).
194
+
195
+ ### April 12, 2023
196
+ * Add ONNX export script, validate script, helpers that I've had kicking around for along time. Tweak 'same' padding for better export w/ recent ONNX + pytorch.
197
+ * Refactor dropout args for vit and vit-like models, separate drop_rate into `drop_rate` (classifier dropout), `proj_drop_rate` (block mlp / out projections), `pos_drop_rate` (position embedding drop), `attn_drop_rate` (attention dropout). Also add patch dropout (FLIP) to vit and eva models.
198
+ * fused F.scaled_dot_product_attention support to more vit models, add env var (TIMM_FUSED_ATTN) to control, and config interface to enable/disable
199
+ * Add EVA-CLIP backbones w/ image tower weights, all the way up to 4B param 'enormous' model, and 336x336 OpenAI ViT mode that was missed.
200
+
201
+ ### April 5, 2023
202
+ * ALL ResNet models pushed to Hugging Face Hub with multi-weight support
203
+ * All past `timm` trained weights added with recipe based tags to differentiate
204
+ * All ResNet strikes back A1/A2/A3 (seed 0) and R50 example B/C1/C2/D weights available
205
+ * Add torchvision v2 recipe weights to existing torchvision originals
206
+ * See comparison table in https://huggingface.co/timm/seresnextaa101d_32x8d.sw_in12k_ft_in1k_288#model-comparison
207
+ * New ImageNet-12k + ImageNet-1k fine-tunes available for a few anti-aliased ResNet models
208
+ * `resnetaa50d.sw_in12k_ft_in1k` - 81.7 @ 224, 82.6 @ 288
209
+ * `resnetaa101d.sw_in12k_ft_in1k` - 83.5 @ 224, 84.1 @ 288
210
+ * `seresnextaa101d_32x8d.sw_in12k_ft_in1k` - 86.0 @ 224, 86.5 @ 288
211
+ * `seresnextaa101d_32x8d.sw_in12k_ft_in1k_288` - 86.5 @ 288, 86.7 @ 320
212
+
213
+ ### March 31, 2023
214
+ * Add first ConvNext-XXLarge CLIP -> IN-1k fine-tune and IN-12k intermediate fine-tunes for convnext-base/large CLIP models.
215
+
216
+ | model |top1 |top5 |img_size|param_count|gmacs |macts |
217
+ |----------------------------------------------------------------------------------------------------------------------|------|------|--------|-----------|------|------|
218
+ | [convnext_xxlarge.clip_laion2b_soup_ft_in1k](https://huggingface.co/timm/convnext_xxlarge.clip_laion2b_soup_ft_in1k) |88.612|98.704|256 |846.47 |198.09|124.45|
219
+ | convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384 |88.312|98.578|384 |200.13 |101.11|126.74|
220
+ | convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_320 |87.968|98.47 |320 |200.13 |70.21 |88.02 |
221
+ | convnext_base.clip_laion2b_augreg_ft_in12k_in1k_384 |87.138|98.212|384 |88.59 |45.21 |84.49 |
222
+ | convnext_base.clip_laion2b_augreg_ft_in12k_in1k |86.344|97.97 |256 |88.59 |20.09 |37.55 |
223
+
224
+ * Add EVA-02 MIM pretrained and fine-tuned weights, push to HF hub and update model cards for all EVA models. First model over 90% top-1 (99% top-5)! Check out the original code & weights at https://github.com/baaivision/EVA for more details on their work blending MIM, CLIP w/ many model, dataset, and train recipe tweaks.
225
+
226
+ | model |top1 |top5 |param_count|img_size|
227
+ |----------------------------------------------------|------|------|-----------|--------|
228
+ | [eva02_large_patch14_448.mim_m38m_ft_in22k_in1k](https://huggingface.co/timm/eva02_large_patch14_448.mim_m38m_ft_in1k) |90.054|99.042|305.08 |448 |
229
+ | eva02_large_patch14_448.mim_in22k_ft_in22k_in1k |89.946|99.01 |305.08 |448 |
230
+ | eva_giant_patch14_560.m30m_ft_in22k_in1k |89.792|98.992|1014.45 |560 |
231
+ | eva02_large_patch14_448.mim_in22k_ft_in1k |89.626|98.954|305.08 |448 |
232
+ | eva02_large_patch14_448.mim_m38m_ft_in1k |89.57 |98.918|305.08 |448 |
233
+ | eva_giant_patch14_336.m30m_ft_in22k_in1k |89.56 |98.956|1013.01 |336 |
234
+ | eva_giant_patch14_336.clip_ft_in1k |89.466|98.82 |1013.01 |336 |
235
+ | eva_large_patch14_336.in22k_ft_in22k_in1k |89.214|98.854|304.53 |336 |
236
+ | eva_giant_patch14_224.clip_ft_in1k |88.882|98.678|1012.56 |224 |
237
+ | eva02_base_patch14_448.mim_in22k_ft_in22k_in1k |88.692|98.722|87.12 |448 |
238
+ | eva_large_patch14_336.in22k_ft_in1k |88.652|98.722|304.53 |336 |
239
+ | eva_large_patch14_196.in22k_ft_in22k_in1k |88.592|98.656|304.14 |196 |
240
+ | eva02_base_patch14_448.mim_in22k_ft_in1k |88.23 |98.564|87.12 |448 |
241
+ | eva_large_patch14_196.in22k_ft_in1k |87.934|98.504|304.14 |196 |
242
+ | eva02_small_patch14_336.mim_in22k_ft_in1k |85.74 |97.614|22.13 |336 |
243
+ | eva02_tiny_patch14_336.mim_in22k_ft_in1k |80.658|95.524|5.76 |336 |
244
+
245
+ * Multi-weight and HF hub for DeiT and MLP-Mixer based models
246
+
247
+ ### March 22, 2023
248
+ * More weights pushed to HF hub along with multi-weight support, including: `regnet.py`, `rexnet.py`, `byobnet.py`, `resnetv2.py`, `swin_transformer.py`, `swin_transformer_v2.py`, `swin_transformer_v2_cr.py`
249
+ * Swin Transformer models support feature extraction (NCHW feat maps for `swinv2_cr_*`, and NHWC for all others) and spatial embedding outputs.
250
+ * FocalNet (from https://github.com/microsoft/FocalNet) models and weights added with significant refactoring, feature extraction, no fixed resolution / sizing constraint
251
+ * RegNet weights increased with HF hub push, SWAG, SEER, and torchvision v2 weights. SEER is pretty poor wrt to performance for model size, but possibly useful.
252
+ * More ImageNet-12k pretrained and 1k fine-tuned `timm` weights:
253
+ * `rexnetr_200.sw_in12k_ft_in1k` - 82.6 @ 224, 83.2 @ 288
254
+ * `rexnetr_300.sw_in12k_ft_in1k` - 84.0 @ 224, 84.5 @ 288
255
+ * `regnety_120.sw_in12k_ft_in1k` - 85.0 @ 224, 85.4 @ 288
256
+ * `regnety_160.lion_in12k_ft_in1k` - 85.6 @ 224, 86.0 @ 288
257
+ * `regnety_160.sw_in12k_ft_in1k` - 85.6 @ 224, 86.0 @ 288 (compare to SWAG PT + 1k FT this is same BUT much lower res, blows SEER FT away)
258
+ * Model name deprecation + remapping functionality added (a milestone for bringing 0.8.x out of pre-release). Mappings being added...
259
+ * Minor bug fixes and improvements.
260
+
261
+ ### Feb 26, 2023
262
+ * Add ConvNeXt-XXLarge CLIP pretrained image tower weights for fine-tune & features (fine-tuning TBD) -- see [model card](https://huggingface.co/laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup)
263
+ * Update `convnext_xxlarge` default LayerNorm eps to 1e-5 (for CLIP weights, improved stability)
264
+ * 0.8.15dev0
265
+
266
+ ### Feb 20, 2023
267
+ * Add 320x320 `convnext_large_mlp.clip_laion2b_ft_320` and `convnext_lage_mlp.clip_laion2b_ft_soup_320` CLIP image tower weights for features & fine-tune
268
+ * 0.8.13dev0 pypi release for latest changes w/ move to huggingface org
269
+
270
+ ### Feb 16, 2023
271
+ * `safetensor` checkpoint support added
272
+ * Add ideas from 'Scaling Vision Transformers to 22 B. Params' (https://arxiv.org/abs/2302.05442) -- qk norm, RmsNorm, parallel block
273
+ * Add F.scaled_dot_product_attention support (PyTorch 2.0 only) to `vit_*`, `vit_relpos*`, `coatnet` / `maxxvit` (to start)
274
+ * Lion optimizer (w/ multi-tensor option) added (https://arxiv.org/abs/2302.06675)
275
+ * gradient checkpointing works with `features_only=True`
276
+
277
+ ## Introduction
278
+
279
+ Py**T**orch **Im**age **M**odels (`timm`) is a collection of image models, layers, utilities, optimizers, schedulers, data-loaders / augmentations, and reference training / validation scripts that aim to pull together a wide variety of SOTA models with ability to reproduce ImageNet training results.
280
+
281
+ The work of many others is present here. I've tried to make sure all source material is acknowledged via links to github, arxiv papers, etc in the README, documentation, and code docstrings. Please let me know if I missed anything.
282
+
283
+ ## Features
284
+
285
+ ### Models
286
+
287
+ All model architecture families include variants with pretrained weights. There are specific model variants without any weights, it is NOT a bug. Help training new or better weights is always appreciated.
288
+
289
+ * Aggregating Nested Transformers - https://arxiv.org/abs/2105.12723
290
+ * BEiT - https://arxiv.org/abs/2106.08254
291
+ * Big Transfer ResNetV2 (BiT) - https://arxiv.org/abs/1912.11370
292
+ * Bottleneck Transformers - https://arxiv.org/abs/2101.11605
293
+ * CaiT (Class-Attention in Image Transformers) - https://arxiv.org/abs/2103.17239
294
+ * CoaT (Co-Scale Conv-Attentional Image Transformers) - https://arxiv.org/abs/2104.06399
295
+ * CoAtNet (Convolution and Attention) - https://arxiv.org/abs/2106.04803
296
+ * ConvNeXt - https://arxiv.org/abs/2201.03545
297
+ * ConvNeXt-V2 - http://arxiv.org/abs/2301.00808
298
+ * ConViT (Soft Convolutional Inductive Biases Vision Transformers)- https://arxiv.org/abs/2103.10697
299
+ * CspNet (Cross-Stage Partial Networks) - https://arxiv.org/abs/1911.11929
300
+ * DeiT - https://arxiv.org/abs/2012.12877
301
+ * DeiT-III - https://arxiv.org/pdf/2204.07118.pdf
302
+ * DenseNet - https://arxiv.org/abs/1608.06993
303
+ * DLA - https://arxiv.org/abs/1707.06484
304
+ * DPN (Dual-Path Network) - https://arxiv.org/abs/1707.01629
305
+ * EdgeNeXt - https://arxiv.org/abs/2206.10589
306
+ * EfficientFormer - https://arxiv.org/abs/2206.01191
307
+ * EfficientNet (MBConvNet Family)
308
+ * EfficientNet NoisyStudent (B0-B7, L2) - https://arxiv.org/abs/1911.04252
309
+ * EfficientNet AdvProp (B0-B8) - https://arxiv.org/abs/1911.09665
310
+ * EfficientNet (B0-B7) - https://arxiv.org/abs/1905.11946
311
+ * EfficientNet-EdgeTPU (S, M, L) - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html
312
+ * EfficientNet V2 - https://arxiv.org/abs/2104.00298
313
+ * FBNet-C - https://arxiv.org/abs/1812.03443
314
+ * MixNet - https://arxiv.org/abs/1907.09595
315
+ * MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626
316
+ * MobileNet-V2 - https://arxiv.org/abs/1801.04381
317
+ * Single-Path NAS - https://arxiv.org/abs/1904.02877
318
+ * TinyNet - https://arxiv.org/abs/2010.14819
319
+ * EfficientViT (MIT) - https://arxiv.org/abs/2205.14756
320
+ * EfficientViT (MSRA) - https://arxiv.org/abs/2305.07027
321
+ * EVA - https://arxiv.org/abs/2211.07636
322
+ * EVA-02 - https://arxiv.org/abs/2303.11331
323
+ * FastViT - https://arxiv.org/abs/2303.14189
324
+ * FlexiViT - https://arxiv.org/abs/2212.08013
325
+ * FocalNet (Focal Modulation Networks) - https://arxiv.org/abs/2203.11926
326
+ * GCViT (Global Context Vision Transformer) - https://arxiv.org/abs/2206.09959
327
+ * GhostNet - https://arxiv.org/abs/1911.11907
328
+ * GhostNet-V2 - https://arxiv.org/abs/2211.12905
329
+ * gMLP - https://arxiv.org/abs/2105.08050
330
+ * GPU-Efficient Networks - https://arxiv.org/abs/2006.14090
331
+ * Halo Nets - https://arxiv.org/abs/2103.12731
332
+ * HGNet / HGNet-V2 - TBD
333
+ * HRNet - https://arxiv.org/abs/1908.07919
334
+ * InceptionNeXt - https://arxiv.org/abs/2303.16900
335
+ * Inception-V3 - https://arxiv.org/abs/1512.00567
336
+ * Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261
337
+ * Lambda Networks - https://arxiv.org/abs/2102.08602
338
+ * LeViT (Vision Transformer in ConvNet's Clothing) - https://arxiv.org/abs/2104.01136
339
+ * MaxViT (Multi-Axis Vision Transformer) - https://arxiv.org/abs/2204.01697
340
+ * MetaFormer (PoolFormer-v2, ConvFormer, CAFormer) - https://arxiv.org/abs/2210.13452
341
+ * MLP-Mixer - https://arxiv.org/abs/2105.01601
342
+ * MobileNet-V3 (MBConvNet w/ Efficient Head) - https://arxiv.org/abs/1905.02244
343
+ * FBNet-V3 - https://arxiv.org/abs/2006.02049
344
+ * HardCoRe-NAS - https://arxiv.org/abs/2102.11646
345
+ * LCNet - https://arxiv.org/abs/2109.15099
346
+ * MobileOne - https://arxiv.org/abs/2206.04040
347
+ * MobileViT - https://arxiv.org/abs/2110.02178
348
+ * MobileViT-V2 - https://arxiv.org/abs/2206.02680
349
+ * MViT-V2 (Improved Multiscale Vision Transformer) - https://arxiv.org/abs/2112.01526
350
+ * NASNet-A - https://arxiv.org/abs/1707.07012
351
+ * NesT - https://arxiv.org/abs/2105.12723
352
+ * Next-ViT - https://arxiv.org/abs/2207.05501
353
+ * NFNet-F - https://arxiv.org/abs/2102.06171
354
+ * NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692
355
+ * PNasNet - https://arxiv.org/abs/1712.00559
356
+ * PoolFormer (MetaFormer) - https://arxiv.org/abs/2111.11418
357
+ * Pooling-based Vision Transformer (PiT) - https://arxiv.org/abs/2103.16302
358
+ * PVT-V2 (Improved Pyramid Vision Transformer) - https://arxiv.org/abs/2106.13797
359
+ * RegNet - https://arxiv.org/abs/2003.13678
360
+ * RegNetZ - https://arxiv.org/abs/2103.06877
361
+ * RepVGG - https://arxiv.org/abs/2101.03697
362
+ * RepGhostNet - https://arxiv.org/abs/2211.06088
363
+ * RepViT - https://arxiv.org/abs/2307.09283
364
+ * ResMLP - https://arxiv.org/abs/2105.03404
365
+ * ResNet/ResNeXt
366
+ * ResNet (v1b/v1.5) - https://arxiv.org/abs/1512.03385
367
+ * ResNeXt - https://arxiv.org/abs/1611.05431
368
+ * 'Bag of Tricks' / Gluon C, D, E, S variations - https://arxiv.org/abs/1812.01187
369
+ * Weakly-supervised (WSL) Instagram pretrained / ImageNet tuned ResNeXt101 - https://arxiv.org/abs/1805.00932
370
+ * Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet/ResNeXts - https://arxiv.org/abs/1905.00546
371
+ * ECA-Net (ECAResNet) - https://arxiv.org/abs/1910.03151v4
372
+ * Squeeze-and-Excitation Networks (SEResNet) - https://arxiv.org/abs/1709.01507
373
+ * ResNet-RS - https://arxiv.org/abs/2103.07579
374
+ * Res2Net - https://arxiv.org/abs/1904.01169
375
+ * ResNeSt - https://arxiv.org/abs/2004.08955
376
+ * ReXNet - https://arxiv.org/abs/2007.00992
377
+ * SelecSLS - https://arxiv.org/abs/1907.00837
378
+ * Selective Kernel Networks - https://arxiv.org/abs/1903.06586
379
+ * Sequencer2D - https://arxiv.org/abs/2205.01972
380
+ * Swin S3 (AutoFormerV2) - https://arxiv.org/abs/2111.14725
381
+ * Swin Transformer - https://arxiv.org/abs/2103.14030
382
+ * Swin Transformer V2 - https://arxiv.org/abs/2111.09883
383
+ * Transformer-iN-Transformer (TNT) - https://arxiv.org/abs/2103.00112
384
+ * TResNet - https://arxiv.org/abs/2003.13630
385
+ * Twins (Spatial Attention in Vision Transformers) - https://arxiv.org/pdf/2104.13840.pdf
386
+ * Visformer - https://arxiv.org/abs/2104.12533
387
+ * Vision Transformer - https://arxiv.org/abs/2010.11929
388
+ * VOLO (Vision Outlooker) - https://arxiv.org/abs/2106.13112
389
+ * VovNet V2 and V1 - https://arxiv.org/abs/1911.06667
390
+ * Xception - https://arxiv.org/abs/1610.02357
391
+ * Xception (Modified Aligned, Gluon) - https://arxiv.org/abs/1802.02611
392
+ * Xception (Modified Aligned, TF) - https://arxiv.org/abs/1802.02611
393
+ * XCiT (Cross-Covariance Image Transformers) - https://arxiv.org/abs/2106.09681
394
+
395
+ ### Optimizers
396
+
397
+ Included optimizers available via `create_optimizer` / `create_optimizer_v2` factory methods:
398
+ * `adabelief` an implementation of AdaBelief adapted from https://github.com/juntang-zhuang/Adabelief-Optimizer - https://arxiv.org/abs/2010.07468
399
+ * `adafactor` adapted from [FAIRSeq impl](https://github.com/pytorch/fairseq/blob/master/fairseq/optim/adafactor.py) - https://arxiv.org/abs/1804.04235
400
+ * `adahessian` by [David Samuel](https://github.com/davda54/ada-hessian) - https://arxiv.org/abs/2006.00719
401
+ * `adamp` and `sgdp` by [Naver ClovAI](https://github.com/clovaai) - https://arxiv.org/abs/2006.08217
402
+ * `adan` an implementation of Adan adapted from https://github.com/sail-sg/Adan - https://arxiv.org/abs/2208.06677
403
+ * `lamb` an implementation of Lamb and LambC (w/ trust-clipping) cleaned up and modified to support use with XLA - https://arxiv.org/abs/1904.00962
404
+ * `lars` an implementation of LARS and LARC (w/ trust-clipping) - https://arxiv.org/abs/1708.03888
405
+ * `lion` and implementation of Lion adapted from https://github.com/google/automl/tree/master/lion - https://arxiv.org/abs/2302.06675
406
+ * `lookahead` adapted from impl by [Liam](https://github.com/alphadl/lookahead.pytorch) - https://arxiv.org/abs/1907.08610
407
+ * `madgrad` - and implementation of MADGRAD adapted from https://github.com/facebookresearch/madgrad - https://arxiv.org/abs/2101.11075
408
+ * `nadam` an implementation of Adam w/ Nesterov momentum
409
+ * `nadamw` an impementation of AdamW (Adam w/ decoupled weight-decay) w/ Nesterov momentum. A simplified impl based on https://github.com/mlcommons/algorithmic-efficiency
410
+ * `novograd` by [Masashi Kimura](https://github.com/convergence-lab/novograd) - https://arxiv.org/abs/1905.11286
411
+ * `radam` by [Liyuan Liu](https://github.com/LiyuanLucasLiu/RAdam) - https://arxiv.org/abs/1908.03265
412
+ * `rmsprop_tf` adapted from PyTorch RMSProp by myself. Reproduces much improved Tensorflow RMSProp behaviour
413
+ * `sgdw` and implementation of SGD w/ decoupled weight-decay
414
+ * `fused<name>` optimizers by name with [NVIDIA Apex](https://github.com/NVIDIA/apex/tree/master/apex/optimizers) installed
415
+ * `bits<name>` optimizers by name with [BitsAndBytes](https://github.com/TimDettmers/bitsandbytes) installed
416
+
417
+ ### Augmentations
418
+ * Random Erasing from [Zhun Zhong](https://github.com/zhunzhong07/Random-Erasing/blob/master/transforms.py) - https://arxiv.org/abs/1708.04896)
419
+ * Mixup - https://arxiv.org/abs/1710.09412
420
+ * CutMix - https://arxiv.org/abs/1905.04899
421
+ * AutoAugment (https://arxiv.org/abs/1805.09501) and RandAugment (https://arxiv.org/abs/1909.13719) ImageNet configurations modeled after impl for EfficientNet training (https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py)
422
+ * AugMix w/ JSD loss, JSD w/ clean + augmented mixing support works with AutoAugment and RandAugment as well - https://arxiv.org/abs/1912.02781
423
+ * SplitBachNorm - allows splitting batch norm layers between clean and augmented (auxiliary batch norm) data
424
+
425
+ ### Regularization
426
+ * DropPath aka "Stochastic Depth" - https://arxiv.org/abs/1603.09382
427
+ * DropBlock - https://arxiv.org/abs/1810.12890
428
+ * Blur Pooling - https://arxiv.org/abs/1904.11486
429
+
430
+ ### Other
431
+
432
+ Several (less common) features that I often utilize in my projects are included. Many of their additions are the reason why I maintain my own set of models, instead of using others' via PIP:
433
+
434
+ * All models have a common default configuration interface and API for
435
+ * accessing/changing the classifier - `get_classifier` and `reset_classifier`
436
+ * doing a forward pass on just the features - `forward_features` (see [documentation](https://huggingface.co/docs/timm/feature_extraction))
437
+ * these makes it easy to write consistent network wrappers that work with any of the models
438
+ * All models support multi-scale feature map extraction (feature pyramids) via create_model (see [documentation](https://huggingface.co/docs/timm/feature_extraction))
439
+ * `create_model(name, features_only=True, out_indices=..., output_stride=...)`
440
+ * `out_indices` creation arg specifies which feature maps to return, these indices are 0 based and generally correspond to the `C(i + 1)` feature level.
441
+ * `output_stride` creation arg controls output stride of the network by using dilated convolutions. Most networks are stride 32 by default. Not all networks support this.
442
+ * feature map channel counts, reduction level (stride) can be queried AFTER model creation via the `.feature_info` member
443
+ * All models have a consistent pretrained weight loader that adapts last linear if necessary, and from 3 to 1 channel input if desired
444
+ * High performance [reference training, validation, and inference scripts](https://huggingface.co/docs/timm/training_script) that work in several process/GPU modes:
445
+ * NVIDIA DDP w/ a single GPU per process, multiple processes with APEX present (AMP mixed-precision optional)
446
+ * PyTorch DistributedDataParallel w/ multi-gpu, single process (AMP disabled as it crashes when enabled)
447
+ * PyTorch w/ single GPU single process (AMP optional)
448
+ * A dynamic global pool implementation that allows selecting from average pooling, max pooling, average + max, or concat([average, max]) at model creation. All global pooling is adaptive average by default and compatible with pretrained weights.
449
+ * A 'Test Time Pool' wrapper that can wrap any of the included models and usually provides improved performance doing inference with input images larger than the training size. Idea adapted from original DPN implementation when I ported (https://github.com/cypw/DPNs)
450
+ * Learning rate schedulers
451
+ * Ideas adopted from
452
+ * [AllenNLP schedulers](https://github.com/allenai/allennlp/tree/master/allennlp/training/learning_rate_schedulers)
453
+ * [FAIRseq lr_scheduler](https://github.com/pytorch/fairseq/tree/master/fairseq/optim/lr_scheduler)
454
+ * SGDR: Stochastic Gradient Descent with Warm Restarts (https://arxiv.org/abs/1608.03983)
455
+ * Schedulers include `step`, `cosine` w/ restarts, `tanh` w/ restarts, `plateau`
456
+ * Space-to-Depth by [mrT23](https://github.com/mrT23/TResNet/blob/master/src/models/tresnet/layers/space_to_depth.py) (https://arxiv.org/abs/1801.04590) -- original paper?
457
+ * Adaptive Gradient Clipping (https://arxiv.org/abs/2102.06171, https://github.com/deepmind/deepmind-research/tree/master/nfnets)
458
+ * An extensive selection of channel and/or spatial attention modules:
459
+ * Bottleneck Transformer - https://arxiv.org/abs/2101.11605
460
+ * CBAM - https://arxiv.org/abs/1807.06521
461
+ * Effective Squeeze-Excitation (ESE) - https://arxiv.org/abs/1911.06667
462
+ * Efficient Channel Attention (ECA) - https://arxiv.org/abs/1910.03151
463
+ * Gather-Excite (GE) - https://arxiv.org/abs/1810.12348
464
+ * Global Context (GC) - https://arxiv.org/abs/1904.11492
465
+ * Halo - https://arxiv.org/abs/2103.12731
466
+ * Involution - https://arxiv.org/abs/2103.06255
467
+ * Lambda Layer - https://arxiv.org/abs/2102.08602
468
+ * Non-Local (NL) - https://arxiv.org/abs/1711.07971
469
+ * Squeeze-and-Excitation (SE) - https://arxiv.org/abs/1709.01507
470
+ * Selective Kernel (SK) - (https://arxiv.org/abs/1903.06586
471
+ * Split (SPLAT) - https://arxiv.org/abs/2004.08955
472
+ * Shifted Window (SWIN) - https://arxiv.org/abs/2103.14030
473
+
474
+ ## Results
475
+
476
+ Model validation results can be found in the [results tables](results/README.md)
477
+
478
+ ## Getting Started (Documentation)
479
+
480
+ The official documentation can be found at https://huggingface.co/docs/hub/timm. Documentation contributions are welcome.
481
+
482
+ [Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide](https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055) by [Chris Hughes](https://github.com/Chris-hughes10) is an extensive blog post covering many aspects of `timm` in detail.
483
+
484
+ [timmdocs](http://timm.fast.ai/) is an alternate set of documentation for `timm`. A big thanks to [Aman Arora](https://github.com/amaarora) for his efforts creating timmdocs.
485
+
486
+ [paperswithcode](https://paperswithcode.com/lib/timm) is a good resource for browsing the models within `timm`.
487
+
488
+ ## Train, Validation, Inference Scripts
489
+
490
+ The root folder of the repository contains reference train, validation, and inference scripts that work with the included models and other features of this repository. They are adaptable for other datasets and use cases with a little hacking. See [documentation](https://huggingface.co/docs/timm/training_script).
491
+
492
+ ## Awesome PyTorch Resources
493
+
494
+ One of the greatest assets of PyTorch is the community and their contributions. A few of my favourite resources that pair well with the models and components here are listed below.
495
+
496
+ ### Object Detection, Instance and Semantic Segmentation
497
+ * Detectron2 - https://github.com/facebookresearch/detectron2
498
+ * Segmentation Models (Semantic) - https://github.com/qubvel/segmentation_models.pytorch
499
+ * EfficientDet (Obj Det, Semantic soon) - https://github.com/rwightman/efficientdet-pytorch
500
+
501
+ ### Computer Vision / Image Augmentation
502
+ * Albumentations - https://github.com/albumentations-team/albumentations
503
+ * Kornia - https://github.com/kornia/kornia
504
+
505
+ ### Knowledge Distillation
506
+ * RepDistiller - https://github.com/HobbitLong/RepDistiller
507
+ * torchdistill - https://github.com/yoshitomo-matsubara/torchdistill
508
+
509
+ ### Metric Learning
510
+ * PyTorch Metric Learning - https://github.com/KevinMusgrave/pytorch-metric-learning
511
+
512
+ ### Training / Frameworks
513
+ * fastai - https://github.com/fastai/fastai
514
+
515
+ ## Licenses
516
+
517
+ ### Code
518
+ The code here is licensed Apache 2.0. I've taken care to make sure any third party code included or adapted has compatible (permissive) licenses such as MIT, BSD, etc. I've made an effort to avoid any GPL / LGPL conflicts. That said, it is your responsibility to ensure you comply with licenses here and conditions of any dependent licenses. Where applicable, I've linked the sources/references for various components in docstrings. If you think I've missed anything please create an issue.
519
+
520
+ ### Pretrained Weights
521
+ So far all of the pretrained weights available here are pretrained on ImageNet with a select few that have some additional pretraining (see extra note below). ImageNet was released for non-commercial research purposes only (https://image-net.org/download). It's not clear what the implications of that are for the use of pretrained weights from that dataset. Any models I have trained with ImageNet are done for research purposes and one should assume that the original dataset license applies to the weights. It's best to seek legal advice if you intend to use the pretrained weights in a commercial product.
522
+
523
+ #### Pretrained on more than ImageNet
524
+ Several weights included or references here were pretrained with proprietary datasets that I do not have access to. These include the Facebook WSL, SSL, SWSL ResNe(Xt) and the Google Noisy Student EfficientNet models. The Facebook models have an explicit non-commercial license (CC-BY-NC 4.0, https://github.com/facebookresearch/semi-supervised-ImageNet1K-models, https://github.com/facebookresearch/WSL-Images). The Google models do not appear to have any restriction beyond the Apache 2.0 license (and ImageNet concerns). In either case, you should contact Facebook or Google with any questions.
525
+
526
+ ## Citing
527
+
528
+ ### BibTeX
529
+
530
+ ```bibtex
531
+ @misc{rw2019timm,
532
+ author = {Ross Wightman},
533
+ title = {PyTorch Image Models},
534
+ year = {2019},
535
+ publisher = {GitHub},
536
+ journal = {GitHub repository},
537
+ doi = {10.5281/zenodo.4414861},
538
+ howpublished = {\url{https://github.com/rwightman/pytorch-image-models}}
539
+ }
540
+ ```
541
+
542
+ ### Latest DOI
543
+
544
+ [![DOI](https://zenodo.org/badge/168799526.svg)](https://zenodo.org/badge/latestdoi/168799526)
pytorch-image-models/avg_checkpoints.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """ Checkpoint Averaging Script
3
+
4
+ This script averages all model weights for checkpoints in specified path that match
5
+ the specified filter wildcard. All checkpoints must be from the exact same model.
6
+
7
+ For any hope of decent results, the checkpoints should be from the same or child
8
+ (via resumes) training session. This can be viewed as similar to maintaining running
9
+ EMA (exponential moving average) of the model weights or performing SWA (stochastic
10
+ weight averaging), but post-training.
11
+
12
+ Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
13
+ """
14
+ import torch
15
+ import argparse
16
+ import os
17
+ import glob
18
+ import hashlib
19
+ from timm.models import load_state_dict
20
+ try:
21
+ import safetensors.torch
22
+ _has_safetensors = True
23
+ except ImportError:
24
+ _has_safetensors = False
25
+
26
+ DEFAULT_OUTPUT = "./averaged.pth"
27
+ DEFAULT_SAFE_OUTPUT = "./averaged.safetensors"
28
+
29
+ parser = argparse.ArgumentParser(description='PyTorch Checkpoint Averager')
30
+ parser.add_argument('--input', default='', type=str, metavar='PATH',
31
+ help='path to base input folder containing checkpoints')
32
+ parser.add_argument('--filter', default='*.pth.tar', type=str, metavar='WILDCARD',
33
+ help='checkpoint filter (path wildcard)')
34
+ parser.add_argument('--output', default=DEFAULT_OUTPUT, type=str, metavar='PATH',
35
+ help=f'Output filename. Defaults to {DEFAULT_SAFE_OUTPUT} when passing --safetensors.')
36
+ parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
37
+ help='Force not using ema version of weights (if present)')
38
+ parser.add_argument('--no-sort', dest='no_sort', action='store_true',
39
+ help='Do not sort and select by checkpoint metric, also makes "n" argument irrelevant')
40
+ parser.add_argument('-n', type=int, default=10, metavar='N',
41
+ help='Number of checkpoints to average')
42
+ parser.add_argument('--safetensors', action='store_true',
43
+ help='Save weights using safetensors instead of the default torch way (pickle).')
44
+
45
+
46
+ def checkpoint_metric(checkpoint_path):
47
+ if not checkpoint_path or not os.path.isfile(checkpoint_path):
48
+ return {}
49
+ print("=> Extracting metric from checkpoint '{}'".format(checkpoint_path))
50
+ checkpoint = torch.load(checkpoint_path, map_location='cpu')
51
+ metric = None
52
+ if 'metric' in checkpoint:
53
+ metric = checkpoint['metric']
54
+ elif 'metrics' in checkpoint and 'metric_name' in checkpoint:
55
+ metrics = checkpoint['metrics']
56
+ print(metrics)
57
+ metric = metrics[checkpoint['metric_name']]
58
+ return metric
59
+
60
+
61
+ def main():
62
+ args = parser.parse_args()
63
+ # by default use the EMA weights (if present)
64
+ args.use_ema = not args.no_use_ema
65
+ # by default sort by checkpoint metric (if present) and avg top n checkpoints
66
+ args.sort = not args.no_sort
67
+
68
+ if args.safetensors and args.output == DEFAULT_OUTPUT:
69
+ # Default path changes if using safetensors
70
+ args.output = DEFAULT_SAFE_OUTPUT
71
+
72
+ output, output_ext = os.path.splitext(args.output)
73
+ if not output_ext:
74
+ output_ext = ('.safetensors' if args.safetensors else '.pth')
75
+ output = output + output_ext
76
+
77
+ if args.safetensors and not output_ext == ".safetensors":
78
+ print(
79
+ "Warning: saving weights as safetensors but output file extension is not "
80
+ f"set to '.safetensors': {args.output}"
81
+ )
82
+
83
+ if os.path.exists(output):
84
+ print("Error: Output filename ({}) already exists.".format(output))
85
+ exit(1)
86
+
87
+ pattern = args.input
88
+ if not args.input.endswith(os.path.sep) and not args.filter.startswith(os.path.sep):
89
+ pattern += os.path.sep
90
+ pattern += args.filter
91
+ checkpoints = glob.glob(pattern, recursive=True)
92
+
93
+ if args.sort:
94
+ checkpoint_metrics = []
95
+ for c in checkpoints:
96
+ metric = checkpoint_metric(c)
97
+ if metric is not None:
98
+ checkpoint_metrics.append((metric, c))
99
+ checkpoint_metrics = list(sorted(checkpoint_metrics))
100
+ checkpoint_metrics = checkpoint_metrics[-args.n:]
101
+ if checkpoint_metrics:
102
+ print("Selected checkpoints:")
103
+ [print(m, c) for m, c in checkpoint_metrics]
104
+ avg_checkpoints = [c for m, c in checkpoint_metrics]
105
+ else:
106
+ avg_checkpoints = checkpoints
107
+ if avg_checkpoints:
108
+ print("Selected checkpoints:")
109
+ [print(c) for c in checkpoints]
110
+
111
+ if not avg_checkpoints:
112
+ print('Error: No checkpoints found to average.')
113
+ exit(1)
114
+
115
+ avg_state_dict = {}
116
+ avg_counts = {}
117
+ for c in avg_checkpoints:
118
+ new_state_dict = load_state_dict(c, args.use_ema)
119
+ if not new_state_dict:
120
+ print(f"Error: Checkpoint ({c}) doesn't exist")
121
+ continue
122
+ for k, v in new_state_dict.items():
123
+ if k not in avg_state_dict:
124
+ avg_state_dict[k] = v.clone().to(dtype=torch.float64)
125
+ avg_counts[k] = 1
126
+ else:
127
+ avg_state_dict[k] += v.to(dtype=torch.float64)
128
+ avg_counts[k] += 1
129
+
130
+ for k, v in avg_state_dict.items():
131
+ v.div_(avg_counts[k])
132
+
133
+ # float32 overflow seems unlikely based on weights seen to date, but who knows
134
+ float32_info = torch.finfo(torch.float32)
135
+ final_state_dict = {}
136
+ for k, v in avg_state_dict.items():
137
+ v = v.clamp(float32_info.min, float32_info.max)
138
+ final_state_dict[k] = v.to(dtype=torch.float32)
139
+
140
+ if args.safetensors:
141
+ assert _has_safetensors, "`pip install safetensors` to use .safetensors"
142
+ safetensors.torch.save_file(final_state_dict, output)
143
+ else:
144
+ torch.save(final_state_dict, output)
145
+
146
+ with open(output, 'rb') as f:
147
+ sha_hash = hashlib.sha256(f.read()).hexdigest()
148
+ print(f"=> Saved state_dict to '{output}, SHA256: {sha_hash}'")
149
+
150
+
151
+ if __name__ == '__main__':
152
+ main()
pytorch-image-models/benchmark.py ADDED
@@ -0,0 +1,703 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """ Model Benchmark Script
3
+
4
+ An inference and train step benchmark script for timm models.
5
+
6
+ Hacked together by Ross Wightman (https://github.com/rwightman)
7
+ """
8
+ import argparse
9
+ import csv
10
+ import json
11
+ import logging
12
+ import time
13
+ from collections import OrderedDict
14
+ from contextlib import suppress
15
+ from functools import partial
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+ import torch.nn.parallel
20
+
21
+ from timm.data import resolve_data_config
22
+ from timm.layers import set_fast_norm
23
+ from timm.models import create_model, is_model, list_models
24
+ from timm.optim import create_optimizer_v2
25
+ from timm.utils import setup_default_logging, set_jit_fuser, decay_batch_step, check_batch_size_retry, ParseKwargs,\
26
+ reparameterize_model
27
+
28
+ has_apex = False
29
+ try:
30
+ from apex import amp
31
+ has_apex = True
32
+ except ImportError:
33
+ pass
34
+
35
+ has_native_amp = False
36
+ try:
37
+ if getattr(torch.cuda.amp, 'autocast') is not None:
38
+ has_native_amp = True
39
+ except AttributeError:
40
+ pass
41
+
42
+ try:
43
+ from deepspeed.profiling.flops_profiler import get_model_profile
44
+ has_deepspeed_profiling = True
45
+ except ImportError as e:
46
+ has_deepspeed_profiling = False
47
+
48
+ try:
49
+ from fvcore.nn import FlopCountAnalysis, flop_count_str, ActivationCountAnalysis
50
+ has_fvcore_profiling = True
51
+ except ImportError as e:
52
+ FlopCountAnalysis = None
53
+ has_fvcore_profiling = False
54
+
55
+ try:
56
+ from functorch.compile import memory_efficient_fusion
57
+ has_functorch = True
58
+ except ImportError as e:
59
+ has_functorch = False
60
+
61
+ has_compile = hasattr(torch, 'compile')
62
+
63
+ if torch.cuda.is_available():
64
+ torch.backends.cuda.matmul.allow_tf32 = True
65
+ torch.backends.cudnn.benchmark = True
66
+ _logger = logging.getLogger('validate')
67
+
68
+
69
+ parser = argparse.ArgumentParser(description='PyTorch Benchmark')
70
+
71
+ # benchmark specific args
72
+ parser.add_argument('--model-list', metavar='NAME', default='',
73
+ help='txt file based list of model names to benchmark')
74
+ parser.add_argument('--bench', default='both', type=str,
75
+ help="Benchmark mode. One of 'inference', 'train', 'both'. Defaults to 'both'")
76
+ parser.add_argument('--detail', action='store_true', default=False,
77
+ help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False')
78
+ parser.add_argument('--no-retry', action='store_true', default=False,
79
+ help='Do not decay batch size and retry on error.')
80
+ parser.add_argument('--results-file', default='', type=str,
81
+ help='Output csv file for validation results (summary)')
82
+ parser.add_argument('--results-format', default='csv', type=str,
83
+ help='Format for results file one of (csv, json) (default: csv).')
84
+ parser.add_argument('--num-warm-iter', default=10, type=int,
85
+ help='Number of warmup iterations (default: 10)')
86
+ parser.add_argument('--num-bench-iter', default=40, type=int,
87
+ help='Number of benchmark iterations (default: 40)')
88
+ parser.add_argument('--device', default='cuda', type=str,
89
+ help="device to run benchmark on")
90
+
91
+ # common inference / train args
92
+ parser.add_argument('--model', '-m', metavar='NAME', default='resnet50',
93
+ help='model architecture (default: resnet50)')
94
+ parser.add_argument('-b', '--batch-size', default=256, type=int,
95
+ metavar='N', help='mini-batch size (default: 256)')
96
+ parser.add_argument('--img-size', default=None, type=int,
97
+ metavar='N', help='Input image dimension, uses model default if empty')
98
+ parser.add_argument('--input-size', default=None, nargs=3, type=int,
99
+ metavar='N N N', help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty')
100
+ parser.add_argument('--use-train-size', action='store_true', default=False,
101
+ help='Run inference at train size, not test-input-size if it exists.')
102
+ parser.add_argument('--num-classes', type=int, default=None,
103
+ help='Number classes in dataset')
104
+ parser.add_argument('--gp', default=None, type=str, metavar='POOL',
105
+ help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.')
106
+ parser.add_argument('--channels-last', action='store_true', default=False,
107
+ help='Use channels_last memory layout')
108
+ parser.add_argument('--grad-checkpointing', action='store_true', default=False,
109
+ help='Enable gradient checkpointing through model blocks/stages')
110
+ parser.add_argument('--amp', action='store_true', default=False,
111
+ help='use PyTorch Native AMP for mixed precision training. Overrides --precision arg.')
112
+ parser.add_argument('--amp-dtype', default='float16', type=str,
113
+ help='lower precision AMP dtype (default: float16). Overrides --precision arg if args.amp True.')
114
+ parser.add_argument('--precision', default='float32', type=str,
115
+ help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)')
116
+ parser.add_argument('--fuser', default='', type=str,
117
+ help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
118
+ parser.add_argument('--fast-norm', default=False, action='store_true',
119
+ help='enable experimental fast-norm')
120
+ parser.add_argument('--reparam', default=False, action='store_true',
121
+ help='Reparameterize model')
122
+ parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs)
123
+
124
+ # codegen (model compilation) options
125
+ scripting_group = parser.add_mutually_exclusive_group()
126
+ scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
127
+ help='convert model torchscript for inference')
128
+ scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
129
+ help="Enable compilation w/ specified backend (default: inductor).")
130
+ scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
131
+ help="Enable AOT Autograd optimization.")
132
+
133
+ # train optimizer parameters
134
+ parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
135
+ help='Optimizer (default: "sgd"')
136
+ parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON',
137
+ help='Optimizer Epsilon (default: None, use opt default)')
138
+ parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA',
139
+ help='Optimizer Betas (default: None, use opt default)')
140
+ parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
141
+ help='Optimizer momentum (default: 0.9)')
142
+ parser.add_argument('--weight-decay', type=float, default=0.0001,
143
+ help='weight decay (default: 0.0001)')
144
+ parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
145
+ help='Clip gradient norm (default: None, no clipping)')
146
+ parser.add_argument('--clip-mode', type=str, default='norm',
147
+ help='Gradient clipping mode. One of ("norm", "value", "agc")')
148
+
149
+
150
+ # model regularization / loss params that impact model or loss fn
151
+ parser.add_argument('--smoothing', type=float, default=0.1,
152
+ help='Label smoothing (default: 0.1)')
153
+ parser.add_argument('--drop', type=float, default=0.0, metavar='PCT',
154
+ help='Dropout rate (default: 0.)')
155
+ parser.add_argument('--drop-path', type=float, default=None, metavar='PCT',
156
+ help='Drop path rate (default: None)')
157
+ parser.add_argument('--drop-block', type=float, default=None, metavar='PCT',
158
+ help='Drop block rate (default: None)')
159
+
160
+
161
+ def timestamp(sync=False):
162
+ return time.perf_counter()
163
+
164
+
165
+ def cuda_timestamp(sync=False, device=None):
166
+ if sync:
167
+ torch.cuda.synchronize(device=device)
168
+ return time.perf_counter()
169
+
170
+
171
+ def count_params(model: nn.Module):
172
+ return sum([m.numel() for m in model.parameters()])
173
+
174
+
175
+ def resolve_precision(precision: str):
176
+ assert precision in ('amp', 'amp_bfloat16', 'float16', 'bfloat16', 'float32')
177
+ amp_dtype = None # amp disabled
178
+ model_dtype = torch.float32
179
+ data_dtype = torch.float32
180
+ if precision == 'amp':
181
+ amp_dtype = torch.float16
182
+ elif precision == 'amp_bfloat16':
183
+ amp_dtype = torch.bfloat16
184
+ elif precision == 'float16':
185
+ model_dtype = torch.float16
186
+ data_dtype = torch.float16
187
+ elif precision == 'bfloat16':
188
+ model_dtype = torch.bfloat16
189
+ data_dtype = torch.bfloat16
190
+ return amp_dtype, model_dtype, data_dtype
191
+
192
+
193
+ def profile_deepspeed(model, input_size=(3, 224, 224), batch_size=1, detailed=False):
194
+ _, macs, _ = get_model_profile(
195
+ model=model,
196
+ input_shape=(batch_size,) + input_size, # input shape/resolution
197
+ print_profile=detailed, # prints the model graph with the measured profile attached to each module
198
+ detailed=detailed, # print the detailed profile
199
+ warm_up=10, # the number of warm-ups before measuring the time of each module
200
+ as_string=False, # print raw numbers (e.g. 1000) or as human-readable strings (e.g. 1k)
201
+ output_file=None, # path to the output file. If None, the profiler prints to stdout.
202
+ ignore_modules=None) # the list of modules to ignore in the profiling
203
+ return macs, 0 # no activation count in DS
204
+
205
+
206
+ def profile_fvcore(model, input_size=(3, 224, 224), batch_size=1, detailed=False, force_cpu=False):
207
+ if force_cpu:
208
+ model = model.to('cpu')
209
+ device, dtype = next(model.parameters()).device, next(model.parameters()).dtype
210
+ example_input = torch.ones((batch_size,) + input_size, device=device, dtype=dtype)
211
+ fca = FlopCountAnalysis(model, example_input)
212
+ aca = ActivationCountAnalysis(model, example_input)
213
+ if detailed:
214
+ fcs = flop_count_str(fca)
215
+ print(fcs)
216
+ return fca.total(), aca.total()
217
+
218
+
219
+ class BenchmarkRunner:
220
+ def __init__(
221
+ self,
222
+ model_name,
223
+ detail=False,
224
+ device='cuda',
225
+ torchscript=False,
226
+ torchcompile=None,
227
+ aot_autograd=False,
228
+ reparam=False,
229
+ precision='float32',
230
+ fuser='',
231
+ num_warm_iter=10,
232
+ num_bench_iter=50,
233
+ use_train_size=False,
234
+ **kwargs
235
+ ):
236
+ self.model_name = model_name
237
+ self.detail = detail
238
+ self.device = device
239
+ self.amp_dtype, self.model_dtype, self.data_dtype = resolve_precision(precision)
240
+ self.channels_last = kwargs.pop('channels_last', False)
241
+ if self.amp_dtype is not None:
242
+ self.amp_autocast = partial(torch.cuda.amp.autocast, dtype=self.amp_dtype)
243
+ else:
244
+ self.amp_autocast = suppress
245
+
246
+ if fuser:
247
+ set_jit_fuser(fuser)
248
+ self.model = create_model(
249
+ model_name,
250
+ num_classes=kwargs.pop('num_classes', None),
251
+ in_chans=3,
252
+ global_pool=kwargs.pop('gp', 'fast'),
253
+ scriptable=torchscript,
254
+ drop_rate=kwargs.pop('drop', 0.),
255
+ drop_path_rate=kwargs.pop('drop_path', None),
256
+ drop_block_rate=kwargs.pop('drop_block', None),
257
+ **kwargs.pop('model_kwargs', {}),
258
+ )
259
+ if reparam:
260
+ self.model = reparameterize_model(self.model)
261
+ self.model.to(
262
+ device=self.device,
263
+ dtype=self.model_dtype,
264
+ memory_format=torch.channels_last if self.channels_last else None,
265
+ )
266
+ self.num_classes = self.model.num_classes
267
+ self.param_count = count_params(self.model)
268
+ _logger.info('Model %s created, param count: %d' % (model_name, self.param_count))
269
+
270
+ data_config = resolve_data_config(kwargs, model=self.model, use_test_size=not use_train_size)
271
+ self.input_size = data_config['input_size']
272
+ self.batch_size = kwargs.pop('batch_size', 256)
273
+
274
+ self.compiled = False
275
+ if torchscript:
276
+ self.model = torch.jit.script(self.model)
277
+ self.compiled = True
278
+ elif torchcompile:
279
+ assert has_compile, 'A version of torch w/ torch.compile() is required, possibly a nightly.'
280
+ torch._dynamo.reset()
281
+ self.model = torch.compile(self.model, backend=torchcompile)
282
+ self.compiled = True
283
+ elif aot_autograd:
284
+ assert has_functorch, "functorch is needed for --aot-autograd"
285
+ self.model = memory_efficient_fusion(self.model)
286
+ self.compiled = True
287
+
288
+ self.example_inputs = None
289
+ self.num_warm_iter = num_warm_iter
290
+ self.num_bench_iter = num_bench_iter
291
+ self.log_freq = num_bench_iter // 5
292
+ if 'cuda' in self.device:
293
+ self.time_fn = partial(cuda_timestamp, device=self.device)
294
+ else:
295
+ self.time_fn = timestamp
296
+
297
+ def _init_input(self):
298
+ self.example_inputs = torch.randn(
299
+ (self.batch_size,) + self.input_size, device=self.device, dtype=self.data_dtype)
300
+ if self.channels_last:
301
+ self.example_inputs = self.example_inputs.contiguous(memory_format=torch.channels_last)
302
+
303
+
304
+ class InferenceBenchmarkRunner(BenchmarkRunner):
305
+
306
+ def __init__(
307
+ self,
308
+ model_name,
309
+ device='cuda',
310
+ torchscript=False,
311
+ **kwargs
312
+ ):
313
+ super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs)
314
+ self.model.eval()
315
+
316
+ def run(self):
317
+ def _step():
318
+ t_step_start = self.time_fn()
319
+ with self.amp_autocast():
320
+ output = self.model(self.example_inputs)
321
+ t_step_end = self.time_fn(True)
322
+ return t_step_end - t_step_start
323
+
324
+ _logger.info(
325
+ f'Running inference benchmark on {self.model_name} for {self.num_bench_iter} steps w/ '
326
+ f'input size {self.input_size} and batch size {self.batch_size}.')
327
+
328
+ with torch.no_grad():
329
+ self._init_input()
330
+
331
+ for _ in range(self.num_warm_iter):
332
+ _step()
333
+
334
+ total_step = 0.
335
+ num_samples = 0
336
+ t_run_start = self.time_fn()
337
+ for i in range(self.num_bench_iter):
338
+ delta_fwd = _step()
339
+ total_step += delta_fwd
340
+ num_samples += self.batch_size
341
+ num_steps = i + 1
342
+ if num_steps % self.log_freq == 0:
343
+ _logger.info(
344
+ f"Infer [{num_steps}/{self.num_bench_iter}]."
345
+ f" {num_samples / total_step:0.2f} samples/sec."
346
+ f" {1000 * total_step / num_steps:0.3f} ms/step.")
347
+ t_run_end = self.time_fn(True)
348
+ t_run_elapsed = t_run_end - t_run_start
349
+
350
+ results = dict(
351
+ samples_per_sec=round(num_samples / t_run_elapsed, 2),
352
+ step_time=round(1000 * total_step / self.num_bench_iter, 3),
353
+ batch_size=self.batch_size,
354
+ img_size=self.input_size[-1],
355
+ param_count=round(self.param_count / 1e6, 2),
356
+ )
357
+
358
+ retries = 0 if self.compiled else 2 # skip profiling if model is scripted
359
+ while retries:
360
+ retries -= 1
361
+ try:
362
+ if has_deepspeed_profiling:
363
+ macs, _ = profile_deepspeed(self.model, self.input_size)
364
+ results['gmacs'] = round(macs / 1e9, 2)
365
+ elif has_fvcore_profiling:
366
+ macs, activations = profile_fvcore(self.model, self.input_size, force_cpu=not retries)
367
+ results['gmacs'] = round(macs / 1e9, 2)
368
+ results['macts'] = round(activations / 1e6, 2)
369
+ except RuntimeError as e:
370
+ pass
371
+
372
+ _logger.info(
373
+ f"Inference benchmark of {self.model_name} done. "
374
+ f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/step")
375
+
376
+ return results
377
+
378
+
379
+ class TrainBenchmarkRunner(BenchmarkRunner):
380
+
381
+ def __init__(
382
+ self,
383
+ model_name,
384
+ device='cuda',
385
+ torchscript=False,
386
+ **kwargs
387
+ ):
388
+ super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs)
389
+ self.model.train()
390
+
391
+ self.loss = nn.CrossEntropyLoss().to(self.device)
392
+ self.target_shape = tuple()
393
+
394
+ self.optimizer = create_optimizer_v2(
395
+ self.model,
396
+ opt=kwargs.pop('opt', 'sgd'),
397
+ lr=kwargs.pop('lr', 1e-4))
398
+
399
+ if kwargs.pop('grad_checkpointing', False):
400
+ self.model.set_grad_checkpointing()
401
+
402
+ def _gen_target(self, batch_size):
403
+ return torch.empty(
404
+ (batch_size,) + self.target_shape, device=self.device, dtype=torch.long).random_(self.num_classes)
405
+
406
+ def run(self):
407
+ def _step(detail=False):
408
+ self.optimizer.zero_grad() # can this be ignored?
409
+ t_start = self.time_fn()
410
+ t_fwd_end = t_start
411
+ t_bwd_end = t_start
412
+ with self.amp_autocast():
413
+ output = self.model(self.example_inputs)
414
+ if isinstance(output, tuple):
415
+ output = output[0]
416
+ if detail:
417
+ t_fwd_end = self.time_fn(True)
418
+ target = self._gen_target(output.shape[0])
419
+ self.loss(output, target).backward()
420
+ if detail:
421
+ t_bwd_end = self.time_fn(True)
422
+ self.optimizer.step()
423
+ t_end = self.time_fn(True)
424
+ if detail:
425
+ delta_fwd = t_fwd_end - t_start
426
+ delta_bwd = t_bwd_end - t_fwd_end
427
+ delta_opt = t_end - t_bwd_end
428
+ return delta_fwd, delta_bwd, delta_opt
429
+ else:
430
+ delta_step = t_end - t_start
431
+ return delta_step
432
+
433
+ _logger.info(
434
+ f'Running train benchmark on {self.model_name} for {self.num_bench_iter} steps w/ '
435
+ f'input size {self.input_size} and batch size {self.batch_size}.')
436
+
437
+ self._init_input()
438
+
439
+ for _ in range(self.num_warm_iter):
440
+ _step()
441
+
442
+ t_run_start = self.time_fn()
443
+ if self.detail:
444
+ total_fwd = 0.
445
+ total_bwd = 0.
446
+ total_opt = 0.
447
+ num_samples = 0
448
+ for i in range(self.num_bench_iter):
449
+ delta_fwd, delta_bwd, delta_opt = _step(True)
450
+ num_samples += self.batch_size
451
+ total_fwd += delta_fwd
452
+ total_bwd += delta_bwd
453
+ total_opt += delta_opt
454
+ num_steps = (i + 1)
455
+ if num_steps % self.log_freq == 0:
456
+ total_step = total_fwd + total_bwd + total_opt
457
+ _logger.info(
458
+ f"Train [{num_steps}/{self.num_bench_iter}]."
459
+ f" {num_samples / total_step:0.2f} samples/sec."
460
+ f" {1000 * total_fwd / num_steps:0.3f} ms/step fwd,"
461
+ f" {1000 * total_bwd / num_steps:0.3f} ms/step bwd,"
462
+ f" {1000 * total_opt / num_steps:0.3f} ms/step opt."
463
+ )
464
+ total_step = total_fwd + total_bwd + total_opt
465
+ t_run_elapsed = self.time_fn() - t_run_start
466
+ results = dict(
467
+ samples_per_sec=round(num_samples / t_run_elapsed, 2),
468
+ step_time=round(1000 * total_step / self.num_bench_iter, 3),
469
+ fwd_time=round(1000 * total_fwd / self.num_bench_iter, 3),
470
+ bwd_time=round(1000 * total_bwd / self.num_bench_iter, 3),
471
+ opt_time=round(1000 * total_opt / self.num_bench_iter, 3),
472
+ batch_size=self.batch_size,
473
+ img_size=self.input_size[-1],
474
+ param_count=round(self.param_count / 1e6, 2),
475
+ )
476
+ else:
477
+ total_step = 0.
478
+ num_samples = 0
479
+ for i in range(self.num_bench_iter):
480
+ delta_step = _step(False)
481
+ num_samples += self.batch_size
482
+ total_step += delta_step
483
+ num_steps = (i + 1)
484
+ if num_steps % self.log_freq == 0:
485
+ _logger.info(
486
+ f"Train [{num_steps}/{self.num_bench_iter}]."
487
+ f" {num_samples / total_step:0.2f} samples/sec."
488
+ f" {1000 * total_step / num_steps:0.3f} ms/step.")
489
+ t_run_elapsed = self.time_fn() - t_run_start
490
+ results = dict(
491
+ samples_per_sec=round(num_samples / t_run_elapsed, 2),
492
+ step_time=round(1000 * total_step / self.num_bench_iter, 3),
493
+ batch_size=self.batch_size,
494
+ img_size=self.input_size[-1],
495
+ param_count=round(self.param_count / 1e6, 2),
496
+ )
497
+
498
+ _logger.info(
499
+ f"Train benchmark of {self.model_name} done. "
500
+ f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/sample")
501
+
502
+ return results
503
+
504
+
505
+ class ProfileRunner(BenchmarkRunner):
506
+
507
+ def __init__(self, model_name, device='cuda', profiler='', **kwargs):
508
+ super().__init__(model_name=model_name, device=device, **kwargs)
509
+ if not profiler:
510
+ if has_deepspeed_profiling:
511
+ profiler = 'deepspeed'
512
+ elif has_fvcore_profiling:
513
+ profiler = 'fvcore'
514
+ assert profiler, "One of deepspeed or fvcore needs to be installed for profiling to work."
515
+ self.profiler = profiler
516
+ self.model.eval()
517
+
518
+ def run(self):
519
+ _logger.info(
520
+ f'Running profiler on {self.model_name} w/ '
521
+ f'input size {self.input_size} and batch size {self.batch_size}.')
522
+
523
+ macs = 0
524
+ activations = 0
525
+ if self.profiler == 'deepspeed':
526
+ macs, _ = profile_deepspeed(self.model, self.input_size, batch_size=self.batch_size, detailed=True)
527
+ elif self.profiler == 'fvcore':
528
+ macs, activations = profile_fvcore(self.model, self.input_size, batch_size=self.batch_size, detailed=True)
529
+
530
+ results = dict(
531
+ gmacs=round(macs / 1e9, 2),
532
+ macts=round(activations / 1e6, 2),
533
+ batch_size=self.batch_size,
534
+ img_size=self.input_size[-1],
535
+ param_count=round(self.param_count / 1e6, 2),
536
+ )
537
+
538
+ _logger.info(
539
+ f"Profile of {self.model_name} done. "
540
+ f"{results['gmacs']:.2f} GMACs, {results['param_count']:.2f} M params.")
541
+
542
+ return results
543
+
544
+
545
+ def _try_run(
546
+ model_name,
547
+ bench_fn,
548
+ bench_kwargs,
549
+ initial_batch_size,
550
+ no_batch_size_retry=False
551
+ ):
552
+ batch_size = initial_batch_size
553
+ results = dict()
554
+ error_str = 'Unknown'
555
+ while batch_size:
556
+ try:
557
+ torch.cuda.empty_cache()
558
+ bench = bench_fn(model_name=model_name, batch_size=batch_size, **bench_kwargs)
559
+ results = bench.run()
560
+ return results
561
+ except RuntimeError as e:
562
+ error_str = str(e)
563
+ _logger.error(f'"{error_str}" while running benchmark.')
564
+ if not check_batch_size_retry(error_str):
565
+ _logger.error(f'Unrecoverable error encountered while benchmarking {model_name}, skipping.')
566
+ break
567
+ if no_batch_size_retry:
568
+ break
569
+ batch_size = decay_batch_step(batch_size)
570
+ _logger.warning(f'Reducing batch size to {batch_size} for retry.')
571
+ results['error'] = error_str
572
+ return results
573
+
574
+
575
+ def benchmark(args):
576
+ if args.amp:
577
+ _logger.warning("Overriding precision to 'amp' since --amp flag set.")
578
+ args.precision = 'amp' if args.amp_dtype == 'float16' else '_'.join(['amp', args.amp_dtype])
579
+ _logger.info(f'Benchmarking in {args.precision} precision. '
580
+ f'{"NHWC" if args.channels_last else "NCHW"} layout. '
581
+ f'torchscript {"enabled" if args.torchscript else "disabled"}')
582
+
583
+ bench_kwargs = vars(args).copy()
584
+ bench_kwargs.pop('amp')
585
+ model = bench_kwargs.pop('model')
586
+ batch_size = bench_kwargs.pop('batch_size')
587
+
588
+ bench_fns = (InferenceBenchmarkRunner,)
589
+ prefixes = ('infer',)
590
+ if args.bench == 'both':
591
+ bench_fns = (
592
+ InferenceBenchmarkRunner,
593
+ TrainBenchmarkRunner
594
+ )
595
+ prefixes = ('infer', 'train')
596
+ elif args.bench == 'train':
597
+ bench_fns = TrainBenchmarkRunner,
598
+ prefixes = 'train',
599
+ elif args.bench.startswith('profile'):
600
+ # specific profiler used if included in bench mode string, otherwise default to deepspeed, fallback to fvcore
601
+ if 'deepspeed' in args.bench:
602
+ assert has_deepspeed_profiling, "deepspeed must be installed to use deepspeed flop counter"
603
+ bench_kwargs['profiler'] = 'deepspeed'
604
+ elif 'fvcore' in args.bench:
605
+ assert has_fvcore_profiling, "fvcore must be installed to use fvcore flop counter"
606
+ bench_kwargs['profiler'] = 'fvcore'
607
+ bench_fns = ProfileRunner,
608
+ batch_size = 1
609
+
610
+ model_results = OrderedDict(model=model)
611
+ for prefix, bench_fn in zip(prefixes, bench_fns):
612
+ run_results = _try_run(
613
+ model,
614
+ bench_fn,
615
+ bench_kwargs=bench_kwargs,
616
+ initial_batch_size=batch_size,
617
+ no_batch_size_retry=args.no_retry,
618
+ )
619
+ if prefix and 'error' not in run_results:
620
+ run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()}
621
+ model_results.update(run_results)
622
+ if 'error' in run_results:
623
+ break
624
+ if 'error' not in model_results:
625
+ param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0))
626
+ model_results.setdefault('param_count', param_count)
627
+ model_results.pop('train_param_count', 0)
628
+ return model_results
629
+
630
+
631
+ def main():
632
+ setup_default_logging()
633
+ args = parser.parse_args()
634
+ model_cfgs = []
635
+ model_names = []
636
+
637
+ if args.fast_norm:
638
+ set_fast_norm()
639
+
640
+ if args.model_list:
641
+ args.model = ''
642
+ with open(args.model_list) as f:
643
+ model_names = [line.rstrip() for line in f]
644
+ model_cfgs = [(n, None) for n in model_names]
645
+ elif args.model == 'all':
646
+ # validate all models in a list of names with pretrained checkpoints
647
+ args.pretrained = True
648
+ model_names = list_models(pretrained=True, exclude_filters=['*in21k'])
649
+ model_cfgs = [(n, None) for n in model_names]
650
+ elif not is_model(args.model):
651
+ # model name doesn't exist, try as wildcard filter
652
+ model_names = list_models(args.model)
653
+ model_cfgs = [(n, None) for n in model_names]
654
+
655
+ if len(model_cfgs):
656
+ _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
657
+ results = []
658
+ try:
659
+ for m, _ in model_cfgs:
660
+ if not m:
661
+ continue
662
+ args.model = m
663
+ r = benchmark(args)
664
+ if r:
665
+ results.append(r)
666
+ time.sleep(10)
667
+ except KeyboardInterrupt as e:
668
+ pass
669
+ sort_key = 'infer_samples_per_sec'
670
+ if 'train' in args.bench:
671
+ sort_key = 'train_samples_per_sec'
672
+ elif 'profile' in args.bench:
673
+ sort_key = 'infer_gmacs'
674
+ results = filter(lambda x: sort_key in x, results)
675
+ results = sorted(results, key=lambda x: x[sort_key], reverse=True)
676
+ else:
677
+ results = benchmark(args)
678
+
679
+ if args.results_file:
680
+ write_results(args.results_file, results, format=args.results_format)
681
+
682
+ # output results in JSON to stdout w/ delimiter for runner script
683
+ print(f'--result\n{json.dumps(results, indent=4)}')
684
+
685
+
686
+ def write_results(results_file, results, format='csv'):
687
+ with open(results_file, mode='w') as cf:
688
+ if format == 'json':
689
+ json.dump(results, cf, indent=4)
690
+ else:
691
+ if not isinstance(results, (list, tuple)):
692
+ results = [results]
693
+ if not results:
694
+ return
695
+ dw = csv.DictWriter(cf, fieldnames=results[0].keys())
696
+ dw.writeheader()
697
+ for r in results:
698
+ dw.writerow(r)
699
+ cf.flush()
700
+
701
+
702
+ if __name__ == '__main__':
703
+ main()
pytorch-image-models/bulk_runner.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """ Bulk Model Script Runner
3
+
4
+ Run validation or benchmark script in separate process for each model
5
+
6
+ Benchmark all 'vit*' models:
7
+ python bulk_runner.py --model-list 'vit*' --results-file vit_bench.csv benchmark.py --amp -b 512
8
+
9
+ Validate all models:
10
+ python bulk_runner.py --model-list all --results-file val.csv --pretrained validate.py /imagenet/validation/ --amp -b 512 --retry
11
+
12
+ Hacked together by Ross Wightman (https://github.com/rwightman)
13
+ """
14
+ import argparse
15
+ import os
16
+ import sys
17
+ import csv
18
+ import json
19
+ import subprocess
20
+ import time
21
+ from typing import Callable, List, Tuple, Union
22
+
23
+
24
+ from timm.models import is_model, list_models, get_pretrained_cfg
25
+
26
+
27
+ parser = argparse.ArgumentParser(description='Per-model process launcher')
28
+
29
+ # model and results args
30
+ parser.add_argument(
31
+ '--model-list', metavar='NAME', default='',
32
+ help='txt file based list of model names to benchmark')
33
+ parser.add_argument(
34
+ '--results-file', default='', type=str, metavar='FILENAME',
35
+ help='Output csv file for validation results (summary)')
36
+ parser.add_argument(
37
+ '--sort-key', default='', type=str, metavar='COL',
38
+ help='Specify sort key for results csv')
39
+ parser.add_argument(
40
+ "--pretrained", action='store_true',
41
+ help="only run models with pretrained weights")
42
+
43
+ parser.add_argument(
44
+ "--delay",
45
+ type=float,
46
+ default=0,
47
+ help="Interval, in seconds, to delay between model invocations.",
48
+ )
49
+ parser.add_argument(
50
+ "--start_method", type=str, default="spawn", choices=["spawn", "fork", "forkserver"],
51
+ help="Multiprocessing start method to use when creating workers.",
52
+ )
53
+ parser.add_argument(
54
+ "--no_python",
55
+ help="Skip prepending the script with 'python' - just execute it directly. Useful "
56
+ "when the script is not a Python script.",
57
+ )
58
+ parser.add_argument(
59
+ "-m",
60
+ "--module",
61
+ help="Change each process to interpret the launch script as a Python module, executing "
62
+ "with the same behavior as 'python -m'.",
63
+ )
64
+
65
+ # positional
66
+ parser.add_argument(
67
+ "script", type=str,
68
+ help="Full path to the program/script to be launched for each model config.",
69
+ )
70
+ parser.add_argument("script_args", nargs=argparse.REMAINDER)
71
+
72
+
73
+ def cmd_from_args(args) -> Tuple[Union[Callable, str], List[str]]:
74
+ # If ``args`` not passed, defaults to ``sys.argv[:1]``
75
+ with_python = not args.no_python
76
+ cmd: Union[Callable, str]
77
+ cmd_args = []
78
+ if with_python:
79
+ cmd = os.getenv("PYTHON_EXEC", sys.executable)
80
+ cmd_args.append("-u")
81
+ if args.module:
82
+ cmd_args.append("-m")
83
+ cmd_args.append(args.script)
84
+ else:
85
+ if args.module:
86
+ raise ValueError(
87
+ "Don't use both the '--no_python' flag"
88
+ " and the '--module' flag at the same time."
89
+ )
90
+ cmd = args.script
91
+ cmd_args.extend(args.script_args)
92
+
93
+ return cmd, cmd_args
94
+
95
+
96
+ def main():
97
+ args = parser.parse_args()
98
+ cmd, cmd_args = cmd_from_args(args)
99
+
100
+ model_cfgs = []
101
+ if args.model_list == 'all':
102
+ model_names = list_models(
103
+ pretrained=args.pretrained, # only include models w/ pretrained checkpoints if set
104
+ )
105
+ model_cfgs = [(n, None) for n in model_names]
106
+ elif args.model_list == 'all_in1k':
107
+ model_names = list_models(pretrained=True)
108
+ model_cfgs = []
109
+ for n in model_names:
110
+ pt_cfg = get_pretrained_cfg(n)
111
+ if getattr(pt_cfg, 'num_classes', 0) == 1000:
112
+ print(n, pt_cfg.num_classes)
113
+ model_cfgs.append((n, None))
114
+ elif args.model_list == 'all_res':
115
+ model_names = list_models()
116
+ model_names += list_models(pretrained=True)
117
+ model_cfgs = set()
118
+ for n in model_names:
119
+ pt_cfg = get_pretrained_cfg(n)
120
+ if pt_cfg is None:
121
+ print(f'Model {n} is missing pretrained cfg, skipping.')
122
+ continue
123
+ n = n.split('.')[0]
124
+ model_cfgs.add((n, pt_cfg.input_size[-1]))
125
+ if pt_cfg.test_input_size is not None:
126
+ model_cfgs.add((n, pt_cfg.test_input_size[-1]))
127
+ model_cfgs = [(n, {'img-size': r}) for n, r in sorted(model_cfgs)]
128
+ elif not is_model(args.model_list):
129
+ # model name doesn't exist, try as wildcard filter
130
+ model_names = list_models(args.model_list)
131
+ model_cfgs = [(n, None) for n in model_names]
132
+
133
+ if not model_cfgs and os.path.exists(args.model_list):
134
+ with open(args.model_list) as f:
135
+ model_names = [line.rstrip() for line in f]
136
+ model_cfgs = [(n, None) for n in model_names]
137
+
138
+ if len(model_cfgs):
139
+ results_file = args.results_file or './results.csv'
140
+ results = []
141
+ errors = []
142
+ model_strings = '\n'.join([f'{x[0]}, {x[1]}' for x in model_cfgs])
143
+ print(f"Running script on these models:\n {model_strings}")
144
+ if not args.sort_key:
145
+ if 'benchmark' in args.script:
146
+ if any(['train' in a for a in args.script_args]):
147
+ sort_key = 'train_samples_per_sec'
148
+ else:
149
+ sort_key = 'infer_samples_per_sec'
150
+ else:
151
+ sort_key = 'top1'
152
+ else:
153
+ sort_key = args.sort_key
154
+ print(f'Script: {args.script}, Args: {args.script_args}, Sort key: {sort_key}')
155
+
156
+ try:
157
+ for m, ax in model_cfgs:
158
+ if not m:
159
+ continue
160
+ args_str = (cmd, *[str(e) for e in cmd_args], '--model', m)
161
+ if ax is not None:
162
+ extra_args = [(f'--{k}', str(v)) for k, v in ax.items()]
163
+ extra_args = [i for t in extra_args for i in t]
164
+ args_str += tuple(extra_args)
165
+ try:
166
+ o = subprocess.check_output(args=args_str).decode('utf-8').split('--result')[-1]
167
+ r = json.loads(o)
168
+ results.append(r)
169
+ except Exception as e:
170
+ # FIXME batch_size retry loop is currently done in either validation.py or benchmark.py
171
+ # for further robustness (but more overhead), we may want to manage that by looping here...
172
+ errors.append(dict(model=m, error=str(e)))
173
+ if args.delay:
174
+ time.sleep(args.delay)
175
+ except KeyboardInterrupt as e:
176
+ pass
177
+
178
+ errors.extend(list(filter(lambda x: 'error' in x, results)))
179
+ if errors:
180
+ print(f'{len(errors)} models had errors during run.')
181
+ for e in errors:
182
+ if 'model' in e:
183
+ print(f"\t {e['model']} ({e.get('error', 'Unknown')})")
184
+ else:
185
+ print(e)
186
+
187
+ results = list(filter(lambda x: 'error' not in x, results))
188
+
189
+ no_sortkey = list(filter(lambda x: sort_key not in x, results))
190
+ if no_sortkey:
191
+ print(f'{len(no_sortkey)} results missing sort key, skipping sort.')
192
+ else:
193
+ results = sorted(results, key=lambda x: x[sort_key], reverse=True)
194
+
195
+ if len(results):
196
+ print(f'{len(results)} models run successfully. Saving results to {results_file}.')
197
+ write_results(results_file, results)
198
+
199
+
200
+ def write_results(results_file, results):
201
+ with open(results_file, mode='w') as cf:
202
+ dw = csv.DictWriter(cf, fieldnames=results[0].keys())
203
+ dw.writeheader()
204
+ for r in results:
205
+ dw.writerow(r)
206
+ cf.flush()
207
+
208
+
209
+ if __name__ == '__main__':
210
+ main()
pytorch-image-models/clean_checkpoint.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """ Checkpoint Cleaning Script
3
+
4
+ Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc.
5
+ and outputs a CPU tensor checkpoint with only the `state_dict` along with SHA256
6
+ calculation for model zoo compatibility.
7
+
8
+ Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
9
+ """
10
+ import torch
11
+ import argparse
12
+ import os
13
+ import hashlib
14
+ import shutil
15
+ import tempfile
16
+ from timm.models import load_state_dict
17
+ try:
18
+ import safetensors.torch
19
+ _has_safetensors = True
20
+ except ImportError:
21
+ _has_safetensors = False
22
+
23
+ parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')
24
+ parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
25
+ help='path to latest checkpoint (default: none)')
26
+ parser.add_argument('--output', default='', type=str, metavar='PATH',
27
+ help='output path')
28
+ parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
29
+ help='use ema version of weights if present')
30
+ parser.add_argument('--no-hash', dest='no_hash', action='store_true',
31
+ help='no hash in output filename')
32
+ parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',
33
+ help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')
34
+ parser.add_argument('--safetensors', action='store_true',
35
+ help='Save weights using safetensors instead of the default torch way (pickle).')
36
+
37
+
38
+ def main():
39
+ args = parser.parse_args()
40
+
41
+ if os.path.exists(args.output):
42
+ print("Error: Output filename ({}) already exists.".format(args.output))
43
+ exit(1)
44
+
45
+ clean_checkpoint(
46
+ args.checkpoint,
47
+ args.output,
48
+ not args.no_use_ema,
49
+ args.no_hash,
50
+ args.clean_aux_bn,
51
+ safe_serialization=args.safetensors,
52
+ )
53
+
54
+
55
+ def clean_checkpoint(
56
+ checkpoint,
57
+ output,
58
+ use_ema=True,
59
+ no_hash=False,
60
+ clean_aux_bn=False,
61
+ safe_serialization: bool=False,
62
+ ):
63
+ # Load an existing checkpoint to CPU, strip everything but the state_dict and re-save
64
+ if checkpoint and os.path.isfile(checkpoint):
65
+ print("=> Loading checkpoint '{}'".format(checkpoint))
66
+ state_dict = load_state_dict(checkpoint, use_ema=use_ema)
67
+ new_state_dict = {}
68
+ for k, v in state_dict.items():
69
+ if clean_aux_bn and 'aux_bn' in k:
70
+ # If all aux_bn keys are removed, the SplitBN layers will end up as normal and
71
+ # load with the unmodified model using BatchNorm2d.
72
+ continue
73
+ name = k[7:] if k.startswith('module.') else k
74
+ new_state_dict[name] = v
75
+ print("=> Loaded state_dict from '{}'".format(checkpoint))
76
+
77
+ ext = ''
78
+ if output:
79
+ checkpoint_root, checkpoint_base = os.path.split(output)
80
+ checkpoint_base, ext = os.path.splitext(checkpoint_base)
81
+ else:
82
+ checkpoint_root = ''
83
+ checkpoint_base = os.path.split(checkpoint)[1]
84
+ checkpoint_base = os.path.splitext(checkpoint_base)[0]
85
+
86
+ temp_filename = '__' + checkpoint_base
87
+ if safe_serialization:
88
+ assert _has_safetensors, "`pip install safetensors` to use .safetensors"
89
+ safetensors.torch.save_file(new_state_dict, temp_filename)
90
+ else:
91
+ torch.save(new_state_dict, temp_filename)
92
+
93
+ with open(temp_filename, 'rb') as f:
94
+ sha_hash = hashlib.sha256(f.read()).hexdigest()
95
+
96
+ if ext:
97
+ final_ext = ext
98
+ else:
99
+ final_ext = ('.safetensors' if safe_serialization else '.pth')
100
+
101
+ if no_hash:
102
+ final_filename = checkpoint_base + final_ext
103
+ else:
104
+ final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext
105
+
106
+ shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))
107
+ print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
108
+ return final_filename
109
+ else:
110
+ print("Error: Checkpoint ({}) doesn't exist".format(checkpoint))
111
+ return ''
112
+
113
+
114
+ if __name__ == '__main__':
115
+ main()
pytorch-image-models/convert/convert_from_mxnet.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import hashlib
3
+ import os
4
+
5
+ import mxnet as mx
6
+ import gluoncv
7
+ import torch
8
+ from timm import create_model
9
+
10
+ parser = argparse.ArgumentParser(description='Convert from MXNet')
11
+ parser.add_argument('--model', default='all', type=str, metavar='MODEL',
12
+ help='Name of model to train (default: "all"')
13
+
14
+
15
+ def convert(mxnet_name, torch_name):
16
+ # download and load the pre-trained model
17
+ net = gluoncv.model_zoo.get_model(mxnet_name, pretrained=True)
18
+
19
+ # create corresponding torch model
20
+ torch_net = create_model(torch_name)
21
+
22
+ mxp = [(k, v) for k, v in net.collect_params().items() if 'running' not in k]
23
+ torchp = list(torch_net.named_parameters())
24
+ torch_params = {}
25
+
26
+ # convert parameters
27
+ # NOTE: we are relying on the fact that the order of parameters
28
+ # are usually exactly the same between these models, thus no key name mapping
29
+ # is necessary. Asserts will trip if this is not the case.
30
+ for (tn, tv), (mn, mv) in zip(torchp, mxp):
31
+ m_split = mn.split('_')
32
+ t_split = tn.split('.')
33
+ print(t_split, m_split)
34
+ print(tv.shape, mv.shape)
35
+
36
+ # ensure ordering of BN params match since their sizes are not specific
37
+ if m_split[-1] == 'gamma':
38
+ assert t_split[-1] == 'weight'
39
+ if m_split[-1] == 'beta':
40
+ assert t_split[-1] == 'bias'
41
+
42
+ # ensure shapes match
43
+ assert all(t == m for t, m in zip(tv.shape, mv.shape))
44
+
45
+ torch_tensor = torch.from_numpy(mv.data().asnumpy())
46
+ torch_params[tn] = torch_tensor
47
+
48
+ # convert buffers (batch norm running stats)
49
+ mxb = [(k, v) for k, v in net.collect_params().items() if any(x in k for x in ['running_mean', 'running_var'])]
50
+ torchb = [(k, v) for k, v in torch_net.named_buffers() if 'num_batches' not in k]
51
+ for (tn, tv), (mn, mv) in zip(torchb, mxb):
52
+ print(tn, mn)
53
+ print(tv.shape, mv.shape)
54
+
55
+ # ensure ordering of BN params match since their sizes are not specific
56
+ if 'running_var' in tn:
57
+ assert 'running_var' in mn
58
+ if 'running_mean' in tn:
59
+ assert 'running_mean' in mn
60
+
61
+ torch_tensor = torch.from_numpy(mv.data().asnumpy())
62
+ torch_params[tn] = torch_tensor
63
+
64
+ torch_net.load_state_dict(torch_params)
65
+ torch_filename = './%s.pth' % torch_name
66
+ torch.save(torch_net.state_dict(), torch_filename)
67
+ with open(torch_filename, 'rb') as f:
68
+ sha_hash = hashlib.sha256(f.read()).hexdigest()
69
+ final_filename = os.path.splitext(torch_filename)[0] + '-' + sha_hash[:8] + '.pth'
70
+ os.rename(torch_filename, final_filename)
71
+ print("=> Saved converted model to '{}, SHA256: {}'".format(final_filename, sha_hash))
72
+
73
+
74
+ def map_mx_to_torch_model(mx_name):
75
+ torch_name = mx_name.lower()
76
+ if torch_name.startswith('se_'):
77
+ torch_name = torch_name.replace('se_', 'se')
78
+ elif torch_name.startswith('senet_'):
79
+ torch_name = torch_name.replace('senet_', 'senet')
80
+ elif torch_name.startswith('inceptionv3'):
81
+ torch_name = torch_name.replace('inceptionv3', 'inception_v3')
82
+ torch_name = 'gluon_' + torch_name
83
+ return torch_name
84
+
85
+
86
+ ALL = ['resnet18_v1b', 'resnet34_v1b', 'resnet50_v1b', 'resnet101_v1b', 'resnet152_v1b',
87
+ 'resnet50_v1c', 'resnet101_v1c', 'resnet152_v1c', 'resnet50_v1d', 'resnet101_v1d', 'resnet152_v1d',
88
+ #'resnet50_v1e', 'resnet101_v1e', 'resnet152_v1e',
89
+ 'resnet50_v1s', 'resnet101_v1s', 'resnet152_v1s', 'resnext50_32x4d', 'resnext101_32x4d', 'resnext101_64x4d',
90
+ 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnext101_64x4d', 'senet_154', 'inceptionv3']
91
+
92
+
93
+ def main():
94
+ args = parser.parse_args()
95
+
96
+ if not args.model or args.model == 'all':
97
+ for mx_model in ALL:
98
+ torch_model = map_mx_to_torch_model(mx_model)
99
+ convert(mx_model, torch_model)
100
+ else:
101
+ mx_model = args.model
102
+ torch_model = map_mx_to_torch_model(mx_model)
103
+ convert(mx_model, torch_model)
104
+
105
+
106
+ if __name__ == '__main__':
107
+ main()
pytorch-image-models/convert/convert_nest_flax.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Convert weights from https://github.com/google-research/nested-transformer
3
+ NOTE: You'll need https://github.com/google/CommonLoopUtils, not included in requirements.txt
4
+ """
5
+
6
+ import sys
7
+
8
+ import numpy as np
9
+ import torch
10
+
11
+ from clu import checkpoint
12
+
13
+
14
+ arch_depths = {
15
+ 'nest_base': [2, 2, 20],
16
+ 'nest_small': [2, 2, 20],
17
+ 'nest_tiny': [2, 2, 8],
18
+ }
19
+
20
+
21
+ def convert_nest(checkpoint_path, arch):
22
+ """
23
+ Expects path to checkpoint which is a dir containing 4 files like in each of these folders
24
+ - https://console.cloud.google.com/storage/browser/gresearch/nest-checkpoints
25
+ `arch` is needed to
26
+ Returns a state dict that can be used with `torch.nn.Module.load_state_dict`
27
+ Hint: Follow timm.models.nest.Nest.__init__ and
28
+ https://github.com/google-research/nested-transformer/blob/main/models/nest_net.py
29
+ """
30
+ assert arch in ['nest_base', 'nest_small', 'nest_tiny'], "Your `arch` is not supported"
31
+
32
+ flax_dict = checkpoint.load_state_dict(checkpoint_path)['optimizer']['target']
33
+ state_dict = {}
34
+
35
+ # Patch embedding
36
+ state_dict['patch_embed.proj.weight'] = torch.tensor(
37
+ flax_dict['PatchEmbedding_0']['Conv_0']['kernel']).permute(3, 2, 0, 1)
38
+ state_dict['patch_embed.proj.bias'] = torch.tensor(flax_dict['PatchEmbedding_0']['Conv_0']['bias'])
39
+
40
+ # Positional embeddings
41
+ posemb_keys = [k for k in flax_dict.keys() if k.startswith('PositionEmbedding')]
42
+ for i, k in enumerate(posemb_keys):
43
+ state_dict[f'levels.{i}.pos_embed'] = torch.tensor(flax_dict[k]['pos_embedding'])
44
+
45
+ # Transformer encoders
46
+ depths = arch_depths[arch]
47
+ for level in range(len(depths)):
48
+ for layer in range(depths[level]):
49
+ global_layer_ix = sum(depths[:level]) + layer
50
+ # Norms
51
+ for i in range(2):
52
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.norm{i+1}.weight'] = torch.tensor(
53
+ flax_dict[f'EncoderNDBlock_{global_layer_ix}'][f'LayerNorm_{i}']['scale'])
54
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.norm{i+1}.bias'] = torch.tensor(
55
+ flax_dict[f'EncoderNDBlock_{global_layer_ix}'][f'LayerNorm_{i}']['bias'])
56
+ # Attention qkv
57
+ w_q = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_0']['kernel']
58
+ w_kv = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_1']['kernel']
59
+ # Pay attention to dims here (maybe get pen and paper)
60
+ w_kv = np.concatenate(np.split(w_kv, 2, -1), 1)
61
+ w_qkv = np.concatenate([w_q, w_kv], 1)
62
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.qkv.weight'] = torch.tensor(w_qkv).flatten(1).permute(1,0)
63
+ b_q = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_0']['bias']
64
+ b_kv = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_1']['bias']
65
+ # Pay attention to dims here (maybe get pen and paper)
66
+ b_kv = np.concatenate(np.split(b_kv, 2, -1), 0)
67
+ b_qkv = np.concatenate([b_q, b_kv], 0)
68
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.qkv.bias'] = torch.tensor(b_qkv).reshape(-1)
69
+ # Attention proj
70
+ w_proj = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['proj_kernel']
71
+ w_proj = torch.tensor(w_proj).permute(2, 1, 0).flatten(1)
72
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.proj.weight'] = w_proj
73
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.proj.bias'] = torch.tensor(
74
+ flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['bias'])
75
+ # MLP
76
+ for i in range(2):
77
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.mlp.fc{i+1}.weight'] = torch.tensor(
78
+ flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MlpBlock_0'][f'Dense_{i}']['kernel']).permute(1, 0)
79
+ state_dict[f'levels.{level}.transformer_encoder.{layer}.mlp.fc{i+1}.bias'] = torch.tensor(
80
+ flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MlpBlock_0'][f'Dense_{i}']['bias'])
81
+
82
+ # Block aggregations (ConvPool)
83
+ for level in range(1, len(depths)):
84
+ # Convs
85
+ state_dict[f'levels.{level}.pool.conv.weight'] = torch.tensor(
86
+ flax_dict[f'ConvPool_{level-1}']['Conv_0']['kernel']).permute(3, 2, 0, 1)
87
+ state_dict[f'levels.{level}.pool.conv.bias'] = torch.tensor(
88
+ flax_dict[f'ConvPool_{level-1}']['Conv_0']['bias'])
89
+ # Norms
90
+ state_dict[f'levels.{level}.pool.norm.weight'] = torch.tensor(
91
+ flax_dict[f'ConvPool_{level-1}']['LayerNorm_0']['scale'])
92
+ state_dict[f'levels.{level}.pool.norm.bias'] = torch.tensor(
93
+ flax_dict[f'ConvPool_{level-1}']['LayerNorm_0']['bias'])
94
+
95
+ # Final norm
96
+ state_dict[f'norm.weight'] = torch.tensor(flax_dict['LayerNorm_0']['scale'])
97
+ state_dict[f'norm.bias'] = torch.tensor(flax_dict['LayerNorm_0']['bias'])
98
+
99
+ # Classifier
100
+ state_dict['head.weight'] = torch.tensor(flax_dict['Dense_0']['kernel']).permute(1, 0)
101
+ state_dict['head.bias'] = torch.tensor(flax_dict['Dense_0']['bias'])
102
+
103
+ return state_dict
104
+
105
+
106
+ if __name__ == '__main__':
107
+ variant = sys.argv[1] # base, small, or tiny
108
+ state_dict = convert_nest(f'./nest-{variant[0]}_imagenet', f'nest_{variant}')
109
+ torch.save(state_dict, f'./jx_nest_{variant}.pth')
pytorch-image-models/distributed_train.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ NUM_PROC=$1
3
+ shift
4
+ torchrun --nproc_per_node=$NUM_PROC train.py "$@"
5
+
pytorch-image-models/docs/archived_changes.md ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Archived Changes
2
+
3
+ ### Nov 22, 2021
4
+ * A number of updated weights anew new model defs
5
+ * `eca_halonext26ts` - 79.5 @ 256
6
+ * `resnet50_gn` (new) - 80.1 @ 224, 81.3 @ 288
7
+ * `resnet50` - 80.7 @ 224, 80.9 @ 288 (trained at 176, not replacing current a1 weights as default since these don't scale as well to higher res, [weights](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth))
8
+ * `resnext50_32x4d` - 81.1 @ 224, 82.0 @ 288
9
+ * `sebotnet33ts_256` (new) - 81.2 @ 224
10
+ * `lamhalobotnet50ts_256` - 81.5 @ 256
11
+ * `halonet50ts` - 81.7 @ 256
12
+ * `halo2botnet50ts_256` - 82.0 @ 256
13
+ * `resnet101` - 82.0 @ 224, 82.8 @ 288
14
+ * `resnetv2_101` (new) - 82.1 @ 224, 83.0 @ 288
15
+ * `resnet152` - 82.8 @ 224, 83.5 @ 288
16
+ * `regnetz_d8` (new) - 83.5 @ 256, 84.0 @ 320
17
+ * `regnetz_e8` (new) - 84.5 @ 256, 85.0 @ 320
18
+ * `vit_base_patch8_224` (85.8 top-1) & `in21k` variant weights added thanks [Martins Bruveris](https://github.com/martinsbruveris)
19
+ * Groundwork in for FX feature extraction thanks to [Alexander Soare](https://github.com/alexander-soare)
20
+ * models updated for tracing compatibility (almost full support with some distlled transformer exceptions)
21
+
22
+ ### Oct 19, 2021
23
+ * ResNet strikes back (https://arxiv.org/abs/2110.00476) weights added, plus any extra training components used. Model weights and some more details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-rsb-weights)
24
+ * BCE loss and Repeated Augmentation support for RSB paper
25
+ * 4 series of ResNet based attention model experiments being added (implemented across byobnet.py/byoanet.py). These include all sorts of attention, from channel attn like SE, ECA to 2D QKV self-attention layers such as Halo, Bottlneck, Lambda. Details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
26
+ * Working implementations of the following 2D self-attention modules (likely to be differences from paper or eventual official impl):
27
+ * Halo (https://arxiv.org/abs/2103.12731)
28
+ * Bottleneck Transformer (https://arxiv.org/abs/2101.11605)
29
+ * LambdaNetworks (https://arxiv.org/abs/2102.08602)
30
+ * A RegNetZ series of models with some attention experiments (being added to). These do not follow the paper (https://arxiv.org/abs/2103.06877) in any way other than block architecture, details of official models are not available. See more here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
31
+ * ConvMixer (https://openreview.net/forum?id=TVHS5Y4dNvM), CrossVit (https://arxiv.org/abs/2103.14899), and BeiT (https://arxiv.org/abs/2106.08254) architectures + weights added
32
+ * freeze/unfreeze helpers by [Alexander Soare](https://github.com/alexander-soare)
33
+
34
+ ### Aug 18, 2021
35
+ * Optimizer bonanza!
36
+ * Add LAMB and LARS optimizers, incl trust ratio clipping options. Tweaked to work properly in PyTorch XLA (tested on TPUs w/ `timm bits` [branch](https://github.com/rwightman/pytorch-image-models/tree/bits_and_tpu/timm/bits))
37
+ * Add MADGRAD from FB research w/ a few tweaks (decoupled decay option, step handling that works with PyTorch XLA)
38
+ * Some cleanup on all optimizers and factory. No more `.data`, a bit more consistency, unit tests for all!
39
+ * SGDP and AdamP still won't work with PyTorch XLA but others should (have yet to test Adabelief, Adafactor, Adahessian myself).
40
+ * EfficientNet-V2 XL TF ported weights added, but they don't validate well in PyTorch (L is better). The pre-processing for the V2 TF training is a bit diff and the fine-tuned 21k -> 1k weights are very sensitive and less robust than the 1k weights.
41
+ * Added PyTorch trained EfficientNet-V2 'Tiny' w/ GlobalContext attn weights. Only .1-.2 top-1 better than the SE so more of a curiosity for those interested.
42
+
43
+ ### July 12, 2021
44
+ * Add XCiT models from [official facebook impl](https://github.com/facebookresearch/xcit). Contributed by [Alexander Soare](https://github.com/alexander-soare)
45
+
46
+ ### July 5-9, 2021
47
+ * Add `efficientnetv2_rw_t` weights, a custom 'tiny' 13.6M param variant that is a bit better than (non NoisyStudent) B3 models. Both faster and better accuracy (at same or lower res)
48
+ * top-1 82.34 @ 288x288 and 82.54 @ 320x320
49
+ * Add [SAM pretrained](https://arxiv.org/abs/2106.01548) in1k weight for ViT B/16 (`vit_base_patch16_sam_224`) and B/32 (`vit_base_patch32_sam_224`) models.
50
+ * Add 'Aggregating Nested Transformer' (NesT) w/ weights converted from official [Flax impl](https://github.com/google-research/nested-transformer). Contributed by [Alexander Soare](https://github.com/alexander-soare).
51
+ * `jx_nest_base` - 83.534, `jx_nest_small` - 83.120, `jx_nest_tiny` - 81.426
52
+
53
+ ### June 23, 2021
54
+ * Reproduce gMLP model training, `gmlp_s16_224` trained to 79.6 top-1, matching [paper](https://arxiv.org/abs/2105.08050). Hparams for this and other recent MLP training [here](https://gist.github.com/rwightman/d6c264a9001f9167e06c209f630b2cc6)
55
+
56
+ ### June 20, 2021
57
+ * Release Vision Transformer 'AugReg' weights from [How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers](https://arxiv.org/abs/2106.10270)
58
+ * .npz weight loading support added, can load any of the 50K+ weights from the [AugReg series](https://console.cloud.google.com/storage/browser/vit_models/augreg)
59
+ * See [example notebook](https://colab.research.google.com/github/google-research/vision_transformer/blob/master/vit_jax_augreg.ipynb) from [official impl](https://github.com/google-research/vision_transformer/) for navigating the augreg weights
60
+ * Replaced all default weights w/ best AugReg variant (if possible). All AugReg 21k classifiers work.
61
+ * Highlights: `vit_large_patch16_384` (87.1 top-1), `vit_large_r50_s32_384` (86.2 top-1), `vit_base_patch16_384` (86.0 top-1)
62
+ * `vit_deit_*` renamed to just `deit_*`
63
+ * Remove my old small model, replace with DeiT compatible small w/ AugReg weights
64
+ * Add 1st training of my `gmixer_24_224` MLP /w GLU, 78.1 top-1 w/ 25M params.
65
+ * Add weights from official ResMLP release (https://github.com/facebookresearch/deit)
66
+ * Add `eca_nfnet_l2` weights from my 'lightweight' series. 84.7 top-1 at 384x384.
67
+ * Add distilled BiT 50x1 student and 152x2 Teacher weights from [Knowledge distillation: A good teacher is patient and consistent](https://arxiv.org/abs/2106.05237)
68
+ * NFNets and ResNetV2-BiT models work w/ Pytorch XLA now
69
+ * weight standardization uses F.batch_norm instead of std_mean (std_mean wasn't lowered)
70
+ * eps values adjusted, will be slight differences but should be quite close
71
+ * Improve test coverage and classifier interface of non-conv (vision transformer and mlp) models
72
+ * Cleanup a few classifier / flatten details for models w/ conv classifiers or early global pool
73
+ * Please report any regressions, this PR touched quite a few models.
74
+
75
+ ### June 8, 2021
76
+ * Add first ResMLP weights, trained in PyTorch XLA on TPU-VM w/ my XLA branch. 24 block variant, 79.2 top-1.
77
+ * Add ResNet51-Q model w/ pretrained weights at 82.36 top-1.
78
+ * NFNet inspired block layout with quad layer stem and no maxpool
79
+ * Same param count (35.7M) and throughput as ResNetRS-50 but +1.5 top-1 @ 224x224 and +2.5 top-1 at 288x288
80
+
81
+ ### May 25, 2021
82
+ * Add LeViT, Visformer, Convit (PR by Aman Arora), Twins (PR by paper authors) transformer models
83
+ * Cleanup input_size/img_size override handling and testing for all vision transformer models
84
+ * Add `efficientnetv2_rw_m` model and weights (started training before official code). 84.8 top-1, 53M params.
85
+
86
+ ### May 14, 2021
87
+ * Add EfficientNet-V2 official model defs w/ ported weights from official [Tensorflow/Keras](https://github.com/google/automl/tree/master/efficientnetv2) impl.
88
+ * 1k trained variants: `tf_efficientnetv2_s/m/l`
89
+ * 21k trained variants: `tf_efficientnetv2_s/m/l_in21k`
90
+ * 21k pretrained -> 1k fine-tuned: `tf_efficientnetv2_s/m/l_in21ft1k`
91
+ * v2 models w/ v1 scaling: `tf_efficientnetv2_b0` through `b3`
92
+ * Rename my prev V2 guess `efficientnet_v2s` -> `efficientnetv2_rw_s`
93
+ * Some blank `efficientnetv2_*` models in-place for future native PyTorch training
94
+
95
+ ### May 5, 2021
96
+ * Add MLP-Mixer models and port pretrained weights from [Google JAX impl](https://github.com/google-research/vision_transformer/tree/linen)
97
+ * Add CaiT models and pretrained weights from [FB](https://github.com/facebookresearch/deit)
98
+ * Add ResNet-RS models and weights from [TF](https://github.com/tensorflow/tpu/tree/master/models/official/resnet/resnet_rs). Thanks [Aman Arora](https://github.com/amaarora)
99
+ * Add CoaT models and weights. Thanks [Mohammed Rizin](https://github.com/morizin)
100
+ * Add new ImageNet-21k weights & finetuned weights for TResNet, MobileNet-V3, ViT models. Thanks [mrT](https://github.com/mrT23)
101
+ * Add GhostNet models and weights. Thanks [Kai Han](https://github.com/iamhankai)
102
+ * Update ByoaNet attention modles
103
+ * Improve SA module inits
104
+ * Hack together experimental stand-alone Swin based attn module and `swinnet`
105
+ * Consistent '26t' model defs for experiments.
106
+ * Add improved Efficientnet-V2S (prelim model def) weights. 83.8 top-1.
107
+ * WandB logging support
108
+
109
+ ### April 13, 2021
110
+ * Add Swin Transformer models and weights from https://github.com/microsoft/Swin-Transformer
111
+
112
+ ### April 12, 2021
113
+ * Add ECA-NFNet-L1 (slimmed down F1 w/ SiLU, 41M params) trained with this code. 84% top-1 @ 320x320. Trained at 256x256.
114
+ * Add EfficientNet-V2S model (unverified model definition) weights. 83.3 top-1 @ 288x288. Only trained single res 224. Working on progressive training.
115
+ * Add ByoaNet model definition (Bring-your-own-attention) w/ SelfAttention block and corresponding SA/SA-like modules and model defs
116
+ * Lambda Networks - https://arxiv.org/abs/2102.08602
117
+ * Bottleneck Transformers - https://arxiv.org/abs/2101.11605
118
+ * Halo Nets - https://arxiv.org/abs/2103.12731
119
+ * Adabelief optimizer contributed by Juntang Zhuang
120
+
121
+ ### April 1, 2021
122
+ * Add snazzy `benchmark.py` script for bulk `timm` model benchmarking of train and/or inference
123
+ * Add Pooling-based Vision Transformer (PiT) models (from https://github.com/naver-ai/pit)
124
+ * Merged distilled variant into main for torchscript compatibility
125
+ * Some `timm` cleanup/style tweaks and weights have hub download support
126
+ * Cleanup Vision Transformer (ViT) models
127
+ * Merge distilled (DeiT) model into main so that torchscript can work
128
+ * Support updated weight init (defaults to old still) that closer matches original JAX impl (possibly better training from scratch)
129
+ * Separate hybrid model defs into different file and add several new model defs to fiddle with, support patch_size != 1 for hybrids
130
+ * Fix fine-tuning num_class changes (PiT and ViT) and pos_embed resizing (Vit) with distilled variants
131
+ * nn.Sequential for block stack (does not break downstream compat)
132
+ * TnT (Transformer-in-Transformer) models contributed by author (from https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/TNT)
133
+ * Add RegNetY-160 weights from DeiT teacher model
134
+ * Add new NFNet-L0 w/ SE attn (rename `nfnet_l0b`->`nfnet_l0`) weights 82.75 top-1 @ 288x288
135
+ * Some fixes/improvements for TFDS dataset wrapper
136
+
137
+ ### March 7, 2021
138
+ * First 0.4.x PyPi release w/ NFNets (& related), ByoB (GPU-Efficient, RepVGG, etc).
139
+ * Change feature extraction for pre-activation nets (NFNets, ResNetV2) to return features before activation.
140
+
141
+ ### Feb 18, 2021
142
+ * Add pretrained weights and model variants for NFNet-F* models from [DeepMind Haiku impl](https://github.com/deepmind/deepmind-research/tree/master/nfnets).
143
+ * Models are prefixed with `dm_`. They require SAME padding conv, skipinit enabled, and activation gains applied in act fn.
144
+ * These models are big, expect to run out of GPU memory. With the GELU activiation + other options, they are roughly 1/2 the inference speed of my SiLU PyTorch optimized `s` variants.
145
+ * Original model results are based on pre-processing that is not the same as all other models so you'll see different results in the results csv (once updated).
146
+ * Matching the original pre-processing as closely as possible I get these results:
147
+ * `dm_nfnet_f6` - 86.352
148
+ * `dm_nfnet_f5` - 86.100
149
+ * `dm_nfnet_f4` - 85.834
150
+ * `dm_nfnet_f3` - 85.676
151
+ * `dm_nfnet_f2` - 85.178
152
+ * `dm_nfnet_f1` - 84.696
153
+ * `dm_nfnet_f0` - 83.464
154
+
155
+ ### Feb 16, 2021
156
+ * Add Adaptive Gradient Clipping (AGC) as per https://arxiv.org/abs/2102.06171. Integrated w/ PyTorch gradient clipping via mode arg that defaults to prev 'norm' mode. For backward arg compat, clip-grad arg must be specified to enable when using train.py.
157
+ * AGC w/ default clipping factor `--clip-grad .01 --clip-mode agc`
158
+ * PyTorch global norm of 1.0 (old behaviour, always norm), `--clip-grad 1.0`
159
+ * PyTorch value clipping of 10, `--clip-grad 10. --clip-mode value`
160
+ * AGC performance is definitely sensitive to the clipping factor. More experimentation needed to determine good values for smaller batch sizes and optimizers besides those in paper. So far I've found .001-.005 is necessary for stable RMSProp training w/ NFNet/NF-ResNet.
161
+
162
+ ### Feb 12, 2021
163
+ * Update Normalization-Free nets to include new NFNet-F (https://arxiv.org/abs/2102.06171) model defs
164
+
165
+ ### Feb 10, 2021
166
+ * More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks')
167
+ * GPU-Efficient-Networks (https://github.com/idstcv/GPU-Efficient-Networks), impl in `byobnet.py`
168
+ * RepVGG (https://github.com/DingXiaoH/RepVGG), impl in `byobnet.py`
169
+ * classic VGG (from torchvision, impl in `vgg`)
170
+ * Refinements to normalizer layer arg handling and normalizer+act layer handling in some models
171
+ * Default AMP mode changed to native PyTorch AMP instead of APEX. Issues not being fixed with APEX. Native works with `--channels-last` and `--torchscript` model training, APEX does not.
172
+ * Fix a few bugs introduced since last pypi release
173
+
174
+ ### Feb 8, 2021
175
+ * Add several ResNet weights with ECA attention. 26t & 50t trained @ 256, test @ 320. 269d train @ 256, fine-tune @320, test @ 352.
176
+ * `ecaresnet26t` - 79.88 top-1 @ 320x320, 79.08 @ 256x256
177
+ * `ecaresnet50t` - 82.35 top-1 @ 320x320, 81.52 @ 256x256
178
+ * `ecaresnet269d` - 84.93 top-1 @ 352x352, 84.87 @ 320x320
179
+ * Remove separate tiered (`t`) vs tiered_narrow (`tn`) ResNet model defs, all `tn` changed to `t` and `t` models removed (`seresnext26t_32x4d` only model w/ weights that was removed).
180
+ * Support model default_cfgs with separate train vs test resolution `test_input_size` and remove extra `_320` suffix ResNet model defs that were just for test.
181
+
182
+ ### Jan 30, 2021
183
+ * Add initial "Normalization Free" NF-RegNet-B* and NF-ResNet model definitions based on [paper](https://arxiv.org/abs/2101.08692)
184
+
185
+ ### Jan 25, 2021
186
+ * Add ResNetV2 Big Transfer (BiT) models w/ ImageNet-1k and 21k weights from https://github.com/google-research/big_transfer
187
+ * Add official R50+ViT-B/16 hybrid models + weights from https://github.com/google-research/vision_transformer
188
+ * ImageNet-21k ViT weights are added w/ model defs and representation layer (pre logits) support
189
+ * NOTE: ImageNet-21k classifier heads were zero'd in original weights, they are only useful for transfer learning
190
+ * Add model defs and weights for DeiT Vision Transformer models from https://github.com/facebookresearch/deit
191
+ * Refactor dataset classes into ImageDataset/IterableImageDataset + dataset specific parser classes
192
+ * Add Tensorflow-Datasets (TFDS) wrapper to allow use of TFDS image classification sets with train script
193
+ * Ex: `train.py /data/tfds --dataset tfds/oxford_iiit_pet --val-split test --model resnet50 -b 256 --amp --num-classes 37 --opt adamw --lr 3e-4 --weight-decay .001 --pretrained -j 2`
194
+ * Add improved .tar dataset parser that reads images from .tar, folder of .tar files, or .tar within .tar
195
+ * Run validation on full ImageNet-21k directly from tar w/ BiT model: `validate.py /data/fall11_whole.tar --model resnetv2_50x1_bitm_in21k --amp`
196
+ * Models in this update should be stable w/ possible exception of ViT/BiT, possibility of some regressions with train/val scripts and dataset handling
197
+
198
+ ### Jan 3, 2021
199
+ * Add SE-ResNet-152D weights
200
+ * 256x256 val, 0.94 crop top-1 - 83.75
201
+ * 320x320 val, 1.0 crop - 84.36
202
+ * Update results files
203
+
204
+ ### Dec 18, 2020
205
+ * Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256
206
+ * 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25)
207
+ * 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76)
208
+ * 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01)
209
+
210
+ ### Dec 7, 2020
211
+ * Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models
212
+ * Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript
213
+ * PyPi release @ 0.3.2 (needed by EfficientDet)
214
+
215
+
216
+ ### Oct 30, 2020
217
+ * Test with PyTorch 1.7 and fix a small top-n metric view vs reshape issue.
218
+ * Convert newly added 224x224 Vision Transformer weights from official JAX repo. 81.8 top-1 for B/16, 83.1 L/16.
219
+ * Support PyTorch 1.7 optimized, native SiLU (aka Swish) activation. Add mapping to 'silu' name, custom swish will eventually be deprecated.
220
+ * Fix regression for loading pretrained classifier via direct model entrypoint functions. Didn't impact create_model() factory usage.
221
+ * PyPi release @ 0.3.0 version!
222
+
223
+ ### Oct 26, 2020
224
+ * Update Vision Transformer models to be compatible with official code release at https://github.com/google-research/vision_transformer
225
+ * Add Vision Transformer weights (ImageNet-21k pretrain) for 384x384 base and large models converted from official jax impl
226
+ * ViT-B/16 - 84.2
227
+ * ViT-B/32 - 81.7
228
+ * ViT-L/16 - 85.2
229
+ * ViT-L/32 - 81.5
230
+
231
+ ### Oct 21, 2020
232
+ * Weights added for Vision Transformer (ViT) models. 77.86 top-1 for 'small' and 79.35 for 'base'. Thanks to [Christof](https://www.kaggle.com/christofhenkel) for training the base model w/ lots of GPUs.
233
+
234
+ ### Oct 13, 2020
235
+ * Initial impl of Vision Transformer models. Both patch and hybrid (CNN backbone) variants. Currently trying to train...
236
+ * Adafactor and AdaHessian (FP32 only, no AMP) optimizers
237
+ * EdgeTPU-M (`efficientnet_em`) model trained in PyTorch, 79.3 top-1
238
+ * Pip release, doc updates pending a few more changes...
239
+
240
+ ### Sept 18, 2020
241
+ * New ResNet 'D' weights. 72.7 (top-1) ResNet-18-D, 77.1 ResNet-34-D, 80.5 ResNet-50-D
242
+ * Added a few untrained defs for other ResNet models (66D, 101D, 152D, 200/200D)
243
+
244
+ ### Sept 3, 2020
245
+ * New weights
246
+ * Wide-ResNet50 - 81.5 top-1 (vs 78.5 torchvision)
247
+ * SEResNeXt50-32x4d - 81.3 top-1 (vs 79.1 cadene)
248
+ * Support for native Torch AMP and channels_last memory format added to train/validate scripts (`--channels-last`, `--native-amp` vs `--apex-amp`)
249
+ * Models tested with channels_last on latest NGC 20.08 container. AdaptiveAvgPool in attn layers changed to mean((2,3)) to work around bug with NHWC kernel.
250
+
251
+ ### Aug 12, 2020
252
+ * New/updated weights from training experiments
253
+ * EfficientNet-B3 - 82.1 top-1 (vs 81.6 for official with AA and 81.9 for AdvProp)
254
+ * RegNetY-3.2GF - 82.0 top-1 (78.9 from official ver)
255
+ * CSPResNet50 - 79.6 top-1 (76.6 from official ver)
256
+ * Add CutMix integrated w/ Mixup. See [pull request](https://github.com/rwightman/pytorch-image-models/pull/218) for some usage examples
257
+ * Some fixes for using pretrained weights with `in_chans` != 3 on several models.
258
+
259
+ ### Aug 5, 2020
260
+ Universal feature extraction, new models, new weights, new test sets.
261
+ * All models support the `features_only=True` argument for `create_model` call to return a network that extracts feature maps from the deepest layer at each stride.
262
+ * New models
263
+ * CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet
264
+ * ReXNet
265
+ * (Modified Aligned) Xception41/65/71 (a proper port of TF models)
266
+ * New trained weights
267
+ * SEResNet50 - 80.3 top-1
268
+ * CSPDarkNet53 - 80.1 top-1
269
+ * CSPResNeXt50 - 80.0 top-1
270
+ * DPN68b - 79.2 top-1
271
+ * EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by [@hal-314](https://github.com/hal-314))
272
+ * Add 'real' labels for ImageNet and ImageNet-Renditions test set, see [`results/README.md`](results/README.md)
273
+ * Test set ranking/top-n diff script by [@KushajveerSingh](https://github.com/KushajveerSingh)
274
+ * Train script and loader/transform tweaks to punch through more aug arguments
275
+ * README and documentation overhaul. See initial (WIP) documentation at https://rwightman.github.io/pytorch-image-models/
276
+ * adamp and sgdp optimizers added by [@hellbell](https://github.com/hellbell)
277
+
278
+ ### June 11, 2020
279
+ Bunch of changes:
280
+ * DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions
281
+ * VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1
282
+ * Activation factory added along with new activations:
283
+ * select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export)
284
+ * hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish
285
+ * context mgr for setting exportable/scriptable/no_jit states
286
+ * Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface
287
+ * Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this
288
+ * Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call
289
+ * Prep for 0.1.28 pip release
290
+
291
+ ### May 12, 2020
292
+ * Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955))
293
+
294
+ ### May 3, 2020
295
+ * Pruned EfficientNet B1, B2, and B3 (https://arxiv.org/abs/2002.08258) contributed by [Yonathan Aflalo](https://github.com/yoniaflalo)
296
+
297
+ ### May 1, 2020
298
+ * Merged a number of execellent contributions in the ResNet model family over the past month
299
+ * BlurPool2D and resnetblur models initiated by [Chris Ha](https://github.com/VRandme), I trained resnetblur50 to 79.3.
300
+ * TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by [mrT23](https://github.com/mrT23)
301
+ * ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per (https://arxiv.org/abs/2002.08258) by [Yonathan Aflalo](https://github.com/yoniaflalo)
302
+ * 200 pretrained models in total now with updated results csv in results folder
303
+
304
+ ### April 5, 2020
305
+ * Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite
306
+ * 3.5M param MobileNet-V2 100 @ 73%
307
+ * 4.5M param MobileNet-V2 110d @ 75%
308
+ * 6.1M param MobileNet-V2 140 @ 76.5%
309
+ * 5.8M param MobileNet-V2 120d @ 77.3%
310
+
311
+ ### March 18, 2020
312
+ * Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
313
+ * Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
314
+
315
+ ### April 5, 2020
316
+ * Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite
317
+ * 3.5M param MobileNet-V2 100 @ 73%
318
+ * 4.5M param MobileNet-V2 110d @ 75%
319
+ * 6.1M param MobileNet-V2 140 @ 76.5%
320
+ * 5.8M param MobileNet-V2 120d @ 77.3%
321
+
322
+ ### March 18, 2020
323
+ * Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
324
+ * Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
325
+
326
+ ### Feb 29, 2020
327
+ * New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1
328
+ * IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models
329
+ * overall results similar to a bit better training from scratch on a few smaller models tried
330
+ * performance early in training seems consistently improved but less difference by end
331
+ * set `fix_group_fanout=False` in `_init_weight_goog` fn if you need to reproducte past behaviour
332
+ * Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training
333
+
334
+ ### Feb 18, 2020
335
+ * Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' (https://arxiv.org/abs/2001.06268):
336
+ * Move layer/module impl into `layers` subfolder/module of `models` and organize in a more granular fashion
337
+ * ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks
338
+ * Add Selective Kernel Nets on top of ResNet base, pretrained weights
339
+ * skresnet18 - 73% top-1
340
+ * skresnet34 - 76.9% top-1
341
+ * skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1
342
+ * ECA and CECA (circular padding) attention layer contributed by [Chris Ha](https://github.com/VRandme)
343
+ * CBAM attention experiment (not the best results so far, may remove)
344
+ * Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the `.se` position for all ResNets
345
+ * Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants
346
+ * Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights
347
+
348
+ ### Feb 12, 2020
349
+ * Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet)
350
+
351
+ ### Feb 6, 2020
352
+ * Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
353
+
354
+ ### Feb 1/2, 2020
355
+ * Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization.
356
+ * Update results csv files on all models for ImageNet validation and three other test sets
357
+ * Push PyPi package update
358
+
359
+ ### Jan 31, 2020
360
+ * Update ResNet50 weights with a new 79.038 result from further JSD / AugMix experiments. Full command line for reproduction in training section below.
361
+
362
+ ### Jan 11/12, 2020
363
+ * Master may be a bit unstable wrt to training, these changes have been tested but not all combos
364
+ * Implementations of AugMix added to existing RA and AA. Including numerous supporting pieces like JSD loss (Jensen-Shannon divergence + CE), and AugMixDataset
365
+ * SplitBatchNorm adaptation layer added for implementing Auxiliary BN as per AdvProp paper
366
+ * ResNet-50 AugMix trained model w/ 79% top-1 added
367
+ * `seresnext26tn_32x4d` - 77.99 top-1, 93.75 top-5 added to tiered experiment, higher img/s than 't' and 'd'
368
+
369
+ ### Jan 3, 2020
370
+ * Add RandAugment trained EfficientNet-B0 weight with 77.7 top-1. Trained by [Michael Klachko](https://github.com/michaelklachko) with this code and recent hparams (see Training section)
371
+ * Add `avg_checkpoints.py` script for post training weight averaging and update all scripts with header docstrings and shebangs.
372
+
373
+ ### Dec 30, 2019
374
+ * Merge [Dushyant Mehta's](https://github.com/mehtadushy) PR for SelecSLS (Selective Short and Long Range Skip Connections) networks. Good GPU memory consumption and throughput. Original: https://github.com/mehtadushy/SelecSLS-Pytorch
375
+
376
+ ### Dec 28, 2019
377
+ * Add new model weights and training hparams (see Training Hparams section)
378
+ * `efficientnet_b3` - 81.5 top-1, 95.7 top-5 at default res/crop, 81.9, 95.8 at 320x320 1.0 crop-pct
379
+ * trained with RandAugment, ended up with an interesting but less than perfect result (see training section)
380
+ * `seresnext26d_32x4d`- 77.6 top-1, 93.6 top-5
381
+ * deep stem (32, 32, 64), avgpool downsample
382
+ * stem/dowsample from bag-of-tricks paper
383
+ * `seresnext26t_32x4d`- 78.0 top-1, 93.7 top-5
384
+ * deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant)
385
+ * stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments
386
+
387
+ ### Dec 23, 2019
388
+ * Add RandAugment trained MixNet-XL weights with 80.48 top-1.
389
+ * `--dist-bn` argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval
390
+
391
+ ### Dec 4, 2019
392
+ * Added weights from the first training from scratch of an EfficientNet (B2) with my new RandAugment implementation. Much better than my previous B2 and very close to the official AdvProp ones (80.4 top-1, 95.08 top-5).
393
+
394
+ ### Nov 29, 2019
395
+ * Brought EfficientNet and MobileNetV3 up to date with my https://github.com/rwightman/gen-efficientnet-pytorch code. Torchscript and ONNX export compat excluded.
396
+ * AdvProp weights added
397
+ * Official TF MobileNetv3 weights added
398
+ * EfficientNet and MobileNetV3 hook based 'feature extraction' classes added. Will serve as basis for using models as backbones in obj detection/segmentation tasks. Lots more to be done here...
399
+ * HRNet classification models and weights added from https://github.com/HRNet/HRNet-Image-Classification
400
+ * Consistency in global pooling, `reset_classifer`, and `forward_features` across models
401
+ * `forward_features` always returns unpooled feature maps now
402
+ * Reasonable chance I broke something... let me know
403
+
404
+ ### Nov 22, 2019
405
+ * Add ImageNet training RandAugment implementation alongside AutoAugment. PyTorch Transform compatible format, using PIL. Currently training two EfficientNet models from scratch with promising results... will update.
406
+ * `drop-connect` cmd line arg finally added to `train.py`, no need to hack model fns. Works for efficientnet/mobilenetv3 based models, ignored otherwise.
pytorch-image-models/docs/changes.md ADDED
@@ -0,0 +1,710 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Recent Changes
2
+
3
+ ### Feb 7, 2023
4
+ * New inference benchmark numbers added in [results](results/) folder.
5
+ * Add convnext LAION CLIP trained weights and initial set of in1k fine-tunes
6
+ * `convnext_base.clip_laion2b_augreg_ft_in1k` - 86.2% @ 256x256
7
+ * `convnext_base.clip_laiona_augreg_ft_in1k_384` - 86.5% @ 384x384
8
+ * `convnext_large_mlp.clip_laion2b_augreg_ft_in1k` - 87.3% @ 256x256
9
+ * `convnext_large_mlp.clip_laion2b_augreg_ft_in1k_384` - 87.9% @ 384x384
10
+ * Add DaViT models. Supports `features_only=True`. Adapted from https://github.com/dingmyu/davit by [Fredo](https://github.com/fffffgggg54).
11
+ * Use a common NormMlpClassifierHead across MaxViT, ConvNeXt, DaViT
12
+ * Add EfficientFormer-V2 model, update EfficientFormer, and refactor LeViT (closely related architectures). Weights on HF hub.
13
+ * New EfficientFormer-V2 arch, significant refactor from original at (https://github.com/snap-research/EfficientFormer). Supports `features_only=True`.
14
+ * Minor updates to EfficientFormer.
15
+ * Refactor LeViT models to stages, add `features_only=True` support to new `conv` variants, weight remap required.
16
+ * Move ImageNet meta-data (synsets, indices) from `/results` to [`timm/data/_info`](timm/data/_info/).
17
+ * Add ImageNetInfo / DatasetInfo classes to provide labelling for various ImageNet classifier layouts in `timm`
18
+ * Update `inference.py` to use, try: `python inference.py /folder/to/images --model convnext_small.in12k --label-type detail --topk 5`
19
+ * Ready for 0.8.10 pypi pre-release (final testing).
20
+
21
+ ### Jan 20, 2023
22
+ * Add two convnext 12k -> 1k fine-tunes at 384x384
23
+ * `convnext_tiny.in12k_ft_in1k_384` - 85.1 @ 384
24
+ * `convnext_small.in12k_ft_in1k_384` - 86.2 @ 384
25
+
26
+ * Push all MaxxViT weights to HF hub, and add new ImageNet-12k -> 1k fine-tunes for `rw` base MaxViT and CoAtNet 1/2 models
27
+
28
+ |model |top1 |top5 |samples / sec |Params (M) |GMAC |Act (M)|
29
+ |------------------------------------------------------------------------------------------------------------------------|----:|----:|--------------:|--------------:|-----:|------:|
30
+ |[maxvit_xlarge_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |88.53|98.64| 21.76| 475.77|534.14|1413.22|
31
+ |[maxvit_xlarge_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |88.32|98.54| 42.53| 475.32|292.78| 668.76|
32
+ |[maxvit_base_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |88.20|98.53| 50.87| 119.88|138.02| 703.99|
33
+ |[maxvit_large_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |88.04|98.40| 36.42| 212.33|244.75| 942.15|
34
+ |[maxvit_large_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |87.98|98.56| 71.75| 212.03|132.55| 445.84|
35
+ |[maxvit_base_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |87.92|98.54| 104.71| 119.65| 73.80| 332.90|
36
+ |[maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k) |87.81|98.37| 106.55| 116.14| 70.97| 318.95|
37
+ |[maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k) |87.47|98.37| 149.49| 116.09| 72.98| 213.74|
38
+ |[coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k) |87.39|98.31| 160.80| 73.88| 47.69| 209.43|
39
+ |[maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k) |86.89|98.02| 375.86| 116.14| 23.15| 92.64|
40
+ |[maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k) |86.64|98.02| 501.03| 116.09| 24.20| 62.77|
41
+ |[maxvit_base_tf_512.in1k](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |86.60|97.92| 50.75| 119.88|138.02| 703.99|
42
+ |[coatnet_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_2_rw_224.sw_in12k_ft_in1k) |86.57|97.89| 631.88| 73.87| 15.09| 49.22|
43
+ |[maxvit_large_tf_512.in1k](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |86.52|97.88| 36.04| 212.33|244.75| 942.15|
44
+ |[coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k) |86.49|97.90| 620.58| 73.88| 15.18| 54.78|
45
+ |[maxvit_base_tf_384.in1k](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |86.29|97.80| 101.09| 119.65| 73.80| 332.90|
46
+ |[maxvit_large_tf_384.in1k](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |86.23|97.69| 70.56| 212.03|132.55| 445.84|
47
+ |[maxvit_small_tf_512.in1k](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |86.10|97.76| 88.63| 69.13| 67.26| 383.77|
48
+ |[maxvit_tiny_tf_512.in1k](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |85.67|97.58| 144.25| 31.05| 33.49| 257.59|
49
+ |[maxvit_small_tf_384.in1k](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |85.54|97.46| 188.35| 69.02| 35.87| 183.65|
50
+ |[maxvit_tiny_tf_384.in1k](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |85.11|97.38| 293.46| 30.98| 17.53| 123.42|
51
+ |[maxvit_large_tf_224.in1k](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |84.93|96.97| 247.71| 211.79| 43.68| 127.35|
52
+ |[coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k) |84.90|96.96| 1025.45| 41.72| 8.11| 40.13|
53
+ |[maxvit_base_tf_224.in1k](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |84.85|96.99| 358.25| 119.47| 24.04| 95.01|
54
+ |[maxxvit_rmlp_small_rw_256.sw_in1k](https://huggingface.co/timm/maxxvit_rmlp_small_rw_256.sw_in1k) |84.63|97.06| 575.53| 66.01| 14.67| 58.38|
55
+ |[coatnet_rmlp_2_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in1k) |84.61|96.74| 625.81| 73.88| 15.18| 54.78|
56
+ |[maxvit_rmlp_small_rw_224.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_small_rw_224.sw_in1k) |84.49|96.76| 693.82| 64.90| 10.75| 49.30|
57
+ |[maxvit_small_tf_224.in1k](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |84.43|96.83| 647.96| 68.93| 11.66| 53.17|
58
+ |[maxvit_rmlp_tiny_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_tiny_rw_256.sw_in1k) |84.23|96.78| 807.21| 29.15| 6.77| 46.92|
59
+ |[coatnet_1_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_1_rw_224.sw_in1k) |83.62|96.38| 989.59| 41.72| 8.04| 34.60|
60
+ |[maxvit_tiny_rw_224.sw_in1k](https://huggingface.co/timm/maxvit_tiny_rw_224.sw_in1k) |83.50|96.50| 1100.53| 29.06| 5.11| 33.11|
61
+ |[maxvit_tiny_tf_224.in1k](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |83.41|96.59| 1004.94| 30.92| 5.60| 35.78|
62
+ |[coatnet_rmlp_1_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_1_rw_224.sw_in1k) |83.36|96.45| 1093.03| 41.69| 7.85| 35.47|
63
+ |[maxxvitv2_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxxvitv2_nano_rw_256.sw_in1k) |83.11|96.33| 1276.88| 23.70| 6.26| 23.05|
64
+ |[maxxvit_rmlp_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxxvit_rmlp_nano_rw_256.sw_in1k) |83.03|96.34| 1341.24| 16.78| 4.37| 26.05|
65
+ |[maxvit_rmlp_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_nano_rw_256.sw_in1k) |82.96|96.26| 1283.24| 15.50| 4.47| 31.92|
66
+ |[maxvit_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_nano_rw_256.sw_in1k) |82.93|96.23| 1218.17| 15.45| 4.46| 30.28|
67
+ |[coatnet_bn_0_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_bn_0_rw_224.sw_in1k) |82.39|96.19| 1600.14| 27.44| 4.67| 22.04|
68
+ |[coatnet_0_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_0_rw_224.sw_in1k) |82.39|95.84| 1831.21| 27.44| 4.43| 18.73|
69
+ |[coatnet_rmlp_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_nano_rw_224.sw_in1k) |82.05|95.87| 2109.09| 15.15| 2.62| 20.34|
70
+ |[coatnext_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnext_nano_rw_224.sw_in1k) |81.95|95.92| 2525.52| 14.70| 2.47| 12.80|
71
+ |[coatnet_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_nano_rw_224.sw_in1k) |81.70|95.64| 2344.52| 15.14| 2.41| 15.41|
72
+ |[maxvit_rmlp_pico_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_pico_rw_256.sw_in1k) |80.53|95.21| 1594.71| 7.52| 1.85| 24.86|
73
+
74
+ ### Jan 11, 2023
75
+ * Update ConvNeXt ImageNet-12k pretrain series w/ two new fine-tuned weights (and pre FT `.in12k` tags)
76
+ * `convnext_nano.in12k_ft_in1k` - 82.3 @ 224, 82.9 @ 288 (previously released)
77
+ * `convnext_tiny.in12k_ft_in1k` - 84.2 @ 224, 84.5 @ 288
78
+ * `convnext_small.in12k_ft_in1k` - 85.2 @ 224, 85.3 @ 288
79
+
80
+ ### Jan 6, 2023
81
+ * Finally got around to adding `--model-kwargs` and `--opt-kwargs` to scripts to pass through rare args directly to model classes from cmd line
82
+ * `train.py /imagenet --model resnet50 --amp --model-kwargs output_stride=16 act_layer=silu`
83
+ * `train.py /imagenet --model vit_base_patch16_clip_224 --img-size 240 --amp --model-kwargs img_size=240 patch_size=12`
84
+ * Cleanup some popular models to better support arg passthrough / merge with model configs, more to go.
85
+
86
+ ### Jan 5, 2023
87
+ * ConvNeXt-V2 models and weights added to existing `convnext.py`
88
+ * Paper: [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
89
+ * Reference impl: https://github.com/facebookresearch/ConvNeXt-V2 (NOTE: weights currently CC-BY-NC)
90
+ @dataclass
91
+ ### Dec 23, 2022 🎄☃
92
+ * Add FlexiViT models and weights from https://github.com/google-research/big_vision (check out paper at https://arxiv.org/abs/2212.08013)
93
+ * NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP
94
+ * Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer*, beit)
95
+ * More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)
96
+ * More ImageNet-12k (subset of 22k) pretrain models popping up:
97
+ * `efficientnet_b5.in12k_ft_in1k` - 85.9 @ 448x448
98
+ * `vit_medium_patch16_gap_384.in12k_ft_in1k` - 85.5 @ 384x384
99
+ * `vit_medium_patch16_gap_256.in12k_ft_in1k` - 84.5 @ 256x256
100
+ * `convnext_nano.in12k_ft_in1k` - 82.9 @ 288x288
101
+
102
+ ### Dec 8, 2022
103
+ * Add 'EVA l' to `vision_transformer.py`, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)
104
+ * original source: https://github.com/baaivision/EVA
105
+
106
+ | model | top1 | param_count | gmac | macts | hub |
107
+ |:------------------------------------------|-----:|------------:|------:|------:|:----------------------------------------|
108
+ | eva_large_patch14_336.in22k_ft_in22k_in1k | 89.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
109
+ | eva_large_patch14_336.in22k_ft_in1k | 88.7 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
110
+ | eva_large_patch14_196.in22k_ft_in22k_in1k | 88.6 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
111
+ | eva_large_patch14_196.in22k_ft_in1k | 87.9 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
112
+
113
+ ### Dec 6, 2022
114
+ * Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
115
+ * original source: https://github.com/baaivision/EVA
116
+ * paper: https://arxiv.org/abs/2211.07636
117
+
118
+ | model | top1 | param_count | gmac | macts | hub |
119
+ |:-----------------------------------------|-------:|--------------:|-------:|--------:|:----------------------------------------|
120
+ | eva_giant_patch14_560.m30m_ft_in22k_in1k | 89.8 | 1014.4 | 1906.8 | 2577.2 | [link](https://huggingface.co/BAAI/EVA) |
121
+ | eva_giant_patch14_336.m30m_ft_in22k_in1k | 89.6 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
122
+ | eva_giant_patch14_336.clip_ft_in1k | 89.4 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
123
+ | eva_giant_patch14_224.clip_ft_in1k | 89.1 | 1012.6 | 267.2 | 192.6 | [link](https://huggingface.co/BAAI/EVA) |
124
+
125
+ ### Dec 5, 2022
126
+
127
+ * Pre-release (`0.8.0dev0`) of multi-weight support (`model_arch.pretrained_tag`). Install with `pip install --pre timm`
128
+ * vision_transformer, maxvit, convnext are the first three model impl w/ support
129
+ * model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling
130
+ * bugs are likely, but I need feedback so please try it out
131
+ * if stability is needed, please use 0.6.x pypi releases or clone from [0.6.x branch](https://github.com/rwightman/pytorch-image-models/tree/0.6.x)
132
+ * Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use `--torchcompile` argument
133
+ * Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output
134
+ * Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models
135
+
136
+ | model | top1 | param_count | gmac | macts | hub |
137
+ |:-------------------------------------------------|-------:|--------------:|-------:|--------:|:-------------------------------------------------------------------------------------|
138
+ | vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k | 88.6 | 632.5 | 391 | 407.5 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k) |
139
+ | vit_large_patch14_clip_336.openai_ft_in12k_in1k | 88.3 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k) |
140
+ | vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k | 88.2 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k) |
141
+ | vit_large_patch14_clip_336.laion2b_ft_in12k_in1k | 88.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k) |
142
+ | vit_large_patch14_clip_224.openai_ft_in12k_in1k | 88.2 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k) |
143
+ | vit_large_patch14_clip_224.laion2b_ft_in12k_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k) |
144
+ | vit_large_patch14_clip_224.openai_ft_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k) |
145
+ | vit_large_patch14_clip_336.laion2b_ft_in1k | 87.9 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k) |
146
+ | vit_huge_patch14_clip_224.laion2b_ft_in1k | 87.6 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k) |
147
+ | vit_large_patch14_clip_224.laion2b_ft_in1k | 87.3 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k) |
148
+ | vit_base_patch16_clip_384.laion2b_ft_in12k_in1k | 87.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k) |
149
+ | vit_base_patch16_clip_384.openai_ft_in12k_in1k | 87 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k) |
150
+ | vit_base_patch16_clip_384.laion2b_ft_in1k | 86.6 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k) |
151
+ | vit_base_patch16_clip_384.openai_ft_in1k | 86.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k) |
152
+ | vit_base_patch16_clip_224.laion2b_ft_in12k_in1k | 86.2 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k) |
153
+ | vit_base_patch16_clip_224.openai_ft_in12k_in1k | 85.9 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k) |
154
+ | vit_base_patch32_clip_448.laion2b_ft_in12k_in1k | 85.8 | 88.3 | 17.9 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k) |
155
+ | vit_base_patch16_clip_224.laion2b_ft_in1k | 85.5 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k) |
156
+ | vit_base_patch32_clip_384.laion2b_ft_in12k_in1k | 85.4 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k) |
157
+ | vit_base_patch16_clip_224.openai_ft_in1k | 85.3 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k) |
158
+ | vit_base_patch32_clip_384.openai_ft_in12k_in1k | 85.2 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k) |
159
+ | vit_base_patch32_clip_224.laion2b_ft_in12k_in1k | 83.3 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k) |
160
+ | vit_base_patch32_clip_224.laion2b_ft_in1k | 82.6 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k) |
161
+ | vit_base_patch32_clip_224.openai_ft_in1k | 81.9 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k) |
162
+
163
+ * Port of MaxViT Tensorflow Weights from official impl at https://github.com/google-research/maxvit
164
+ * There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing
165
+
166
+ | model | top1 | param_count | gmac | macts | hub |
167
+ |:-----------------------------------|-------:|--------------:|-------:|--------:|:-----------------------------------------------------------------------|
168
+ | maxvit_xlarge_tf_512.in21k_ft_in1k | 88.5 | 475.8 | 534.1 | 1413.2 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |
169
+ | maxvit_xlarge_tf_384.in21k_ft_in1k | 88.3 | 475.3 | 292.8 | 668.8 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |
170
+ | maxvit_base_tf_512.in21k_ft_in1k | 88.2 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |
171
+ | maxvit_large_tf_512.in21k_ft_in1k | 88 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |
172
+ | maxvit_large_tf_384.in21k_ft_in1k | 88 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |
173
+ | maxvit_base_tf_384.in21k_ft_in1k | 87.9 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |
174
+ | maxvit_base_tf_512.in1k | 86.6 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |
175
+ | maxvit_large_tf_512.in1k | 86.5 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |
176
+ | maxvit_base_tf_384.in1k | 86.3 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |
177
+ | maxvit_large_tf_384.in1k | 86.2 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |
178
+ | maxvit_small_tf_512.in1k | 86.1 | 69.1 | 67.3 | 383.8 | [link](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |
179
+ | maxvit_tiny_tf_512.in1k | 85.7 | 31 | 33.5 | 257.6 | [link](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |
180
+ | maxvit_small_tf_384.in1k | 85.5 | 69 | 35.9 | 183.6 | [link](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |
181
+ | maxvit_tiny_tf_384.in1k | 85.1 | 31 | 17.5 | 123.4 | [link](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |
182
+ | maxvit_large_tf_224.in1k | 84.9 | 211.8 | 43.7 | 127.4 | [link](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |
183
+ | maxvit_base_tf_224.in1k | 84.9 | 119.5 | 24 | 95 | [link](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |
184
+ | maxvit_small_tf_224.in1k | 84.4 | 68.9 | 11.7 | 53.2 | [link](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |
185
+ | maxvit_tiny_tf_224.in1k | 83.4 | 30.9 | 5.6 | 35.8 | [link](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |
186
+
187
+ ### Oct 15, 2022
188
+ * Train and validation script enhancements
189
+ * Non-GPU (ie CPU) device support
190
+ * SLURM compatibility for train script
191
+ * HF datasets support (via ReaderHfds)
192
+ * TFDS/WDS dataloading improvements (sample padding/wrap for distributed use fixed wrt sample count estimate)
193
+ * in_chans !=3 support for scripts / loader
194
+ * Adan optimizer
195
+ * Can enable per-step LR scheduling via args
196
+ * Dataset 'parsers' renamed to 'readers', more descriptive of purpose
197
+ * AMP args changed, APEX via `--amp-impl apex`, bfloat16 supportedf via `--amp-dtype bfloat16`
198
+ * main branch switched to 0.7.x version, 0.6x forked for stable release of weight only adds
199
+ * master -> main branch rename
200
+
201
+ ### Oct 10, 2022
202
+ * More weights in `maxxvit` series, incl first ConvNeXt block based `coatnext` and `maxxvit` experiments:
203
+ * `coatnext_nano_rw_224` - 82.0 @ 224 (G) -- (uses ConvNeXt conv block, no BatchNorm)
204
+ * `maxxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)
205
+ * `maxvit_rmlp_small_rw_224` - 84.5 @ 224, 85.1 @ 320 (G)
206
+ * `maxxvit_rmlp_small_rw_256` - 84.6 @ 256, 84.9 @ 288 (G) -- could be trained better, hparams need tuning (uses ConvNeXt block, no BN)
207
+ * `coatnet_rmlp_2_rw_224` - 84.6 @ 224, 85 @ 320 (T)
208
+ * NOTE: official MaxVit weights (in1k) have been released at https://github.com/google-research/maxvit -- some extra work is needed to port and adapt since my impl was created independently of theirs and has a few small differences + the whole TF same padding fun.
209
+
210
+ ### Sept 23, 2022
211
+ * LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)
212
+ * vit_base_patch32_224_clip_laion2b
213
+ * vit_large_patch14_224_clip_laion2b
214
+ * vit_huge_patch14_224_clip_laion2b
215
+ * vit_giant_patch14_224_clip_laion2b
216
+
217
+ ### Sept 7, 2022
218
+ * Hugging Face [`timm` docs](https://huggingface.co/docs/hub/timm) home now exists, look for more here in the future
219
+ * Add BEiT-v2 weights for base and large 224x224 models from https://github.com/microsoft/unilm/tree/master/beit2
220
+ * Add more weights in `maxxvit` series incl a `pico` (7.5M params, 1.9 GMACs), two `tiny` variants:
221
+ * `maxvit_rmlp_pico_rw_256` - 80.5 @ 256, 81.3 @ 320 (T)
222
+ * `maxvit_tiny_rw_224` - 83.5 @ 224 (G)
223
+ * `maxvit_rmlp_tiny_rw_256` - 84.2 @ 256, 84.8 @ 320 (T)
224
+
225
+ ### Aug 29, 2022
226
+ * MaxVit window size scales with img_size by default. Add new RelPosMlp MaxViT weight that leverages this:
227
+ * `maxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.6 @ 320 (T)
228
+
229
+ ### Aug 26, 2022
230
+ * CoAtNet (https://arxiv.org/abs/2106.04803) and MaxVit (https://arxiv.org/abs/2204.01697) `timm` original models
231
+ * both found in [`maxxvit.py`](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/maxxvit.py) model def, contains numerous experiments outside scope of original papers
232
+ * an unfinished Tensorflow version from MaxVit authors can be found https://github.com/google-research/maxvit
233
+ * Initial CoAtNet and MaxVit timm pretrained weights (working on more):
234
+ * `coatnet_nano_rw_224` - 81.7 @ 224 (T)
235
+ * `coatnet_rmlp_nano_rw_224` - 82.0 @ 224, 82.8 @ 320 (T)
236
+ * `coatnet_0_rw_224` - 82.4 (T) -- NOTE timm '0' coatnets have 2 more 3rd stage blocks
237
+ * `coatnet_bn_0_rw_224` - 82.4 (T)
238
+ * `maxvit_nano_rw_256` - 82.9 @ 256 (T)
239
+ * `coatnet_rmlp_1_rw_224` - 83.4 @ 224, 84 @ 320 (T)
240
+ * `coatnet_1_rw_224` - 83.6 @ 224 (G)
241
+ * (T) = TPU trained with `bits_and_tpu` branch training code, (G) = GPU trained
242
+ * GCVit (weights adapted from https://github.com/NVlabs/GCVit, code 100% `timm` re-write for license purposes)
243
+ * MViT-V2 (multi-scale vit, adapted from https://github.com/facebookresearch/mvit)
244
+ * EfficientFormer (adapted from https://github.com/snap-research/EfficientFormer)
245
+ * PyramidVisionTransformer-V2 (adapted from https://github.com/whai362/PVT)
246
+ * 'Fast Norm' support for LayerNorm and GroupNorm that avoids float32 upcast w/ AMP (uses APEX LN if available for further boost)
247
+
248
+ ### Aug 15, 2022
249
+ * ConvNeXt atto weights added
250
+ * `convnext_atto` - 75.7 @ 224, 77.0 @ 288
251
+ * `convnext_atto_ols` - 75.9 @ 224, 77.2 @ 288
252
+
253
+ ### Aug 5, 2022
254
+ * More custom ConvNeXt smaller model defs with weights
255
+ * `convnext_femto` - 77.5 @ 224, 78.7 @ 288
256
+ * `convnext_femto_ols` - 77.9 @ 224, 78.9 @ 288
257
+ * `convnext_pico` - 79.5 @ 224, 80.4 @ 288
258
+ * `convnext_pico_ols` - 79.5 @ 224, 80.5 @ 288
259
+ * `convnext_nano_ols` - 80.9 @ 224, 81.6 @ 288
260
+ * Updated EdgeNeXt to improve ONNX export, add new base variant and weights from original (https://github.com/mmaaz60/EdgeNeXt)
261
+
262
+ ### July 28, 2022
263
+ * Add freshly minted DeiT-III Medium (width=512, depth=12, num_heads=8) model weights. Thanks [Hugo Touvron](https://github.com/TouvronHugo)!
264
+
265
+ ### July 27, 2022
266
+ * All runtime benchmark and validation result csv files are finally up-to-date!
267
+ * A few more weights & model defs added:
268
+ * `darknetaa53` - 79.8 @ 256, 80.5 @ 288
269
+ * `convnext_nano` - 80.8 @ 224, 81.5 @ 288
270
+ * `cs3sedarknet_l` - 81.2 @ 256, 81.8 @ 288
271
+ * `cs3darknet_x` - 81.8 @ 256, 82.2 @ 288
272
+ * `cs3sedarknet_x` - 82.2 @ 256, 82.7 @ 288
273
+ * `cs3edgenet_x` - 82.2 @ 256, 82.7 @ 288
274
+ * `cs3se_edgenet_x` - 82.8 @ 256, 83.5 @ 320
275
+ * `cs3*` weights above all trained on TPU w/ `bits_and_tpu` branch. Thanks to TRC program!
276
+ * Add output_stride=8 and 16 support to ConvNeXt (dilation)
277
+ * deit3 models not being able to resize pos_emb fixed
278
+ * Version 0.6.7 PyPi release (/w above bug fixes and new weighs since 0.6.5)
279
+
280
+ ### July 8, 2022
281
+ More models, more fixes
282
+ * Official research models (w/ weights) added:
283
+ * EdgeNeXt from (https://github.com/mmaaz60/EdgeNeXt)
284
+ * MobileViT-V2 from (https://github.com/apple/ml-cvnets)
285
+ * DeiT III (Revenge of the ViT) from (https://github.com/facebookresearch/deit)
286
+ * My own models:
287
+ * Small `ResNet` defs added by request with 1 block repeats for both basic and bottleneck (resnet10 and resnet14)
288
+ * `CspNet` refactored with dataclass config, simplified CrossStage3 (`cs3`) option. These are closer to YOLO-v5+ backbone defs.
289
+ * More relative position vit fiddling. Two `srelpos` (shared relative position) models trained, and a medium w/ class token.
290
+ * Add an alternate downsample mode to EdgeNeXt and train a `small` model. Better than original small, but not their new USI trained weights.
291
+ * My own model weight results (all ImageNet-1k training)
292
+ * `resnet10t` - 66.5 @ 176, 68.3 @ 224
293
+ * `resnet14t` - 71.3 @ 176, 72.3 @ 224
294
+ * `resnetaa50` - 80.6 @ 224 , 81.6 @ 288
295
+ * `darknet53` - 80.0 @ 256, 80.5 @ 288
296
+ * `cs3darknet_m` - 77.0 @ 256, 77.6 @ 288
297
+ * `cs3darknet_focus_m` - 76.7 @ 256, 77.3 @ 288
298
+ * `cs3darknet_l` - 80.4 @ 256, 80.9 @ 288
299
+ * `cs3darknet_focus_l` - 80.3 @ 256, 80.9 @ 288
300
+ * `vit_srelpos_small_patch16_224` - 81.1 @ 224, 82.1 @ 320
301
+ * `vit_srelpos_medium_patch16_224` - 82.3 @ 224, 83.1 @ 320
302
+ * `vit_relpos_small_patch16_cls_224` - 82.6 @ 224, 83.6 @ 320
303
+ * `edgnext_small_rw` - 79.6 @ 224, 80.4 @ 320
304
+ * `cs3`, `darknet`, and `vit_*relpos` weights above all trained on TPU thanks to TRC program! Rest trained on overheating GPUs.
305
+ * Hugging Face Hub support fixes verified, demo notebook TBA
306
+ * Pretrained weights / configs can be loaded externally (ie from local disk) w/ support for head adaptation.
307
+ * Add support to change image extensions scanned by `timm` datasets/readers. See (https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103)
308
+ * Default ConvNeXt LayerNorm impl to use `F.layer_norm(x.permute(0, 2, 3, 1), ...).permute(0, 3, 1, 2)` via `LayerNorm2d` in all cases.
309
+ * a bit slower than previous custom impl on some hardware (ie Ampere w/ CL), but overall fewer regressions across wider HW / PyTorch version ranges.
310
+ * previous impl exists as `LayerNormExp2d` in `models/layers/norm.py`
311
+ * Numerous bug fixes
312
+ * Currently testing for imminent PyPi 0.6.x release
313
+ * LeViT pretraining of larger models still a WIP, they don't train well / easily without distillation. Time to add distill support (finally)?
314
+ * ImageNet-22k weight training + finetune ongoing, work on multi-weight support (slowly) chugging along (there are a LOT of weights, sigh) ...
315
+
316
+ ### May 13, 2022
317
+ * Official Swin-V2 models and weights added from (https://github.com/microsoft/Swin-Transformer). Cleaned up to support torchscript.
318
+ * Some refactoring for existing `timm` Swin-V2-CR impl, will likely do a bit more to bring parts closer to official and decide whether to merge some aspects.
319
+ * More Vision Transformer relative position / residual post-norm experiments (all trained on TPU thanks to TRC program)
320
+ * `vit_relpos_small_patch16_224` - 81.5 @ 224, 82.5 @ 320 -- rel pos, layer scale, no class token, avg pool
321
+ * `vit_relpos_medium_patch16_rpn_224` - 82.3 @ 224, 83.1 @ 320 -- rel pos + res-post-norm, no class token, avg pool
322
+ * `vit_relpos_medium_patch16_224` - 82.5 @ 224, 83.3 @ 320 -- rel pos, layer scale, no class token, avg pool
323
+ * `vit_relpos_base_patch16_gapcls_224` - 82.8 @ 224, 83.9 @ 320 -- rel pos, layer scale, class token, avg pool (by mistake)
324
+ * Bring 512 dim, 8-head 'medium' ViT model variant back to life (after using in a pre DeiT 'small' model for first ViT impl back in 2020)
325
+ * Add ViT relative position support for switching btw existing impl and some additions in official Swin-V2 impl for future trials
326
+ * Sequencer2D impl (https://arxiv.org/abs/2205.01972), added via PR from author (https://github.com/okojoalg)
327
+
328
+ ### May 2, 2022
329
+ * Vision Transformer experiments adding Relative Position (Swin-V2 log-coord) (`vision_transformer_relpos.py`) and Residual Post-Norm branches (from Swin-V2) (`vision_transformer*.py`)
330
+ * `vit_relpos_base_patch32_plus_rpn_256` - 79.5 @ 256, 80.6 @ 320 -- rel pos + extended width + res-post-norm, no class token, avg pool
331
+ * `vit_relpos_base_patch16_224` - 82.5 @ 224, 83.6 @ 320 -- rel pos, layer scale, no class token, avg pool
332
+ * `vit_base_patch16_rpn_224` - 82.3 @ 224 -- rel pos + res-post-norm, no class token, avg pool
333
+ * Vision Transformer refactor to remove representation layer that was only used in initial vit and rarely used since with newer pretrain (ie `How to Train Your ViT`)
334
+ * `vit_*` models support removal of class token, use of global average pool, use of fc_norm (ala beit, mae).
335
+
336
+ ### April 22, 2022
337
+ * `timm` models are now officially supported in [fast.ai](https://www.fast.ai/)! Just in time for the new Practical Deep Learning course. `timmdocs` documentation link updated to [timm.fast.ai](http://timm.fast.ai/).
338
+ * Two more model weights added in the TPU trained [series](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights). Some In22k pretrain still in progress.
339
+ * `seresnext101d_32x8d` - 83.69 @ 224, 84.35 @ 288
340
+ * `seresnextaa101d_32x8d` (anti-aliased w/ AvgPool2d) - 83.85 @ 224, 84.57 @ 288
341
+
342
+ ### March 23, 2022
343
+ * Add `ParallelBlock` and `LayerScale` option to base vit models to support model configs in [Three things everyone should know about ViT](https://arxiv.org/abs/2203.09795)
344
+ * `convnext_tiny_hnf` (head norm first) weights trained with (close to) A2 recipe, 82.2% top-1, could do better with more epochs.
345
+
346
+ ### March 21, 2022
347
+ * Merge `norm_norm_norm`. **IMPORTANT** this update for a coming 0.6.x release will likely de-stabilize the master branch for a while. Branch [`0.5.x`](https://github.com/rwightman/pytorch-image-models/tree/0.5.x) or a previous 0.5.x release can be used if stability is required.
348
+ * Significant weights update (all TPU trained) as described in this [release](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights)
349
+ * `regnety_040` - 82.3 @ 224, 82.96 @ 288
350
+ * `regnety_064` - 83.0 @ 224, 83.65 @ 288
351
+ * `regnety_080` - 83.17 @ 224, 83.86 @ 288
352
+ * `regnetv_040` - 82.44 @ 224, 83.18 @ 288 (timm pre-act)
353
+ * `regnetv_064` - 83.1 @ 224, 83.71 @ 288 (timm pre-act)
354
+ * `regnetz_040` - 83.67 @ 256, 84.25 @ 320
355
+ * `regnetz_040h` - 83.77 @ 256, 84.5 @ 320 (w/ extra fc in head)
356
+ * `resnetv2_50d_gn` - 80.8 @ 224, 81.96 @ 288 (pre-act GroupNorm)
357
+ * `resnetv2_50d_evos` 80.77 @ 224, 82.04 @ 288 (pre-act EvoNormS)
358
+ * `regnetz_c16_evos` - 81.9 @ 256, 82.64 @ 320 (EvoNormS)
359
+ * `regnetz_d8_evos` - 83.42 @ 256, 84.04 @ 320 (EvoNormS)
360
+ * `xception41p` - 82 @ 299 (timm pre-act)
361
+ * `xception65` - 83.17 @ 299
362
+ * `xception65p` - 83.14 @ 299 (timm pre-act)
363
+ * `resnext101_64x4d` - 82.46 @ 224, 83.16 @ 288
364
+ * `seresnext101_32x8d` - 83.57 @ 224, 84.270 @ 288
365
+ * `resnetrs200` - 83.85 @ 256, 84.44 @ 320
366
+ * HuggingFace hub support fixed w/ initial groundwork for allowing alternative 'config sources' for pretrained model definitions and weights (generic local file / remote url support soon)
367
+ * SwinTransformer-V2 implementation added. Submitted by [Christoph Reich](https://github.com/ChristophReich1996). Training experiments and model changes by myself are ongoing so expect compat breaks.
368
+ * Swin-S3 (AutoFormerV2) models / weights added from https://github.com/microsoft/Cream/tree/main/AutoFormerV2
369
+ * MobileViT models w/ weights adapted from https://github.com/apple/ml-cvnets
370
+ * PoolFormer models w/ weights adapted from https://github.com/sail-sg/poolformer
371
+ * VOLO models w/ weights adapted from https://github.com/sail-sg/volo
372
+ * Significant work experimenting with non-BatchNorm norm layers such as EvoNorm, FilterResponseNorm, GroupNorm, etc
373
+ * Enhance support for alternate norm + act ('NormAct') layers added to a number of models, esp EfficientNet/MobileNetV3, RegNet, and aligned Xception
374
+ * Grouped conv support added to EfficientNet family
375
+ * Add 'group matching' API to all models to allow grouping model parameters for application of 'layer-wise' LR decay, lr scale added to LR scheduler
376
+ * Gradient checkpointing support added to many models
377
+ * `forward_head(x, pre_logits=False)` fn added to all models to allow separate calls of `forward_features` + `forward_head`
378
+ * All vision transformer and vision MLP models update to return non-pooled / non-token selected features from `foward_features`, for consistency with CNN models, token selection or pooling now applied in `forward_head`
379
+
380
+ ### Feb 2, 2022
381
+ * [Chris Hughes](https://github.com/Chris-hughes10) posted an exhaustive run through of `timm` on his blog yesterday. Well worth a read. [Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide](https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055)
382
+ * I'm currently prepping to merge the `norm_norm_norm` branch back to master (ver 0.6.x) in next week or so.
383
+ * The changes are more extensive than usual and may destabilize and break some model API use (aiming for full backwards compat). So, beware `pip install git+https://github.com/rwightman/pytorch-image-models` installs!
384
+ * `0.5.x` releases and a `0.5.x` branch will remain stable with a cherry pick or two until dust clears. Recommend sticking to pypi install for a bit if you want stable.
385
+
386
+ ### Jan 14, 2022
387
+ * Version 0.5.4 w/ release to be pushed to pypi. It's been a while since last pypi update and riskier changes will be merged to main branch soon....
388
+ * Add ConvNeXT models /w weights from official impl (https://github.com/facebookresearch/ConvNeXt), a few perf tweaks, compatible with timm features
389
+ * Tried training a few small (~1.8-3M param) / mobile optimized models, a few are good so far, more on the way...
390
+ * `mnasnet_small` - 65.6 top-1
391
+ * `mobilenetv2_050` - 65.9
392
+ * `lcnet_100/075/050` - 72.1 / 68.8 / 63.1
393
+ * `semnasnet_075` - 73
394
+ * `fbnetv3_b/d/g` - 79.1 / 79.7 / 82.0
395
+ * TinyNet models added by [rsomani95](https://github.com/rsomani95)
396
+ * LCNet added via MobileNetV3 architecture
397
+
398
+ ### Jan 5, 2023
399
+ * ConvNeXt-V2 models and weights added to existing `convnext.py`
400
+ * Paper: [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
401
+ * Reference impl: https://github.com/facebookresearch/ConvNeXt-V2 (NOTE: weights currently CC-BY-NC)
402
+
403
+ ### Dec 23, 2022 🎄☃
404
+ * Add FlexiViT models and weights from https://github.com/google-research/big_vision (check out paper at https://arxiv.org/abs/2212.08013)
405
+ * NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP
406
+ * Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer*, beit)
407
+ * More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)
408
+ * More ImageNet-12k (subset of 22k) pretrain models popping up:
409
+ * `efficientnet_b5.in12k_ft_in1k` - 85.9 @ 448x448
410
+ * `vit_medium_patch16_gap_384.in12k_ft_in1k` - 85.5 @ 384x384
411
+ * `vit_medium_patch16_gap_256.in12k_ft_in1k` - 84.5 @ 256x256
412
+ * `convnext_nano.in12k_ft_in1k` - 82.9 @ 288x288
413
+
414
+ ### Dec 8, 2022
415
+ * Add 'EVA l' to `vision_transformer.py`, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)
416
+ * original source: https://github.com/baaivision/EVA
417
+
418
+ | model | top1 | param_count | gmac | macts | hub |
419
+ |:------------------------------------------|-----:|------------:|------:|------:|:----------------------------------------|
420
+ | eva_large_patch14_336.in22k_ft_in22k_in1k | 89.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
421
+ | eva_large_patch14_336.in22k_ft_in1k | 88.7 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
422
+ | eva_large_patch14_196.in22k_ft_in22k_in1k | 88.6 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
423
+ | eva_large_patch14_196.in22k_ft_in1k | 87.9 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
424
+
425
+ ### Dec 6, 2022
426
+ * Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
427
+ * original source: https://github.com/baaivision/EVA
428
+ * paper: https://arxiv.org/abs/2211.07636
429
+
430
+ | model | top1 | param_count | gmac | macts | hub |
431
+ |:-----------------------------------------|-------:|--------------:|-------:|--------:|:----------------------------------------|
432
+ | eva_giant_patch14_560.m30m_ft_in22k_in1k | 89.8 | 1014.4 | 1906.8 | 2577.2 | [link](https://huggingface.co/BAAI/EVA) |
433
+ | eva_giant_patch14_336.m30m_ft_in22k_in1k | 89.6 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
434
+ | eva_giant_patch14_336.clip_ft_in1k | 89.4 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
435
+ | eva_giant_patch14_224.clip_ft_in1k | 89.1 | 1012.6 | 267.2 | 192.6 | [link](https://huggingface.co/BAAI/EVA) |
436
+
437
+ ### Dec 5, 2022
438
+
439
+ * Pre-release (`0.8.0dev0`) of multi-weight support (`model_arch.pretrained_tag`). Install with `pip install --pre timm`
440
+ * vision_transformer, maxvit, convnext are the first three model impl w/ support
441
+ * model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling
442
+ * bugs are likely, but I need feedback so please try it out
443
+ * if stability is needed, please use 0.6.x pypi releases or clone from [0.6.x branch](https://github.com/rwightman/pytorch-image-models/tree/0.6.x)
444
+ * Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use `--torchcompile` argument
445
+ * Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output
446
+ * Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models
447
+
448
+ | model | top1 | param_count | gmac | macts | hub |
449
+ |:-------------------------------------------------|-------:|--------------:|-------:|--------:|:-------------------------------------------------------------------------------------|
450
+ | vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k | 88.6 | 632.5 | 391 | 407.5 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k) |
451
+ | vit_large_patch14_clip_336.openai_ft_in12k_in1k | 88.3 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k) |
452
+ | vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k | 88.2 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k) |
453
+ | vit_large_patch14_clip_336.laion2b_ft_in12k_in1k | 88.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k) |
454
+ | vit_large_patch14_clip_224.openai_ft_in12k_in1k | 88.2 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k) |
455
+ | vit_large_patch14_clip_224.laion2b_ft_in12k_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k) |
456
+ | vit_large_patch14_clip_224.openai_ft_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k) |
457
+ | vit_large_patch14_clip_336.laion2b_ft_in1k | 87.9 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k) |
458
+ | vit_huge_patch14_clip_224.laion2b_ft_in1k | 87.6 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k) |
459
+ | vit_large_patch14_clip_224.laion2b_ft_in1k | 87.3 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k) |
460
+ | vit_base_patch16_clip_384.laion2b_ft_in12k_in1k | 87.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k) |
461
+ | vit_base_patch16_clip_384.openai_ft_in12k_in1k | 87 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k) |
462
+ | vit_base_patch16_clip_384.laion2b_ft_in1k | 86.6 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k) |
463
+ | vit_base_patch16_clip_384.openai_ft_in1k | 86.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k) |
464
+ | vit_base_patch16_clip_224.laion2b_ft_in12k_in1k | 86.2 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k) |
465
+ | vit_base_patch16_clip_224.openai_ft_in12k_in1k | 85.9 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k) |
466
+ | vit_base_patch32_clip_448.laion2b_ft_in12k_in1k | 85.8 | 88.3 | 17.9 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k) |
467
+ | vit_base_patch16_clip_224.laion2b_ft_in1k | 85.5 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k) |
468
+ | vit_base_patch32_clip_384.laion2b_ft_in12k_in1k | 85.4 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k) |
469
+ | vit_base_patch16_clip_224.openai_ft_in1k | 85.3 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k) |
470
+ | vit_base_patch32_clip_384.openai_ft_in12k_in1k | 85.2 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k) |
471
+ | vit_base_patch32_clip_224.laion2b_ft_in12k_in1k | 83.3 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k) |
472
+ | vit_base_patch32_clip_224.laion2b_ft_in1k | 82.6 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k) |
473
+ | vit_base_patch32_clip_224.openai_ft_in1k | 81.9 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k) |
474
+
475
+ * Port of MaxViT Tensorflow Weights from official impl at https://github.com/google-research/maxvit
476
+ * There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing
477
+
478
+ | model | top1 | param_count | gmac | macts | hub |
479
+ |:-----------------------------------|-------:|--------------:|-------:|--------:|:-----------------------------------------------------------------------|
480
+ | maxvit_xlarge_tf_512.in21k_ft_in1k | 88.5 | 475.8 | 534.1 | 1413.2 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |
481
+ | maxvit_xlarge_tf_384.in21k_ft_in1k | 88.3 | 475.3 | 292.8 | 668.8 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |
482
+ | maxvit_base_tf_512.in21k_ft_in1k | 88.2 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |
483
+ | maxvit_large_tf_512.in21k_ft_in1k | 88 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |
484
+ | maxvit_large_tf_384.in21k_ft_in1k | 88 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |
485
+ | maxvit_base_tf_384.in21k_ft_in1k | 87.9 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |
486
+ | maxvit_base_tf_512.in1k | 86.6 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |
487
+ | maxvit_large_tf_512.in1k | 86.5 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |
488
+ | maxvit_base_tf_384.in1k | 86.3 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |
489
+ | maxvit_large_tf_384.in1k | 86.2 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |
490
+ | maxvit_small_tf_512.in1k | 86.1 | 69.1 | 67.3 | 383.8 | [link](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |
491
+ | maxvit_tiny_tf_512.in1k | 85.7 | 31 | 33.5 | 257.6 | [link](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |
492
+ | maxvit_small_tf_384.in1k | 85.5 | 69 | 35.9 | 183.6 | [link](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |
493
+ | maxvit_tiny_tf_384.in1k | 85.1 | 31 | 17.5 | 123.4 | [link](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |
494
+ | maxvit_large_tf_224.in1k | 84.9 | 211.8 | 43.7 | 127.4 | [link](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |
495
+ | maxvit_base_tf_224.in1k | 84.9 | 119.5 | 24 | 95 | [link](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |
496
+ | maxvit_small_tf_224.in1k | 84.4 | 68.9 | 11.7 | 53.2 | [link](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |
497
+ | maxvit_tiny_tf_224.in1k | 83.4 | 30.9 | 5.6 | 35.8 | [link](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |
498
+
499
+ ### Oct 15, 2022
500
+ * Train and validation script enhancements
501
+ * Non-GPU (ie CPU) device support
502
+ * SLURM compatibility for train script
503
+ * HF datasets support (via ReaderHfds)
504
+ * TFDS/WDS dataloading improvements (sample padding/wrap for distributed use fixed wrt sample count estimate)
505
+ * in_chans !=3 support for scripts / loader
506
+ * Adan optimizer
507
+ * Can enable per-step LR scheduling via args
508
+ * Dataset 'parsers' renamed to 'readers', more descriptive of purpose
509
+ * AMP args changed, APEX via `--amp-impl apex`, bfloat16 supportedf via `--amp-dtype bfloat16`
510
+ * main branch switched to 0.7.x version, 0.6x forked for stable release of weight only adds
511
+ * master -> main branch rename
512
+
513
+ ### Oct 10, 2022
514
+ * More weights in `maxxvit` series, incl first ConvNeXt block based `coatnext` and `maxxvit` experiments:
515
+ * `coatnext_nano_rw_224` - 82.0 @ 224 (G) -- (uses ConvNeXt conv block, no BatchNorm)
516
+ * `maxxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)
517
+ * `maxvit_rmlp_small_rw_224` - 84.5 @ 224, 85.1 @ 320 (G)
518
+ * `maxxvit_rmlp_small_rw_256` - 84.6 @ 256, 84.9 @ 288 (G) -- could be trained better, hparams need tuning (uses ConvNeXt block, no BN)
519
+ * `coatnet_rmlp_2_rw_224` - 84.6 @ 224, 85 @ 320 (T)
520
+ * NOTE: official MaxVit weights (in1k) have been released at https://github.com/google-research/maxvit -- some extra work is needed to port and adapt since my impl was created independently of theirs and has a few small differences + the whole TF same padding fun.
521
+
522
+ ### Sept 23, 2022
523
+ * LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)
524
+ * vit_base_patch32_224_clip_laion2b
525
+ * vit_large_patch14_224_clip_laion2b
526
+ * vit_huge_patch14_224_clip_laion2b
527
+ * vit_giant_patch14_224_clip_laion2b
528
+
529
+ ### Sept 7, 2022
530
+ * Hugging Face [`timm` docs](https://huggingface.co/docs/hub/timm) home now exists, look for more here in the future
531
+ * Add BEiT-v2 weights for base and large 224x224 models from https://github.com/microsoft/unilm/tree/master/beit2
532
+ * Add more weights in `maxxvit` series incl a `pico` (7.5M params, 1.9 GMACs), two `tiny` variants:
533
+ * `maxvit_rmlp_pico_rw_256` - 80.5 @ 256, 81.3 @ 320 (T)
534
+ * `maxvit_tiny_rw_224` - 83.5 @ 224 (G)
535
+ * `maxvit_rmlp_tiny_rw_256` - 84.2 @ 256, 84.8 @ 320 (T)
536
+
537
+ ### Aug 29, 2022
538
+ * MaxVit window size scales with img_size by default. Add new RelPosMlp MaxViT weight that leverages this:
539
+ * `maxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.6 @ 320 (T)
540
+
541
+ ### Aug 26, 2022
542
+ * CoAtNet (https://arxiv.org/abs/2106.04803) and MaxVit (https://arxiv.org/abs/2204.01697) `timm` original models
543
+ * both found in [`maxxvit.py`](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/maxxvit.py) model def, contains numerous experiments outside scope of original papers
544
+ * an unfinished Tensorflow version from MaxVit authors can be found https://github.com/google-research/maxvit
545
+ * Initial CoAtNet and MaxVit timm pretrained weights (working on more):
546
+ * `coatnet_nano_rw_224` - 81.7 @ 224 (T)
547
+ * `coatnet_rmlp_nano_rw_224` - 82.0 @ 224, 82.8 @ 320 (T)
548
+ * `coatnet_0_rw_224` - 82.4 (T) -- NOTE timm '0' coatnets have 2 more 3rd stage blocks
549
+ * `coatnet_bn_0_rw_224` - 82.4 (T)
550
+ * `maxvit_nano_rw_256` - 82.9 @ 256 (T)
551
+ * `coatnet_rmlp_1_rw_224` - 83.4 @ 224, 84 @ 320 (T)
552
+ * `coatnet_1_rw_224` - 83.6 @ 224 (G)
553
+ * (T) = TPU trained with `bits_and_tpu` branch training code, (G) = GPU trained
554
+ * GCVit (weights adapted from https://github.com/NVlabs/GCVit, code 100% `timm` re-write for license purposes)
555
+ * MViT-V2 (multi-scale vit, adapted from https://github.com/facebookresearch/mvit)
556
+ * EfficientFormer (adapted from https://github.com/snap-research/EfficientFormer)
557
+ * PyramidVisionTransformer-V2 (adapted from https://github.com/whai362/PVT)
558
+ * 'Fast Norm' support for LayerNorm and GroupNorm that avoids float32 upcast w/ AMP (uses APEX LN if available for further boost)
559
+
560
+
561
+ ### Aug 15, 2022
562
+ * ConvNeXt atto weights added
563
+ * `convnext_atto` - 75.7 @ 224, 77.0 @ 288
564
+ * `convnext_atto_ols` - 75.9 @ 224, 77.2 @ 288
565
+
566
+ ### Aug 5, 2022
567
+ * More custom ConvNeXt smaller model defs with weights
568
+ * `convnext_femto` - 77.5 @ 224, 78.7 @ 288
569
+ * `convnext_femto_ols` - 77.9 @ 224, 78.9 @ 288
570
+ * `convnext_pico` - 79.5 @ 224, 80.4 @ 288
571
+ * `convnext_pico_ols` - 79.5 @ 224, 80.5 @ 288
572
+ * `convnext_nano_ols` - 80.9 @ 224, 81.6 @ 288
573
+ * Updated EdgeNeXt to improve ONNX export, add new base variant and weights from original (https://github.com/mmaaz60/EdgeNeXt)
574
+
575
+ ### July 28, 2022
576
+ * Add freshly minted DeiT-III Medium (width=512, depth=12, num_heads=8) model weights. Thanks [Hugo Touvron](https://github.com/TouvronHugo)!
577
+
578
+ ### July 27, 2022
579
+ * All runtime benchmark and validation result csv files are up-to-date!
580
+ * A few more weights & model defs added:
581
+ * `darknetaa53` - 79.8 @ 256, 80.5 @ 288
582
+ * `convnext_nano` - 80.8 @ 224, 81.5 @ 288
583
+ * `cs3sedarknet_l` - 81.2 @ 256, 81.8 @ 288
584
+ * `cs3darknet_x` - 81.8 @ 256, 82.2 @ 288
585
+ * `cs3sedarknet_x` - 82.2 @ 256, 82.7 @ 288
586
+ * `cs3edgenet_x` - 82.2 @ 256, 82.7 @ 288
587
+ * `cs3se_edgenet_x` - 82.8 @ 256, 83.5 @ 320
588
+ * `cs3*` weights above all trained on TPU w/ `bits_and_tpu` branch. Thanks to TRC program!
589
+ * Add output_stride=8 and 16 support to ConvNeXt (dilation)
590
+ * deit3 models not being able to resize pos_emb fixed
591
+ * Version 0.6.7 PyPi release (/w above bug fixes and new weighs since 0.6.5)
592
+
593
+ ### July 8, 2022
594
+ More models, more fixes
595
+ * Official research models (w/ weights) added:
596
+ * EdgeNeXt from (https://github.com/mmaaz60/EdgeNeXt)
597
+ * MobileViT-V2 from (https://github.com/apple/ml-cvnets)
598
+ * DeiT III (Revenge of the ViT) from (https://github.com/facebookresearch/deit)
599
+ * My own models:
600
+ * Small `ResNet` defs added by request with 1 block repeats for both basic and bottleneck (resnet10 and resnet14)
601
+ * `CspNet` refactored with dataclass config, simplified CrossStage3 (`cs3`) option. These are closer to YOLO-v5+ backbone defs.
602
+ * More relative position vit fiddling. Two `srelpos` (shared relative position) models trained, and a medium w/ class token.
603
+ * Add an alternate downsample mode to EdgeNeXt and train a `small` model. Better than original small, but not their new USI trained weights.
604
+ * My own model weight results (all ImageNet-1k training)
605
+ * `resnet10t` - 66.5 @ 176, 68.3 @ 224
606
+ * `resnet14t` - 71.3 @ 176, 72.3 @ 224
607
+ * `resnetaa50` - 80.6 @ 224 , 81.6 @ 288
608
+ * `darknet53` - 80.0 @ 256, 80.5 @ 288
609
+ * `cs3darknet_m` - 77.0 @ 256, 77.6 @ 288
610
+ * `cs3darknet_focus_m` - 76.7 @ 256, 77.3 @ 288
611
+ * `cs3darknet_l` - 80.4 @ 256, 80.9 @ 288
612
+ * `cs3darknet_focus_l` - 80.3 @ 256, 80.9 @ 288
613
+ * `vit_srelpos_small_patch16_224` - 81.1 @ 224, 82.1 @ 320
614
+ * `vit_srelpos_medium_patch16_224` - 82.3 @ 224, 83.1 @ 320
615
+ * `vit_relpos_small_patch16_cls_224` - 82.6 @ 224, 83.6 @ 320
616
+ * `edgnext_small_rw` - 79.6 @ 224, 80.4 @ 320
617
+ * `cs3`, `darknet`, and `vit_*relpos` weights above all trained on TPU thanks to TRC program! Rest trained on overheating GPUs.
618
+ * Hugging Face Hub support fixes verified, demo notebook TBA
619
+ * Pretrained weights / configs can be loaded externally (ie from local disk) w/ support for head adaptation.
620
+ * Add support to change image extensions scanned by `timm` datasets/parsers. See (https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103)
621
+ * Default ConvNeXt LayerNorm impl to use `F.layer_norm(x.permute(0, 2, 3, 1), ...).permute(0, 3, 1, 2)` via `LayerNorm2d` in all cases.
622
+ * a bit slower than previous custom impl on some hardware (ie Ampere w/ CL), but overall fewer regressions across wider HW / PyTorch version ranges.
623
+ * previous impl exists as `LayerNormExp2d` in `models/layers/norm.py`
624
+ * Numerous bug fixes
625
+ * Currently testing for imminent PyPi 0.6.x release
626
+ * LeViT pretraining of larger models still a WIP, they don't train well / easily without distillation. Time to add distill support (finally)?
627
+ * ImageNet-22k weight training + finetune ongoing, work on multi-weight support (slowly) chugging along (there are a LOT of weights, sigh) ...
628
+
629
+ ### May 13, 2022
630
+ * Official Swin-V2 models and weights added from (https://github.com/microsoft/Swin-Transformer). Cleaned up to support torchscript.
631
+ * Some refactoring for existing `timm` Swin-V2-CR impl, will likely do a bit more to bring parts closer to official and decide whether to merge some aspects.
632
+ * More Vision Transformer relative position / residual post-norm experiments (all trained on TPU thanks to TRC program)
633
+ * `vit_relpos_small_patch16_224` - 81.5 @ 224, 82.5 @ 320 -- rel pos, layer scale, no class token, avg pool
634
+ * `vit_relpos_medium_patch16_rpn_224` - 82.3 @ 224, 83.1 @ 320 -- rel pos + res-post-norm, no class token, avg pool
635
+ * `vit_relpos_medium_patch16_224` - 82.5 @ 224, 83.3 @ 320 -- rel pos, layer scale, no class token, avg pool
636
+ * `vit_relpos_base_patch16_gapcls_224` - 82.8 @ 224, 83.9 @ 320 -- rel pos, layer scale, class token, avg pool (by mistake)
637
+ * Bring 512 dim, 8-head 'medium' ViT model variant back to life (after using in a pre DeiT 'small' model for first ViT impl back in 2020)
638
+ * Add ViT relative position support for switching btw existing impl and some additions in official Swin-V2 impl for future trials
639
+ * Sequencer2D impl (https://arxiv.org/abs/2205.01972), added via PR from author (https://github.com/okojoalg)
640
+
641
+ ### May 2, 2022
642
+ * Vision Transformer experiments adding Relative Position (Swin-V2 log-coord) (`vision_transformer_relpos.py`) and Residual Post-Norm branches (from Swin-V2) (`vision_transformer*.py`)
643
+ * `vit_relpos_base_patch32_plus_rpn_256` - 79.5 @ 256, 80.6 @ 320 -- rel pos + extended width + res-post-norm, no class token, avg pool
644
+ * `vit_relpos_base_patch16_224` - 82.5 @ 224, 83.6 @ 320 -- rel pos, layer scale, no class token, avg pool
645
+ * `vit_base_patch16_rpn_224` - 82.3 @ 224 -- rel pos + res-post-norm, no class token, avg pool
646
+ * Vision Transformer refactor to remove representation layer that was only used in initial vit and rarely used since with newer pretrain (ie `How to Train Your ViT`)
647
+ * `vit_*` models support removal of class token, use of global average pool, use of fc_norm (ala beit, mae).
648
+
649
+ ### April 22, 2022
650
+ * `timm` models are now officially supported in [fast.ai](https://www.fast.ai/)! Just in time for the new Practical Deep Learning course. `timmdocs` documentation link updated to [timm.fast.ai](http://timm.fast.ai/).
651
+ * Two more model weights added in the TPU trained [series](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights). Some In22k pretrain still in progress.
652
+ * `seresnext101d_32x8d` - 83.69 @ 224, 84.35 @ 288
653
+ * `seresnextaa101d_32x8d` (anti-aliased w/ AvgPool2d) - 83.85 @ 224, 84.57 @ 288
654
+
655
+ ### March 23, 2022
656
+ * Add `ParallelBlock` and `LayerScale` option to base vit models to support model configs in [Three things everyone should know about ViT](https://arxiv.org/abs/2203.09795)
657
+ * `convnext_tiny_hnf` (head norm first) weights trained with (close to) A2 recipe, 82.2% top-1, could do better with more epochs.
658
+
659
+ ### March 21, 2022
660
+ * Merge `norm_norm_norm`. **IMPORTANT** this update for a coming 0.6.x release will likely de-stabilize the master branch for a while. Branch [`0.5.x`](https://github.com/rwightman/pytorch-image-models/tree/0.5.x) or a previous 0.5.x release can be used if stability is required.
661
+ * Significant weights update (all TPU trained) as described in this [release](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights)
662
+ * `regnety_040` - 82.3 @ 224, 82.96 @ 288
663
+ * `regnety_064` - 83.0 @ 224, 83.65 @ 288
664
+ * `regnety_080` - 83.17 @ 224, 83.86 @ 288
665
+ * `regnetv_040` - 82.44 @ 224, 83.18 @ 288 (timm pre-act)
666
+ * `regnetv_064` - 83.1 @ 224, 83.71 @ 288 (timm pre-act)
667
+ * `regnetz_040` - 83.67 @ 256, 84.25 @ 320
668
+ * `regnetz_040h` - 83.77 @ 256, 84.5 @ 320 (w/ extra fc in head)
669
+ * `resnetv2_50d_gn` - 80.8 @ 224, 81.96 @ 288 (pre-act GroupNorm)
670
+ * `resnetv2_50d_evos` 80.77 @ 224, 82.04 @ 288 (pre-act EvoNormS)
671
+ * `regnetz_c16_evos` - 81.9 @ 256, 82.64 @ 320 (EvoNormS)
672
+ * `regnetz_d8_evos` - 83.42 @ 256, 84.04 @ 320 (EvoNormS)
673
+ * `xception41p` - 82 @ 299 (timm pre-act)
674
+ * `xception65` - 83.17 @ 299
675
+ * `xception65p` - 83.14 @ 299 (timm pre-act)
676
+ * `resnext101_64x4d` - 82.46 @ 224, 83.16 @ 288
677
+ * `seresnext101_32x8d` - 83.57 @ 224, 84.270 @ 288
678
+ * `resnetrs200` - 83.85 @ 256, 84.44 @ 320
679
+ * HuggingFace hub support fixed w/ initial groundwork for allowing alternative 'config sources' for pretrained model definitions and weights (generic local file / remote url support soon)
680
+ * SwinTransformer-V2 implementation added. Submitted by [Christoph Reich](https://github.com/ChristophReich1996). Training experiments and model changes by myself are ongoing so expect compat breaks.
681
+ * Swin-S3 (AutoFormerV2) models / weights added from https://github.com/microsoft/Cream/tree/main/AutoFormerV2
682
+ * MobileViT models w/ weights adapted from https://github.com/apple/ml-cvnets
683
+ * PoolFormer models w/ weights adapted from https://github.com/sail-sg/poolformer
684
+ * VOLO models w/ weights adapted from https://github.com/sail-sg/volo
685
+ * Significant work experimenting with non-BatchNorm norm layers such as EvoNorm, FilterResponseNorm, GroupNorm, etc
686
+ * Enhance support for alternate norm + act ('NormAct') layers added to a number of models, esp EfficientNet/MobileNetV3, RegNet, and aligned Xception
687
+ * Grouped conv support added to EfficientNet family
688
+ * Add 'group matching' API to all models to allow grouping model parameters for application of 'layer-wise' LR decay, lr scale added to LR scheduler
689
+ * Gradient checkpointing support added to many models
690
+ * `forward_head(x, pre_logits=False)` fn added to all models to allow separate calls of `forward_features` + `forward_head`
691
+ * All vision transformer and vision MLP models update to return non-pooled / non-token selected features from `foward_features`, for consistency with CNN models, token selection or pooling now applied in `forward_head`
692
+
693
+ ### Feb 2, 2022
694
+ * [Chris Hughes](https://github.com/Chris-hughes10) posted an exhaustive run through of `timm` on his blog yesterday. Well worth a read. [Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide](https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055)
695
+ * I'm currently prepping to merge the `norm_norm_norm` branch back to master (ver 0.6.x) in next week or so.
696
+ * The changes are more extensive than usual and may destabilize and break some model API use (aiming for full backwards compat). So, beware `pip install git+https://github.com/rwightman/pytorch-image-models` installs!
697
+ * `0.5.x` releases and a `0.5.x` branch will remain stable with a cherry pick or two until dust clears. Recommend sticking to pypi install for a bit if you want stable.
698
+
699
+ ### Jan 14, 2022
700
+ * Version 0.5.4 w/ release to be pushed to pypi. It's been a while since last pypi update and riskier changes will be merged to main branch soon....
701
+ * Add ConvNeXT models /w weights from official impl (https://github.com/facebookresearch/ConvNeXt), a few perf tweaks, compatible with timm features
702
+ * Tried training a few small (~1.8-3M param) / mobile optimized models, a few are good so far, more on the way...
703
+ * `mnasnet_small` - 65.6 top-1
704
+ * `mobilenetv2_050` - 65.9
705
+ * `lcnet_100/075/050` - 72.1 / 68.8 / 63.1
706
+ * `semnasnet_075` - 73
707
+ * `fbnetv3_b/d/g` - 79.1 / 79.7 / 82.0
708
+ * TinyNet models added by [rsomani95](https://github.com/rsomani95)
709
+ * LCNet added via MobileNetV3 architecture
710
+
pytorch-image-models/docs/feature_extraction.md ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Feature Extraction
2
+
3
+ All of the models in `timm` have consistent mechanisms for obtaining various types of features from the model for tasks besides classification.
4
+
5
+ ## Penultimate Layer Features (Pre-Classifier Features)
6
+
7
+ The features from the penultimate model layer can be obtained in several ways without requiring model surgery (although feel free to do surgery). One must first decide if they want pooled or un-pooled features.
8
+
9
+ ### Unpooled
10
+
11
+ There are three ways to obtain unpooled features.
12
+
13
+ Without modifying the network, one can call `model.forward_features(input)` on any model instead of the usual `model(input)`. This will bypass the head classifier and global pooling for networks.
14
+
15
+ If one wants to explicitly modify the network to return unpooled features, they can either create the model without a classifier and pooling, or remove it later. Both paths remove the parameters associated with the classifier from the network.
16
+
17
+ #### forward_features()
18
+ ```python hl_lines="3 6"
19
+ import torch
20
+ import timm
21
+ m = timm.create_model('xception41', pretrained=True)
22
+ o = m(torch.randn(2, 3, 299, 299))
23
+ print(f'Original shape: {o.shape}')
24
+ o = m.forward_features(torch.randn(2, 3, 299, 299))
25
+ print(f'Unpooled shape: {o.shape}')
26
+ ```
27
+ Output:
28
+ ```text
29
+ Original shape: torch.Size([2, 1000])
30
+ Unpooled shape: torch.Size([2, 2048, 10, 10])
31
+ ```
32
+
33
+ #### Create with no classifier and pooling
34
+ ```python hl_lines="3"
35
+ import torch
36
+ import timm
37
+ m = timm.create_model('resnet50', pretrained=True, num_classes=0, global_pool='')
38
+ o = m(torch.randn(2, 3, 224, 224))
39
+ print(f'Unpooled shape: {o.shape}')
40
+ ```
41
+ Output:
42
+ ```text
43
+ Unpooled shape: torch.Size([2, 2048, 7, 7])
44
+ ```
45
+
46
+ #### Remove it later
47
+ ```python hl_lines="3 6"
48
+ import torch
49
+ import timm
50
+ m = timm.create_model('densenet121', pretrained=True)
51
+ o = m(torch.randn(2, 3, 224, 224))
52
+ print(f'Original shape: {o.shape}')
53
+ m.reset_classifier(0, '')
54
+ o = m(torch.randn(2, 3, 224, 224))
55
+ print(f'Unpooled shape: {o.shape}')
56
+ ```
57
+ Output:
58
+ ```text
59
+ Original shape: torch.Size([2, 1000])
60
+ Unpooled shape: torch.Size([2, 1024, 7, 7])
61
+ ```
62
+
63
+ ### Pooled
64
+
65
+ To modify the network to return pooled features, one can use `forward_features()` and pool/flatten the result themselves, or modify the network like above but keep pooling intact.
66
+
67
+ #### Create with no classifier
68
+ ```python hl_lines="3"
69
+ import torch
70
+ import timm
71
+ m = timm.create_model('resnet50', pretrained=True, num_classes=0)
72
+ o = m(torch.randn(2, 3, 224, 224))
73
+ print(f'Pooled shape: {o.shape}')
74
+ ```
75
+ Output:
76
+ ```text
77
+ Pooled shape: torch.Size([2, 2048])
78
+ ```
79
+
80
+ #### Remove it later
81
+ ```python hl_lines="3 6"
82
+ import torch
83
+ import timm
84
+ m = timm.create_model('ese_vovnet19b_dw', pretrained=True)
85
+ o = m(torch.randn(2, 3, 224, 224))
86
+ print(f'Original shape: {o.shape}')
87
+ m.reset_classifier(0)
88
+ o = m(torch.randn(2, 3, 224, 224))
89
+ print(f'Pooled shape: {o.shape}')
90
+ ```
91
+ Output:
92
+ ```text
93
+ Original shape: torch.Size([2, 1000])
94
+ Pooled shape: torch.Size([2, 1024])
95
+ ```
96
+
97
+
98
+ ## Multi-scale Feature Maps (Feature Pyramid)
99
+
100
+ Object detection, segmentation, keypoint, and a variety of dense pixel tasks require access to feature maps from the backbone network at multiple scales. This is often done by modifying the original classification network. Since each network varies quite a bit in structure, it's not uncommon to see only a few backbones supported in any given obj detection or segmentation library.
101
+
102
+ `timm` allows a consistent interface for creating any of the included models as feature backbones that output feature maps for selected levels.
103
+
104
+ A feature backbone can be created by adding the argument `features_only=True` to any `create_model` call. By default 5 strides will be output from most models (not all have that many), with the first starting at 2 (some start at 1 or 4).
105
+
106
+ ### Create a feature map extraction model
107
+ ```python hl_lines="3"
108
+ import torch
109
+ import timm
110
+ m = timm.create_model('resnest26d', features_only=True, pretrained=True)
111
+ o = m(torch.randn(2, 3, 224, 224))
112
+ for x in o:
113
+ print(x.shape)
114
+ ```
115
+ Output:
116
+ ```text
117
+ torch.Size([2, 64, 112, 112])
118
+ torch.Size([2, 256, 56, 56])
119
+ torch.Size([2, 512, 28, 28])
120
+ torch.Size([2, 1024, 14, 14])
121
+ torch.Size([2, 2048, 7, 7])
122
+ ```
123
+
124
+ ### Query the feature information
125
+
126
+ After a feature backbone has been created, it can be queried to provide channel or resolution reduction information to the downstream heads without requiring static config or hardcoded constants. The `.feature_info` attribute is a class encapsulating the information about the feature extraction points.
127
+
128
+ ```python hl_lines="3 4"
129
+ import torch
130
+ import timm
131
+ m = timm.create_model('regnety_032', features_only=True, pretrained=True)
132
+ print(f'Feature channels: {m.feature_info.channels()}')
133
+ o = m(torch.randn(2, 3, 224, 224))
134
+ for x in o:
135
+ print(x.shape)
136
+ ```
137
+ Output:
138
+ ```text
139
+ Feature channels: [32, 72, 216, 576, 1512]
140
+ torch.Size([2, 32, 112, 112])
141
+ torch.Size([2, 72, 56, 56])
142
+ torch.Size([2, 216, 28, 28])
143
+ torch.Size([2, 576, 14, 14])
144
+ torch.Size([2, 1512, 7, 7])
145
+ ```
146
+
147
+ ### Select specific feature levels or limit the stride
148
+
149
+ There are two additional creation arguments impacting the output features.
150
+
151
+ * `out_indices` selects which indices to output
152
+ * `output_stride` limits the feature output stride of the network (also works in classification mode BTW)
153
+
154
+ `out_indices` is supported by all models, but not all models have the same index to feature stride mapping. Look at the code or check feature_info to compare. The out indices generally correspond to the `C(i+1)th` feature level (a `2^(i+1)` reduction). For most models, index 0 is the stride 2 features, and index 4 is stride 32.
155
+
156
+ `output_stride` is achieved by converting layers to use dilated convolutions. Doing so is not always straightforward, some networks only support `output_stride=32`.
157
+
158
+ ```python hl_lines="3 4 5"
159
+ import torch
160
+ import timm
161
+ m = timm.create_model('ecaresnet101d', features_only=True, output_stride=8, out_indices=(2, 4), pretrained=True)
162
+ print(f'Feature channels: {m.feature_info.channels()}')
163
+ print(f'Feature reduction: {m.feature_info.reduction()}')
164
+ o = m(torch.randn(2, 3, 320, 320))
165
+ for x in o:
166
+ print(x.shape)
167
+ ```
168
+ Output:
169
+ ```text
170
+ Feature channels: [512, 2048]
171
+ Feature reduction: [8, 8]
172
+ torch.Size([2, 512, 40, 40])
173
+ torch.Size([2, 2048, 40, 40])
174
+ ```
pytorch-image-models/docs/index.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started
2
+
3
+ ## Welcome
4
+
5
+ Welcome to the `timm` documentation, a lean set of docs that covers the basics of `timm`.
6
+
7
+ For a more comprehensive set of docs (currently under development), please visit [timmdocs](http://timm.fast.ai) by [Aman Arora](https://github.com/amaarora).
8
+
9
+ ## Install
10
+
11
+ The library can be installed with pip:
12
+
13
+ ```
14
+ pip install timm
15
+ ```
16
+
17
+ I update the PyPi (pip) packages when I'm confident there are no significant model regressions from previous releases. If you want to pip install the bleeding edge from GitHub, use:
18
+ ```
19
+ pip install git+https://github.com/rwightman/pytorch-image-models.git
20
+ ```
21
+
22
+ !!! info "Conda Environment"
23
+ All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically 3.7, 3.8, 3.9, 3.10
24
+
25
+ Little to no care has been taken to be Python 2.x friendly and will not support it. If you run into any challenges running on Windows, or other OS, I'm definitely open to looking into those issues so long as it's in a reproducible (read Conda) environment.
26
+
27
+ PyTorch versions 1.9, 1.10, 1.11 have been tested with the latest versions of this code.
28
+
29
+ I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda:
30
+ ```
31
+ conda create -n torch-env
32
+ conda activate torch-env
33
+ conda install pytorch torchvision cudatoolkit=11.3 -c pytorch
34
+ conda install pyyaml
35
+ ```
36
+
37
+ ## Load a Pretrained Model
38
+
39
+ Pretrained models can be loaded using `timm.create_model`
40
+
41
+ ```python
42
+ import timm
43
+
44
+ m = timm.create_model('mobilenetv3_large_100', pretrained=True)
45
+ m.eval()
46
+ ```
47
+
48
+ ## List Models with Pretrained Weights
49
+ ```python
50
+ import timm
51
+ from pprint import pprint
52
+ model_names = timm.list_models(pretrained=True)
53
+ pprint(model_names)
54
+ >>> ['adv_inception_v3',
55
+ 'cspdarknet53',
56
+ 'cspresnext50',
57
+ 'densenet121',
58
+ 'densenet161',
59
+ 'densenet169',
60
+ 'densenet201',
61
+ 'densenetblur121d',
62
+ 'dla34',
63
+ 'dla46_c',
64
+ ...
65
+ ]
66
+ ```
67
+
68
+ ## List Model Architectures by Wildcard
69
+ ```python
70
+ import timm
71
+ from pprint import pprint
72
+ model_names = timm.list_models('*resne*t*')
73
+ pprint(model_names)
74
+ >>> ['cspresnet50',
75
+ 'cspresnet50d',
76
+ 'cspresnet50w',
77
+ 'cspresnext50',
78
+ ...
79
+ ]
80
+ ```
pytorch-image-models/docs/javascripts/tables.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ app.location$.subscribe(function() {
2
+ var tables = document.querySelectorAll("article table")
3
+ tables.forEach(function(table) {
4
+ new Tablesort(table)
5
+ })
6
+ })
pytorch-image-models/docs/models.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Summaries
2
+
3
+ The model architectures included come from a wide variety of sources. Sources, including papers, original impl ("reference code") that I rewrote / adapted, and PyTorch impl that I leveraged directly ("code") are listed below.
4
+
5
+ Most included models have pretrained weights. The weights are either:
6
+
7
+ 1. from their original sources
8
+ 2. ported by myself from their original impl in a different framework (e.g. Tensorflow models)
9
+ 3. trained from scratch using the included training script
10
+
11
+ The validation results for the pretrained weights are [here](results.md)
12
+
13
+ A more exciting view (with pretty pictures) of the models within `timm` can be found at [paperswithcode](https://paperswithcode.com/lib/timm).
14
+
15
+ ## Big Transfer ResNetV2 (BiT) [[resnetv2.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnetv2.py)]
16
+ * Paper: `Big Transfer (BiT): General Visual Representation Learning` - https://arxiv.org/abs/1912.11370
17
+ * Reference code: https://github.com/google-research/big_transfer
18
+
19
+ ## Cross-Stage Partial Networks [[cspnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/cspnet.py)]
20
+ * Paper: `CSPNet: A New Backbone that can Enhance Learning Capability of CNN` - https://arxiv.org/abs/1911.11929
21
+ * Reference impl: https://github.com/WongKinYiu/CrossStagePartialNetworks
22
+
23
+ ## DenseNet [[densenet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/densenet.py)]
24
+ * Paper: `Densely Connected Convolutional Networks` - https://arxiv.org/abs/1608.06993
25
+ * Code: https://github.com/pytorch/vision/tree/master/torchvision/models
26
+
27
+ ## DLA [[dla.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dla.py)]
28
+ * Paper: https://arxiv.org/abs/1707.06484
29
+ * Code: https://github.com/ucbdrive/dla
30
+
31
+ ## Dual-Path Networks [[dpn.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dpn.py)]
32
+ * Paper: `Dual Path Networks` - https://arxiv.org/abs/1707.01629
33
+ * My PyTorch code: https://github.com/rwightman/pytorch-dpn-pretrained
34
+ * Reference code: https://github.com/cypw/DPNs
35
+
36
+ ## GPU-Efficient Networks [[byobnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/byobnet.py)]
37
+ * Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
38
+ * Reference code: https://github.com/idstcv/GPU-Efficient-Networks
39
+
40
+ ## HRNet [[hrnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hrnet.py)]
41
+ * Paper: `Deep High-Resolution Representation Learning for Visual Recognition` - https://arxiv.org/abs/1908.07919
42
+ * Code: https://github.com/HRNet/HRNet-Image-Classification
43
+
44
+ ## Inception-V3 [[inception_v3.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_v3.py)]
45
+ * Paper: `Rethinking the Inception Architecture for Computer Vision` - https://arxiv.org/abs/1512.00567
46
+ * Code: https://github.com/pytorch/vision/tree/master/torchvision/models
47
+
48
+ ## Inception-V4 [[inception_v4.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_v4.py)]
49
+ * Paper: `Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning` - https://arxiv.org/abs/1602.07261
50
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
51
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets
52
+
53
+ ## Inception-ResNet-V2 [[inception_resnet_v2.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_resnet_v2.py)]
54
+ * Paper: `Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning` - https://arxiv.org/abs/1602.07261
55
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
56
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets
57
+
58
+ ## NASNet-A [[nasnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/nasnet.py)]
59
+ * Papers: `Learning Transferable Architectures for Scalable Image Recognition` - https://arxiv.org/abs/1707.07012
60
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
61
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet
62
+
63
+ ## PNasNet-5 [[pnasnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/pnasnet.py)]
64
+ * Papers: `Progressive Neural Architecture Search` - https://arxiv.org/abs/1712.00559
65
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
66
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet
67
+
68
+ ## EfficientNet [[efficientnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py)]
69
+
70
+ * Papers:
71
+ * EfficientNet NoisyStudent (B0-B7, L2) - https://arxiv.org/abs/1911.04252
72
+ * EfficientNet AdvProp (B0-B8) - https://arxiv.org/abs/1911.09665
73
+ * EfficientNet (B0-B7) - https://arxiv.org/abs/1905.11946
74
+ * EfficientNet-EdgeTPU (S, M, L) - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html
75
+ * MixNet - https://arxiv.org/abs/1907.09595
76
+ * MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626
77
+ * MobileNet-V2 - https://arxiv.org/abs/1801.04381
78
+ * FBNet-C - https://arxiv.org/abs/1812.03443
79
+ * Single-Path NAS - https://arxiv.org/abs/1904.02877
80
+ * My PyTorch code: https://github.com/rwightman/gen-efficientnet-pytorch
81
+ * Reference code: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet
82
+
83
+ ## MobileNet-V3 [[mobilenetv3.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mobilenetv3.py)]
84
+ * Paper: `Searching for MobileNetV3` - https://arxiv.org/abs/1905.02244
85
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet
86
+
87
+ ## RegNet [[regnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/regnet.py)]
88
+ * Paper: `Designing Network Design Spaces` - https://arxiv.org/abs/2003.13678
89
+ * Reference code: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py
90
+
91
+ ## RepVGG [[byobnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/byobnet.py)]
92
+ * Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
93
+ * Reference code: https://github.com/DingXiaoH/RepVGG
94
+
95
+ ## ResNet, ResNeXt [[resnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnet.py)]
96
+
97
+ * ResNet (V1B)
98
+ * Paper: `Deep Residual Learning for Image Recognition` - https://arxiv.org/abs/1512.03385
99
+ * Code: https://github.com/pytorch/vision/tree/master/torchvision/models
100
+ * ResNeXt
101
+ * Paper: `Aggregated Residual Transformations for Deep Neural Networks` - https://arxiv.org/abs/1611.05431
102
+ * Code: https://github.com/pytorch/vision/tree/master/torchvision/models
103
+ * 'Bag of Tricks' / Gluon C, D, E, S ResNet variants
104
+ * Paper: `Bag of Tricks for Image Classification with CNNs` - https://arxiv.org/abs/1812.01187
105
+ * Code: https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnetv1b.py
106
+ * Instagram pretrained / ImageNet tuned ResNeXt101
107
+ * Paper: `Exploring the Limits of Weakly Supervised Pretraining` - https://arxiv.org/abs/1805.00932
108
+ * Weights: https://pytorch.org/hub/facebookresearch_WSL-Images_resnext (NOTE: CC BY-NC 4.0 License, NOT commercial friendly)
109
+ * Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet and ResNeXts
110
+ * Paper: `Billion-scale semi-supervised learning for image classification` - https://arxiv.org/abs/1905.00546
111
+ * Weights: https://github.com/facebookresearch/semi-supervised-ImageNet1K-models (NOTE: CC BY-NC 4.0 License, NOT commercial friendly)
112
+ * Squeeze-and-Excitation Networks
113
+ * Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507
114
+ * Code: Added to ResNet base, this is current version going forward, old `senet.py` is being deprecated
115
+ * ECAResNet (ECA-Net)
116
+ * Paper: `ECA-Net: Efficient Channel Attention for Deep CNN` - https://arxiv.org/abs/1910.03151v4
117
+ * Code: Added to ResNet base, ECA module contributed by @VRandme, reference https://github.com/BangguWu/ECANet
118
+
119
+ ## Res2Net [[res2net.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/res2net.py)]
120
+ * Paper: `Res2Net: A New Multi-scale Backbone Architecture` - https://arxiv.org/abs/1904.01169
121
+ * Code: https://github.com/gasvn/Res2Net
122
+
123
+ ## ResNeSt [[resnest.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnest.py)]
124
+ * Paper: `ResNeSt: Split-Attention Networks` - https://arxiv.org/abs/2004.08955
125
+ * Code: https://github.com/zhanghang1989/ResNeSt
126
+
127
+ ## ReXNet [[rexnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/rexnet.py)]
128
+ * Paper: `ReXNet: Diminishing Representational Bottleneck on CNN` - https://arxiv.org/abs/2007.00992
129
+ * Code: https://github.com/clovaai/rexnet
130
+
131
+ ## Selective-Kernel Networks [[sknet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/sknet.py)]
132
+ * Paper: `Selective-Kernel Networks` - https://arxiv.org/abs/1903.06586
133
+ * Code: https://github.com/implus/SKNet, https://github.com/clovaai/assembled-cnn
134
+
135
+ ## SelecSLS [[selecsls.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/selecsls.py)]
136
+ * Paper: `XNect: Real-time Multi-Person 3D Motion Capture with a Single RGB Camera` - https://arxiv.org/abs/1907.00837
137
+ * Code: https://github.com/mehtadushy/SelecSLS-Pytorch
138
+
139
+ ## Squeeze-and-Excitation Networks [[senet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/senet.py)]
140
+ NOTE: I am deprecating this version of the networks, the new ones are part of `resnet.py`
141
+
142
+ * Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507
143
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
144
+
145
+ ## TResNet [[tresnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/tresnet.py)]
146
+ * Paper: `TResNet: High Performance GPU-Dedicated Architecture` - https://arxiv.org/abs/2003.13630
147
+ * Code: https://github.com/mrT23/TResNet
148
+
149
+ ## VGG [[vgg.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vgg.py)]
150
+ * Paper: `Very Deep Convolutional Networks For Large-Scale Image Recognition` - https://arxiv.org/pdf/1409.1556.pdf
151
+ * Reference code: https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
152
+
153
+ ## Vision Transformer [[vision_transformer.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py)]
154
+ * Paper: `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - https://arxiv.org/abs/2010.11929
155
+ * Reference code and pretrained weights: https://github.com/google-research/vision_transformer
156
+
157
+ ## VovNet V2 and V1 [[vovnet.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vovnet.py)]
158
+ * Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
159
+ * Reference code: https://github.com/youngwanLEE/vovnet-detectron2
160
+
161
+ ## Xception [[xception.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/xception.py)]
162
+ * Paper: `Xception: Deep Learning with Depthwise Separable Convolutions` - https://arxiv.org/abs/1610.02357
163
+ * Code: https://github.com/Cadene/pretrained-models.pytorch
164
+
165
+ ## Xception (Modified Aligned, Gluon) [[gluon_xception.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/gluon_xception.py)]
166
+ * Paper: `Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation` - https://arxiv.org/abs/1802.02611
167
+ * Reference code: https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo, https://github.com/jfzhang95/pytorch-deeplab-xception/
168
+
169
+ ## Xception (Modified Aligned, TF) [[aligned_xception.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/aligned_xception.py)]
170
+ * Paper: `Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation` - https://arxiv.org/abs/1802.02611
171
+ * Reference code: https://github.com/tensorflow/models/tree/master/research/deeplab
pytorch-image-models/docs/models/.pages ADDED
@@ -0,0 +1 @@
 
 
1
+ title: Model Pages
pytorch-image-models/docs/models/.templates/code_snippets.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## How do I use this model on an image?
2
+ To load a pretrained model:
3
+
4
+ ```python
5
+ import timm
6
+ model = timm.create_model('{{ model_name }}', pretrained=True)
7
+ model.eval()
8
+ ```
9
+
10
+ To load and preprocess the image:
11
+ ```python
12
+ import urllib
13
+ from PIL import Image
14
+ from timm.data import resolve_data_config
15
+ from timm.data.transforms_factory import create_transform
16
+
17
+ config = resolve_data_config({}, model=model)
18
+ transform = create_transform(**config)
19
+
20
+ url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
21
+ urllib.request.urlretrieve(url, filename)
22
+ img = Image.open(filename).convert('RGB')
23
+ tensor = transform(img).unsqueeze(0) # transform and add batch dimension
24
+ ```
25
+
26
+ To get the model predictions:
27
+ ```python
28
+ import torch
29
+ with torch.no_grad():
30
+ out = model(tensor)
31
+ probabilities = torch.nn.functional.softmax(out[0], dim=0)
32
+ print(probabilities.shape)
33
+ # prints: torch.Size([1000])
34
+ ```
35
+
36
+ To get the top-5 predictions class names:
37
+ ```python
38
+ # Get imagenet class mappings
39
+ url, filename = ("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt", "imagenet_classes.txt")
40
+ urllib.request.urlretrieve(url, filename)
41
+ with open("imagenet_classes.txt", "r") as f:
42
+ categories = [s.strip() for s in f.readlines()]
43
+
44
+ # Print top categories per image
45
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
46
+ for i in range(top5_prob.size(0)):
47
+ print(categories[top5_catid[i]], top5_prob[i].item())
48
+ # prints class names and probabilities like:
49
+ # [('Samoyed', 0.6425196528434753), ('Pomeranian', 0.04062102362513542), ('keeshond', 0.03186424449086189), ('white wolf', 0.01739676296710968), ('Eskimo dog', 0.011717947199940681)]
50
+ ```
51
+
52
+ Replace the model name with the variant you want to use, e.g. `{{ model_name }}`. You can find the IDs in the model summaries at the top of this page.
53
+
54
+ To extract image features with this model, follow the [timm feature extraction examples](https://rwightman.github.io/pytorch-image-models/feature_extraction/), just change the name of the model you want to use.
55
+
56
+ ## How do I finetune this model?
57
+ You can finetune any of the pre-trained models just by changing the classifier (the last layer).
58
+ ```python
59
+ model = timm.create_model('{{ model_name }}', pretrained=True, num_classes=NUM_FINETUNE_CLASSES)
60
+ ```
61
+ To finetune on your own dataset, you have to write a training loop or adapt [timm's training
62
+ script](https://github.com/rwightman/pytorch-image-models/blob/master/train.py) to use your dataset.
pytorch-image-models/docs/models/.templates/generate_readmes.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Run this script to generate the model-index files in `models` from the templates in `.templates/models`.
3
+ """
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from jinja2 import Environment, FileSystemLoader
9
+
10
+ import modelindex
11
+
12
+
13
+ def generate_readmes(templates_path: Path, dest_path: Path):
14
+ """Add the code snippet template to the readmes"""
15
+ readme_templates_path = templates_path / "models"
16
+ code_template_path = templates_path / "code_snippets.md"
17
+
18
+ env = Environment(
19
+ loader=FileSystemLoader([readme_templates_path, readme_templates_path.parent]),
20
+ )
21
+
22
+ for readme in readme_templates_path.iterdir():
23
+ if readme.suffix == ".md":
24
+ template = env.get_template(readme.name)
25
+
26
+ # get the first model_name for this model family
27
+ mi = modelindex.load(str(readme))
28
+ model_name = mi.models[0].name
29
+
30
+ full_content = template.render(model_name=model_name)
31
+
32
+ # generate full_readme
33
+ with open(dest_path / readme.name, "w") as f:
34
+ f.write(full_content)
35
+
36
+
37
+ def main():
38
+ parser = argparse.ArgumentParser(description="Model index generation config")
39
+ parser.add_argument(
40
+ "-t",
41
+ "--templates",
42
+ default=Path(__file__).parent / ".templates",
43
+ type=str,
44
+ help="Location of the markdown templates",
45
+ )
46
+ parser.add_argument(
47
+ "-d",
48
+ "--dest",
49
+ default=Path(__file__).parent / "models",
50
+ type=str,
51
+ help="Destination folder that contains the generated model-index files.",
52
+ )
53
+ args = parser.parse_args()
54
+ templates_path = Path(args.templates)
55
+ dest_readmes_path = Path(args.dest)
56
+
57
+ generate_readmes(
58
+ templates_path,
59
+ dest_readmes_path,
60
+ )
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
pytorch-image-models/docs/models/.templates/models/adversarial-inception-v3.md ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adversarial Inception v3
2
+
3
+ **Inception v3** is a convolutional neural network architecture from the Inception family that makes several improvements including using [Label Smoothing](https://paperswithcode.com/method/label-smoothing), Factorized 7 x 7 convolutions, and the use of an [auxiliary classifer](https://paperswithcode.com/method/auxiliary-classifier) to propagate label information lower down the network (along with the use of batch normalization for layers in the sidehead). The key building block is an [Inception Module](https://paperswithcode.com/method/inception-v3-module).
4
+
5
+ This particular model was trained for study of adversarial examples (adversarial training).
6
+
7
+ The weights from this model were ported from [Tensorflow/Models](https://github.com/tensorflow/models).
8
+
9
+ {% include 'code_snippets.md' %}
10
+
11
+ ## How do I train this model?
12
+
13
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
14
+
15
+ ## Citation
16
+
17
+ ```BibTeX
18
+ @article{DBLP:journals/corr/abs-1804-00097,
19
+ author = {Alexey Kurakin and
20
+ Ian J. Goodfellow and
21
+ Samy Bengio and
22
+ Yinpeng Dong and
23
+ Fangzhou Liao and
24
+ Ming Liang and
25
+ Tianyu Pang and
26
+ Jun Zhu and
27
+ Xiaolin Hu and
28
+ Cihang Xie and
29
+ Jianyu Wang and
30
+ Zhishuai Zhang and
31
+ Zhou Ren and
32
+ Alan L. Yuille and
33
+ Sangxia Huang and
34
+ Yao Zhao and
35
+ Yuzhe Zhao and
36
+ Zhonglin Han and
37
+ Junjiajia Long and
38
+ Yerkebulan Berdibekov and
39
+ Takuya Akiba and
40
+ Seiya Tokui and
41
+ Motoki Abe},
42
+ title = {Adversarial Attacks and Defences Competition},
43
+ journal = {CoRR},
44
+ volume = {abs/1804.00097},
45
+ year = {2018},
46
+ url = {http://arxiv.org/abs/1804.00097},
47
+ archivePrefix = {arXiv},
48
+ eprint = {1804.00097},
49
+ timestamp = {Thu, 31 Oct 2019 16:31:22 +0100},
50
+ biburl = {https://dblp.org/rec/journals/corr/abs-1804-00097.bib},
51
+ bibsource = {dblp computer science bibliography, https://dblp.org}
52
+ }
53
+ ```
54
+
55
+ <!--
56
+ Type: model-index
57
+ Collections:
58
+ - Name: Adversarial Inception v3
59
+ Paper:
60
+ Title: Adversarial Attacks and Defences Competition
61
+ URL: https://paperswithcode.com/paper/adversarial-attacks-and-defences-competition
62
+ Models:
63
+ - Name: adv_inception_v3
64
+ In Collection: Adversarial Inception v3
65
+ Metadata:
66
+ FLOPs: 7352418880
67
+ Parameters: 23830000
68
+ File Size: 95549439
69
+ Architecture:
70
+ - 1x1 Convolution
71
+ - Auxiliary Classifier
72
+ - Average Pooling
73
+ - Average Pooling
74
+ - Batch Normalization
75
+ - Convolution
76
+ - Dense Connections
77
+ - Dropout
78
+ - Inception-v3 Module
79
+ - Max Pooling
80
+ - ReLU
81
+ - Softmax
82
+ Tasks:
83
+ - Image Classification
84
+ Training Data:
85
+ - ImageNet
86
+ ID: adv_inception_v3
87
+ Crop Pct: '0.875'
88
+ Image Size: '299'
89
+ Interpolation: bicubic
90
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/inception_v3.py#L456
91
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/adv_inception_v3-9e27bd63.pth
92
+ Results:
93
+ - Task: Image Classification
94
+ Dataset: ImageNet
95
+ Metrics:
96
+ Top 1 Accuracy: 77.58%
97
+ Top 5 Accuracy: 93.74%
98
+ -->
pytorch-image-models/docs/models/.templates/models/advprop.md ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AdvProp (EfficientNet)
2
+
3
+ **AdvProp** is an adversarial training scheme which treats adversarial examples as additional examples, to prevent overfitting. Key to the method is the usage of a separate auxiliary batch norm for adversarial examples, as they have different underlying distributions to normal examples.
4
+
5
+ The weights from this model were ported from [Tensorflow/TPU](https://github.com/tensorflow/tpu).
6
+
7
+ {% include 'code_snippets.md' %}
8
+
9
+ ## How do I train this model?
10
+
11
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
12
+
13
+ ## Citation
14
+
15
+ ```BibTeX
16
+ @misc{xie2020adversarial,
17
+ title={Adversarial Examples Improve Image Recognition},
18
+ author={Cihang Xie and Mingxing Tan and Boqing Gong and Jiang Wang and Alan Yuille and Quoc V. Le},
19
+ year={2020},
20
+ eprint={1911.09665},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CV}
23
+ }
24
+ ```
25
+
26
+ <!--
27
+ Type: model-index
28
+ Collections:
29
+ - Name: AdvProp
30
+ Paper:
31
+ Title: Adversarial Examples Improve Image Recognition
32
+ URL: https://paperswithcode.com/paper/adversarial-examples-improve-image
33
+ Models:
34
+ - Name: tf_efficientnet_b0_ap
35
+ In Collection: AdvProp
36
+ Metadata:
37
+ FLOPs: 488688572
38
+ Parameters: 5290000
39
+ File Size: 21385973
40
+ Architecture:
41
+ - 1x1 Convolution
42
+ - Average Pooling
43
+ - Batch Normalization
44
+ - Convolution
45
+ - Dense Connections
46
+ - Dropout
47
+ - Inverted Residual Block
48
+ - Squeeze-and-Excitation Block
49
+ - Swish
50
+ Tasks:
51
+ - Image Classification
52
+ Training Techniques:
53
+ - AdvProp
54
+ - AutoAugment
55
+ - Label Smoothing
56
+ - RMSProp
57
+ - Stochastic Depth
58
+ - Weight Decay
59
+ Training Data:
60
+ - ImageNet
61
+ ID: tf_efficientnet_b0_ap
62
+ LR: 0.256
63
+ Epochs: 350
64
+ Crop Pct: '0.875'
65
+ Momentum: 0.9
66
+ Batch Size: 2048
67
+ Image Size: '224'
68
+ Weight Decay: 1.0e-05
69
+ Interpolation: bicubic
70
+ RMSProp Decay: 0.9
71
+ Label Smoothing: 0.1
72
+ BatchNorm Momentum: 0.99
73
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1334
74
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ap-f262efe1.pth
75
+ Results:
76
+ - Task: Image Classification
77
+ Dataset: ImageNet
78
+ Metrics:
79
+ Top 1 Accuracy: 77.1%
80
+ Top 5 Accuracy: 93.26%
81
+ - Name: tf_efficientnet_b1_ap
82
+ In Collection: AdvProp
83
+ Metadata:
84
+ FLOPs: 883633200
85
+ Parameters: 7790000
86
+ File Size: 31515350
87
+ Architecture:
88
+ - 1x1 Convolution
89
+ - Average Pooling
90
+ - Batch Normalization
91
+ - Convolution
92
+ - Dense Connections
93
+ - Dropout
94
+ - Inverted Residual Block
95
+ - Squeeze-and-Excitation Block
96
+ - Swish
97
+ Tasks:
98
+ - Image Classification
99
+ Training Techniques:
100
+ - AdvProp
101
+ - AutoAugment
102
+ - Label Smoothing
103
+ - RMSProp
104
+ - Stochastic Depth
105
+ - Weight Decay
106
+ Training Data:
107
+ - ImageNet
108
+ ID: tf_efficientnet_b1_ap
109
+ LR: 0.256
110
+ Epochs: 350
111
+ Crop Pct: '0.882'
112
+ Momentum: 0.9
113
+ Batch Size: 2048
114
+ Image Size: '240'
115
+ Weight Decay: 1.0e-05
116
+ Interpolation: bicubic
117
+ RMSProp Decay: 0.9
118
+ Label Smoothing: 0.1
119
+ BatchNorm Momentum: 0.99
120
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1344
121
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ap-44ef0a3d.pth
122
+ Results:
123
+ - Task: Image Classification
124
+ Dataset: ImageNet
125
+ Metrics:
126
+ Top 1 Accuracy: 79.28%
127
+ Top 5 Accuracy: 94.3%
128
+ - Name: tf_efficientnet_b2_ap
129
+ In Collection: AdvProp
130
+ Metadata:
131
+ FLOPs: 1234321170
132
+ Parameters: 9110000
133
+ File Size: 36800745
134
+ Architecture:
135
+ - 1x1 Convolution
136
+ - Average Pooling
137
+ - Batch Normalization
138
+ - Convolution
139
+ - Dense Connections
140
+ - Dropout
141
+ - Inverted Residual Block
142
+ - Squeeze-and-Excitation Block
143
+ - Swish
144
+ Tasks:
145
+ - Image Classification
146
+ Training Techniques:
147
+ - AdvProp
148
+ - AutoAugment
149
+ - Label Smoothing
150
+ - RMSProp
151
+ - Stochastic Depth
152
+ - Weight Decay
153
+ Training Data:
154
+ - ImageNet
155
+ ID: tf_efficientnet_b2_ap
156
+ LR: 0.256
157
+ Epochs: 350
158
+ Crop Pct: '0.89'
159
+ Momentum: 0.9
160
+ Batch Size: 2048
161
+ Image Size: '260'
162
+ Weight Decay: 1.0e-05
163
+ Interpolation: bicubic
164
+ RMSProp Decay: 0.9
165
+ Label Smoothing: 0.1
166
+ BatchNorm Momentum: 0.99
167
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1354
168
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ap-2f8e7636.pth
169
+ Results:
170
+ - Task: Image Classification
171
+ Dataset: ImageNet
172
+ Metrics:
173
+ Top 1 Accuracy: 80.3%
174
+ Top 5 Accuracy: 95.03%
175
+ - Name: tf_efficientnet_b3_ap
176
+ In Collection: AdvProp
177
+ Metadata:
178
+ FLOPs: 2275247568
179
+ Parameters: 12230000
180
+ File Size: 49384538
181
+ Architecture:
182
+ - 1x1 Convolution
183
+ - Average Pooling
184
+ - Batch Normalization
185
+ - Convolution
186
+ - Dense Connections
187
+ - Dropout
188
+ - Inverted Residual Block
189
+ - Squeeze-and-Excitation Block
190
+ - Swish
191
+ Tasks:
192
+ - Image Classification
193
+ Training Techniques:
194
+ - AdvProp
195
+ - AutoAugment
196
+ - Label Smoothing
197
+ - RMSProp
198
+ - Stochastic Depth
199
+ - Weight Decay
200
+ Training Data:
201
+ - ImageNet
202
+ ID: tf_efficientnet_b3_ap
203
+ LR: 0.256
204
+ Epochs: 350
205
+ Crop Pct: '0.904'
206
+ Momentum: 0.9
207
+ Batch Size: 2048
208
+ Image Size: '300'
209
+ Weight Decay: 1.0e-05
210
+ Interpolation: bicubic
211
+ RMSProp Decay: 0.9
212
+ Label Smoothing: 0.1
213
+ BatchNorm Momentum: 0.99
214
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1364
215
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ap-aad25bdd.pth
216
+ Results:
217
+ - Task: Image Classification
218
+ Dataset: ImageNet
219
+ Metrics:
220
+ Top 1 Accuracy: 81.82%
221
+ Top 5 Accuracy: 95.62%
222
+ - Name: tf_efficientnet_b4_ap
223
+ In Collection: AdvProp
224
+ Metadata:
225
+ FLOPs: 5749638672
226
+ Parameters: 19340000
227
+ File Size: 77993585
228
+ Architecture:
229
+ - 1x1 Convolution
230
+ - Average Pooling
231
+ - Batch Normalization
232
+ - Convolution
233
+ - Dense Connections
234
+ - Dropout
235
+ - Inverted Residual Block
236
+ - Squeeze-and-Excitation Block
237
+ - Swish
238
+ Tasks:
239
+ - Image Classification
240
+ Training Techniques:
241
+ - AdvProp
242
+ - AutoAugment
243
+ - Label Smoothing
244
+ - RMSProp
245
+ - Stochastic Depth
246
+ - Weight Decay
247
+ Training Data:
248
+ - ImageNet
249
+ ID: tf_efficientnet_b4_ap
250
+ LR: 0.256
251
+ Epochs: 350
252
+ Crop Pct: '0.922'
253
+ Momentum: 0.9
254
+ Batch Size: 2048
255
+ Image Size: '380'
256
+ Weight Decay: 1.0e-05
257
+ Interpolation: bicubic
258
+ RMSProp Decay: 0.9
259
+ Label Smoothing: 0.1
260
+ BatchNorm Momentum: 0.99
261
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1374
262
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ap-dedb23e6.pth
263
+ Results:
264
+ - Task: Image Classification
265
+ Dataset: ImageNet
266
+ Metrics:
267
+ Top 1 Accuracy: 83.26%
268
+ Top 5 Accuracy: 96.39%
269
+ - Name: tf_efficientnet_b5_ap
270
+ In Collection: AdvProp
271
+ Metadata:
272
+ FLOPs: 13176501888
273
+ Parameters: 30390000
274
+ File Size: 122403150
275
+ Architecture:
276
+ - 1x1 Convolution
277
+ - Average Pooling
278
+ - Batch Normalization
279
+ - Convolution
280
+ - Dense Connections
281
+ - Dropout
282
+ - Inverted Residual Block
283
+ - Squeeze-and-Excitation Block
284
+ - Swish
285
+ Tasks:
286
+ - Image Classification
287
+ Training Techniques:
288
+ - AdvProp
289
+ - AutoAugment
290
+ - Label Smoothing
291
+ - RMSProp
292
+ - Stochastic Depth
293
+ - Weight Decay
294
+ Training Data:
295
+ - ImageNet
296
+ ID: tf_efficientnet_b5_ap
297
+ LR: 0.256
298
+ Epochs: 350
299
+ Crop Pct: '0.934'
300
+ Momentum: 0.9
301
+ Batch Size: 2048
302
+ Image Size: '456'
303
+ Weight Decay: 1.0e-05
304
+ Interpolation: bicubic
305
+ RMSProp Decay: 0.9
306
+ Label Smoothing: 0.1
307
+ BatchNorm Momentum: 0.99
308
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1384
309
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth
310
+ Results:
311
+ - Task: Image Classification
312
+ Dataset: ImageNet
313
+ Metrics:
314
+ Top 1 Accuracy: 84.25%
315
+ Top 5 Accuracy: 96.97%
316
+ - Name: tf_efficientnet_b6_ap
317
+ In Collection: AdvProp
318
+ Metadata:
319
+ FLOPs: 24180518488
320
+ Parameters: 43040000
321
+ File Size: 173237466
322
+ Architecture:
323
+ - 1x1 Convolution
324
+ - Average Pooling
325
+ - Batch Normalization
326
+ - Convolution
327
+ - Dense Connections
328
+ - Dropout
329
+ - Inverted Residual Block
330
+ - Squeeze-and-Excitation Block
331
+ - Swish
332
+ Tasks:
333
+ - Image Classification
334
+ Training Techniques:
335
+ - AdvProp
336
+ - AutoAugment
337
+ - Label Smoothing
338
+ - RMSProp
339
+ - Stochastic Depth
340
+ - Weight Decay
341
+ Training Data:
342
+ - ImageNet
343
+ ID: tf_efficientnet_b6_ap
344
+ LR: 0.256
345
+ Epochs: 350
346
+ Crop Pct: '0.942'
347
+ Momentum: 0.9
348
+ Batch Size: 2048
349
+ Image Size: '528'
350
+ Weight Decay: 1.0e-05
351
+ Interpolation: bicubic
352
+ RMSProp Decay: 0.9
353
+ Label Smoothing: 0.1
354
+ BatchNorm Momentum: 0.99
355
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1394
356
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ap-4ffb161f.pth
357
+ Results:
358
+ - Task: Image Classification
359
+ Dataset: ImageNet
360
+ Metrics:
361
+ Top 1 Accuracy: 84.79%
362
+ Top 5 Accuracy: 97.14%
363
+ - Name: tf_efficientnet_b7_ap
364
+ In Collection: AdvProp
365
+ Metadata:
366
+ FLOPs: 48205304880
367
+ Parameters: 66349999
368
+ File Size: 266850607
369
+ Architecture:
370
+ - 1x1 Convolution
371
+ - Average Pooling
372
+ - Batch Normalization
373
+ - Convolution
374
+ - Dense Connections
375
+ - Dropout
376
+ - Inverted Residual Block
377
+ - Squeeze-and-Excitation Block
378
+ - Swish
379
+ Tasks:
380
+ - Image Classification
381
+ Training Techniques:
382
+ - AdvProp
383
+ - AutoAugment
384
+ - Label Smoothing
385
+ - RMSProp
386
+ - Stochastic Depth
387
+ - Weight Decay
388
+ Training Data:
389
+ - ImageNet
390
+ ID: tf_efficientnet_b7_ap
391
+ LR: 0.256
392
+ Epochs: 350
393
+ Crop Pct: '0.949'
394
+ Momentum: 0.9
395
+ Batch Size: 2048
396
+ Image Size: '600'
397
+ Weight Decay: 1.0e-05
398
+ Interpolation: bicubic
399
+ RMSProp Decay: 0.9
400
+ Label Smoothing: 0.1
401
+ BatchNorm Momentum: 0.99
402
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1405
403
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ap-ddb28fec.pth
404
+ Results:
405
+ - Task: Image Classification
406
+ Dataset: ImageNet
407
+ Metrics:
408
+ Top 1 Accuracy: 85.12%
409
+ Top 5 Accuracy: 97.25%
410
+ - Name: tf_efficientnet_b8_ap
411
+ In Collection: AdvProp
412
+ Metadata:
413
+ FLOPs: 80962956270
414
+ Parameters: 87410000
415
+ File Size: 351412563
416
+ Architecture:
417
+ - 1x1 Convolution
418
+ - Average Pooling
419
+ - Batch Normalization
420
+ - Convolution
421
+ - Dense Connections
422
+ - Dropout
423
+ - Inverted Residual Block
424
+ - Squeeze-and-Excitation Block
425
+ - Swish
426
+ Tasks:
427
+ - Image Classification
428
+ Training Techniques:
429
+ - AdvProp
430
+ - AutoAugment
431
+ - Label Smoothing
432
+ - RMSProp
433
+ - Stochastic Depth
434
+ - Weight Decay
435
+ Training Data:
436
+ - ImageNet
437
+ ID: tf_efficientnet_b8_ap
438
+ LR: 0.128
439
+ Epochs: 350
440
+ Crop Pct: '0.954'
441
+ Momentum: 0.9
442
+ Batch Size: 2048
443
+ Image Size: '672'
444
+ Weight Decay: 1.0e-05
445
+ Interpolation: bicubic
446
+ RMSProp Decay: 0.9
447
+ Label Smoothing: 0.1
448
+ BatchNorm Momentum: 0.99
449
+ Code: https://github.com/rwightman/pytorch-image-models/blob/9a25fdf3ad0414b4d66da443fe60ae0aa14edc84/timm/models/efficientnet.py#L1416
450
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth
451
+ Results:
452
+ - Task: Image Classification
453
+ Dataset: ImageNet
454
+ Metrics:
455
+ Top 1 Accuracy: 85.37%
456
+ Top 5 Accuracy: 97.3%
457
+ -->
pytorch-image-models/docs/models/.templates/models/big-transfer.md ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Big Transfer (BiT)
2
+
3
+ **Big Transfer (BiT)** is a type of pretraining recipe that pre-trains on a large supervised source dataset, and fine-tunes the weights on the target task. Models are trained on the JFT-300M dataset. The finetuned models contained in this collection are finetuned on ImageNet.
4
+
5
+ {% include 'code_snippets.md' %}
6
+
7
+ ## How do I train this model?
8
+
9
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
10
+
11
+ ## Citation
12
+
13
+ ```BibTeX
14
+ @misc{kolesnikov2020big,
15
+ title={Big Transfer (BiT): General Visual Representation Learning},
16
+ author={Alexander Kolesnikov and Lucas Beyer and Xiaohua Zhai and Joan Puigcerver and Jessica Yung and Sylvain Gelly and Neil Houlsby},
17
+ year={2020},
18
+ eprint={1912.11370},
19
+ archivePrefix={arXiv},
20
+ primaryClass={cs.CV}
21
+ }
22
+ ```
23
+
24
+ <!--
25
+ Type: model-index
26
+ Collections:
27
+ - Name: Big Transfer
28
+ Paper:
29
+ Title: 'Big Transfer (BiT): General Visual Representation Learning'
30
+ URL: https://paperswithcode.com/paper/large-scale-learning-of-general-visual
31
+ Models:
32
+ - Name: resnetv2_101x1_bitm
33
+ In Collection: Big Transfer
34
+ Metadata:
35
+ FLOPs: 5330896
36
+ Parameters: 44540000
37
+ File Size: 178256468
38
+ Architecture:
39
+ - 1x1 Convolution
40
+ - Bottleneck Residual Block
41
+ - Convolution
42
+ - Global Average Pooling
43
+ - Group Normalization
44
+ - Max Pooling
45
+ - ReLU
46
+ - Residual Block
47
+ - Residual Connection
48
+ - Softmax
49
+ - Weight Standardization
50
+ Tasks:
51
+ - Image Classification
52
+ Training Techniques:
53
+ - Mixup
54
+ - SGD with Momentum
55
+ - Weight Decay
56
+ Training Data:
57
+ - ImageNet
58
+ - JFT-300M
59
+ Training Resources: Cloud TPUv3-512
60
+ ID: resnetv2_101x1_bitm
61
+ LR: 0.03
62
+ Epochs: 90
63
+ Layers: 101
64
+ Crop Pct: '1.0'
65
+ Momentum: 0.9
66
+ Batch Size: 4096
67
+ Image Size: '480'
68
+ Weight Decay: 0.0001
69
+ Interpolation: bilinear
70
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L444
71
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R101x1-ILSVRC2012.npz
72
+ Results:
73
+ - Task: Image Classification
74
+ Dataset: ImageNet
75
+ Metrics:
76
+ Top 1 Accuracy: 82.21%
77
+ Top 5 Accuracy: 96.47%
78
+ - Name: resnetv2_101x3_bitm
79
+ In Collection: Big Transfer
80
+ Metadata:
81
+ FLOPs: 15988688
82
+ Parameters: 387930000
83
+ File Size: 1551830100
84
+ Architecture:
85
+ - 1x1 Convolution
86
+ - Bottleneck Residual Block
87
+ - Convolution
88
+ - Global Average Pooling
89
+ - Group Normalization
90
+ - Max Pooling
91
+ - ReLU
92
+ - Residual Block
93
+ - Residual Connection
94
+ - Softmax
95
+ - Weight Standardization
96
+ Tasks:
97
+ - Image Classification
98
+ Training Techniques:
99
+ - Mixup
100
+ - SGD with Momentum
101
+ - Weight Decay
102
+ Training Data:
103
+ - ImageNet
104
+ - JFT-300M
105
+ Training Resources: Cloud TPUv3-512
106
+ ID: resnetv2_101x3_bitm
107
+ LR: 0.03
108
+ Epochs: 90
109
+ Layers: 101
110
+ Crop Pct: '1.0'
111
+ Momentum: 0.9
112
+ Batch Size: 4096
113
+ Image Size: '480'
114
+ Weight Decay: 0.0001
115
+ Interpolation: bilinear
116
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L451
117
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R101x3-ILSVRC2012.npz
118
+ Results:
119
+ - Task: Image Classification
120
+ Dataset: ImageNet
121
+ Metrics:
122
+ Top 1 Accuracy: 84.38%
123
+ Top 5 Accuracy: 97.37%
124
+ - Name: resnetv2_152x2_bitm
125
+ In Collection: Big Transfer
126
+ Metadata:
127
+ FLOPs: 10659792
128
+ Parameters: 236340000
129
+ File Size: 945476668
130
+ Architecture:
131
+ - 1x1 Convolution
132
+ - Bottleneck Residual Block
133
+ - Convolution
134
+ - Global Average Pooling
135
+ - Group Normalization
136
+ - Max Pooling
137
+ - ReLU
138
+ - Residual Block
139
+ - Residual Connection
140
+ - Softmax
141
+ - Weight Standardization
142
+ Tasks:
143
+ - Image Classification
144
+ Training Techniques:
145
+ - Mixup
146
+ - SGD with Momentum
147
+ - Weight Decay
148
+ Training Data:
149
+ - ImageNet
150
+ - JFT-300M
151
+ ID: resnetv2_152x2_bitm
152
+ Crop Pct: '1.0'
153
+ Image Size: '480'
154
+ Interpolation: bilinear
155
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L458
156
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R152x2-ILSVRC2012.npz
157
+ Results:
158
+ - Task: Image Classification
159
+ Dataset: ImageNet
160
+ Metrics:
161
+ Top 1 Accuracy: 84.4%
162
+ Top 5 Accuracy: 97.43%
163
+ - Name: resnetv2_152x4_bitm
164
+ In Collection: Big Transfer
165
+ Metadata:
166
+ FLOPs: 21317584
167
+ Parameters: 936530000
168
+ File Size: 3746270104
169
+ Architecture:
170
+ - 1x1 Convolution
171
+ - Bottleneck Residual Block
172
+ - Convolution
173
+ - Global Average Pooling
174
+ - Group Normalization
175
+ - Max Pooling
176
+ - ReLU
177
+ - Residual Block
178
+ - Residual Connection
179
+ - Softmax
180
+ - Weight Standardization
181
+ Tasks:
182
+ - Image Classification
183
+ Training Techniques:
184
+ - Mixup
185
+ - SGD with Momentum
186
+ - Weight Decay
187
+ Training Data:
188
+ - ImageNet
189
+ - JFT-300M
190
+ Training Resources: Cloud TPUv3-512
191
+ ID: resnetv2_152x4_bitm
192
+ Crop Pct: '1.0'
193
+ Image Size: '480'
194
+ Interpolation: bilinear
195
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L465
196
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R152x4-ILSVRC2012.npz
197
+ Results:
198
+ - Task: Image Classification
199
+ Dataset: ImageNet
200
+ Metrics:
201
+ Top 1 Accuracy: 84.95%
202
+ Top 5 Accuracy: 97.45%
203
+ - Name: resnetv2_50x1_bitm
204
+ In Collection: Big Transfer
205
+ Metadata:
206
+ FLOPs: 5330896
207
+ Parameters: 25550000
208
+ File Size: 102242668
209
+ Architecture:
210
+ - 1x1 Convolution
211
+ - Bottleneck Residual Block
212
+ - Convolution
213
+ - Global Average Pooling
214
+ - Group Normalization
215
+ - Max Pooling
216
+ - ReLU
217
+ - Residual Block
218
+ - Residual Connection
219
+ - Softmax
220
+ - Weight Standardization
221
+ Tasks:
222
+ - Image Classification
223
+ Training Techniques:
224
+ - Mixup
225
+ - SGD with Momentum
226
+ - Weight Decay
227
+ Training Data:
228
+ - ImageNet
229
+ - JFT-300M
230
+ Training Resources: Cloud TPUv3-512
231
+ ID: resnetv2_50x1_bitm
232
+ LR: 0.03
233
+ Epochs: 90
234
+ Layers: 50
235
+ Crop Pct: '1.0'
236
+ Momentum: 0.9
237
+ Batch Size: 4096
238
+ Image Size: '480'
239
+ Weight Decay: 0.0001
240
+ Interpolation: bilinear
241
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L430
242
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R50x1-ILSVRC2012.npz
243
+ Results:
244
+ - Task: Image Classification
245
+ Dataset: ImageNet
246
+ Metrics:
247
+ Top 1 Accuracy: 80.19%
248
+ Top 5 Accuracy: 95.63%
249
+ - Name: resnetv2_50x3_bitm
250
+ In Collection: Big Transfer
251
+ Metadata:
252
+ FLOPs: 15988688
253
+ Parameters: 217320000
254
+ File Size: 869321580
255
+ Architecture:
256
+ - 1x1 Convolution
257
+ - Bottleneck Residual Block
258
+ - Convolution
259
+ - Global Average Pooling
260
+ - Group Normalization
261
+ - Max Pooling
262
+ - ReLU
263
+ - Residual Block
264
+ - Residual Connection
265
+ - Softmax
266
+ - Weight Standardization
267
+ Tasks:
268
+ - Image Classification
269
+ Training Techniques:
270
+ - Mixup
271
+ - SGD with Momentum
272
+ - Weight Decay
273
+ Training Data:
274
+ - ImageNet
275
+ - JFT-300M
276
+ Training Resources: Cloud TPUv3-512
277
+ ID: resnetv2_50x3_bitm
278
+ LR: 0.03
279
+ Epochs: 90
280
+ Layers: 50
281
+ Crop Pct: '1.0'
282
+ Momentum: 0.9
283
+ Batch Size: 4096
284
+ Image Size: '480'
285
+ Weight Decay: 0.0001
286
+ Interpolation: bilinear
287
+ Code: https://github.com/rwightman/pytorch-image-models/blob/b9843f954b0457af2db4f9dea41a8538f51f5d78/timm/models/resnetv2.py#L437
288
+ Weights: https://storage.googleapis.com/bit_models/BiT-M-R50x3-ILSVRC2012.npz
289
+ Results:
290
+ - Task: Image Classification
291
+ Dataset: ImageNet
292
+ Metrics:
293
+ Top 1 Accuracy: 83.75%
294
+ Top 5 Accuracy: 97.12%
295
+ -->
pytorch-image-models/docs/models/.templates/models/csp-darknet.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CSP-DarkNet
2
+
3
+ **CSPDarknet53** is a convolutional neural network and backbone for object detection that uses [DarkNet-53](https://paperswithcode.com/method/darknet-53). It employs a CSPNet strategy to partition the feature map of the base layer into two parts and then merges them through a cross-stage hierarchy. The use of a split and merge strategy allows for more gradient flow through the network.
4
+
5
+ This CNN is used as the backbone for [YOLOv4](https://paperswithcode.com/method/yolov4).
6
+
7
+ {% include 'code_snippets.md' %}
8
+
9
+ ## How do I train this model?
10
+
11
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
12
+
13
+ ## Citation
14
+
15
+ ```BibTeX
16
+ @misc{bochkovskiy2020yolov4,
17
+ title={YOLOv4: Optimal Speed and Accuracy of Object Detection},
18
+ author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao},
19
+ year={2020},
20
+ eprint={2004.10934},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CV}
23
+ }
24
+ ```
25
+
26
+ <!--
27
+ Type: model-index
28
+ Collections:
29
+ - Name: CSP DarkNet
30
+ Paper:
31
+ Title: 'YOLOv4: Optimal Speed and Accuracy of Object Detection'
32
+ URL: https://paperswithcode.com/paper/yolov4-optimal-speed-and-accuracy-of-object
33
+ Models:
34
+ - Name: cspdarknet53
35
+ In Collection: CSP DarkNet
36
+ Metadata:
37
+ FLOPs: 8545018880
38
+ Parameters: 27640000
39
+ File Size: 110775135
40
+ Architecture:
41
+ - 1x1 Convolution
42
+ - Batch Normalization
43
+ - Convolution
44
+ - Global Average Pooling
45
+ - Mish
46
+ - Residual Connection
47
+ - Softmax
48
+ Tasks:
49
+ - Image Classification
50
+ Training Techniques:
51
+ - CutMix
52
+ - Label Smoothing
53
+ - Mosaic
54
+ - Polynomial Learning Rate Decay
55
+ - SGD with Momentum
56
+ - Self-Adversarial Training
57
+ - Weight Decay
58
+ Training Data:
59
+ - ImageNet
60
+ Training Resources: 1x NVIDIA RTX 2070 GPU
61
+ ID: cspdarknet53
62
+ LR: 0.1
63
+ Layers: 53
64
+ Crop Pct: '0.887'
65
+ Momentum: 0.9
66
+ Batch Size: 128
67
+ Image Size: '256'
68
+ Warmup Steps: 1000
69
+ Weight Decay: 0.0005
70
+ Interpolation: bilinear
71
+ Training Steps: 8000000
72
+ FPS (GPU RTX 2070): 66
73
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/cspnet.py#L441
74
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth
75
+ Results:
76
+ - Task: Image Classification
77
+ Dataset: ImageNet
78
+ Metrics:
79
+ Top 1 Accuracy: 80.05%
80
+ Top 5 Accuracy: 95.09%
81
+ -->
pytorch-image-models/docs/models/.templates/models/csp-resnet.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CSP-ResNet
2
+
3
+ **CSPResNet** is a convolutional neural network where we apply the Cross Stage Partial Network (CSPNet) approach to [ResNet](https://paperswithcode.com/method/resnet). The CSPNet partitions the feature map of the base layer into two parts and then merges them through a cross-stage hierarchy. The use of a split and merge strategy allows for more gradient flow through the network.
4
+
5
+ {% include 'code_snippets.md' %}
6
+
7
+ ## How do I train this model?
8
+
9
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
10
+
11
+ ## Citation
12
+
13
+ ```BibTeX
14
+ @misc{wang2019cspnet,
15
+ title={CSPNet: A New Backbone that can Enhance Learning Capability of CNN},
16
+ author={Chien-Yao Wang and Hong-Yuan Mark Liao and I-Hau Yeh and Yueh-Hua Wu and Ping-Yang Chen and Jun-Wei Hsieh},
17
+ year={2019},
18
+ eprint={1911.11929},
19
+ archivePrefix={arXiv},
20
+ primaryClass={cs.CV}
21
+ }
22
+ ```
23
+
24
+ <!--
25
+ Type: model-index
26
+ Collections:
27
+ - Name: CSP ResNet
28
+ Paper:
29
+ Title: 'CSPNet: A New Backbone that can Enhance Learning Capability of CNN'
30
+ URL: https://paperswithcode.com/paper/cspnet-a-new-backbone-that-can-enhance
31
+ Models:
32
+ - Name: cspresnet50
33
+ In Collection: CSP ResNet
34
+ Metadata:
35
+ FLOPs: 5924992000
36
+ Parameters: 21620000
37
+ File Size: 86679303
38
+ Architecture:
39
+ - 1x1 Convolution
40
+ - Batch Normalization
41
+ - Bottleneck Residual Block
42
+ - Convolution
43
+ - Global Average Pooling
44
+ - Max Pooling
45
+ - ReLU
46
+ - Residual Block
47
+ - Residual Connection
48
+ - Softmax
49
+ Tasks:
50
+ - Image Classification
51
+ Training Techniques:
52
+ - Label Smoothing
53
+ - Polynomial Learning Rate Decay
54
+ - SGD with Momentum
55
+ - Weight Decay
56
+ Training Data:
57
+ - ImageNet
58
+ ID: cspresnet50
59
+ LR: 0.1
60
+ Layers: 50
61
+ Crop Pct: '0.887'
62
+ Momentum: 0.9
63
+ Batch Size: 128
64
+ Image Size: '256'
65
+ Weight Decay: 0.005
66
+ Interpolation: bilinear
67
+ Training Steps: 8000000
68
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/cspnet.py#L415
69
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnet50_ra-d3e8d487.pth
70
+ Results:
71
+ - Task: Image Classification
72
+ Dataset: ImageNet
73
+ Metrics:
74
+ Top 1 Accuracy: 79.57%
75
+ Top 5 Accuracy: 94.71%
76
+ -->
pytorch-image-models/docs/models/.templates/models/csp-resnext.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CSP-ResNeXt
2
+
3
+ **CSPResNeXt** is a convolutional neural network where we apply the Cross Stage Partial Network (CSPNet) approach to [ResNeXt](https://paperswithcode.com/method/resnext). The CSPNet partitions the feature map of the base layer into two parts and then merges them through a cross-stage hierarchy. The use of a split and merge strategy allows for more gradient flow through the network.
4
+
5
+ {% include 'code_snippets.md' %}
6
+
7
+ ## How do I train this model?
8
+
9
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
10
+
11
+ ## Citation
12
+
13
+ ```BibTeX
14
+ @misc{wang2019cspnet,
15
+ title={CSPNet: A New Backbone that can Enhance Learning Capability of CNN},
16
+ author={Chien-Yao Wang and Hong-Yuan Mark Liao and I-Hau Yeh and Yueh-Hua Wu and Ping-Yang Chen and Jun-Wei Hsieh},
17
+ year={2019},
18
+ eprint={1911.11929},
19
+ archivePrefix={arXiv},
20
+ primaryClass={cs.CV}
21
+ }
22
+ ```
23
+
24
+ <!--
25
+ Type: model-index
26
+ Collections:
27
+ - Name: CSP ResNeXt
28
+ Paper:
29
+ Title: 'CSPNet: A New Backbone that can Enhance Learning Capability of CNN'
30
+ URL: https://paperswithcode.com/paper/cspnet-a-new-backbone-that-can-enhance
31
+ Models:
32
+ - Name: cspresnext50
33
+ In Collection: CSP ResNeXt
34
+ Metadata:
35
+ FLOPs: 3962945536
36
+ Parameters: 20570000
37
+ File Size: 82562887
38
+ Architecture:
39
+ - 1x1 Convolution
40
+ - Batch Normalization
41
+ - Convolution
42
+ - Global Average Pooling
43
+ - Grouped Convolution
44
+ - Max Pooling
45
+ - ReLU
46
+ - ResNeXt Block
47
+ - Residual Connection
48
+ - Softmax
49
+ Tasks:
50
+ - Image Classification
51
+ Training Techniques:
52
+ - Label Smoothing
53
+ - Polynomial Learning Rate Decay
54
+ - SGD with Momentum
55
+ - Weight Decay
56
+ Training Data:
57
+ - ImageNet
58
+ Training Resources: 1x GPU
59
+ ID: cspresnext50
60
+ LR: 0.1
61
+ Layers: 50
62
+ Crop Pct: '0.875'
63
+ Momentum: 0.9
64
+ Batch Size: 128
65
+ Image Size: '224'
66
+ Weight Decay: 0.005
67
+ Interpolation: bilinear
68
+ Training Steps: 8000000
69
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/cspnet.py#L430
70
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnext50_ra_224-648b4713.pth
71
+ Results:
72
+ - Task: Image Classification
73
+ Dataset: ImageNet
74
+ Metrics:
75
+ Top 1 Accuracy: 80.05%
76
+ Top 5 Accuracy: 94.94%
77
+ -->
pytorch-image-models/docs/models/.templates/models/densenet.md ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DenseNet
2
+
3
+ **DenseNet** is a type of convolutional neural network that utilises dense connections between layers, through [Dense Blocks](http://www.paperswithcode.com/method/dense-block), where we connect *all layers* (with matching feature-map sizes) directly with each other. To preserve the feed-forward nature, each layer obtains additional inputs from all preceding layers and passes on its own feature-maps to all subsequent layers.
4
+
5
+ The **DenseNet Blur** variant in this collection by Ross Wightman employs [Blur Pooling](http://www.paperswithcode.com/method/blur-pooling)
6
+
7
+ {% include 'code_snippets.md' %}
8
+
9
+ ## How do I train this model?
10
+
11
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
12
+
13
+ ## Citation
14
+
15
+ ```BibTeX
16
+ @article{DBLP:journals/corr/HuangLW16a,
17
+ author = {Gao Huang and
18
+ Zhuang Liu and
19
+ Kilian Q. Weinberger},
20
+ title = {Densely Connected Convolutional Networks},
21
+ journal = {CoRR},
22
+ volume = {abs/1608.06993},
23
+ year = {2016},
24
+ url = {http://arxiv.org/abs/1608.06993},
25
+ archivePrefix = {arXiv},
26
+ eprint = {1608.06993},
27
+ timestamp = {Mon, 10 Sep 2018 15:49:32 +0200},
28
+ biburl = {https://dblp.org/rec/journals/corr/HuangLW16a.bib},
29
+ bibsource = {dblp computer science bibliography, https://dblp.org}
30
+ }
31
+ ```
32
+
33
+ ```
34
+ @misc{rw2019timm,
35
+ author = {Ross Wightman},
36
+ title = {PyTorch Image Models},
37
+ year = {2019},
38
+ publisher = {GitHub},
39
+ journal = {GitHub repository},
40
+ doi = {10.5281/zenodo.4414861},
41
+ howpublished = {\url{https://github.com/rwightman/pytorch-image-models}}
42
+ }
43
+ ```
44
+
45
+ <!--
46
+ Type: model-index
47
+ Collections:
48
+ - Name: DenseNet
49
+ Paper:
50
+ Title: Densely Connected Convolutional Networks
51
+ URL: https://paperswithcode.com/paper/densely-connected-convolutional-networks
52
+ Models:
53
+ - Name: densenet121
54
+ In Collection: DenseNet
55
+ Metadata:
56
+ FLOPs: 3641843200
57
+ Parameters: 7980000
58
+ File Size: 32376726
59
+ Architecture:
60
+ - 1x1 Convolution
61
+ - Average Pooling
62
+ - Batch Normalization
63
+ - Convolution
64
+ - Dense Block
65
+ - Dense Connections
66
+ - Dropout
67
+ - Max Pooling
68
+ - ReLU
69
+ - Softmax
70
+ Tasks:
71
+ - Image Classification
72
+ Training Techniques:
73
+ - Kaiming Initialization
74
+ - Nesterov Accelerated Gradient
75
+ - Weight Decay
76
+ Training Data:
77
+ - ImageNet
78
+ ID: densenet121
79
+ LR: 0.1
80
+ Epochs: 90
81
+ Layers: 121
82
+ Dropout: 0.2
83
+ Crop Pct: '0.875'
84
+ Momentum: 0.9
85
+ Batch Size: 256
86
+ Image Size: '224'
87
+ Weight Decay: 0.0001
88
+ Interpolation: bicubic
89
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L295
90
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/densenet121_ra-50efcf5c.pth
91
+ Results:
92
+ - Task: Image Classification
93
+ Dataset: ImageNet
94
+ Metrics:
95
+ Top 1 Accuracy: 75.56%
96
+ Top 5 Accuracy: 92.65%
97
+ - Name: densenet161
98
+ In Collection: DenseNet
99
+ Metadata:
100
+ FLOPs: 9931959264
101
+ Parameters: 28680000
102
+ File Size: 115730790
103
+ Architecture:
104
+ - 1x1 Convolution
105
+ - Average Pooling
106
+ - Batch Normalization
107
+ - Convolution
108
+ - Dense Block
109
+ - Dense Connections
110
+ - Dropout
111
+ - Max Pooling
112
+ - ReLU
113
+ - Softmax
114
+ Tasks:
115
+ - Image Classification
116
+ Training Techniques:
117
+ - Kaiming Initialization
118
+ - Nesterov Accelerated Gradient
119
+ - Weight Decay
120
+ Training Data:
121
+ - ImageNet
122
+ ID: densenet161
123
+ LR: 0.1
124
+ Epochs: 90
125
+ Layers: 161
126
+ Dropout: 0.2
127
+ Crop Pct: '0.875'
128
+ Momentum: 0.9
129
+ Batch Size: 256
130
+ Image Size: '224'
131
+ Weight Decay: 0.0001
132
+ Interpolation: bicubic
133
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L347
134
+ Weights: https://download.pytorch.org/models/densenet161-8d451a50.pth
135
+ Results:
136
+ - Task: Image Classification
137
+ Dataset: ImageNet
138
+ Metrics:
139
+ Top 1 Accuracy: 77.36%
140
+ Top 5 Accuracy: 93.63%
141
+ - Name: densenet169
142
+ In Collection: DenseNet
143
+ Metadata:
144
+ FLOPs: 4316945792
145
+ Parameters: 14150000
146
+ File Size: 57365526
147
+ Architecture:
148
+ - 1x1 Convolution
149
+ - Average Pooling
150
+ - Batch Normalization
151
+ - Convolution
152
+ - Dense Block
153
+ - Dense Connections
154
+ - Dropout
155
+ - Max Pooling
156
+ - ReLU
157
+ - Softmax
158
+ Tasks:
159
+ - Image Classification
160
+ Training Techniques:
161
+ - Kaiming Initialization
162
+ - Nesterov Accelerated Gradient
163
+ - Weight Decay
164
+ Training Data:
165
+ - ImageNet
166
+ ID: densenet169
167
+ LR: 0.1
168
+ Epochs: 90
169
+ Layers: 169
170
+ Dropout: 0.2
171
+ Crop Pct: '0.875'
172
+ Momentum: 0.9
173
+ Batch Size: 256
174
+ Image Size: '224'
175
+ Weight Decay: 0.0001
176
+ Interpolation: bicubic
177
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L327
178
+ Weights: https://download.pytorch.org/models/densenet169-b2777c0a.pth
179
+ Results:
180
+ - Task: Image Classification
181
+ Dataset: ImageNet
182
+ Metrics:
183
+ Top 1 Accuracy: 75.9%
184
+ Top 5 Accuracy: 93.02%
185
+ - Name: densenet201
186
+ In Collection: DenseNet
187
+ Metadata:
188
+ FLOPs: 5514321024
189
+ Parameters: 20010000
190
+ File Size: 81131730
191
+ Architecture:
192
+ - 1x1 Convolution
193
+ - Average Pooling
194
+ - Batch Normalization
195
+ - Convolution
196
+ - Dense Block
197
+ - Dense Connections
198
+ - Dropout
199
+ - Max Pooling
200
+ - ReLU
201
+ - Softmax
202
+ Tasks:
203
+ - Image Classification
204
+ Training Techniques:
205
+ - Kaiming Initialization
206
+ - Nesterov Accelerated Gradient
207
+ - Weight Decay
208
+ Training Data:
209
+ - ImageNet
210
+ ID: densenet201
211
+ LR: 0.1
212
+ Epochs: 90
213
+ Layers: 201
214
+ Dropout: 0.2
215
+ Crop Pct: '0.875'
216
+ Momentum: 0.9
217
+ Batch Size: 256
218
+ Image Size: '224'
219
+ Weight Decay: 0.0001
220
+ Interpolation: bicubic
221
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L337
222
+ Weights: https://download.pytorch.org/models/densenet201-c1103571.pth
223
+ Results:
224
+ - Task: Image Classification
225
+ Dataset: ImageNet
226
+ Metrics:
227
+ Top 1 Accuracy: 77.29%
228
+ Top 5 Accuracy: 93.48%
229
+ - Name: densenetblur121d
230
+ In Collection: DenseNet
231
+ Metadata:
232
+ FLOPs: 3947812864
233
+ Parameters: 8000000
234
+ File Size: 32456500
235
+ Architecture:
236
+ - 1x1 Convolution
237
+ - Batch Normalization
238
+ - Blur Pooling
239
+ - Convolution
240
+ - Dense Block
241
+ - Dense Connections
242
+ - Dropout
243
+ - Max Pooling
244
+ - ReLU
245
+ - Softmax
246
+ Tasks:
247
+ - Image Classification
248
+ Training Data:
249
+ - ImageNet
250
+ ID: densenetblur121d
251
+ Crop Pct: '0.875'
252
+ Image Size: '224'
253
+ Interpolation: bicubic
254
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L305
255
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/densenetblur121d_ra-100dcfbc.pth
256
+ Results:
257
+ - Task: Image Classification
258
+ Dataset: ImageNet
259
+ Metrics:
260
+ Top 1 Accuracy: 76.59%
261
+ Top 5 Accuracy: 93.2%
262
+ - Name: tv_densenet121
263
+ In Collection: DenseNet
264
+ Metadata:
265
+ FLOPs: 3641843200
266
+ Parameters: 7980000
267
+ File Size: 32342954
268
+ Architecture:
269
+ - 1x1 Convolution
270
+ - Average Pooling
271
+ - Batch Normalization
272
+ - Convolution
273
+ - Dense Block
274
+ - Dense Connections
275
+ - Dropout
276
+ - Max Pooling
277
+ - ReLU
278
+ - Softmax
279
+ Tasks:
280
+ - Image Classification
281
+ Training Techniques:
282
+ - SGD with Momentum
283
+ - Weight Decay
284
+ Training Data:
285
+ - ImageNet
286
+ ID: tv_densenet121
287
+ LR: 0.1
288
+ Epochs: 90
289
+ Crop Pct: '0.875'
290
+ LR Gamma: 0.1
291
+ Momentum: 0.9
292
+ Batch Size: 32
293
+ Image Size: '224'
294
+ LR Step Size: 30
295
+ Weight Decay: 0.0001
296
+ Interpolation: bicubic
297
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/densenet.py#L379
298
+ Weights: https://download.pytorch.org/models/densenet121-a639ec97.pth
299
+ Results:
300
+ - Task: Image Classification
301
+ Dataset: ImageNet
302
+ Metrics:
303
+ Top 1 Accuracy: 74.74%
304
+ Top 5 Accuracy: 92.15%
305
+ -->
pytorch-image-models/docs/models/.templates/models/dla.md ADDED
@@ -0,0 +1,545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deep Layer Aggregation
2
+
3
+ Extending “shallow” skip connections, **Dense Layer Aggregation (DLA)** incorporates more depth and sharing. The authors introduce two structures for deep layer aggregation (DLA): iterative deep aggregation (IDA) and hierarchical deep aggregation (HDA). These structures are expressed through an architectural framework, independent of the choice of backbone, for compatibility with current and future networks.
4
+
5
+ IDA focuses on fusing resolutions and scales while HDA focuses on merging features from all modules and channels. IDA follows the base hierarchy to refine resolution and aggregate scale stage-bystage. HDA assembles its own hierarchy of tree-structured connections that cross and merge stages to aggregate different levels of representation.
6
+
7
+ {% include 'code_snippets.md' %}
8
+
9
+ ## How do I train this model?
10
+
11
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
12
+
13
+ ## Citation
14
+
15
+ ```BibTeX
16
+ @misc{yu2019deep,
17
+ title={Deep Layer Aggregation},
18
+ author={Fisher Yu and Dequan Wang and Evan Shelhamer and Trevor Darrell},
19
+ year={2019},
20
+ eprint={1707.06484},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CV}
23
+ }
24
+ ```
25
+
26
+ <!--
27
+ Type: model-index
28
+ Collections:
29
+ - Name: DLA
30
+ Paper:
31
+ Title: Deep Layer Aggregation
32
+ URL: https://paperswithcode.com/paper/deep-layer-aggregation
33
+ Models:
34
+ - Name: dla102
35
+ In Collection: DLA
36
+ Metadata:
37
+ FLOPs: 7192952808
38
+ Parameters: 33270000
39
+ File Size: 135290579
40
+ Architecture:
41
+ - 1x1 Convolution
42
+ - Batch Normalization
43
+ - Convolution
44
+ - DLA Bottleneck Residual Block
45
+ - DLA Residual Block
46
+ - Global Average Pooling
47
+ - Max Pooling
48
+ - ReLU
49
+ - Residual Block
50
+ - Residual Connection
51
+ - Softmax
52
+ Tasks:
53
+ - Image Classification
54
+ Training Techniques:
55
+ - SGD with Momentum
56
+ - Weight Decay
57
+ Training Data:
58
+ - ImageNet
59
+ Training Resources: 8x GPUs
60
+ ID: dla102
61
+ LR: 0.1
62
+ Epochs: 120
63
+ Layers: 102
64
+ Crop Pct: '0.875'
65
+ Momentum: 0.9
66
+ Batch Size: 256
67
+ Image Size: '224'
68
+ Weight Decay: 0.0001
69
+ Interpolation: bilinear
70
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L410
71
+ Weights: http://dl.yf.io/dla/models/imagenet/dla102-d94d9790.pth
72
+ Results:
73
+ - Task: Image Classification
74
+ Dataset: ImageNet
75
+ Metrics:
76
+ Top 1 Accuracy: 78.03%
77
+ Top 5 Accuracy: 93.95%
78
+ - Name: dla102x
79
+ In Collection: DLA
80
+ Metadata:
81
+ FLOPs: 5886821352
82
+ Parameters: 26310000
83
+ File Size: 107552695
84
+ Architecture:
85
+ - 1x1 Convolution
86
+ - Batch Normalization
87
+ - Convolution
88
+ - DLA Bottleneck Residual Block
89
+ - DLA Residual Block
90
+ - Global Average Pooling
91
+ - Max Pooling
92
+ - ReLU
93
+ - Residual Block
94
+ - Residual Connection
95
+ - Softmax
96
+ Tasks:
97
+ - Image Classification
98
+ Training Techniques:
99
+ - SGD with Momentum
100
+ - Weight Decay
101
+ Training Data:
102
+ - ImageNet
103
+ Training Resources: 8x GPUs
104
+ ID: dla102x
105
+ LR: 0.1
106
+ Epochs: 120
107
+ Layers: 102
108
+ Crop Pct: '0.875'
109
+ Momentum: 0.9
110
+ Batch Size: 256
111
+ Image Size: '224'
112
+ Weight Decay: 0.0001
113
+ Interpolation: bilinear
114
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L418
115
+ Weights: http://dl.yf.io/dla/models/imagenet/dla102x-ad62be81.pth
116
+ Results:
117
+ - Task: Image Classification
118
+ Dataset: ImageNet
119
+ Metrics:
120
+ Top 1 Accuracy: 78.51%
121
+ Top 5 Accuracy: 94.23%
122
+ - Name: dla102x2
123
+ In Collection: DLA
124
+ Metadata:
125
+ FLOPs: 9343847400
126
+ Parameters: 41280000
127
+ File Size: 167645295
128
+ Architecture:
129
+ - 1x1 Convolution
130
+ - Batch Normalization
131
+ - Convolution
132
+ - DLA Bottleneck Residual Block
133
+ - DLA Residual Block
134
+ - Global Average Pooling
135
+ - Max Pooling
136
+ - ReLU
137
+ - Residual Block
138
+ - Residual Connection
139
+ - Softmax
140
+ Tasks:
141
+ - Image Classification
142
+ Training Techniques:
143
+ - SGD with Momentum
144
+ - Weight Decay
145
+ Training Data:
146
+ - ImageNet
147
+ Training Resources: 8x GPUs
148
+ ID: dla102x2
149
+ LR: 0.1
150
+ Epochs: 120
151
+ Layers: 102
152
+ Crop Pct: '0.875'
153
+ Momentum: 0.9
154
+ Batch Size: 256
155
+ Image Size: '224'
156
+ Weight Decay: 0.0001
157
+ Interpolation: bilinear
158
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L426
159
+ Weights: http://dl.yf.io/dla/models/imagenet/dla102x2-262837b6.pth
160
+ Results:
161
+ - Task: Image Classification
162
+ Dataset: ImageNet
163
+ Metrics:
164
+ Top 1 Accuracy: 79.44%
165
+ Top 5 Accuracy: 94.65%
166
+ - Name: dla169
167
+ In Collection: DLA
168
+ Metadata:
169
+ FLOPs: 11598004200
170
+ Parameters: 53390000
171
+ File Size: 216547113
172
+ Architecture:
173
+ - 1x1 Convolution
174
+ - Batch Normalization
175
+ - Convolution
176
+ - DLA Bottleneck Residual Block
177
+ - DLA Residual Block
178
+ - Global Average Pooling
179
+ - Max Pooling
180
+ - ReLU
181
+ - Residual Block
182
+ - Residual Connection
183
+ - Softmax
184
+ Tasks:
185
+ - Image Classification
186
+ Training Techniques:
187
+ - SGD with Momentum
188
+ - Weight Decay
189
+ Training Data:
190
+ - ImageNet
191
+ Training Resources: 8x GPUs
192
+ ID: dla169
193
+ LR: 0.1
194
+ Epochs: 120
195
+ Layers: 169
196
+ Crop Pct: '0.875'
197
+ Momentum: 0.9
198
+ Batch Size: 256
199
+ Image Size: '224'
200
+ Weight Decay: 0.0001
201
+ Interpolation: bilinear
202
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L434
203
+ Weights: http://dl.yf.io/dla/models/imagenet/dla169-0914e092.pth
204
+ Results:
205
+ - Task: Image Classification
206
+ Dataset: ImageNet
207
+ Metrics:
208
+ Top 1 Accuracy: 78.69%
209
+ Top 5 Accuracy: 94.33%
210
+ - Name: dla34
211
+ In Collection: DLA
212
+ Metadata:
213
+ FLOPs: 3070105576
214
+ Parameters: 15740000
215
+ File Size: 63228658
216
+ Architecture:
217
+ - 1x1 Convolution
218
+ - Batch Normalization
219
+ - Convolution
220
+ - DLA Bottleneck Residual Block
221
+ - DLA Residual Block
222
+ - Global Average Pooling
223
+ - Max Pooling
224
+ - ReLU
225
+ - Residual Block
226
+ - Residual Connection
227
+ - Softmax
228
+ Tasks:
229
+ - Image Classification
230
+ Training Techniques:
231
+ - SGD with Momentum
232
+ - Weight Decay
233
+ Training Data:
234
+ - ImageNet
235
+ ID: dla34
236
+ LR: 0.1
237
+ Epochs: 120
238
+ Layers: 32
239
+ Crop Pct: '0.875'
240
+ Momentum: 0.9
241
+ Batch Size: 256
242
+ Image Size: '224'
243
+ Weight Decay: 0.0001
244
+ Interpolation: bilinear
245
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L362
246
+ Weights: http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth
247
+ Results:
248
+ - Task: Image Classification
249
+ Dataset: ImageNet
250
+ Metrics:
251
+ Top 1 Accuracy: 74.62%
252
+ Top 5 Accuracy: 92.06%
253
+ - Name: dla46_c
254
+ In Collection: DLA
255
+ Metadata:
256
+ FLOPs: 583277288
257
+ Parameters: 1300000
258
+ File Size: 5307963
259
+ Architecture:
260
+ - 1x1 Convolution
261
+ - Batch Normalization
262
+ - Convolution
263
+ - DLA Bottleneck Residual Block
264
+ - DLA Residual Block
265
+ - Global Average Pooling
266
+ - Max Pooling
267
+ - ReLU
268
+ - Residual Block
269
+ - Residual Connection
270
+ - Softmax
271
+ Tasks:
272
+ - Image Classification
273
+ Training Techniques:
274
+ - SGD with Momentum
275
+ - Weight Decay
276
+ Training Data:
277
+ - ImageNet
278
+ ID: dla46_c
279
+ LR: 0.1
280
+ Epochs: 120
281
+ Layers: 46
282
+ Crop Pct: '0.875'
283
+ Momentum: 0.9
284
+ Batch Size: 256
285
+ Image Size: '224'
286
+ Weight Decay: 0.0001
287
+ Interpolation: bilinear
288
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L369
289
+ Weights: http://dl.yf.io/dla/models/imagenet/dla46_c-2bfd52c3.pth
290
+ Results:
291
+ - Task: Image Classification
292
+ Dataset: ImageNet
293
+ Metrics:
294
+ Top 1 Accuracy: 64.87%
295
+ Top 5 Accuracy: 86.29%
296
+ - Name: dla46x_c
297
+ In Collection: DLA
298
+ Metadata:
299
+ FLOPs: 544052200
300
+ Parameters: 1070000
301
+ File Size: 4387641
302
+ Architecture:
303
+ - 1x1 Convolution
304
+ - Batch Normalization
305
+ - Convolution
306
+ - DLA Bottleneck Residual Block
307
+ - DLA Residual Block
308
+ - Global Average Pooling
309
+ - Max Pooling
310
+ - ReLU
311
+ - Residual Block
312
+ - Residual Connection
313
+ - Softmax
314
+ Tasks:
315
+ - Image Classification
316
+ Training Techniques:
317
+ - SGD with Momentum
318
+ - Weight Decay
319
+ Training Data:
320
+ - ImageNet
321
+ ID: dla46x_c
322
+ LR: 0.1
323
+ Epochs: 120
324
+ Layers: 46
325
+ Crop Pct: '0.875'
326
+ Momentum: 0.9
327
+ Batch Size: 256
328
+ Image Size: '224'
329
+ Weight Decay: 0.0001
330
+ Interpolation: bilinear
331
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L378
332
+ Weights: http://dl.yf.io/dla/models/imagenet/dla46x_c-d761bae7.pth
333
+ Results:
334
+ - Task: Image Classification
335
+ Dataset: ImageNet
336
+ Metrics:
337
+ Top 1 Accuracy: 65.98%
338
+ Top 5 Accuracy: 86.99%
339
+ - Name: dla60
340
+ In Collection: DLA
341
+ Metadata:
342
+ FLOPs: 4256251880
343
+ Parameters: 22040000
344
+ File Size: 89560235
345
+ Architecture:
346
+ - 1x1 Convolution
347
+ - Batch Normalization
348
+ - Convolution
349
+ - DLA Bottleneck Residual Block
350
+ - DLA Residual Block
351
+ - Global Average Pooling
352
+ - Max Pooling
353
+ - ReLU
354
+ - Residual Block
355
+ - Residual Connection
356
+ - Softmax
357
+ Tasks:
358
+ - Image Classification
359
+ Training Techniques:
360
+ - SGD with Momentum
361
+ - Weight Decay
362
+ Training Data:
363
+ - ImageNet
364
+ ID: dla60
365
+ LR: 0.1
366
+ Epochs: 120
367
+ Layers: 60
368
+ Dropout: 0.2
369
+ Crop Pct: '0.875'
370
+ Momentum: 0.9
371
+ Batch Size: 256
372
+ Image Size: '224'
373
+ Weight Decay: 0.0001
374
+ Interpolation: bilinear
375
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L394
376
+ Weights: http://dl.yf.io/dla/models/imagenet/dla60-24839fc4.pth
377
+ Results:
378
+ - Task: Image Classification
379
+ Dataset: ImageNet
380
+ Metrics:
381
+ Top 1 Accuracy: 77.04%
382
+ Top 5 Accuracy: 93.32%
383
+ - Name: dla60_res2net
384
+ In Collection: DLA
385
+ Metadata:
386
+ FLOPs: 4147578504
387
+ Parameters: 20850000
388
+ File Size: 84886593
389
+ Architecture:
390
+ - 1x1 Convolution
391
+ - Batch Normalization
392
+ - Convolution
393
+ - DLA Bottleneck Residual Block
394
+ - DLA Residual Block
395
+ - Global Average Pooling
396
+ - Max Pooling
397
+ - ReLU
398
+ - Residual Block
399
+ - Residual Connection
400
+ - Softmax
401
+ Tasks:
402
+ - Image Classification
403
+ Training Techniques:
404
+ - SGD with Momentum
405
+ - Weight Decay
406
+ Training Data:
407
+ - ImageNet
408
+ ID: dla60_res2net
409
+ Layers: 60
410
+ Crop Pct: '0.875'
411
+ Image Size: '224'
412
+ Interpolation: bilinear
413
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L346
414
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net_dla60_4s-d88db7f9.pth
415
+ Results:
416
+ - Task: Image Classification
417
+ Dataset: ImageNet
418
+ Metrics:
419
+ Top 1 Accuracy: 78.46%
420
+ Top 5 Accuracy: 94.21%
421
+ - Name: dla60_res2next
422
+ In Collection: DLA
423
+ Metadata:
424
+ FLOPs: 3485335272
425
+ Parameters: 17030000
426
+ File Size: 69639245
427
+ Architecture:
428
+ - 1x1 Convolution
429
+ - Batch Normalization
430
+ - Convolution
431
+ - DLA Bottleneck Residual Block
432
+ - DLA Residual Block
433
+ - Global Average Pooling
434
+ - Max Pooling
435
+ - ReLU
436
+ - Residual Block
437
+ - Residual Connection
438
+ - Softmax
439
+ Tasks:
440
+ - Image Classification
441
+ Training Techniques:
442
+ - SGD with Momentum
443
+ - Weight Decay
444
+ Training Data:
445
+ - ImageNet
446
+ ID: dla60_res2next
447
+ Layers: 60
448
+ Crop Pct: '0.875'
449
+ Image Size: '224'
450
+ Interpolation: bilinear
451
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L354
452
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2next_dla60_4s-d327927b.pth
453
+ Results:
454
+ - Task: Image Classification
455
+ Dataset: ImageNet
456
+ Metrics:
457
+ Top 1 Accuracy: 78.44%
458
+ Top 5 Accuracy: 94.16%
459
+ - Name: dla60x
460
+ In Collection: DLA
461
+ Metadata:
462
+ FLOPs: 3544204264
463
+ Parameters: 17350000
464
+ File Size: 70883139
465
+ Architecture:
466
+ - 1x1 Convolution
467
+ - Batch Normalization
468
+ - Convolution
469
+ - DLA Bottleneck Residual Block
470
+ - DLA Residual Block
471
+ - Global Average Pooling
472
+ - Max Pooling
473
+ - ReLU
474
+ - Residual Block
475
+ - Residual Connection
476
+ - Softmax
477
+ Tasks:
478
+ - Image Classification
479
+ Training Techniques:
480
+ - SGD with Momentum
481
+ - Weight Decay
482
+ Training Data:
483
+ - ImageNet
484
+ ID: dla60x
485
+ LR: 0.1
486
+ Epochs: 120
487
+ Layers: 60
488
+ Crop Pct: '0.875'
489
+ Momentum: 0.9
490
+ Batch Size: 256
491
+ Image Size: '224'
492
+ Weight Decay: 0.0001
493
+ Interpolation: bilinear
494
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L402
495
+ Weights: http://dl.yf.io/dla/models/imagenet/dla60x-d15cacda.pth
496
+ Results:
497
+ - Task: Image Classification
498
+ Dataset: ImageNet
499
+ Metrics:
500
+ Top 1 Accuracy: 78.25%
501
+ Top 5 Accuracy: 94.02%
502
+ - Name: dla60x_c
503
+ In Collection: DLA
504
+ Metadata:
505
+ FLOPs: 593325032
506
+ Parameters: 1320000
507
+ File Size: 5454396
508
+ Architecture:
509
+ - 1x1 Convolution
510
+ - Batch Normalization
511
+ - Convolution
512
+ - DLA Bottleneck Residual Block
513
+ - DLA Residual Block
514
+ - Global Average Pooling
515
+ - Max Pooling
516
+ - ReLU
517
+ - Residual Block
518
+ - Residual Connection
519
+ - Softmax
520
+ Tasks:
521
+ - Image Classification
522
+ Training Techniques:
523
+ - SGD with Momentum
524
+ - Weight Decay
525
+ Training Data:
526
+ - ImageNet
527
+ ID: dla60x_c
528
+ LR: 0.1
529
+ Epochs: 120
530
+ Layers: 60
531
+ Crop Pct: '0.875'
532
+ Momentum: 0.9
533
+ Batch Size: 256
534
+ Image Size: '224'
535
+ Weight Decay: 0.0001
536
+ Interpolation: bilinear
537
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dla.py#L386
538
+ Weights: http://dl.yf.io/dla/models/imagenet/dla60x_c-b870c45c.pth
539
+ Results:
540
+ - Task: Image Classification
541
+ Dataset: ImageNet
542
+ Metrics:
543
+ Top 1 Accuracy: 67.91%
544
+ Top 5 Accuracy: 88.42%
545
+ -->
pytorch-image-models/docs/models/.templates/models/dpn.md ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dual Path Network (DPN)
2
+
3
+ A **Dual Path Network (DPN)** is a convolutional neural network which presents a new topology of connection paths internally. The intuition is that [ResNets](https://paperswithcode.com/method/resnet) enables feature re-usage while DenseNet enables new feature exploration, and both are important for learning good representations. To enjoy the benefits from both path topologies, Dual Path Networks share common features while maintaining the flexibility to explore new features through dual path architectures.
4
+
5
+ The principal building block is an [DPN Block](https://paperswithcode.com/method/dpn-block).
6
+
7
+ {% include 'code_snippets.md' %}
8
+
9
+ ## How do I train this model?
10
+
11
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
12
+
13
+ ## Citation
14
+
15
+ ```BibTeX
16
+ @misc{chen2017dual,
17
+ title={Dual Path Networks},
18
+ author={Yunpeng Chen and Jianan Li and Huaxin Xiao and Xiaojie Jin and Shuicheng Yan and Jiashi Feng},
19
+ year={2017},
20
+ eprint={1707.01629},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CV}
23
+ }
24
+ ```
25
+
26
+ <!--
27
+ Type: model-index
28
+ Collections:
29
+ - Name: DPN
30
+ Paper:
31
+ Title: Dual Path Networks
32
+ URL: https://paperswithcode.com/paper/dual-path-networks
33
+ Models:
34
+ - Name: dpn107
35
+ In Collection: DPN
36
+ Metadata:
37
+ FLOPs: 23524280296
38
+ Parameters: 86920000
39
+ File Size: 348612331
40
+ Architecture:
41
+ - Batch Normalization
42
+ - Convolution
43
+ - DPN Block
44
+ - Dense Connections
45
+ - Global Average Pooling
46
+ - Max Pooling
47
+ - Softmax
48
+ Tasks:
49
+ - Image Classification
50
+ Training Techniques:
51
+ - SGD with Momentum
52
+ - Weight Decay
53
+ Training Data:
54
+ - ImageNet
55
+ Training Resources: 40x K80 GPUs
56
+ ID: dpn107
57
+ LR: 0.316
58
+ Layers: 107
59
+ Crop Pct: '0.875'
60
+ Batch Size: 1280
61
+ Image Size: '224'
62
+ Interpolation: bicubic
63
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L310
64
+ Weights: https://github.com/rwightman/pytorch-dpn-pretrained/releases/download/v0.1/dpn107_extra-1ac7121e2.pth
65
+ Results:
66
+ - Task: Image Classification
67
+ Dataset: ImageNet
68
+ Metrics:
69
+ Top 1 Accuracy: 80.16%
70
+ Top 5 Accuracy: 94.91%
71
+ - Name: dpn131
72
+ In Collection: DPN
73
+ Metadata:
74
+ FLOPs: 20586274792
75
+ Parameters: 79250000
76
+ File Size: 318016207
77
+ Architecture:
78
+ - Batch Normalization
79
+ - Convolution
80
+ - DPN Block
81
+ - Dense Connections
82
+ - Global Average Pooling
83
+ - Max Pooling
84
+ - Softmax
85
+ Tasks:
86
+ - Image Classification
87
+ Training Techniques:
88
+ - SGD with Momentum
89
+ - Weight Decay
90
+ Training Data:
91
+ - ImageNet
92
+ Training Resources: 40x K80 GPUs
93
+ ID: dpn131
94
+ LR: 0.316
95
+ Layers: 131
96
+ Crop Pct: '0.875'
97
+ Batch Size: 960
98
+ Image Size: '224'
99
+ Interpolation: bicubic
100
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L302
101
+ Weights: https://github.com/rwightman/pytorch-dpn-pretrained/releases/download/v0.1/dpn131-71dfe43e0.pth
102
+ Results:
103
+ - Task: Image Classification
104
+ Dataset: ImageNet
105
+ Metrics:
106
+ Top 1 Accuracy: 79.83%
107
+ Top 5 Accuracy: 94.71%
108
+ - Name: dpn68
109
+ In Collection: DPN
110
+ Metadata:
111
+ FLOPs: 2990567880
112
+ Parameters: 12610000
113
+ File Size: 50761994
114
+ Architecture:
115
+ - Batch Normalization
116
+ - Convolution
117
+ - DPN Block
118
+ - Dense Connections
119
+ - Global Average Pooling
120
+ - Max Pooling
121
+ - Softmax
122
+ Tasks:
123
+ - Image Classification
124
+ Training Techniques:
125
+ - SGD with Momentum
126
+ - Weight Decay
127
+ Training Data:
128
+ - ImageNet
129
+ Training Resources: 40x K80 GPUs
130
+ ID: dpn68
131
+ LR: 0.316
132
+ Layers: 68
133
+ Crop Pct: '0.875'
134
+ Batch Size: 1280
135
+ Image Size: '224'
136
+ Interpolation: bicubic
137
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L270
138
+ Weights: https://github.com/rwightman/pytorch-dpn-pretrained/releases/download/v0.1/dpn68-66bebafa7.pth
139
+ Results:
140
+ - Task: Image Classification
141
+ Dataset: ImageNet
142
+ Metrics:
143
+ Top 1 Accuracy: 76.31%
144
+ Top 5 Accuracy: 92.97%
145
+ - Name: dpn68b
146
+ In Collection: DPN
147
+ Metadata:
148
+ FLOPs: 2990567880
149
+ Parameters: 12610000
150
+ File Size: 50781025
151
+ Architecture:
152
+ - Batch Normalization
153
+ - Convolution
154
+ - DPN Block
155
+ - Dense Connections
156
+ - Global Average Pooling
157
+ - Max Pooling
158
+ - Softmax
159
+ Tasks:
160
+ - Image Classification
161
+ Training Techniques:
162
+ - SGD with Momentum
163
+ - Weight Decay
164
+ Training Data:
165
+ - ImageNet
166
+ Training Resources: 40x K80 GPUs
167
+ ID: dpn68b
168
+ LR: 0.316
169
+ Layers: 68
170
+ Crop Pct: '0.875'
171
+ Batch Size: 1280
172
+ Image Size: '224'
173
+ Interpolation: bicubic
174
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L278
175
+ Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/dpn68b_ra-a31ca160.pth
176
+ Results:
177
+ - Task: Image Classification
178
+ Dataset: ImageNet
179
+ Metrics:
180
+ Top 1 Accuracy: 79.21%
181
+ Top 5 Accuracy: 94.42%
182
+ - Name: dpn92
183
+ In Collection: DPN
184
+ Metadata:
185
+ FLOPs: 8357659624
186
+ Parameters: 37670000
187
+ File Size: 151248422
188
+ Architecture:
189
+ - Batch Normalization
190
+ - Convolution
191
+ - DPN Block
192
+ - Dense Connections
193
+ - Global Average Pooling
194
+ - Max Pooling
195
+ - Softmax
196
+ Tasks:
197
+ - Image Classification
198
+ Training Techniques:
199
+ - SGD with Momentum
200
+ - Weight Decay
201
+ Training Data:
202
+ - ImageNet
203
+ Training Resources: 40x K80 GPUs
204
+ ID: dpn92
205
+ LR: 0.316
206
+ Layers: 92
207
+ Crop Pct: '0.875'
208
+ Batch Size: 1280
209
+ Image Size: '224'
210
+ Interpolation: bicubic
211
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L286
212
+ Weights: https://github.com/rwightman/pytorch-dpn-pretrained/releases/download/v0.1/dpn92_extra-b040e4a9b.pth
213
+ Results:
214
+ - Task: Image Classification
215
+ Dataset: ImageNet
216
+ Metrics:
217
+ Top 1 Accuracy: 79.99%
218
+ Top 5 Accuracy: 94.84%
219
+ - Name: dpn98
220
+ In Collection: DPN
221
+ Metadata:
222
+ FLOPs: 15003675112
223
+ Parameters: 61570000
224
+ File Size: 247021307
225
+ Architecture:
226
+ - Batch Normalization
227
+ - Convolution
228
+ - DPN Block
229
+ - Dense Connections
230
+ - Global Average Pooling
231
+ - Max Pooling
232
+ - Softmax
233
+ Tasks:
234
+ - Image Classification
235
+ Training Techniques:
236
+ - SGD with Momentum
237
+ - Weight Decay
238
+ Training Data:
239
+ - ImageNet
240
+ Training Resources: 40x K80 GPUs
241
+ ID: dpn98
242
+ LR: 0.4
243
+ Layers: 98
244
+ Crop Pct: '0.875'
245
+ Batch Size: 1280
246
+ Image Size: '224'
247
+ Interpolation: bicubic
248
+ Code: https://github.com/rwightman/pytorch-image-models/blob/d8e69206be253892b2956341fea09fdebfaae4e3/timm/models/dpn.py#L294
249
+ Weights: https://github.com/rwightman/pytorch-dpn-pretrained/releases/download/v0.1/dpn98-5b90dec4d.pth
250
+ Results:
251
+ - Task: Image Classification
252
+ Dataset: ImageNet
253
+ Metrics:
254
+ Top 1 Accuracy: 79.65%
255
+ Top 5 Accuracy: 94.61%
256
+ -->
pytorch-image-models/docs/models/.templates/models/ecaresnet.md ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ECA-ResNet
2
+
3
+ An **ECA ResNet** is a variant on a [ResNet](https://paperswithcode.com/method/resnet) that utilises an [Efficient Channel Attention module](https://paperswithcode.com/method/efficient-channel-attention). Efficient Channel Attention is an architectural unit based on [squeeze-and-excitation blocks](https://paperswithcode.com/method/squeeze-and-excitation-block) that reduces model complexity without dimensionality reduction.
4
+
5
+ {% include 'code_snippets.md' %}
6
+
7
+ ## How do I train this model?
8
+
9
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
10
+
11
+ ## Citation
12
+
13
+ ```BibTeX
14
+ @misc{wang2020ecanet,
15
+ title={ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks},
16
+ author={Qilong Wang and Banggu Wu and Pengfei Zhu and Peihua Li and Wangmeng Zuo and Qinghua Hu},
17
+ year={2020},
18
+ eprint={1910.03151},
19
+ archivePrefix={arXiv},
20
+ primaryClass={cs.CV}
21
+ }
22
+ ```
23
+
24
+ <!--
25
+ Type: model-index
26
+ Collections:
27
+ - Name: ECAResNet
28
+ Paper:
29
+ Title: 'ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks'
30
+ URL: https://paperswithcode.com/paper/eca-net-efficient-channel-attention-for-deep
31
+ Models:
32
+ - Name: ecaresnet101d
33
+ In Collection: ECAResNet
34
+ Metadata:
35
+ FLOPs: 10377193728
36
+ Parameters: 44570000
37
+ File Size: 178815067
38
+ Architecture:
39
+ - 1x1 Convolution
40
+ - Batch Normalization
41
+ - Bottleneck Residual Block
42
+ - Convolution
43
+ - Efficient Channel Attention
44
+ - Global Average Pooling
45
+ - Max Pooling
46
+ - ReLU
47
+ - Residual Block
48
+ - Residual Connection
49
+ - Softmax
50
+ - Squeeze-and-Excitation Block
51
+ Tasks:
52
+ - Image Classification
53
+ Training Techniques:
54
+ - SGD with Momentum
55
+ - Weight Decay
56
+ Training Data:
57
+ - ImageNet
58
+ Training Resources: 4x RTX 2080Ti GPUs
59
+ ID: ecaresnet101d
60
+ LR: 0.1
61
+ Epochs: 100
62
+ Layers: 101
63
+ Crop Pct: '0.875'
64
+ Batch Size: 256
65
+ Image Size: '224'
66
+ Weight Decay: 0.0001
67
+ Interpolation: bicubic
68
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/resnet.py#L1087
69
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45402/outputs/ECAResNet101D_281c5844.pth
70
+ Results:
71
+ - Task: Image Classification
72
+ Dataset: ImageNet
73
+ Metrics:
74
+ Top 1 Accuracy: 82.18%
75
+ Top 5 Accuracy: 96.06%
76
+ - Name: ecaresnet101d_pruned
77
+ In Collection: ECAResNet
78
+ Metadata:
79
+ FLOPs: 4463972081
80
+ Parameters: 24880000
81
+ File Size: 99852736
82
+ Architecture:
83
+ - 1x1 Convolution
84
+ - Batch Normalization
85
+ - Bottleneck Residual Block
86
+ - Convolution
87
+ - Efficient Channel Attention
88
+ - Global Average Pooling
89
+ - Max Pooling
90
+ - ReLU
91
+ - Residual Block
92
+ - Residual Connection
93
+ - Softmax
94
+ - Squeeze-and-Excitation Block
95
+ Tasks:
96
+ - Image Classification
97
+ Training Techniques:
98
+ - SGD with Momentum
99
+ - Weight Decay
100
+ Training Data:
101
+ - ImageNet
102
+ ID: ecaresnet101d_pruned
103
+ Layers: 101
104
+ Crop Pct: '0.875'
105
+ Image Size: '224'
106
+ Interpolation: bicubic
107
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/resnet.py#L1097
108
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45610/outputs/ECAResNet101D_P_75a3370e.pth
109
+ Results:
110
+ - Task: Image Classification
111
+ Dataset: ImageNet
112
+ Metrics:
113
+ Top 1 Accuracy: 80.82%
114
+ Top 5 Accuracy: 95.64%
115
+ - Name: ecaresnet50d
116
+ In Collection: ECAResNet
117
+ Metadata:
118
+ FLOPs: 5591090432
119
+ Parameters: 25580000
120
+ File Size: 102579290
121
+ Architecture:
122
+ - 1x1 Convolution
123
+ - Batch Normalization
124
+ - Bottleneck Residual Block
125
+ - Convolution
126
+ - Efficient Channel Attention
127
+ - Global Average Pooling
128
+ - Max Pooling
129
+ - ReLU
130
+ - Residual Block
131
+ - Residual Connection
132
+ - Softmax
133
+ - Squeeze-and-Excitation Block
134
+ Tasks:
135
+ - Image Classification
136
+ Training Techniques:
137
+ - SGD with Momentum
138
+ - Weight Decay
139
+ Training Data:
140
+ - ImageNet
141
+ Training Resources: 4x RTX 2080Ti GPUs
142
+ ID: ecaresnet50d
143
+ LR: 0.1
144
+ Epochs: 100
145
+ Layers: 50
146
+ Crop Pct: '0.875'
147
+ Batch Size: 256
148
+ Image Size: '224'
149
+ Weight Decay: 0.0001
150
+ Interpolation: bicubic
151
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/resnet.py#L1045
152
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45402/outputs/ECAResNet50D_833caf58.pth
153
+ Results:
154
+ - Task: Image Classification
155
+ Dataset: ImageNet
156
+ Metrics:
157
+ Top 1 Accuracy: 80.61%
158
+ Top 5 Accuracy: 95.31%
159
+ - Name: ecaresnet50d_pruned
160
+ In Collection: ECAResNet
161
+ Metadata:
162
+ FLOPs: 3250730657
163
+ Parameters: 19940000
164
+ File Size: 79990436
165
+ Architecture:
166
+ - 1x1 Convolution
167
+ - Batch Normalization
168
+ - Bottleneck Residual Block
169
+ - Convolution
170
+ - Efficient Channel Attention
171
+ - Global Average Pooling
172
+ - Max Pooling
173
+ - ReLU
174
+ - Residual Block
175
+ - Residual Connection
176
+ - Softmax
177
+ - Squeeze-and-Excitation Block
178
+ Tasks:
179
+ - Image Classification
180
+ Training Techniques:
181
+ - SGD with Momentum
182
+ - Weight Decay
183
+ Training Data:
184
+ - ImageNet
185
+ ID: ecaresnet50d_pruned
186
+ Layers: 50
187
+ Crop Pct: '0.875'
188
+ Image Size: '224'
189
+ Interpolation: bicubic
190
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/resnet.py#L1055
191
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45899/outputs/ECAResNet50D_P_9c67f710.pth
192
+ Results:
193
+ - Task: Image Classification
194
+ Dataset: ImageNet
195
+ Metrics:
196
+ Top 1 Accuracy: 79.71%
197
+ Top 5 Accuracy: 94.88%
198
+ - Name: ecaresnetlight
199
+ In Collection: ECAResNet
200
+ Metadata:
201
+ FLOPs: 5276118784
202
+ Parameters: 30160000
203
+ File Size: 120956612
204
+ Architecture:
205
+ - 1x1 Convolution
206
+ - Batch Normalization
207
+ - Bottleneck Residual Block
208
+ - Convolution
209
+ - Efficient Channel Attention
210
+ - Global Average Pooling
211
+ - Max Pooling
212
+ - ReLU
213
+ - Residual Block
214
+ - Residual Connection
215
+ - Softmax
216
+ - Squeeze-and-Excitation Block
217
+ Tasks:
218
+ - Image Classification
219
+ Training Techniques:
220
+ - SGD with Momentum
221
+ - Weight Decay
222
+ Training Data:
223
+ - ImageNet
224
+ ID: ecaresnetlight
225
+ Crop Pct: '0.875'
226
+ Image Size: '224'
227
+ Interpolation: bicubic
228
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/resnet.py#L1077
229
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45402/outputs/ECAResNetLight_4f34b35b.pth
230
+ Results:
231
+ - Task: Image Classification
232
+ Dataset: ImageNet
233
+ Metrics:
234
+ Top 1 Accuracy: 80.46%
235
+ Top 5 Accuracy: 95.25%
236
+ -->
pytorch-image-models/docs/models/.templates/models/efficientnet-pruned.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EfficientNet (Knapsack Pruned)
2
+
3
+ **EfficientNet** is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a *compound coefficient*. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients. For example, if we want to use $2^N$ times more computational resources, then we can simply increase the network depth by $\alpha ^ N$, width by $\beta ^ N$, and image size by $\gamma ^ N$, where $\alpha, \beta, \gamma$ are constant coefficients determined by a small grid search on the original small model. EfficientNet uses a compound coefficient $\phi$ to uniformly scales network width, depth, and resolution in a principled way.
4
+
5
+ The compound scaling method is justified by the intuition that if the input image is bigger, then the network needs more layers to increase the receptive field and more channels to capture more fine-grained patterns on the bigger image.
6
+
7
+ The base EfficientNet-B0 network is based on the inverted bottleneck residual blocks of [MobileNetV2](https://paperswithcode.com/method/mobilenetv2), in addition to [squeeze-and-excitation blocks](https://paperswithcode.com/method/squeeze-and-excitation-block).
8
+
9
+ This collection consists of pruned EfficientNet models.
10
+
11
+ {% include 'code_snippets.md' %}
12
+
13
+ ## How do I train this model?
14
+
15
+ You can follow the [timm recipe scripts](https://rwightman.github.io/pytorch-image-models/scripts/) for training a new model afresh.
16
+
17
+ ## Citation
18
+
19
+ ```BibTeX
20
+ @misc{tan2020efficientnet,
21
+ title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
22
+ author={Mingxing Tan and Quoc V. Le},
23
+ year={2020},
24
+ eprint={1905.11946},
25
+ archivePrefix={arXiv},
26
+ primaryClass={cs.LG}
27
+ }
28
+ ```
29
+
30
+ ```
31
+ @misc{aflalo2020knapsack,
32
+ title={Knapsack Pruning with Inner Distillation},
33
+ author={Yonathan Aflalo and Asaf Noy and Ming Lin and Itamar Friedman and Lihi Zelnik},
34
+ year={2020},
35
+ eprint={2002.08258},
36
+ archivePrefix={arXiv},
37
+ primaryClass={cs.LG}
38
+ }
39
+ ```
40
+
41
+ <!--
42
+ Type: model-index
43
+ Collections:
44
+ - Name: EfficientNet Pruned
45
+ Paper:
46
+ Title: Knapsack Pruning with Inner Distillation
47
+ URL: https://paperswithcode.com/paper/knapsack-pruning-with-inner-distillation
48
+ Models:
49
+ - Name: efficientnet_b1_pruned
50
+ In Collection: EfficientNet Pruned
51
+ Metadata:
52
+ FLOPs: 489653114
53
+ Parameters: 6330000
54
+ File Size: 25595162
55
+ Architecture:
56
+ - 1x1 Convolution
57
+ - Average Pooling
58
+ - Batch Normalization
59
+ - Convolution
60
+ - Dense Connections
61
+ - Dropout
62
+ - Inverted Residual Block
63
+ - Squeeze-and-Excitation Block
64
+ - Swish
65
+ Tasks:
66
+ - Image Classification
67
+ Training Data:
68
+ - ImageNet
69
+ ID: efficientnet_b1_pruned
70
+ Crop Pct: '0.882'
71
+ Image Size: '240'
72
+ Interpolation: bicubic
73
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/efficientnet.py#L1208
74
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45403/outputs/effnetb1_pruned_9ebb3fe6.pth
75
+ Results:
76
+ - Task: Image Classification
77
+ Dataset: ImageNet
78
+ Metrics:
79
+ Top 1 Accuracy: 78.25%
80
+ Top 5 Accuracy: 93.84%
81
+ - Name: efficientnet_b2_pruned
82
+ In Collection: EfficientNet Pruned
83
+ Metadata:
84
+ FLOPs: 878133915
85
+ Parameters: 8310000
86
+ File Size: 33555005
87
+ Architecture:
88
+ - 1x1 Convolution
89
+ - Average Pooling
90
+ - Batch Normalization
91
+ - Convolution
92
+ - Dense Connections
93
+ - Dropout
94
+ - Inverted Residual Block
95
+ - Squeeze-and-Excitation Block
96
+ - Swish
97
+ Tasks:
98
+ - Image Classification
99
+ Training Data:
100
+ - ImageNet
101
+ ID: efficientnet_b2_pruned
102
+ Crop Pct: '0.89'
103
+ Image Size: '260'
104
+ Interpolation: bicubic
105
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/efficientnet.py#L1219
106
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45403/outputs/effnetb2_pruned_203f55bc.pth
107
+ Results:
108
+ - Task: Image Classification
109
+ Dataset: ImageNet
110
+ Metrics:
111
+ Top 1 Accuracy: 79.91%
112
+ Top 5 Accuracy: 94.86%
113
+ - Name: efficientnet_b3_pruned
114
+ In Collection: EfficientNet Pruned
115
+ Metadata:
116
+ FLOPs: 1239590641
117
+ Parameters: 9860000
118
+ File Size: 39770812
119
+ Architecture:
120
+ - 1x1 Convolution
121
+ - Average Pooling
122
+ - Batch Normalization
123
+ - Convolution
124
+ - Dense Connections
125
+ - Dropout
126
+ - Inverted Residual Block
127
+ - Squeeze-and-Excitation Block
128
+ - Swish
129
+ Tasks:
130
+ - Image Classification
131
+ Training Data:
132
+ - ImageNet
133
+ ID: efficientnet_b3_pruned
134
+ Crop Pct: '0.904'
135
+ Image Size: '300'
136
+ Interpolation: bicubic
137
+ Code: https://github.com/rwightman/pytorch-image-models/blob/a7f95818e44b281137503bcf4b3e3e94d8ffa52f/timm/models/efficientnet.py#L1230
138
+ Weights: https://imvl-automl-sh.oss-cn-shanghai.aliyuncs.com/darts/hyperml/hyperml/job_45403/outputs/effnetb3_pruned_5abcc29f.pth
139
+ Results:
140
+ - Task: Image Classification
141
+ Dataset: ImageNet
142
+ Metrics:
143
+ Top 1 Accuracy: 80.86%
144
+ Top 5 Accuracy: 95.24%
145
+ -->