commit files to HF hub
Browse files
README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vit_base_patch16_224
|
2 |
+
Implementation of Vision Transformer (ViT) proposed in [An Image Is
|
3 |
+
Worth 16x16 Words: Transformers For Image Recognition At
|
4 |
+
Scale](https://arxiv.org/pdf/2010.11929.pdf)
|
5 |
+
|
6 |
+
The following image from the authors shows the architecture.
|
7 |
+
|
8 |
+
![image](https://github.com/FrancescoSaverioZuppichini/glasses/blob/develop/docs/_static/images/ViT.png?raw=true)
|
9 |
+
|
10 |
+
``` python
|
11 |
+
ViT.vit_small_patch16_224()
|
12 |
+
ViT.vit_base_patch16_224()
|
13 |
+
ViT.vit_base_patch16_384()
|
14 |
+
ViT.vit_base_patch32_384()
|
15 |
+
ViT.vit_huge_patch16_224()
|
16 |
+
ViT.vit_huge_patch32_384()
|
17 |
+
ViT.vit_large_patch16_224()
|
18 |
+
ViT.vit_large_patch16_384()
|
19 |
+
ViT.vit_large_patch32_384()
|
20 |
+
```
|
21 |
+
|
22 |
+
Examples:
|
23 |
+
|
24 |
+
``` python
|
25 |
+
# change activation
|
26 |
+
ViT.vit_base_patch16_224(activation = nn.SELU)
|
27 |
+
# change number of classes (default is 1000 )
|
28 |
+
ViT.vit_base_patch16_224(n_classes=100)
|
29 |
+
# pass a different block, default is TransformerEncoderBlock
|
30 |
+
ViT.vit_base_patch16_224(block=MyCoolTransformerBlock)
|
31 |
+
# get features
|
32 |
+
model = ViT.vit_base_patch16_224
|
33 |
+
# first call .features, this will activate the forward hooks and tells the model you'll like to get the features
|
34 |
+
model.encoder.features
|
35 |
+
model(torch.randn((1,3,224,224)))
|
36 |
+
# get the features from the encoder
|
37 |
+
features = model.encoder.features
|
38 |
+
print([x.shape for x in features])
|
39 |
+
#[[torch.Size([1, 197, 768]), torch.Size([1, 197, 768]), ...]
|
40 |
+
# change the tokens, you have to subclass ViTTokens
|
41 |
+
class MyTokens(ViTTokens):
|
42 |
+
def __init__(self, emb_size: int):
|
43 |
+
super().__init__(emb_size)
|
44 |
+
self.my_new_token = nn.Parameter(torch.randn(1, 1, emb_size))
|
45 |
+
ViT(tokens=MyTokens)
|
46 |
+
```
|
47 |
+
|
48 |
+
|