KyanChen's picture
Upload 1861 files
3b96cb1
raw
history blame
1.3 kB
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
import torch
from mmpretrain.registry import MODELS
from mmpretrain.structures import DataSample
from .base import BaseSelfSupervisor
@MODELS.register_module()
class EVA(BaseSelfSupervisor):
"""EVA.
Implementation of `EVA: Exploring the Limits of Masked Visual
Representation Learning at Scale <https://arxiv.org/abs/2211.07636>`_.
"""
def extract_feat(self, inputs: torch.Tensor):
return self.backbone(inputs, mask=None)
def loss(self, inputs: torch.Tensor, data_samples: List[DataSample],
**kwargs) -> Dict[str, torch.Tensor]:
"""The forward function in training.
Args:
inputs (torch.Tensor): The input images.
data_samples (List[DataSample]): All elements required
during the forward function.
Returns:
Dict[str, torch.Tensor]: A dictionary of loss components.
"""
clip_feature, _ = self.target_generator(inputs)
latent, mask, ids_restore = self.backbone(inputs)
pred = self.neck(latent, ids_restore)
clip_feature = clip_feature[:, 1:, :]
loss = self.head.loss(pred, clip_feature, mask)
losses = dict(loss=loss)
return losses