import torch.nn as nn import torchvision.models as models import torch from transformers import AutoTokenizer, AutoModel, AutoConfig class ImageModel(nn.Module): def __init__(self, num_genre) -> None: super(ImageModel, self).__init__() # Feature extraction layer. # Input 200x200 self.features = models.mobilenet_v3_large(weights="IMAGENET1K_V2") in_features = self.features.classifier[0].in_features self.features.classifier = nn.Identity() self.head_score = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(in_features=in_features, out_features=512, bias=True), nn.SiLU(), nn.Dropout(p=0.2), nn.Linear(in_features=512, out_features=1, bias=True) ) self.head_award = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(in_features=in_features, out_features=512, bias=True), nn.SiLU(), nn.Dropout(p=0.2), nn.Linear(in_features=512, out_features=1, bias=True) ) self.head_genre = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(in_features=in_features, out_features=1024, bias=True), nn.Hardswish(), nn.Dropout(p=0.2), nn.Linear(in_features=1024, out_features=num_genre, bias=True) ) # Initialize model weights. self._initialize_weights() def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.features(x) return self.head_score(x), self.head_award(x), self.head_genre(x) # The filter weight of each layer is a Gaussian distribution with zero mean and standard deviation initialized by random extraction 0.001 (deviation is 0). def _initialize_weights(model): """ Initializes weights of all layers in a PyTorch model. Args: model (nn.Module): The model to initialize weights for. """ for m in model.modules(): if isinstance(m, nn.Conv2d): nn.init.xavier_normal_(m.weight) elif isinstance(m, nn.Linear): nn.init.xavier_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) class TextModel(nn.Module): def __init__(self, model_name, num_genre): super(TextModel, self).__init__() config = AutoConfig.from_pretrained(model_name) in_features = config.hidden_size self.transformer = AutoModel.from_pretrained(model_name) self.head_score = nn.Sequential( # nn.Dropout(p=0.5), # nn.Linear(in_features=in_features, out_features=512, bias=True), # nn.SiLU(), nn.Dropout(p=0.2), nn.Linear(in_features=in_features, out_features=1, bias=True) ) self.head_award = nn.Sequential( # nn.Dropout(p=0.5), # nn.Linear(in_features=in_features, out_features=512, bias=True), # nn.SiLU(), nn.Dropout(p=0.2), nn.Linear(in_features=in_features, out_features=1, bias=True) ) self.head_genre = nn.Sequential( # nn.Linear(in_features=in_features, out_features=1024, bias=True), # nn.Hardswish(), nn.Dropout(p=0.2), nn.Linear(in_features=in_features, out_features=num_genre, bias=True) ) def forward(self, x): x = self.transformer(input_ids=x[0], attention_mask=x[1])['pooler_output'] return self.head_score(x), self.head_award(x), self.head_genre(x)