Spaces:
Runtime error
Runtime error
import torch.nn as nn | |
import torchvision.models as models | |
import torch | |
from transformers import AutoTokenizer, AutoModel, AutoConfig | |
class ImageModel(nn.Module): | |
def __init__(self, num_genre) -> None: | |
super(ImageModel, self).__init__() | |
# Feature extraction layer. | |
# Input 200x200 | |
self.features = models.mobilenet_v3_large(weights="IMAGENET1K_V2") | |
in_features = self.features.classifier[0].in_features | |
self.features.classifier = nn.Identity() | |
self.head_score = nn.Sequential( | |
nn.Dropout(p=0.5), | |
nn.Linear(in_features=in_features, out_features=512, bias=True), | |
nn.SiLU(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=512, out_features=1, bias=True) | |
) | |
self.head_award = nn.Sequential( | |
nn.Dropout(p=0.5), | |
nn.Linear(in_features=in_features, out_features=512, bias=True), | |
nn.SiLU(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=512, out_features=1, bias=True) | |
) | |
self.head_genre = nn.Sequential( | |
nn.Dropout(p=0.5), | |
nn.Linear(in_features=in_features, out_features=1024, bias=True), | |
nn.Hardswish(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=1024, out_features=num_genre, bias=True) | |
) | |
# Initialize model weights. | |
self._initialize_weights() | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
x = self.features(x) | |
return self.head_score(x), self.head_award(x), self.head_genre(x) | |
# The filter weight of each layer is a Gaussian distribution with zero mean and standard deviation initialized by random extraction 0.001 (deviation is 0). | |
def _initialize_weights(model): | |
""" | |
Initializes weights of all layers in a PyTorch model. | |
Args: | |
model (nn.Module): The model to initialize weights for. | |
""" | |
for m in model.modules(): | |
if isinstance(m, nn.Conv2d): | |
nn.init.xavier_normal_(m.weight) | |
elif isinstance(m, nn.Linear): | |
nn.init.xavier_normal_(m.weight) | |
elif isinstance(m, nn.BatchNorm2d): | |
nn.init.constant_(m.weight, 1) | |
nn.init.constant_(m.bias, 0) | |
class TextModel(nn.Module): | |
def __init__(self, model_name, num_genre): | |
super(TextModel, self).__init__() | |
config = AutoConfig.from_pretrained(model_name) | |
in_features = config.hidden_size | |
self.transformer = AutoModel.from_pretrained(model_name) | |
self.head_score = nn.Sequential( | |
# nn.Dropout(p=0.5), | |
# nn.Linear(in_features=in_features, out_features=512, bias=True), | |
# nn.SiLU(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=in_features, out_features=1, bias=True) | |
) | |
self.head_award = nn.Sequential( | |
# nn.Dropout(p=0.5), | |
# nn.Linear(in_features=in_features, out_features=512, bias=True), | |
# nn.SiLU(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=in_features, out_features=1, bias=True) | |
) | |
self.head_genre = nn.Sequential( | |
# nn.Linear(in_features=in_features, out_features=1024, bias=True), | |
# nn.Hardswish(), | |
nn.Dropout(p=0.2), | |
nn.Linear(in_features=in_features, out_features=num_genre, bias=True) | |
) | |
def forward(self, x): | |
x = self.transformer(input_ids=x[0], attention_mask=x[1])['pooler_output'] | |
return self.head_score(x), self.head_award(x), self.head_genre(x) | |