|
|
|
|
|
|
|
|
|
|
|
|
|
import torch.nn as nn |
|
import torch |
|
|
|
num_parallel = 2 |
|
|
|
|
|
class TokenExchange(nn.Module): |
|
def __init__(self): |
|
super(TokenExchange, self).__init__() |
|
|
|
def forward(self, x, mask, mask_threshold): |
|
|
|
x0, x1 = torch.zeros_like(x[0]), torch.zeros_like(x[1]) |
|
x0[mask[0] >= mask_threshold] = x[0][mask[0] >= mask_threshold] |
|
x0[mask[0] < mask_threshold] = x[1][mask[0] < mask_threshold] |
|
x1[mask[1] >= mask_threshold] = x[1][mask[1] >= mask_threshold] |
|
x1[mask[1] < mask_threshold] = x[0][mask[1] < mask_threshold] |
|
return [x0, x1] |
|
|
|
|
|
class ModuleParallel(nn.Module): |
|
def __init__(self, module): |
|
super(ModuleParallel, self).__init__() |
|
self.module = module |
|
|
|
def forward(self, x_parallel): |
|
return [self.module(x) for x in x_parallel] |
|
|
|
|
|
class LayerNormParallel(nn.Module): |
|
def __init__(self, num_features): |
|
super(LayerNormParallel, self).__init__() |
|
for i in range(num_parallel): |
|
setattr(self, 'ln_' + str(i), nn.LayerNorm(num_features, eps=1e-6)) |
|
|
|
def forward(self, x_parallel): |
|
return [getattr(self, 'ln_' + str(i))(x) for i, x in enumerate(x_parallel)] |