PreMode / model /model.py
gzhong's picture
Upload folder using huggingface_hub
7718235 verified
raw
history blame
32.4 kB
import re
import warnings
from typing import Optional, List, Tuple, Dict
import torch
from torch import _dynamo
_dynamo.config.suppress_errors = True
from torch import nn, Tensor
from model.module.representation import eqStar2PAETransformerSoftMax, eqStar2WeightedPAETransformerSoftMax, eqStar2FullGraphPAETransformerSoftMax
from model.module import output
__all__ = ["PreMode", "PreMode_Star_CON", "PreMode_DIFF", "PreMode_SSP", "PreMode_Mask_Predict", "PreMode_Single"]
def create_model(args, model_class="PreMode"):
shared_args = dict(
num_heads=args["num_heads"],
x_in_channels=args["x_in_channels"],
x_channels=args["x_channels"],
vec_channels=args["vec_channels"],
vec_in_channels=args["vec_in_channels"],
x_hidden_channels=args["x_hidden_channels"],
vec_hidden_channels=args["vec_hidden_channels"],
num_layers=args["num_layers"],
num_edge_attr=args["num_edge_attr"],
num_rbf=args["num_rbf"],
rbf_type=args["rbf_type"],
trainable_rbf=args["trainable_rbf"],
activation=args["activation"],
attn_activation=args["attn_activation"],
neighbor_embedding=args["neighbor_embedding"],
cutoff_lower=args["cutoff_lower"],
cutoff_upper=args["cutoff_upper"],
x_in_embedding_type=args["x_in_embedding_type"],
x_use_msa=args['add_msa'] or args['zero_msa'],
drop_out_rate=args["drop_out"],
)
# representation network
if args["model"] == "equivariant-transformer":
from model.module.representation import eqTransformer
model_fn = eqTransformer
elif args["model"] == "equivariant-transformer-star":
from model.module.representation import eqStarTransformer
model_fn = eqStarTransformer
elif args["model"] == "equivariant-transformer-softmax":
from model.module.representation import eqTransformerSoftMax
model_fn = eqTransformerSoftMax
elif args["model"] == "equivariant-transformer-star-softmax":
from model.module.representation import eqStarTransformerSoftMax
model_fn = eqStarTransformerSoftMax
elif args["model"] == "equivariant-transformer-star2-softmax":
from model.module.representation import eqStar2TransformerSoftMax
model_fn = eqStar2TransformerSoftMax
shared_args["use_lora"]=args["use_lora"]
shared_args["share_kv"]=args["share_kv"]
elif args["model"] == "equivariant-transformer-PAE-star2-softmax":
model_fn = eqStar2PAETransformerSoftMax
shared_args["use_lora"]=args["use_lora"]
shared_args["share_kv"]=args["share_kv"]
args["num_rbf"] = 0 # cancel the rbf in PAE model
elif args["model"] == "equivariant-transformer-weighted-PAE-star2-softmax":
model_fn = eqStar2WeightedPAETransformerSoftMax
shared_args["use_lora"]=args["use_lora"]
shared_args["share_kv"]=args["share_kv"]
args["num_rbf"] = 0 # cancel the rbf in PAE model
elif args["model"] == "equivariant-transformer-PAE-star2-fullgraph-softmax":
model_fn = eqStar2FullGraphPAETransformerSoftMax
shared_args["use_lora"]=args["use_lora"]
shared_args["share_kv"]=args["share_kv"]
elif args["model"] == "transformer-fullgraph-softmax":
from model.module.representation import FullGraphPAETransformerSoftMax
model_fn = FullGraphPAETransformerSoftMax
shared_args["use_lora"]=args["use_lora"]
shared_args["share_kv"]=args["share_kv"]
elif args["model"] == "equivariant-triangular-attention-transformer":
from model.module.representation import eqTriAttnTransformer
model_fn = eqTriAttnTransformer
shared_args["pariwise_state_dim"]=args["vec_hidden_channels"]
elif args["model"] == "equivariant-triangular-star-transformer":
from model.module.representation import eqTriStarTransformer
model_fn = eqTriStarTransformer
elif args["model"] == "equivariant-msa-triangular-star-transformer":
from model.module.representation import eqMSATriStarTransformer
model_fn = eqMSATriStarTransformer
shared_args["ee_channels"]=args["ee_channels"]
shared_args["triangular_update"]=args["triangular_update"]
elif args["model"] == "equivariant-msa-triangular-star-drop-transformer":
from model.module.representation import eqMSATriStarDropTransformer
model_fn = eqMSATriStarDropTransformer
shared_args["ee_channels"]=args["ee_channels"]
shared_args["triangular_update"]=args["triangular_update"]
shared_args["use_lora"]=args["use_lora"]
elif args["model"] == "equivariant-msa-triangular-star-gru-transformer":
from model.module.representation import eqMSATriStarGRUTransformer
model_fn = eqMSATriStarGRUTransformer
shared_args["ee_channels"]=args["ee_channels"]
shared_args["triangular_update"]=args["triangular_update"]
elif args["model"] == "equivariant-msa-triangular-star-drop-gru-transformer":
from model.module.representation import eqMSATriStarDropGRUTransformer
model_fn = eqMSATriStarDropGRUTransformer
shared_args["ee_channels"]=args["ee_channels"]
shared_args["triangular_update"]=args["triangular_update"]
shared_args["use_lora"]=args["use_lora"]
elif args["model"] == "pass-forward":
from model.module.representation import PassForward
model_fn = PassForward
elif args["model"] == "lora-esm":
from model.module.representation import LoRAESM2
model_fn = LoRAESM2
else:
raise ValueError(f'Unknown architecture: {args["model"]}')
representation_model = model_fn(
**shared_args,
)
# create output network
if "MaskPredict" in args["output_model"]:
output_model = getattr(output, args["output_model"])(
args=args,
lm_weight=representation_model.node_x_proj.weight,
)
elif "ESM" in args["output_model"]:
# get lm_weight from esm2
import esm
esm_model, _ = esm.pretrained.esm2_t33_650M_UR50D()
output_model = output.build_output_model(
args["output_model"],
args=args,
lm_head=esm_model.lm_head,
)
else:
# for non-clinvar tasks, use non_uniform init
if args["init_fn"] is None:
if args["data_type"] != "ClinVar":
args["init_fn"] = "non_uniform"
else:
args["init_fn"] = "uniform"
if hasattr(output, args["output_model"]):
output_model = getattr(output, args["output_model"])(
args=args,
)
else:
output_model = output.build_output_model(args["output_model"], args=args)
# combine representation and output network
model = globals()[model_class](
representation_model,
output_model,
alt_projector=args["alt_projector"],
)
return model
def create_model_and_load(args, model_class="PreMode"):
model = create_model(args, model_class)
state_dict = torch.load(args["load_model"], map_location="cpu")
# The following are for backward compatibility with models created when atomref was
# the only supported prior.
output_model_state_dict = {}
representation_model_state_dict = {}
for key in state_dict.keys():
# delete _orig_mod
if key.startswith("_orig_mod"):
newkey = key.replace("_orig_mod.", "")
else:
newkey = key
if newkey.startswith("output_model"):
output_model_state_dict[newkey.replace("output_model.", "")] = state_dict[key]
elif newkey.startswith("representation_model"):
if newkey.startswith("representation_model.node_x_proj.weight"):
if args["partial_load_model"]:
embedding_weight = state_dict[key]
print('only use the first 26 embedding of MaskPredict')
embedding_weight = embedding_weight[:26] # exclude the embedding of mask
representation_model_state_dict["node_x_proj.weight"] = \
torch.concat((embedding_weight,
torch.zeros(args["x_in_channels"] - embedding_weight.shape[0],
embedding_weight.shape[1]))).T
representation_model_state_dict["node_x_proj.bias"] = \
torch.zeros(args["x_channels"])
else:
representation_model_state_dict[newkey.replace("representation_model.", "")] = state_dict[key]
else:
representation_model_state_dict[newkey.replace("representation_model.", "")] = state_dict[key]
model.representation_model.load_state_dict(representation_model_state_dict, strict=False)
if args["data_type"] == "ClinVar" \
or args['loss_fn'] == "combined_loss" \
or args['loss_fn'] == "weighted_combined_loss" \
or args['use_output_head']:
# or args['loss_fn'] == "weighted_loss":
try:
# check the output network module dimension
if output_model_state_dict['output_network.0.weight'].shape[0] != args['output_dim']:
# if output network is EquivariantAttnOneSiteScalar, we can use it
if "OneSite" in args['output_model'] and args['use_output_head']:
rep_time = args['output_dim'] // output_model_state_dict['output_network.0.weight'].shape[0]
# repeat the weight and bias repeat_interleave
output_model_state_dict['output_network.0.weight'] = output_model_state_dict['output_network.0.weight'].repeat_interleave(rep_time, 0)
output_model_state_dict['output_network.0.bias'] = output_model_state_dict['output_network.0.bias'].repeat_interleave(rep_time)
else:
print('Warning: output network module dimension is not equal to output_dim, now changing the dimension')
output_network_weight = torch.concat(
(output_model_state_dict['output_network.0.weight'],
torch.zeros(args['output_dim'] - output_model_state_dict['output_network.0.weight'].shape[0],
output_model_state_dict['output_network.0.weight'].shape[1])
)
)
output_network_bias = torch.concat(
(output_model_state_dict['output_network.0.bias'],
torch.zeros(args['output_dim'] - output_model_state_dict['output_network.0.bias'].shape[0])
)
)
output_model_state_dict['output_network.0.weight'] = output_network_weight
output_model_state_dict['output_network.0.bias'] = output_network_bias
model.output_model.load_state_dict(output_model_state_dict, strict=False)
print(f"loaded the output model state dict including the output module")
except RuntimeError:
print(f"Warning: Didn't load output model state dict because keys didn't match.")
else:
print(f"Warning: Didn't load output model because task is not ClinVar")
return model
def load_model(filepath, args=None, device="cpu", model_class="PreMode", **kwargs):
ckpt = torch.load(filepath, map_location="cpu")
if args is None:
args = ckpt["hyper_parameters"]
for key, value in kwargs.items():
if not key in args:
warnings.warn(f"Unknown hyperparameter: {key}={value}")
args[key] = value
model = create_model(args, model_class=model_class)
state_dict = {re.sub(r"^model\.", "", k): v for k, v in ckpt["state_dict"].items()}
model.load_state_dict(state_dict)
return model.to(device)
class PreMode(nn.Module):
def __init__(
self,
representation_model,
output_model,
alt_projector=None,
):
super(PreMode, self).__init__()
self.representation_model = representation_model
self.output_model = output_model
if alt_projector is not None:
# need to have a linear layer to project the concatenated vector to the same dimension as the original vector
out_dim = representation_model.x_channels if representation_model.x_in_channels is None else representation_model.x_in_channels
self.alt_linear = nn.Linear(alt_projector, out_dim, bias=False)
else:
self.alt_linear = None
self.reset_parameters()
def reset_parameters(self):
self.representation_model.reset_parameters()
self.output_model.reset_parameters()
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, List]:
# assert x.dim() == 2
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# get the graph representation of origin protein first
# if there is msa in x, split it
if (self.representation_model.x_in_channels is not None and x.shape[1] > self.representation_model.x_in_channels):
x_orig, _ = x[:, :self.representation_model.x_in_channels], x[:, self.representation_model.x_in_channels:]
elif x.shape[1] > self.representation_model.x_channels:
x_orig, _ = x[:, :self.representation_model.x_channels], x[:, self.representation_model.x_channels:]
else:
x_orig = x
if self.alt_linear is not None:
x_alt = self.alt_linear(x_alt)
# update x to alt aa
x = x * x_mask + x_alt * x_mask
# run the potentially wrapped representation model
if extra_args is not None and "y_mask" in extra_args:
x, v, pos, edge_attr, batch, attn_weight_layers = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
mask=extra_args["y_mask"].to(x.device, non_blocking=True),
return_attn=return_attn, )
else:
x, v, pos, edge_attr, batch, attn_weight_layers = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
# apply the output network
x = self.output_model.pre_reduce(x, v, pos, batch)
# aggregate residues
if extra_args is not None and "y_mask" in extra_args:
x = x * extra_args["y_mask"].unsqueeze(2).to(x.device, non_blocking=True)
# reduce nodes
x, attn_out = self.output_model.reduce(x - x_orig, edge_index, edge_attr, batch)
# x = self.output_model.reduce(x, edge_index, edge_attr, batch)
attn_weight_layers.append(attn_out)
# apply output model after reduction
y = self.output_model.post_reduce(x)
return y, x, attn_weight_layers
class PreMode_Star_CON(nn.Module):
def __init__(
self,
representation_model,
output_model,
alt_projector=None,
):
super(PreMode_Star_CON, self).__init__()
self.representation_model = representation_model
self.output_model = output_model
self.alt_projector = alt_projector
if alt_projector is not None:
# need to have a linear layer to project the concatenated vector to the same dimension as the original vector
out_dim = representation_model.x_channels if representation_model.x_in_channels is None else representation_model.x_in_channels
self.alt_linear = nn.Sequential(nn.Linear(alt_projector, out_dim, bias=False), nn.SiLU())
else:
self.alt_linear = None
self.reset_parameters()
def reset_parameters(self):
self.representation_model.reset_parameters()
self.output_model.reset_parameters()
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, List]:
# assert x.dim() == 2
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# get the graph representation of origin protein first
# if there is msa in x, split it
if self.representation_model.x_in_channels is not None:
if x.shape[-1] > self.representation_model.x_in_channels:
x, msa = x[..., :self.representation_model.x_in_channels], x[..., self.representation_model.x_in_channels:]
split = True
else:
split = False
elif x.shape[-1] > self.representation_model.x_channels:
x, msa = x[..., :self.representation_model.x_channels], x[..., self.representation_model.x_channels:]
split = True
else:
split = False
if len(x.shape) == 3 or len(x_mask.shape) == 1:
x_mask = x_mask.unsqueeze(-1)
else:
x_mask = x_mask[:, 0].unsqueeze(1)
if self.alt_linear is not None:
x_alt = x_alt[..., :self.alt_projector]
x_alt = self.alt_linear(x_alt)
else:
x_alt = x_alt[..., :x.shape[-1]]
# update x to alt aa
x = x * x_mask + x_alt * (~x_mask)
# concat with msa
if split:
x = torch.cat((x, msa), dim=-1)
# run the potentially wrapped representation model
# wrap input features
input = {"x": x,
"pos": pos,
"batch": batch,
"edge_index": edge_index,
"edge_index_star": edge_index_star,
"edge_attr": edge_attr,
"edge_attr_star": edge_attr_star,
"node_vec_attr": node_vec_attr,
"return_attn": return_attn}
if extra_args is not None and "y_mask" in extra_args:
input["mask"] = extra_args["y_mask"].to(x.device, non_blocking=True)
if extra_args is not None and "x_padding_mask" in extra_args:
input["x_padding_mask"] = extra_args["x_padding_mask"].to(x.device, non_blocking=True)
if isinstance(self.representation_model, eqStar2PAETransformerSoftMax) or \
isinstance(self.representation_model, eqStar2WeightedPAETransformerSoftMax) or \
isinstance(self.representation_model, eqStar2FullGraphPAETransformerSoftMax):
# means we are using PAE model
input["plddt"] = extra_args["plddt"].to(x.device, non_blocking=True) \
if "plddt" in extra_args else None
input["edge_confidence"] = extra_args["edge_confidence"].to(x.device, non_blocking=True) \
if "edge_confidence" in extra_args else None
input["edge_confidence_star"] = extra_args["edge_confidence_star"].to(x.device, non_blocking=True) \
if "edge_confidence_star" in extra_args else None
x, v, pos, edge_attr, batch, attn_weight_layers = self.representation_model(**input)
# apply the output network
x = self.output_model.pre_reduce(x, v, pos, batch)
# aggregate residues
if extra_args is not None and "y_mask" in extra_args:
x = x * extra_args["y_mask"].unsqueeze(2).to(x.device, non_blocking=True)
# if edge_attr is same shape as edge_index_star, it means that edge_attr is actually updated to edge_attr_star
if len(x.shape) < 3:
# # for nodes not connected by edges, set their x to 0
# reduce nodes by star graph
end_node_count = edge_index_star[1].unique(return_counts=True)
end_nodes = end_node_count[0][end_node_count[1] > 1]
if edge_attr is not None and edge_attr.shape[0] == edge_index_star.shape[1]:
x, attn_out = self.output_model.reduce(x,
edge_index_star[:, torch.isin(edge_index_star[1], end_nodes)],
edge_attr[torch.isin(edge_index_star[1], end_nodes), :],
batch)
else:
# if edge_attr is not updated, use edge_attr_star
x, attn_out = self.output_model.reduce(x,
edge_index_star[:, torch.isin(edge_index_star[1], end_nodes)],
edge_attr_star[torch.isin(edge_index_star[1], end_nodes), :],
batch)
else:
x, attn_out = self.output_model.reduce(
x, (~x_mask).squeeze(2),
edge_attr[0], edge_attr[1], edge_attr[2], edge_attr[3],
input["x_padding_mask"])
if 'score_mask' not in extra_args:
x = x.unsqueeze(1)
# x = self.output_model.reduce(x, edge_index, edge_attr, batch)
attn_weight_layers.append(attn_out)
# apply output model after reduction
# if esm_mask is in extra_args, it means we are using esm model
if "esm_mask" in extra_args:
y = self.output_model.post_reduce(x, extra_args["esm_mask"].to(x.device, non_blocking=True))
else:
y = self.output_model.post_reduce(x)
return y, x, attn_weight_layers
class PreMode_SSP(PreMode):
def __init__(
self,
representation_model,
output_model,
vec_in_channels=4,
):
super(PreMode_SSP, self).__init__(representation_model=representation_model,
output_model=output_model,)
self.vec_reconstruct = nn.Linear(representation_model.vec_channels, vec_in_channels, bias=False)
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
edge_vec: Tensor = None,
edge_vec_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, Tensor, Tensor, List]:
assert x.dim() == 2 and x.dtype == torch.float
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# get the graph representation of origin protein first
x_orig = x
# update x to alt aa
x = x * x_mask + x_alt
# run the potentially wrapped representation model
x, v, pos, edge_attr, batch, attn_weight_layers = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
vec = self.vec_reconstruct(v)
# apply the output network
x_graph: Tensor = x
x = self.output_model.pre_reduce(x, v, pos, batch)
# aggregate residues
x, _ = self.output_model.reduce(x - x_orig, edge_index, edge_attr, batch)
# apply output model after reduction
y = self.output_model.post_reduce(x)
return x_graph, vec, y, x, attn_weight_layers
class PreMode_DIFF(PreMode):
def __init__(
self,
representation_model,
output_model,
alt_projector=None,
):
super(PreMode_DIFF, self).__init__(representation_model=representation_model,
output_model=output_model,)
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
edge_vec: Tensor = None,
edge_vec_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, List]:
# assert x.dim() == 2 and x.dtype == torch.float
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# get the graph representation of origin protein first
x_orig, v, pos, _, batch, attn_weight_layers_ref = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
x_orig = self.output_model.pre_reduce(x_orig, v, pos, batch)
# update x to alt aa
x = x * x_mask + x_alt
# run the potentially wrapped representation model
x, v, pos, edge_attr, batch, attn_weight_layers_alt = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
# apply the output network
x = self.output_model.pre_reduce(x, v, pos, batch)
# aggregate residues
x, _ = self.output_model.reduce(x - x_orig, edge_index, edge_attr, batch)
# apply output model after reduction
y = self.output_model.post_reduce(x)
return y, x, [attn_weight_layers_ref, attn_weight_layers_alt]
class PreMode_Mask_Predict(PreMode):
def __init__(
self,
representation_model,
output_model,
alt_projector=None,
):
super(PreMode_Mask_Predict, self).__init__(representation_model=representation_model,
output_model=output_model,)
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
edge_vec: Tensor = None,
edge_vec_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, List]:
# assert x.dim() == 2 and x.dtype == torch.float
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# update x to alt aa
x = x * x_mask + x_alt
# get the graph representation of origin protein first
if "y_mask" in extra_args:
# means that it is non-graph model
x_embed, v, pos, _, batch, attn_weight_layers_ref = self.representation_model(
x=x,
pos=pos,
mask=extra_args["y_mask"].to(x.device, non_blocking=True),
return_attn=return_attn, )
else:
x_embed, v, pos, _, batch, attn_weight_layers_ref = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
# pre reduce is to reduce to one hot alphabet
y = self.output_model.pre_reduce(x_embed, v, pos, batch)
return y, y, attn_weight_layers_ref
class PreMode_Single(PreMode):
def __init__(
self,
representation_model,
output_model,
alt_projector=None,
):
super(PreMode_Single, self).__init__(representation_model=representation_model,
output_model=output_model,)
def forward(
self,
x: Tensor,
x_mask: Tensor,
x_alt: Tensor,
pos: Tensor,
edge_index: Tensor,
edge_index_star: Tensor = None,
edge_attr: Tensor = None,
edge_attr_star: Tensor = None,
edge_vec: Tensor = None,
edge_vec_star: Tensor = None,
node_vec_attr: Tensor = None,
batch: Optional[Tensor] = None,
extra_args: Optional[Dict[str, Tensor]] = None,
return_attn: bool = False,
) -> Tuple[Tensor, Tensor, List]:
assert x.dim() == 2
batch = torch.zeros(x.shape[0], dtype=torch.int64, device=x.device) if batch is None else batch
# get the graph representation of origin protein first
# if there is msa in x, split it
# if there is msa in x, split it
if (self.representation_model.x_in_channels is not None and x.shape[1] > self.representation_model.x_in_channels):
x, msa = x[:, :self.representation_model.x_in_channels], x[:, self.representation_model.x_in_channels:]
split = True
elif x.shape[1] > self.representation_model.x_channels:
x, msa = x[:, :self.representation_model.x_channels], x[:, self.representation_model.x_channels:]
split = True
else:
split = False
x_mask = x_mask[:, 0]
if self.alt_linear is not None:
x_alt = x_alt[:, :self.alt_projector]
x_alt = self.alt_linear(x_alt)
else:
x_alt = x_alt[:, :x.shape[1]]
# update x to alt aa
x = x * x_mask.unsqueeze(1) + x_alt * (~x_mask).unsqueeze(1)
# concat with msa
if split:
x = torch.cat((x, msa), dim=1)
# run the potentially wrapped representation model
x, v, pos, edge_attr, batch, attn_weight_layers = self.representation_model(
x=x,
pos=pos,
batch=batch,
edge_index=edge_index,
edge_index_star=edge_index_star,
edge_attr=edge_attr,
edge_attr_star=edge_attr_star,
node_vec_attr=node_vec_attr,
return_attn=return_attn, )
# apply the output network
x = self.output_model.pre_reduce(x, v, pos, batch)
# aggregate residues
x, _ = self.output_model.reduce(x, edge_index, edge_attr, batch)
# apply output model after reduction
y = self.output_model.post_reduce(x)
return y, x, attn_weight_layers