cossim-bert-chinese-wwm-ext / modeling_bert.py
minskiter's picture
feat(model): update model parameters
57034b1
raw
history blame
2.95 kB
from transformers import PretrainedConfig, PreTrainedModel, BertModel, BertConfig
from .configuration_bert import SimBertConfig
from torch import nn
class SimBertModel(PreTrainedModel):
""" SimBert Model
"""
config_class = SimBertConfig
def __init__(
self,
config: PretrainedConfig
) -> None:
super().__init__(config)
self.bert = BertModel(config=config, add_pooling_layer=True)
self.fc = nn.Linear(config.hidden_size, 2)
# self.loss_fct = nn.CrossEntropyLoss()
self.loss_fct = nn.MSELoss()
self.softmax = nn.Softmax(dim=1)
def forward(
self,
input_ids,
token_type_ids,
attention_mask,
labels=None
):
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
pooled_output = outputs.pooler_output
logits = self.fc(pooled_output)
logits = self.softmax(logits)[:,1]
if labels is not None:
loss = self.loss_fct(logits.view(-1), labels.view(-1))
return loss, logits
return None, logits
class CosSimBertModel(PreTrainedModel):
""" CosSimBert Model
"""
config_class = SimBertConfig
def __init__(
self,
config: PretrainedConfig
) -> None:
super().__init__(config)
self.bert = BertModel(config=config, add_pooling_layer=True)
self.loss_fct = nn.MSELoss()
self.softmax = nn.Softmax(dim=1)
def forward(
self,
input_ids,
token_type_ids,
attention_mask,
labels=None
):
seq_length = input_ids.size(-1)
a = {
"input_ids": input_ids[:,:seq_length//2],
"token_type_ids": token_type_ids[:,:seq_length//2],
"attention_mask": attention_mask[:,:seq_length//2]
}
b = {
"input_ids": input_ids[:,seq_length//2:],
"token_type_ids": token_type_ids[:,seq_length//2:],
"attention_mask": attention_mask[:,seq_length//2:]
}
outputs_a = self.bert(**a)
outputs_b = self.bert(**b)
pooled_a_output = outputs_a.pooler_output
pooled_b_output = outputs_b.pooler_output
logits = nn.functional.cosine_similarity(pooled_a_output, pooled_b_output)
if labels is not None:
loss = self.loss_fct(logits.view(-1), labels.view(-1))
return loss, logits
return None, logits
def encode(
self,
input_ids,
token_type_ids,
attention_mask,
):
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
pooled_output = outputs.pooler_output
return pooled_output