Sifal
/

ClinicalMosaic

+from typing import Optional, Tuple, Union
+import torch.nn as nn
+import torch
+from utils.bert_layers_mosa import BertModel
+from transformers import BertPreTrainedModel
+from transformers.modeling_outputs import SequenceClassifierOutput
+from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
+import logging
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+logger = logging.getLogger(__name__)
+class MosaicBertForSequenceClassification(BertPreTrainedModel):
+    """Bert Model transformer with a sequence classification/regression head.
+    This head is just a linear layer on top of the pooled output.
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.config = config
+        self.bert = BertModel(config, add_pooling_layer=True)
+        classifier_dropout = (config.classifier_dropout
+                              if config.classifier_dropout is not None else
+                              config.hidden_dropout_prob)
+        self.dropout = nn.Dropout(classifier_dropout)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+       # this resets the weights
+        self.post_init()
+    @classmethod
+    def from_pretrained(cls,
+                      pretrained_checkpoint,
+                      state_dict=None,
+                      config=None,
+                      *inputs,
+                      **kwargs):
+        """Load from pre-trained."""
+        # this gets a fresh init model
+        model = cls(config, *inputs, **kwargs)
+        # thus we need to load the state_dict
+        state_dict = torch.load(pretrained_checkpoint)
+        # remove `model` prefix to avoid error
+        consume_prefix_in_state_dict_if_present(state_dict, prefix='model.')
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict,
+                                                              strict=False)
+        if len(missing_keys) > 0:
+            logger.warning(
+                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}")
+            logger.warning(f"the number of which is equal to {len(missing_keys)}"
+            )
+        if len(unexpected_keys) > 0:
+            logger.warning(
+                f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
+            )
+            logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
+        return model
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        pooled_output = outputs[1]
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(logits, labels)
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=None,
+            attentions=None,)
+from typing import Optional
+import torch.nn as nn
+import torch
+from utils.bert_layers_mosa import BertModel
+from transformers import BertPreTrainedModel
+from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
+import logging
+logger = logging.getLogger(__name__)
+class MosaicBertForEmbeddingGeneration(BertPreTrainedModel):
+    def __init__(self, config, add_pooling_layer=False):
+        """
+        Initializes the BertEmbeddings class.
+        Args:
+            config (BertConfig): The configuration for the BERT model.
+            add_pooling_layer (bool, optional): Whether to add a pooling layer. Defaults to False.
+        """
+        super().__init__(config)
+        assert config.num_hidden_layers >= config.num_embedding_layers, 'num_hidden_layers should be greater than or equal to num_embedding_layers'
+        self.config = config
+        self.strategy = config.strategy
+        self.bert = BertModel(config, add_pooling_layer=add_pooling_layer)
+        # this resets the weights
+        self.post_init()
+    @classmethod
+    def from_pretrained(cls,
+                      pretrained_checkpoint,
+                      state_dict=None,
+                      config=None,
+                      *inputs,
+                      **kwargs):
+        """Load from pre-trained."""
+        # this gets a fresh init model
+        model = cls(config, *inputs, **kwargs)
+        # thus we need to load the state_dict
+        state_dict = torch.load(pretrained_checkpoint)
+        # remove `model` prefix to avoid error
+        consume_prefix_in_state_dict_if_present(state_dict, prefix='model.')
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict,
+                                                              strict=False)
+        if len(missing_keys) > 0:
+            logger.warning(
+                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}")
+            logger.warning(f"the number of which is equal to {len(missing_keys)}"
+            )
+        if len(unexpected_keys) > 0:
+            logger.warning(
+                f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
+            )
+            logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
+        return model
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        subset_mask : Optional[torch.Tensor] = None,
+        hospital_ids_lens: list = None,
+    ) -> torch.Tensor:
+        embedding_output = self.bert.embeddings(input_ids, token_type_ids,
+                                           position_ids)
+        encoder_outputs_all = self.bert.encoder(
+            embedding_output,
+            attention_mask,
+            output_all_encoded_layers=True,
+            subset_mask=subset_mask)
+        # batch_size, hidden_dim
+        return self.get_embeddings(encoder_outputs_all, hospital_ids_lens, self.config.num_embedding_layers, self.config.strategy)
+    def get_embeddings(self, encoder_outputs_all, hospital_ids_lens, num_layers, strategy):
+        batch_embeddings = []
+        start_idx = 0
+        # num_layer (we use default = 4), batch_size (concatenated visits), seq_len (clinical note sequences), hidden_dim.
+        # average across num_layers and seq_len
+        if strategy == 'mean':
+            # batch_size (concatenated visits), hidden_dim.
+            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:]).mean(dim=[0, 2])
+            for length in hospital_ids_lens:
+                # We then average across visits
+                # batch_size (true batch size), hidden_dim.
+                batch_embeddings.append(torch.mean(sentence_representation[start_idx:start_idx + length],dim=0))
+                start_idx += length
+            return torch.stack(batch_embeddings)
+        elif strategy == 'concat':
+            # num_layer, batch_size (concatenated visits), hidden_dim.
+            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:]).mean(dim=2)
+            for length in hospital_ids_lens:
+                # We then average across visits
+                # num_layer, batch_size (true batch size), hidden_dim.
+                batch_embeddings.append(torch.mean(sentence_representation[:,start_idx:start_idx + length],dim=1))
+                start_idx += length
+            return torch.stack(batch_embeddings)
+        elif strategy == 'all':
+            # num_layer, batch_size (concatenated visits), seq_len (clinical note sequences), hidden_dim.
+            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:])
+            return sentence_representation
+        else:
+            raise ValueError(f'{strategy} is not a valid strategy, choose between mean and concat')