Sifal
/

ClinicalMosaic

@@ -1,19 +1,91 @@
 from typing import Optional, Tuple, Union
-import torch.nn as nn
 import torch
-from utils.bert_layers_mosa import BertModel
 from transformers import BertPreTrainedModel
 from transformers.modeling_outputs import SequenceClassifierOutput
-from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
-import logging
-from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 logger = logging.getLogger(__name__)
 class MosaicBertForSequenceClassification(BertPreTrainedModel):
     """Bert Model transformer with a sequence classification/regression head.
     This head is just a linear layer on top of the pooled output.
     """
@@ -22,48 +94,44 @@ class MosaicBertForSequenceClassification(BertPreTrainedModel):
         self.num_labels = config.num_labels
         self.config = config
         self.bert = BertModel(config, add_pooling_layer=True)
-        classifier_dropout = (config.classifier_dropout
-                              if config.classifier_dropout is not None else
-                              config.hidden_dropout_prob)
         self.dropout = nn.Dropout(classifier_dropout)
         self.classifier = nn.Linear(config.hidden_size, config.num_labels)
-       # this resets the weights
         self.post_init()
     @classmethod
-    def from_pretrained(cls,
-                      pretrained_checkpoint,
-                      state_dict=None,
-                      config=None,
-                      *inputs,
-                      **kwargs):
         """Load from pre-trained."""
         # this gets a fresh init model
         model = cls(config, *inputs, **kwargs)
         # thus we need to load the state_dict
         state_dict = torch.load(pretrained_checkpoint)
         # remove `model` prefix to avoid error
-        consume_prefix_in_state_dict_if_present(state_dict, prefix='model.')
-        missing_keys, unexpected_keys = model.load_state_dict(state_dict,
-                                                              strict=False)
         if len(missing_keys) > 0:
             logger.warning(
-                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}")
-            logger.warning(f"the number of which is equal to {len(missing_keys)}"
             )
         if len(unexpected_keys) > 0:
             logger.warning(
                 f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
             )
             logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
         return model
     def forward(
@@ -80,7 +148,9 @@ class MosaicBertForSequenceClassification(BertPreTrainedModel):
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         outputs = self.bert(
             input_ids,
@@ -104,7 +174,9 @@ class MosaicBertForSequenceClassification(BertPreTrainedModel):
             if self.config.problem_type is None:
                 if self.num_labels == 1:
                     self.config.problem_type = "regression"
-                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                     self.config.problem_type = "single_label_classification"
                 else:
                     self.config.problem_type = "multi_label_classification"
@@ -129,144 +201,5 @@ class MosaicBertForSequenceClassification(BertPreTrainedModel):
             loss=loss,
             logits=logits,
             hidden_states=None,
-            attentions=None,)
-from typing import Optional
-import torch.nn as nn
-import torch
-from utils.bert_layers_mosa import BertModel
-from transformers import BertPreTrainedModel
-from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
-import logging
-logger = logging.getLogger(__name__)
-class MosaicBertForEmbeddingGeneration(BertPreTrainedModel):
-    def __init__(self, config, add_pooling_layer=False):
-        """
-        Initializes the BertEmbeddings class.
-        Args:
-            config (BertConfig): The configuration for the BERT model.
-            add_pooling_layer (bool, optional): Whether to add a pooling layer. Defaults to False.
-        """
-        super().__init__(config)
-        assert config.num_hidden_layers >= config.num_embedding_layers, 'num_hidden_layers should be greater than or equal to num_embedding_layers'
-        self.config = config
-        self.strategy = config.strategy
-        self.bert = BertModel(config, add_pooling_layer=add_pooling_layer)
-        # this resets the weights
-        self.post_init()
-    @classmethod
-    def from_pretrained(cls,
-                      pretrained_checkpoint,
-                      state_dict=None,
-                      config=None,
-                      *inputs,
-                      **kwargs):
-        """Load from pre-trained."""
-        # this gets a fresh init model
-        model = cls(config, *inputs, **kwargs)
-        # thus we need to load the state_dict
-        state_dict = torch.load(pretrained_checkpoint)
-        # remove `model` prefix to avoid error
-        consume_prefix_in_state_dict_if_present(state_dict, prefix='model.')
-        missing_keys, unexpected_keys = model.load_state_dict(state_dict,
-                                                              strict=False)
-        if len(missing_keys) > 0:
-            logger.warning(
-                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}")
-            logger.warning(f"the number of which is equal to {len(missing_keys)}"
-            )
-        if len(unexpected_keys) > 0:
-            logger.warning(
-                f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
-            )
-            logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
-        return model
-    def forward(
-        self,
-        input_ids: Optional[torch.Tensor] = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        token_type_ids: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.Tensor] = None,
-        subset_mask : Optional[torch.Tensor] = None,
-        hospital_ids_lens: list = None,
-    ) -> torch.Tensor:
-        embedding_output = self.bert.embeddings(input_ids, token_type_ids,
-                                           position_ids)
-        encoder_outputs_all = self.bert.encoder(
-            embedding_output,
-            attention_mask,
-            output_all_encoded_layers=True,
-            subset_mask=subset_mask)
-        # batch_size, hidden_dim
-        return self.get_embeddings(encoder_outputs_all, hospital_ids_lens, self.config.num_embedding_layers, self.config.strategy)
-    def get_embeddings(self, encoder_outputs_all, hospital_ids_lens, num_layers, strategy):
-        batch_embeddings = []
-        start_idx = 0
-        # num_layer (we use default = 4), batch_size (concatenated visits), seq_len (clinical note sequences), hidden_dim.
-        # average across num_layers and seq_len
-        if strategy == 'mean':
-            # batch_size (concatenated visits), hidden_dim.
-            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:]).mean(dim=[0, 2])
-            for length in hospital_ids_lens:
-                # We then average across visits
-                # batch_size (true batch size), hidden_dim.
-                batch_embeddings.append(torch.mean(sentence_representation[start_idx:start_idx + length],dim=0))
-                start_idx += length
-            return torch.stack(batch_embeddings)
-        elif strategy == 'concat':
-            # num_layer, batch_size (concatenated visits), hidden_dim.
-            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:]).mean(dim=2)
-            for length in hospital_ids_lens:
-                # We then average across visits
-                # num_layer, batch_size (true batch size), hidden_dim.
-                batch_embeddings.append(torch.mean(sentence_representation[:,start_idx:start_idx + length],dim=1))
-                start_idx += length
-            return torch.stack(batch_embeddings)
-        elif strategy == 'all':
-            # num_layer, batch_size (concatenated visits), seq_len (clinical note sequences), hidden_dim.
-            sentence_representation = torch.stack(encoder_outputs_all[-num_layers:])
-            return sentence_representation
-        else:
-            raise ValueError(f'{strategy} is not a valid strategy, choose between mean and concat')

+import logging
 from typing import Optional, Tuple, Union
 import torch
+import torch.nn as nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
 from transformers import BertPreTrainedModel
 from transformers.modeling_outputs import SequenceClassifierOutput
+from bert_layers_mosa import BertModel
 logger = logging.getLogger(__name__)
+class MosaicBertForEmbeddingGeneration(BertPreTrainedModel):
+    def __init__(self, config, add_pooling_layer=False):
+        """
+        Initializes the BertEmbeddings class.
+        Args:
+            config (BertConfig): The configuration for the BERT model.
+            add_pooling_layer (bool, optional): Whether to add a pooling layer. Defaults to False.
+        """
+        super().__init__(config)
+        assert (
+            config.num_hidden_layers >= config.num_embedding_layers
+        ), "num_hidden_layers should be greater than or equal to num_embedding_layers"
+        self.config = config
+        self.strategy = config.strategy
+        self.bert = BertModel(config, add_pooling_layer=add_pooling_layer)
+        # this resets the weights
+        self.post_init()
+    @classmethod
+    def from_pretrained(
+        cls, pretrained_checkpoint, state_dict=None, config=None, *inputs, **kwargs
+    ):
+        """Load from pre-trained."""
+        # this gets a fresh init model
+        model = cls(config, *inputs, **kwargs)
+        # thus we need to load the state_dict
+        state_dict = torch.load(pretrained_checkpoint)
+        # remove `model` prefix to avoid error
+        consume_prefix_in_state_dict_if_present(state_dict, prefix="model.")
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+        if len(missing_keys) > 0:
+            logger.warning(
+                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}"
+            )
+            logger.warning(f"the number of which is equal to {len(missing_keys)}")
+        if len(unexpected_keys) > 0:
+            logger.warning(
+                f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
+            )
+            logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
+        return model
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        subset_mask: Optional[torch.Tensor] = None,
+        output_all_encoded_layers: Book = True,
+    ) -> torch.Tensor:
+        embedding_output = self.bert.embeddings(input_ids, token_type_ids, position_ids)
+        encoder_outputs_all = self.bert.encoder(
+            embedding_output,
+            attention_mask,
+            output_all_encoded_layers=output_all_encoded_layers,
+            subset_mask=subset_mask,
+        )
+        # batch_size, hidden_dim
+        return encoder_outputs_all
 class MosaicBertForSequenceClassification(BertPreTrainedModel):
     """Bert Model transformer with a sequence classification/regression head.
     This head is just a linear layer on top of the pooled output.
     """
         self.num_labels = config.num_labels
         self.config = config
         self.bert = BertModel(config, add_pooling_layer=True)
+        classifier_dropout = (
+            config.classifier_dropout
+            if config.classifier_dropout is not None
+            else config.hidden_dropout_prob
+        )
         self.dropout = nn.Dropout(classifier_dropout)
         self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+        # this resets the weights
         self.post_init()
     @classmethod
+    def from_pretrained(
+        cls, pretrained_checkpoint, state_dict=None, config=None, *inputs, **kwargs
+    ):
         """Load from pre-trained."""
         # this gets a fresh init model
         model = cls(config, *inputs, **kwargs)
         # thus we need to load the state_dict
         state_dict = torch.load(pretrained_checkpoint)
         # remove `model` prefix to avoid error
+        consume_prefix_in_state_dict_if_present(state_dict, prefix="model.")
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
         if len(missing_keys) > 0:
             logger.warning(
+                f"Found these missing keys in the checkpoint: {', '.join(missing_keys)}"
             )
+            logger.warning(f"the number of which is equal to {len(missing_keys)}")
         if len(unexpected_keys) > 0:
             logger.warning(
                 f"Found these unexpected keys in the checkpoint: {', '.join(unexpected_keys)}",
             )
             logger.warning(f"the number of which is equal to {len(unexpected_keys)}")
         return model
     def forward(
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
         outputs = self.bert(
             input_ids,
             if self.config.problem_type is None:
                 if self.num_labels == 1:
                     self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (
+                    labels.dtype == torch.long or labels.dtype == torch.int
+                ):
                     self.config.problem_type = "single_label_classification"
                 else:
                     self.config.problem_type = "multi_label_classification"
             loss=loss,
             logits=logits,
             hidden_states=None,
+            attentions=None,
+        )