Spaces:

spitzc32
/

bilstm_api

Runtime error

App Files Files Community

spitzc32 commited on Feb 24, 2023

Commit

24d0437

1 Parent(s): d13f1d3

Added initial structure of the model

Browse files

Files changed (15) hide show

Dockerfile +11 -0
app.py +26 -0
model/.DS_Store +0 -0
model/__init__.py +2 -0
model/embedding/__init__.py +63 -0
model/layer/__init__.py +652 -0
model/layer/bioes.py +62 -0
model/layer/crf.py +47 -0
model/layer/lstm.py +47 -0
model/layer/span.py +211 -0
model/layer/viterbi.py +241 -0
part/__init__.py +2 -0
part/data.py +142 -0
part/dropout.py +60 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from model.layer import Bi_LSTM_CRF
+from flair.data import Sentence
+tagger = Bi_LSTM_CRF.load("checkpoints/best-model.pt")
+def model(word: str):
+    """
+    An function for serving the model for the PHI classification.
+    :param word: list of word tokens in a paragraph.
+    :returns: dict that contains labeled
+        tags their respective classification.
+    """
+    txt = Sentence(word)
+    tagger.predict(txt)
+    labels, tags = [], []
+    for entity in txt.get_spans('ner'):
+        labels.append(entity.text)
+        tags.append(entity.get_label("ner").value)
+    return {
+            "labels": labels,
+            "tags": tags
+        }

model/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ import model.embedding
2	+ import model.layer

model/embedding/__init__.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch.nn as nn
+from flair.embeddings import (
+    TransformerWordEmbeddings,
+    FlairEmbeddings,
+    CharacterEmbeddings,
+    StackedEmbeddings,
+    OneHotEmbeddings
+)
+from flair.data import Sentence
+class PretrainedEmbeddings():
+    """
+    This is the implmentation of the PretrainedEmbeddings we will use to embed our own
+    corpus for the purpose of generating a Tensor(the pre_word_embeds) that we will pass
+    to the model
+    Plan:
+    * Word-level Embeddings: We will utilize BERT Based transformer word embeddings
+    in order to achieve more functionality
+    * Context-level Embeddings: We will stick to Flair Embeddings first then go check if
+    pooled flair is better than FlairEmbeddings
+    """
+    def __init__(self,
+        word_embedding: str,
+        forward_embedding: str,
+        backward_embedding: str
+        ) -> None:
+        self.word_embedding = word_embedding,
+        self.forward_embedding = forward_embedding
+        self.backward_embedding = backward_embedding
+    def forward(self):
+        # Firstly, we need to call out all pretrained embeddings accessible in
+        # Flair for our requirement
+        flair_forward_embedding = FlairEmbeddings(self.forward_embedding)
+        flair_backward_embedding = FlairEmbeddings(self.backward_embedding)
+        bert_embedding = TransformerWordEmbeddings(model=self.word_embedding,
+                                       fine_tune=True,
+                                       use_context=True,)
+        # Next Concatenate all embeddings above
+        stacked_embeddings = StackedEmbeddings(
+            embeddings=[
+                flair_forward_embedding,
+                flair_backward_embedding,
+                bert_embedding,
+            ])
+        return stacked_embeddings

model/layer/__init__.py ADDED Viewed

	@@ -0,0 +1,652 @@

+import logging
+from typing import List, Optional, Tuple, Union
+import torch
+import torch.nn
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
+from tqdm import tqdm
+import flair.nn
+from part import *
+from flair.data import Dictionary, Sentence
+from flair.datasets import DataLoader, FlairDatapointDataset
+from flair.embeddings import TokenEmbeddings
+from flair.file_utils import cached_path
+from flair.training_utils import store_embeddings
+from model.layer.bioes import get_spans_from_bio
+from model.layer.lstm import LSTM
+from model.layer.crf import CRF
+from model.layer.viterbi import ViterbiDecoder, ViterbiLoss
+log = logging.getLogger("flair")
+class Bi_LSTM_CRF(flair.nn.Classifier[Sentence]):
+    def __init__(
+        self,
+        embeddings: TokenEmbeddings,
+        tag_dictionary: Dictionary,
+        tag_type: str,
+        rnn: Optional[torch.nn.RNN] = None,
+        tag_format: str = "BIOES",
+        hidden_size: int = 256,
+        rnn_layers: int = 1,
+        bidirectional: bool = True,
+        use_crf: bool = True,
+        ave_embeddings: bool = True,
+        dropout: float = 0.0,
+        word_dropout: float = 0.05,
+        locked_dropout: float = 0.5,
+        loss_weights: Dict[str, float] = None,
+        init_from_state_dict: bool = False,
+        allow_unk_predictions: bool = False,
+    ):
+        """
+        BiLSTM Span CRF class for predicting labels for single tokens. Can be parameterized by several attributes.
+        Span prediction is utilized if there are nested entities such as Address and Organization. Since the researchers
+        observed that the token are have different length for a given dataset, we made the Span useful by incorporating it
+        only if the data needs it.
+        :param embeddings: Embeddings to use during training and prediction
+        :param tag_dictionary: Dictionary containing all tags from corpus which can be predicted
+        :param tag_type: type of tag which is going to be predicted in case a corpus has multiple annotations
+        :param rnn: (Optional) Takes a torch.nn.Module as parameter by which you can pass a shared RNN between
+            different tasks.
+        :param hidden_size: Hidden size of RNN layer
+        :param rnn_layers: number of RNN layers
+        :param bidirectional: If True, RNN becomes bidirectional
+        :param use_crf: If True, use a Conditional Random Field for prediction, else linear map to tag space.
+        :param ave_embeddings: If True, add a linear layer on top of embeddings, if you want to imitate
+            fine tune non-trainable embeddings.
+        :param dropout: If > 0, then use dropout.
+        :param word_dropout: If > 0, then use word dropout.
+        :param locked_dropout: If > 0, then use locked dropout.
+        :param loss_weights: Dictionary of weights for labels for the loss function
+            (if any label's weight is unspecified it will default to 1.0)
+        :param init_from_state_dict: Indicator whether we are loading a model from state dict
+            since we need to transform previous models' weights into CRF instance weights
+        """
+        super(Bi_LSTM_CRF, self).__init__()
+        # ----- Create the internal tag dictionary -----
+        self.tag_type = tag_type
+        self.tag_format = tag_format.upper()
+        if init_from_state_dict:
+            self.label_dictionary = tag_dictionary
+        else:
+            # span-labels need special encoding (BIO or BIOES)
+            if tag_dictionary.span_labels:
+                # the big question is whether the label dictionary should contain an UNK or not
+                # without UNK, we cannot evaluate on data that contains labels not seen in test
+                # with UNK, the model learns less well if there are no UNK examples
+                self.label_dictionary = Dictionary(add_unk=allow_unk_predictions)
+                assert self.tag_format in ["BIOES", "BIO"]
+                for label in tag_dictionary.get_items():
+                    if label == "<unk>":
+                        continue
+                    self.label_dictionary.add_item("O")
+                    if self.tag_format == "BIOES":
+                        self.label_dictionary.add_item("S-" + label)
+                        self.label_dictionary.add_item("B-" + label)
+                        self.label_dictionary.add_item("E-" + label)
+                        self.label_dictionary.add_item("I-" + label)
+                    if self.tag_format == "BIO":
+                        self.label_dictionary.add_item("B-" + label)
+                        self.label_dictionary.add_item("I-" + label)
+            else:
+                self.label_dictionary = tag_dictionary
+        # is this a span prediction problem?
+        self.predict_spans = self._determine_if_span_prediction_problem(self.label_dictionary)
+        self.tagset_size = len(self.label_dictionary)
+        log.info(f"SequenceTagger predicts: {self.label_dictionary}")
+        # ----- Embeddings -----
+        # We set the first initial embeddings gathered from Flair
+        # Stacked and concatenated then ave. using Linear
+        self.embeddings = embeddings
+        embedding_dim: int = embeddings.embedding_length
+        # ----- Initial loss weights parameters -----
+        # This is for reiteration process of training.
+        # Initially we don't have any loss weights, but as we proceed to training,
+        # we get loss computations from the evaluation stage.
+        self.weight_dict = loss_weights
+        self.loss_weights = self._init_loss_weights(loss_weights) if loss_weights else None
+        # ----- RNN specific parameters -----
+        # These parameters are for setting up the self.RNN
+        self.hidden_size = hidden_size if not rnn else rnn.hidden_size
+        self.rnn_layers = rnn_layers if not rnn else rnn.num_layers
+        self.bidirectional = bidirectional if not rnn else rnn.bidirectional
+        # ----- Conditional Random Field parameters -----
+        self.use_crf = use_crf
+        # Previously trained models have been trained without an explicit CRF, thus it is required to check
+        # whether we are loading a model from state dict in order to skip or add START and STOP token
+        if use_crf and not init_from_state_dict and not self.label_dictionary.start_stop_tags_are_set():
+            self.label_dictionary.set_start_stop_tags()
+            self.tagset_size += 2
+        # ----- Dropout parameters -----
+        # dropouts
+        self.use_dropout: float = dropout
+        self.use_word_dropout: float = word_dropout
+        self.use_locked_dropout: float = locked_dropout
+        if dropout > 0.0:
+            self.dropout = torch.nn.Dropout(dropout)
+        if word_dropout > 0.0:
+            self.word_dropout = flair.nn.WordDropout(word_dropout)
+        if locked_dropout > 0.0:
+            self.locked_dropout = flair.nn.LockedDropout(locked_dropout)
+        # ----- Model layers -----
+        # Initialize Embedding Linear Dim for the purpose of ave them
+        self.ave_embeddings = ave_embeddings
+        if self.ave_embeddings:
+            self.embedding2nn = torch.nn.Linear(embedding_dim, embedding_dim)
+        # ----- RNN layer -----
+        # If shared RNN provided, else create one for model
+        self.rnn: torch.nn.RNN = (
+            rnn
+            if rnn
+            else LSTM(
+                rnn_layers,
+                hidden_size,
+                bidirectional,
+                rnn_input_dim=embedding_dim,
+            )
+        )
+        num_directions = 2 if self.bidirectional else 1
+        hidden_output_dim = self.rnn.hidden_size * num_directions
+        # final linear map to tag space
+        self.linear = torch.nn.Linear(hidden_output_dim, len(self.label_dictionary))
+        # the loss function is Viterbi if using CRF, else regular Cross Entropy Loss
+        self.loss_function = (
+            ViterbiLoss(self.label_dictionary)
+        )
+        # if using CRF, we also require a CRF and a Viterbi decoder
+        if use_crf:
+            self.crf = CRF(self.label_dictionary, self.tagset_size, init_from_state_dict)
+            self.viterbi_decoder = ViterbiDecoder(self.label_dictionary)
+        self.to(flair.device)
+    @property
+    def label_type(self):
+        return self.tag_type
+    def _init_loss_weights(self, loss_weights: Dict[str, float]) -> torch.Tensor:
+        """
+        Intializes the loss weights based on given dictionary:
+        :param loss_weights: dictionary - contains loss weights
+        """
+        n_classes = len(self.label_dictionary)
+        weight_list = [1.0 for _ in range(n_classes)]
+        for i, tag in enumerate(self.label_dictionary.get_items()):
+            if tag in loss_weights.keys():
+                weight_list[i] = loss_weights[tag]
+        return torch.tensor(weight_list).to(flair.device)
+    def forward_loss(self, sentences: Union[List[Sentence], Sentence]) -> Tuple[torch.Tensor, int]:
+        """
+        Calculates the loss of the forward propagation of the model
+        :param sentences: either a listof sentence or just a sentence
+        """
+        # if there are no sentences, there is no loss
+        if len(sentences) == 0:
+            return torch.tensor(0.0, dtype=torch.float, device=flair.device, requires_grad=True), 0
+        # forward pass to get scores
+        scores, gold_labels = self.forward(sentences)  # type: ignore
+        # calculate loss given scores and labels
+        return self._calculate_loss(scores, gold_labels)
+    def forward(self, sentences: Union[List[Sentence], Sentence]):
+        """
+        Forward propagation through network. Returns gold labels of batch in addition.
+        :param sentences: Batch of current sentences
+        """
+        if not isinstance(sentences, list):
+            sentences = [sentences]
+        self.embeddings.embed(sentences)
+        # make a zero-padded tensor for the whole sentence
+        lengths, sentence_tensor = self._make_padded_tensor_for_batch(sentences)
+        # sort tensor in decreasing order based on lengths of sentences in batch
+        sorted_lengths, length_indices = lengths.sort(dim=0, descending=True)
+        sentences = [sentences[i] for i in length_indices]
+        sentence_tensor = sentence_tensor[length_indices]
+        # ----- Forward Propagation -----
+        # we get the dropout we initialize for th regularization
+        # of our inputs
+        if self.use_dropout:
+            sentence_tensor = self.dropout(sentence_tensor)
+        if self.use_word_dropout:
+            sentence_tensor = self.word_dropout(sentence_tensor)
+        if self.use_locked_dropout:
+            sentence_tensor = self.locked_dropout(sentence_tensor)
+        # Average the embeddings using Linear Transform
+        if self.ave_embeddings:
+            sentence_tensor = self.embedding2nn(sentence_tensor)
+        # This packs our Sentence tensor form, the process for weighting
+        # our LSTM model
+        sentence_tensor, output_lengths = self.rnn(sentence_tensor, sorted_lengths)
+        # Regularize our computed sentence tensor form the LSTM model
+        if self.use_dropout:
+            sentence_tensor = self.dropout(sentence_tensor)
+        if self.use_locked_dropout:
+            sentence_tensor = self.locked_dropout(sentence_tensor)
+        # linear map to tag space
+        features = self.linear(sentence_tensor)
+        # Depending on whether we are using CRF or a linear layer, scores is either:
+        # -- A tensor of shape (batch size, sequence length, tagset size, tagset size) for CRF
+        # -- A tensor of shape (aggregated sequence length for all sentences in batch, tagset size) for linear layer
+        if self.use_crf:
+            features = self.crf(features)
+            scores = (features, sorted_lengths, self.crf.transitions)
+        else:
+            scores = self._get_scores_from_features(features, sorted_lengths)
+        # get the gold labels
+        gold_labels = self._get_gold_labels(sentences)
+        return scores, gold_labels
+    def _calculate_loss(self, scores, labels) -> Tuple[torch.Tensor, int]:
+        if not any(labels):
+            return torch.tensor(0.0, requires_grad=True, device=flair.device), 1
+        labels = torch.tensor(
+            [
+                self.label_dictionary.get_idx_for_item(label[0])
+                if len(label) > 0
+                else self.label_dictionary.get_idx_for_item("O")
+                for label in labels
+            ],
+            dtype=torch.long,
+            device=flair.device,
+        )
+        return self.loss_function(scores, labels), len(labels)
+    def _make_padded_tensor_for_batch(self, sentences: List[Sentence]) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        makes zero padded tensors in the shape of the max longest sentence and the embedding_length to match
+        the shape of the embedding in feeding to our LSTM model.
+        :param sentences: Batch of current sentences
+        """
+        names = self.embeddings.get_names()
+        tok_lengths: List[int] = [len(sentence.tokens) for sentence in sentences]
+        longest_token_sequence_in_batch: int = max(tok_lengths)
+        zero_tensor = torch.zeros(
+            self.embeddings.embedding_length * longest_token_sequence_in_batch,
+            dtype=torch.float,
+            device=flair.device,
+        )
+        all_embs = list()
+        for sentence in sentences:
+            all_embs += [emb for token in sentence for emb in token.get_each_embedding(names)]
+            nb_padding_tokens = longest_token_sequence_in_batch - len(sentence)
+            if nb_padding_tokens > 0:
+                t = zero_tensor[: self.embeddings.embedding_length * nb_padding_tokens]
+                all_embs.append(t)
+        sentence_tensor = torch.cat(all_embs).view(
+            [
+                len(sentences),
+                longest_token_sequence_in_batch,
+                self.embeddings.embedding_length,
+            ]
+        )
+        return torch.tensor(tok_lengths, dtype=torch.long), sentence_tensor
+    @staticmethod
+    def _get_scores_from_features(features: torch.Tensor, lengths: torch.Tensor):
+        """
+        Trims current batch tensor in shape (batch size, sequence length, tagset size) in such a way that all
+        pads are going to be removed.
+        :param features: torch.tensor containing all features from forward propagation
+        :param lengths: length from each sentence in batch in order to trim padding tokens
+        """
+        features_formatted = []
+        for feat, lens in zip(features, lengths):
+            features_formatted.append(feat[:lens])
+        scores = torch.cat(features_formatted)
+        return scores
+    def _get_gold_labels(self, sentences: Union[List[Sentence], Sentence]):
+        """
+        Extracts gold labels from each sentence.
+        :param sentences: List of sentences in batch
+        """
+        # spans need to be encoded as token-level predictions
+        if self.predict_spans:
+            all_sentence_labels = []
+            for sentence in sentences:
+                sentence_labels = ["O"] * len(sentence)
+                for label in sentence.get_labels(self.label_type):
+                    span: Span = label.data_point
+                    if self.tag_format == "BIOES":
+                        if len(span) == 1:
+                            sentence_labels[span[0].idx - 1] = "S-" + label.value
+                        else:
+                            sentence_labels[span[0].idx - 1] = "B-" + label.value
+                            sentence_labels[span[-1].idx - 1] = "E-" + label.value
+                            for i in range(span[0].idx, span[-1].idx - 1):
+                                sentence_labels[i] = "I-" + label.value
+                    else:
+                        sentence_labels[span[0].idx - 1] = "B-" + label.value
+                        for i in range(span[0].idx, span[-1].idx):
+                            sentence_labels[i] = "I-" + label.value
+                all_sentence_labels.extend(sentence_labels)
+            labels = [[label] for label in all_sentence_labels]
+        # all others are regular labels for each token
+        else:
+            labels = [[token.get_label(self.label_type, "O").value] for sentence in sentences for token in sentence]
+        return labels
+    def predict(
+        self,
+        sentences: Union[List[Sentence], Sentence],
+        mini_batch_size: int = 32,
+        return_probabilities_for_all_classes: bool = False,
+        verbose: bool = False,
+        label_name: Optional[str] = None,
+        return_loss=False,
+        embedding_storage_mode="none",
+        force_token_predictions: bool = False,
+    ):  # type: ignore
+        """
+        Predicts labels for current batch with CRF.
+        :param sentences: List of sentences in batch
+        :param mini_batch_size: batch size for test data
+        :param return_probabilities_for_all_classes: Whether to return probabilites for all classes
+        :param verbose: whether to use progress bar
+        :param label_name: which label to predict
+        :param return_loss: whether to return loss value
+        :param embedding_storage_mode: determines where to store embeddings - can be "gpu", "cpu" or None.
+        """
+        if label_name is None:
+            label_name = self.tag_type
+        with torch.no_grad():
+            if not sentences:
+                return sentences
+            # make sure its a list
+            if not isinstance(sentences, list) and not isinstance(sentences, flair.data.Dataset):
+                sentences = [sentences]
+            # filter empty sentences
+            sentences = [sentence for sentence in sentences if len(sentence) > 0]
+            # reverse sort all sequences by their length
+            reordered_sentences = sorted(sentences, key=lambda s: len(s), reverse=True)
+            if len(reordered_sentences) == 0:
+                return sentences
+            dataloader = DataLoader(
+                dataset=FlairDatapointDataset(reordered_sentences),
+                batch_size=mini_batch_size,
+            )
+            # progress bar for verbosity
+            if verbose:
+                dataloader = tqdm(dataloader, desc="Batch inference")
+            overall_loss = torch.zeros(1, device=flair.device)
+            batch_no = 0
+            label_count = 0
+            for batch in dataloader:
+                batch_no += 1
+                # stop if all sentences are empty
+                if not batch:
+                    continue
+                # get features from forward propagation
+                features, gold_labels = self.forward(batch)
+                # remove previously predicted labels of this type
+                for sentence in batch:
+                    sentence.remove_labels(label_name)
+                # if return_loss, get loss value
+                if return_loss:
+                    loss = self._calculate_loss(features, gold_labels)
+                    overall_loss += loss[0]
+                    label_count += loss[1]
+                # Sort batch in same way as forward propagation
+                lengths = torch.LongTensor([len(sentence) for sentence in batch])
+                _, sort_indices = lengths.sort(dim=0, descending=True)
+                batch = [batch[i] for i in sort_indices]
+                # make predictions
+                if self.use_crf:
+                    predictions, all_tags = self.viterbi_decoder.decode(
+                        features, return_probabilities_for_all_classes, batch
+                    )
+                else:
+                    predictions, all_tags = self._standard_inference(
+                        features, batch, return_probabilities_for_all_classes
+                    )
+                # add predictions to Sentence
+                for sentence, sentence_predictions in zip(batch, predictions):
+                    # BIOES-labels need to be converted to spans
+                    if self.predict_spans and not force_token_predictions:
+                        sentence_tags = [label[0] for label in sentence_predictions]
+                        sentence_scores = [label[1] for label in sentence_predictions]
+                        predicted_spans = get_spans_from_bio(sentence_tags, sentence_scores)
+                        for predicted_span in predicted_spans:
+                            span: Span = sentence[predicted_span[0][0] : predicted_span[0][-1] + 1]
+                            span.add_label(label_name, value=predicted_span[2], score=predicted_span[1])
+                    # token-labels can be added directly ("O" and legacy "_" predictions are skipped)
+                    else:
+                        for token, label in zip(sentence.tokens, sentence_predictions):
+                            if label[0] in ["O", "_"]:
+                                continue
+                            token.add_label(typename=label_name, value=label[0], score=label[1])
+                # all_tags will be empty if all_tag_prob is set to False, so the for loop will be avoided
+                for (sentence, sent_all_tags) in zip(batch, all_tags):
+                    for (token, token_all_tags) in zip(sentence.tokens, sent_all_tags):
+                        token.add_tags_proba_dist(label_name, token_all_tags)
+                store_embeddings(sentences, storage_mode=embedding_storage_mode)
+            if return_loss:
+                return overall_loss, label_count
+    def _standard_inference(self, features: torch.Tensor, batch: List[Sentence], probabilities_for_all_classes: bool):
+        """
+        Softmax over emission scores from forward propagation.
+        :param features: sentence tensor from forward propagation
+        :param batch: list of sentence
+        :param probabilities_for_all_classes: whether to return score for each tag in tag dictionary
+        """
+        softmax_batch = F.softmax(features, dim=1).cpu()
+        scores_batch, prediction_batch = torch.max(softmax_batch, dim=1)
+        predictions = []
+        all_tags = []
+        for sentence in batch:
+            scores = scores_batch[: len(sentence)]
+            predictions_for_sentence = prediction_batch[: len(sentence)]
+            predictions.append(
+                [
+                    (self.label_dictionary.get_item_for_index(prediction), score.item())
+                    for token, score, prediction in zip(sentence, scores, predictions_for_sentence)
+                ]
+            )
+            scores_batch = scores_batch[len(sentence) :]
+            prediction_batch = prediction_batch[len(sentence) :]
+        if probabilities_for_all_classes:
+            lengths = [len(sentence) for sentence in batch]
+            all_tags = self._all_scores_for_token(batch, softmax_batch, lengths)
+        return predictions, all_tags
+    def _all_scores_for_token(self, sentences: List[Sentence], scores: torch.Tensor, lengths: List[int]):
+        """
+        Returns all scores for each tag in tag dictionary.
+        :param scores: Scores for current sentence.
+        """
+        scores = scores.numpy()
+        tokens = [token for sentence in sentences for token in sentence]
+        prob_all_tags = [
+            [
+                Label(token, self.label_dictionary.get_item_for_index(score_id), score)
+                for score_id, score in enumerate(score_dist)
+            ]
+            for score_dist, token in zip(scores, tokens)
+        ]
+        prob_tags_per_sentence = []
+        previous = 0
+        for length in lengths:
+            prob_tags_per_sentence.append(prob_all_tags[previous : previous + length])
+            previous = length
+        return prob_tags_per_sentence
+    def _get_state_dict(self):
+        """Returns the state dictionary for this model."""
+        model_state = {
+            **super()._get_state_dict(),
+            "embeddings": self.embeddings,
+            "hidden_size": self.hidden_size,
+            "tag_dictionary": self.label_dictionary,
+            "tag_format": self.tag_format,
+            "tag_type": self.tag_type,
+            "use_crf": self.use_crf,
+            "rnn_layers": self.rnn_layers,
+            "use_dropout": self.use_dropout,
+            "use_word_dropout": self.use_word_dropout,
+            "use_locked_dropout": self.use_locked_dropout,
+            "ave_embeddings": self.ave_embeddings,
+            "weight_dict": self.weight_dict,
+        }
+        return model_state
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+        if state["use_crf"]:
+            if "transitions" in state["state_dict"]:
+                state["state_dict"]["crf.transitions"] = state["state_dict"]["transitions"]
+                del state["state_dict"]["transitions"]
+        return super()._init_model_with_state_dict(
+            state,
+            embeddings=state.get("embeddings"),
+            tag_dictionary=state.get("tag_dictionary"),
+            tag_format=state.get("tag_format", "BIOES"),
+            tag_type=state.get("tag_type"),
+            use_crf=state.get("use_crf"),
+            rnn_layers=state.get("rnn_layers"),
+            hidden_size=state.get("hidden_size"),
+            dropout=state.get("use_dropout", 0.0),
+            word_dropout=state.get("use_word_dropout", 0.0),
+            locked_dropout=state.get("use_locked_dropout", 0.0),
+            ave_embeddings=state.get("ave_embeddings", True),
+            loss_weights=state.get("weight_dict"),
+            init_from_state_dict=True,
+            **kwargs,
+        )
+    @staticmethod
+    def _filter_empty_sentences(sentences: List[Sentence]) -> List[Sentence]:
+        filtered_sentences = [sentence for sentence in sentences if sentence.tokens]
+        if len(sentences) != len(filtered_sentences):
+            log.warning(f"Ignore {len(sentences) - len(filtered_sentences)} sentence(s) with no tokens.")
+        return filtered_sentences
+    def _determine_if_span_prediction_problem(self, dictionary: Dictionary) -> bool:
+        for item in dictionary.get_items():
+            if item.startswith("B-") or item.startswith("S-") or item.startswith("I-"):
+                return True
+        return False
+    def _print_predictions(self, batch, gold_label_type):
+        lines = []
+        if self.predict_spans:
+            for datapoint in batch:
+                # all labels default to "O"
+                for token in datapoint:
+                    token.set_label("gold_bio", "O")
+                    token.set_label("predicted_bio", "O")
+                # set gold token-level
+                for gold_label in datapoint.get_labels(gold_label_type):
+                    gold_span: Span = gold_label.data_point
+                    prefix = "B-"
+                    for token in gold_span:
+                        token.set_label("gold_bio", prefix + gold_label.value)
+                        prefix = "I-"
+                # set predicted token-level
+                for predicted_label in datapoint.get_labels("predicted"):
+                    predicted_span: Span = predicted_label.data_point
+                    prefix = "B-"
+                    for token in predicted_span:
+                        token.set_label("predicted_bio", prefix + predicted_label.value)
+                        prefix = "I-"
+                # now print labels in CoNLL format
+                for token in datapoint:
+                    eval_line = (
+                        f"{token.text} "
+                        f"{token.get_label('gold_bio').value} "
+                        f"{token.get_label('predicted_bio').value}\n"
+                    )
+                    lines.append(eval_line)
+                lines.append("\n")
+        else:
+            for datapoint in batch:
+                # print labels in CoNLL format
+                for token in datapoint:
+                    eval_line = (
+                        f"{token.text} "
+                        f"{token.get_label(gold_label_type).value} "
+                        f"{token.get_label('predicted').value}\n"
+                    )
+                    lines.append(eval_line)
+                lines.append("\n")
+        return lines

model/layer/bioes.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from collections import defaultdict
+from typing import Dict
+def get_spans_from_bio(bioes_tags, bioes_scores=None):
+    # add a dummy "O" to close final prediction
+    bioes_tags.append("O")
+    # return complex list
+    found_spans = []
+    # internal variables
+    current_tag_weights: Dict[str, float] = defaultdict(lambda: 0.0)
+    previous_tag = "O-"
+    current_span = []
+    current_span_scores = []
+    for idx, bioes_tag in enumerate(bioes_tags):
+        # non-set tags are OUT tags
+        if bioes_tag == "" or bioes_tag == "O" or bioes_tag == "_":
+            bioes_tag = "O-"
+        # anything that is not OUT is IN
+        in_span = False if bioes_tag == "O-" else True
+        # does this prediction start a new span?
+        starts_new_span = False
+        # begin and single tags start new spans
+        if bioes_tag[0:2] in ["B-", "S-"]:
+            starts_new_span = True
+        # in IOB format, an I tag starts a span if it follows an O or is a different span
+        if bioes_tag[0:2] == "I-" and previous_tag[2:] != bioes_tag[2:]:
+            starts_new_span = True
+        # single tags that change prediction start new spans
+        if bioes_tag[0:2] in ["S-"] and previous_tag[2:] != bioes_tag[2:]:
+            starts_new_span = True
+        # if an existing span is ended (either by reaching O or starting a new span)
+        if (starts_new_span or not in_span) and len(current_span) > 0:
+            # determine score and value
+            span_score = sum(current_span_scores) / len(current_span_scores)
+            span_value = sorted(current_tag_weights.items(), key=lambda k_v: k_v[1], reverse=True)[0][0]
+            # append to result list
+            found_spans.append((current_span, span_score, span_value))
+            # reset for-loop variables for new span
+            current_span = []
+            current_span_scores = []
+            current_tag_weights = defaultdict(lambda: 0.0)
+        if in_span:
+            current_span.append(idx)
+            current_span_scores.append(bioes_scores[idx] if bioes_scores else 1.0)
+            weight = 1.1 if starts_new_span else 1.0
+            current_tag_weights[bioes_tag[2:]] += weight
+        # remember previous tag
+        previous_tag = bioes_tag
+    return found_spans

model/layer/crf.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+import flair
+START_TAG: str = "<START>"
+STOP_TAG: str = "<STOP>"
+class CRF(torch.nn.Module):
+    """
+    Conditional Random Field Implementation according to sgrvinod and modified to not
+    only look at the current word, but also on the previously seen annotation.
+    """
+    def __init__(self, tag_dictionary, tagset_size: int, init_from_state_dict: bool):
+        """
+        :param tag_dictionary: tag dictionary in order to find ID for start and stop tags
+        :param tagset_size: number of tag from tag dictionary
+        :param init_from_state_dict: whether we load pretrained model from state dict
+        """
+        super(CRF, self).__init__()
+        self.tagset_size = tagset_size
+        # Transitions are used in the following way: transitions[to, from].
+        self.transitions = torch.nn.Parameter(torch.randn(tagset_size, tagset_size))
+        # If we are not using a pretrained model and train a fresh one, we need to set transitions from any tag
+        # to START-tag and from STOP-tag to any other tag to -10000.
+        if not init_from_state_dict:
+            self.transitions.detach()[tag_dictionary.get_idx_for_item(START_TAG), :] = -10000
+            self.transitions.detach()[:, tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000
+        self.to(flair.device)
+    def forward(self, features: torch.Tensor) -> torch.Tensor:
+        """
+        Forward propagation of Conditional Random Field.
+        :param features: output from LSTM Layer in shape (batch size, seq len, hidden size)
+        :return: CRF scores (emission scores for each token + transitions prob from previous state) in
+        shape (batch_size, seq len, tagset size, tagset size)
+        """
+        batch_size, seq_len = features.size()[:2]
+        emission_scores = features
+        emission_scores = emission_scores.unsqueeze(-1).expand(batch_size, seq_len, self.tagset_size, self.tagset_size)
+        crf_scores = emission_scores + self.transitions.unsqueeze(0).unsqueeze(0)
+        return crf_scores

model/layer/lstm.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
+import flair
+class LSTM(torch.nn.Module):
+    """
+    Simple LSTM Implementation that returns the features used for (1)CRF and (2)Span Classifier
+    """
+    def __init__(self, rnn_layers: int, hidden_size: int, bidirectional: bool, rnn_input_dim: int,):
+        """
+        :param rnn_layers: number of rnn layers to be used, default 1
+        :param hidden_size: hidden size of the LSTM layer
+        :param bidirectional: whether we use biderectional lstm or not, default True
+        :param rnn_input_dim: the shape of our max sentence token and embeddings
+        """
+        super(LSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.rnn_input_dim = rnn_input_dim
+        self.num_layers = rnn_layers
+        self.dropout = 0.0 if rnn_layers == 1 else 0.5
+        self.bidirectional = bidirectional
+        self.batch_first = True
+        self.lstm = torch.nn.LSTM(
+            self.rnn_input_dim,
+            self.hidden_size,
+            num_layers=self.num_layers,
+            dropout=self.dropout,
+            bidirectional=self.bidirectional,
+            batch_first=self.batch_first,
+        )
+        self.to(flair.device)
+    def forward(self, sentence_tensor: torch.Tensor, sorted_lengths: torch.Tensor) -> torch.Tensor:
+        """
+        Forward propagation of LSTM Model by packing the tensors.
+        :param features: output from RNN / Linear layer in shape (batch size, seq len, hidden size)
+        :return: CRF scores (emission scores for each token + transitions prob from previous state) in
+        shape (batch_size, seq len, tagset size, tagset size)
+        """
+        packed = pack_padded_sequence(sentence_tensor, sorted_lengths, batch_first=True, enforce_sorted=False)
+        rnn_output, hidden = self.lstm(packed)
+        sentence_tensor, output_lengths = pad_packed_sequence(rnn_output, batch_first=True)
+        return sentence_tensor, output_lengths

model/layer/span.py ADDED Viewed

	@@ -0,0 +1,211 @@

+from functools import lru_cache
+from itertools import chain
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def enumerate_spans(n):
+    for i in range(n):
+        for j in range(i, n):
+            yield (i, j)
+@lru_cache  # type: ignore
+def get_all_spans(n: int) -> torch.Tensor:
+    return torch.tensor(list(enumerate_spans(n)), dtype=torch.long)
+class SpanClassifier(nn.Module):
+    num_additional_labels = 1
+    def __init__(self, encoder, scorer: "SpanScorer"):
+        super().__init__()
+        self.encoder = encoder
+        self.scorer = scorer
+    def forward(
+        self, *input_ids: Sequence[torch.Tensor]
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        hs, lengths = self.encoder(*input_ids)
+        spans = list(map(get_all_spans, lengths))
+        scores = self.scorer(hs, spans)
+        return spans, scores
+    @torch.no_grad()
+    def decode(
+        self,
+        spans: Sequence[torch.Tensor],
+        scores: Sequence[torch.Tensor],
+    ) -> List[List[Tuple[int, int, int]]]:
+        spans_flatten = torch.cat(spans)
+        scores_flatten = torch.cat(scores)
+        assert len(spans_flatten) == len(scores_flatten)
+        labels_flatten = scores_flatten.argmax(dim=1).cpu()
+        mask = labels_flatten < self.scorer.num_labels - 1
+        mentions = torch.hstack((spans_flatten[mask], labels_flatten[mask, None]))
+        output = []
+        offset = 0
+        sizes = [m.sum() for m in torch.split(mask, [len(idxs) for idxs in spans])]
+        for size in sizes:
+            output.append([tuple(m) for m in mentions[offset : offset + size].tolist()])
+            offset += size
+        return output  # type: ignore
+    def compute_metrics(
+        self,
+        spans: Sequence[torch.Tensor],
+        scores: Sequence[torch.Tensor],
+        true_mentions: Sequence[Sequence[Tuple[int, int, int]]],
+        decode=True,
+    ) -> Dict[str, Any]:
+        assert len(spans) == len(scores) == len(true_mentions)
+        num_labels = self.scorer.num_labels
+        true_labels = []
+        for spans_i, scores_i, true_mentions_i in zip(spans, scores, true_mentions):
+            assert len(spans_i) == len(scores_i)
+            span2idx = {tuple(s): idx for idx, s in enumerate(spans_i.tolist())}
+            labels_i = torch.full((len(spans_i),), fill_value=num_labels - 1)
+            for (start, end, label) in true_mentions_i:
+                idx = span2idx.get((start, end))
+                if idx is not None:
+                    labels_i[idx] = label
+            true_labels.append(labels_i)
+        scores_flatten = torch.cat(scores)
+        true_labels_flatten = torch.cat(true_labels).to(scores_flatten.device)
+        assert len(scores_flatten) == len(true_labels_flatten)
+        loss = F.cross_entropy(scores_flatten, true_labels_flatten)
+        accuracy = categorical_accuracy(scores_flatten, true_labels_flatten)
+        result = {"loss": loss, "accuracy": accuracy}
+        if decode:
+            pred_mentions = self.decode(spans, scores)
+            tp, fn, fp = 0, 0, 0
+            for pred_mentions_i, true_mentions_i in zip(pred_mentions, true_mentions):
+                pred, gold = set(pred_mentions_i), set(true_mentions_i)
+                tp += len(gold & pred)
+                fn += len(gold - pred)
+                fp += len(pred - gold)
+            result["precision"] = (tp, tp + fp)
+            result["recall"] = (tp, tp + fn)
+            result["mentions"] = pred_mentions
+        return result
+@torch.no_grad()
+def categorical_accuracy(
+    y: torch.Tensor, t: torch.Tensor, ignore_index: Optional[int] = None
+) -> Tuple[int, int]:
+    pred = y.argmax(dim=1)
+    if ignore_index is not None:
+        mask = t == ignore_index
+        ignore_cnt = mask.sum()
+        pred.masked_fill_(mask, ignore_index)
+        count = ((pred == t).sum() - ignore_cnt).item()
+        total = (t.numel() - ignore_cnt).item()
+    else:
+        count = (pred == t).sum().item()
+        total = t.numel()
+    return count, total
+class SpanScorer(torch.nn.Module):
+    def __init__(self, num_labels: int):
+        super().__init__()
+        self.num_labels = num_labels
+    def forward(
+        self, xs: torch.Tensor, spans: Sequence[torch.Tensor]
+    ):
+        raise NotImplementedError
+class BaselineSpanScorer(SpanScorer):
+    def __init__(
+        self,
+        input_size: int,
+        num_labels: int,
+        mlp_units: Union[int, Sequence[int]] = 150,
+        mlp_dropout: float = 0.0,
+        feature="concat",
+    ):
+        super().__init__(num_labels)
+        input_size *= 2 if feature == "concat" else 1
+        self.mlp = MLP(input_size, num_labels, mlp_units, F.relu, mlp_dropout)
+        self.feature = feature
+    def forward(
+        self, xs: torch.Tensor, spans: Sequence[torch.Tensor]
+    ):
+        max_length = xs.size(1)
+        xs_flatten = xs.reshape(-1, xs.size(-1))
+        spans_flatten = torch.cat([idxs + max_length * i for i, idxs in enumerate(spans)])
+        features = self._compute_feature(xs_flatten, spans_flatten)
+        scores = self.mlp(features)
+        return torch.split(scores, [len(idxs) for idxs in spans])
+    def _compute_feature(self, xs, spans):
+        if self.feature == "concat":
+            return xs[spans.ravel()].view(len(spans), -1)
+        elif self.feature == "minus":
+            begins, ends = spans.T
+            return xs[ends] - xs[begins]
+        else:
+            raise NotImplementedError
+class MLP(nn.Sequential):
+    def __init__(
+        self,
+        in_features: int,
+        out_features: Optional[int],
+        units: Optional[Union[int, Sequence[int]]] = None,
+        activate: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+        dropout: float = 0.0,
+        bias: bool = True,
+    ):
+        units = [units] if isinstance(units, int) else units
+        if not units and out_features is None:
+            raise ValueError("'out_features' or 'units' must be specified")
+        layers = []
+        for u in units or []:
+            layers.append(MLP.Layer(in_features, u, activate, dropout, bias))
+            in_features = u
+        if out_features is not None:
+            layers.append(MLP.Layer(in_features, out_features, None, 0.0, bias))
+        super().__init__(*layers)
+    class Layer(nn.Module):
+        def __init__(
+            self,
+            in_features: int,
+            out_features: int,
+            activate: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+            dropout: float = 0.0,
+            bias: bool = True,
+        ):
+            super().__init__()
+            if activate is not None and not callable(activate):
+                raise TypeError("activate must be callable: type={}".format(type(activate)))
+            self.linear = nn.Linear(in_features, out_features, bias)
+            self.activate = activate
+            self.dropout = nn.Dropout(dropout)
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            h = self.linear(x)
+            if self.activate is not None:
+                h = self.activate(h)
+            return self.dropout(h)
+        def extra_repr(self) -> str:
+            return "{}, activate={}, dropout={}".format(
+                self.linear.extra_repr(), self.activate, self.dropout.p
+            )
+        def __repr__(self):
+            return "{}.{}({})".format(MLP.__name__, self._get_name(), self.extra_repr())

model/layer/viterbi.py ADDED Viewed

	@@ -0,0 +1,241 @@

+from typing import Tuple
+import numpy as np
+import torch
+import torch.nn
+from torch.nn.functional import softmax
+from torch.nn.utils.rnn import pack_padded_sequence
+import flair
+from flair.data import Dictionary, Label, List, Sentence
+START_TAG: str = "<START>"
+STOP_TAG: str = "<STOP>"
+class ViterbiLoss(torch.nn.Module):
+    """
+    Calculates the loss for each sequence up to its length t.
+    """
+    def __init__(self, tag_dictionary: Dictionary):
+        """
+        :param tag_dictionary: tag_dictionary of task
+        """
+        super(ViterbiLoss, self).__init__()
+        self.tag_dictionary = tag_dictionary
+        self.tagset_size = len(tag_dictionary)
+        self.start_tag = tag_dictionary.get_idx_for_item(START_TAG)
+        self.stop_tag = tag_dictionary.get_idx_for_item(STOP_TAG)
+    def forward(self, features_tuple: tuple, targets: torch.Tensor) -> torch.Tensor:
+        """
+        Forward propagation of Viterbi Loss
+        :param features_tuple: CRF scores from forward method in shape (batch size, seq len, tagset size, tagset size),
+            lengths of sentences in batch, transitions from CRF
+        :param targets: true tags for sentences which will be converted to matrix indices.
+        :return: average Viterbi Loss over batch size
+        """
+        features, lengths, transitions = features_tuple
+        batch_size = features.size(0)
+        seq_len = features.size(1)
+        targets, targets_matrix_indices = self._format_targets(targets, lengths)
+        targets_matrix_indices = torch.tensor(targets_matrix_indices, dtype=torch.long).unsqueeze(2).to(flair.device)
+        # scores_at_targets[range(features.shape[0]), lengths.values -1]
+        # Squeeze crf scores matrices in 1-dim shape and gather scores at targets by matrix indices
+        scores_at_targets = torch.gather(features.view(batch_size, seq_len, -1), 2, targets_matrix_indices)
+        scores_at_targets = pack_padded_sequence(scores_at_targets, lengths, batch_first=True)[0]
+        transitions_to_stop = transitions[
+            np.repeat(self.stop_tag, features.shape[0]),
+            [target[length - 1] for target, length in zip(targets, lengths)],
+        ]
+        gold_score = scores_at_targets.sum() + transitions_to_stop.sum()
+        scores_upto_t = torch.zeros(batch_size, self.tagset_size, device=flair.device)
+        for t in range(max(lengths)):
+            batch_size_t = sum(
+                [length > t for length in lengths]
+            )  # since batch is ordered, we can save computation time by reducing our effective batch_size
+            if t == 0:
+                # Initially, get scores from <start> tag to all other tags
+                scores_upto_t[:batch_size_t] = (
+                    scores_upto_t[:batch_size_t] + features[:batch_size_t, t, :, self.start_tag]
+                )
+            else:
+                # We add scores at current timestep to scores accumulated up to previous timestep, and log-sum-exp
+                # Remember, the cur_tag of the previous timestep is the prev_tag of this timestep
+                scores_upto_t[:batch_size_t] = self._log_sum_exp(
+                    features[:batch_size_t, t, :, :] + scores_upto_t[:batch_size_t].unsqueeze(1), dim=2
+                )
+        all_paths_scores = self._log_sum_exp(scores_upto_t + transitions[self.stop_tag].unsqueeze(0), dim=1).sum()
+        viterbi_loss = all_paths_scores - gold_score
+        return viterbi_loss
+    @staticmethod
+    def _log_sum_exp(tensor, dim):
+        """
+        Calculates the log-sum-exponent of a tensor's dimension in a numerically stable way.
+        :param tensor: tensor
+        :param dim: dimension to calculate log-sum-exp of
+        :return: log-sum-exp
+        """
+        m, _ = torch.max(tensor, dim)
+        m_expanded = m.unsqueeze(dim).expand_as(tensor)
+        return m + torch.log(torch.sum(torch.exp(tensor - m_expanded), dim))
+    def _format_targets(self, targets: torch.Tensor, lengths: torch.IntTensor):
+        """
+        Formats targets into matrix indices.
+        CRF scores contain per sentence, per token a (tagset_size x tagset_size) matrix, containing emission score for
+            token j + transition prob from previous token i. Means, if we think of our rows as "to tag" and our columns
+            as "from tag", the matrix in cell [10,5] would contain the emission score for tag 10 + transition score
+            from previous tag 5 and could directly be addressed through the 1-dim indices (10 + tagset_size * 5) = 70,
+            if our tagset consists of 12 tags.
+        :param targets: targets as in tag dictionary
+        :param lengths: lengths of sentences in batch
+        """
+        targets_per_sentence = []
+        targets_list = targets.tolist()
+        for cut in lengths:
+            targets_per_sentence.append(targets_list[:cut])
+            targets_list = targets_list[cut:]
+        for t in targets_per_sentence:
+            t += [self.tag_dictionary.get_idx_for_item(STOP_TAG)] * (int(lengths.max().item()) - len(t))
+        matrix_indices = list(
+            map(
+                lambda s: [self.tag_dictionary.get_idx_for_item(START_TAG) + (s[0] * self.tagset_size)]
+                + [s[i] + (s[i + 1] * self.tagset_size) for i in range(0, len(s) - 1)],
+                targets_per_sentence,
+            )
+        )
+        return targets_per_sentence, matrix_indices
+class ViterbiDecoder:
+    """
+    Decodes a given sequence using the Viterbi algorithm.
+    """
+    def __init__(self, tag_dictionary: Dictionary):
+        """
+        :param tag_dictionary: Dictionary of tags for sequence labeling task
+        """
+        self.tag_dictionary = tag_dictionary
+        self.tagset_size = len(tag_dictionary)
+        self.start_tag = tag_dictionary.get_idx_for_item(START_TAG)
+        self.stop_tag = tag_dictionary.get_idx_for_item(STOP_TAG)
+    def decode(
+        self, features_tuple: tuple, probabilities_for_all_classes: bool, sentences: List[Sentence]
+    ) -> Tuple[List, List]:
+        """
+        Decoding function returning the most likely sequence of tags.
+        :param features_tuple: CRF scores from forward method in shape (batch size, seq len, tagset size, tagset size),
+            lengths of sentence in batch, transitions of CRF
+        :param probabilities_for_all_classes: whether to return probabilities for all tags
+        :return: decoded sequences
+        """
+        features, lengths, transitions = features_tuple
+        all_tags = []
+        batch_size = features.size(0)
+        seq_len = features.size(1)
+        # Create a tensor to hold accumulated sequence scores at each current tag
+        scores_upto_t = torch.zeros(batch_size, seq_len + 1, self.tagset_size).to(flair.device)
+        # Create a tensor to hold back-pointers
+        # i.e., indices of the previous_tag that corresponds to maximum accumulated score at current tag
+        # Let pads be the <end> tag index, since that was the last tag in the decoded sequence
+        backpointers = (
+            torch.ones((batch_size, seq_len + 1, self.tagset_size), dtype=torch.long, device=flair.device)
+            * self.stop_tag
+        )
+        for t in range(seq_len):
+            batch_size_t = sum([length > t for length in lengths])  # effective batch size (sans pads) at this timestep
+            terminates = [i for i, length in enumerate(lengths) if length == t + 1]
+            if t == 0:
+                scores_upto_t[:batch_size_t, t] = features[:batch_size_t, t, :, self.start_tag]
+                backpointers[:batch_size_t, t, :] = (
+                    torch.ones((batch_size_t, self.tagset_size), dtype=torch.long) * self.start_tag
+                )
+            else:
+                # We add scores at current timestep to scores accumulated up to previous timestep, and
+                # choose the previous timestep that corresponds to the max. accumulated score for each current timestep
+                scores_upto_t[:batch_size_t, t], backpointers[:batch_size_t, t, :] = torch.max(
+                    features[:batch_size_t, t, :, :] + scores_upto_t[:batch_size_t, t - 1].unsqueeze(1), dim=2
+                )
+            # If sentence is over, add transition to STOP-tag
+            if terminates:
+                scores_upto_t[terminates, t + 1], backpointers[terminates, t + 1, :] = torch.max(
+                    scores_upto_t[terminates, t].unsqueeze(1) + transitions[self.stop_tag].unsqueeze(0), dim=2
+                )
+        # Decode/trace best path backwards
+        decoded = torch.zeros((batch_size, backpointers.size(1)), dtype=torch.long, device=flair.device)
+        pointer = torch.ones((batch_size, 1), dtype=torch.long, device=flair.device) * self.stop_tag
+        for t in list(reversed(range(backpointers.size(1)))):
+            decoded[:, t] = torch.gather(backpointers[:, t, :], 1, pointer).squeeze(1)
+            pointer = decoded[:, t].unsqueeze(1)
+        # Sanity check
+        assert torch.equal(
+            decoded[:, 0], torch.ones((batch_size), dtype=torch.long, device=flair.device) * self.start_tag
+        )
+        # remove start-tag and backscore to stop-tag
+        scores_upto_t = scores_upto_t[:, :-1, :]
+        decoded = decoded[:, 1:]
+        # Max + Softmax to get confidence score for predicted label and append label to each token
+        scores = softmax(scores_upto_t, dim=2)
+        confidences = torch.max(scores, dim=2)
+        tags = []
+        for tag_seq, tag_seq_conf, length_seq in zip(decoded, confidences.values, lengths):
+            tags.append(
+                [
+                    (self.tag_dictionary.get_item_for_index(tag), conf.item())
+                    for tag, conf in list(zip(tag_seq, tag_seq_conf))[:length_seq]
+                ]
+            )
+        if probabilities_for_all_classes:
+            all_tags = self._all_scores_for_token(scores.cpu(), lengths, sentences)
+        return tags, all_tags
+    def _all_scores_for_token(self, scores: torch.Tensor, lengths: torch.IntTensor, sentences: List[Sentence]):
+        """
+        Returns all scores for each tag in tag dictionary.
+        :param scores: Scores for current sentence.
+        """
+        scores = scores.numpy()
+        prob_tags_per_sentence = []
+        for scores_sentence, length, sentence in zip(scores, lengths, sentences):
+            scores_sentence = scores_sentence[:length]
+            prob_tags_per_sentence.append(
+                [
+                    [
+                        Label(token, self.tag_dictionary.get_item_for_index(score_id), score)
+                        for score_id, score in enumerate(score_dist)
+                    ]
+                    for score_dist, token in zip(scores_sentence, sentence)
+                ]
+            )
+        return prob_tags_per_sentence

part/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from part.data import *
2	+ from part.dropout import *

part/data.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from typing import Dict, List, Optional
+from flair.data import _PartOfSentence, DataPoint, Label
+class Token(_PartOfSentence):
+    """
+    This class represents one word in a tokenized sentence. Each token may have any number of tags. It may also point
+    to its head in a dependency tree.
+    :param text: Single text(Token) from the sequence
+    :param head_id: the location of the text (For Document)
+    :param whitespace_after: if token has whitespace
+    :param start_position: what character number in document does this token start?
+    :param sentence: If token belongs to sentence, indicate here which var it belongs to
+    """
+    def __init__(
+        self,
+        text: str,
+        head_id: int = None,
+        whitespace_after: int = 1,
+        start_position: int = 0,
+        sentence=None,
+    ):
+        super().__init__(sentence=sentence)
+        self.form: str = text
+        self._internal_index: Optional[int] = None
+        self.head_id: Optional[int] = head_id
+        self.whitespace_after: int = whitespace_after
+        self.start_pos = start_position
+        self.end_pos = start_position + len(text)
+        self._embeddings: Dict = {}
+        self.tags_proba_dist: Dict[str, List[Label]] = {}
+    @property
+    def idx(self) -> int:
+        if isinstance(self._internal_index, int):
+            return self._internal_index
+        else:
+            raise ValueError
+    @property
+    def text(self):
+        return self.form
+    @property
+    def unlabeled_identifier(self) -> str:
+        return f'Token[{self.idx-1}]: "{self.text}"'
+    def add_tags_proba_dist(self, tag_type: str, tags: List[Label]):
+        self.tags_proba_dist[tag_type] = tags
+    def get_tags_proba_dist(self, tag_type: str) -> List[Label]:
+        if tag_type in self.tags_proba_dist:
+            return self.tags_proba_dist[tag_type]
+        return []
+    def get_head(self):
+        return self.sentence.get_token(self.head_id)
+    @property
+    def start_position(self) -> int:
+        return self.start_pos
+    @property
+    def end_position(self) -> int:
+        return self.end_pos
+    @property
+    def embedding(self):
+        return self.get_embedding()
+    def __repr__(self):
+        return self.__str__()
+    def add_label(self, typename: str, value: str, score: float = 1.0):
+        """
+        The Token is a special _PartOfSentence in that it may be initialized without a Sentence.
+        Therefore, labels get added only to the Sentence if it exists
+        """
+        if self.sentence:
+            super().add_label(typename=typename, value=value, score=score)
+        else:
+            DataPoint.add_label(self, typename=typename, value=value, score=score)
+    def set_label(self, typename: str, value: str, score: float = 1.0):
+        """
+        The Token is a special _PartOfSentence in that it may be initialized without a Sentence.
+        Therefore, labels get set only to the Sentence if it exists
+        """
+        if self.sentence:
+            super().set_label(typename=typename, value=value, score=score)
+        else:
+            DataPoint.set_label(self, typename=typename, value=value, score=score)
+class Span(_PartOfSentence):
+    """
+    This class represents one textual span consisting of Tokens. It may be used for the instance that the
+    tokens form in a nested nature, meaning the tokens combined together forms a long phrase.
+    :param tokens: List of tokens in the span
+    """
+    def __init__(self, tokens: List[Token]):
+        super().__init__(tokens[0].sentence)
+        self.tokens = tokens
+        super()._init_labels()
+    @property
+    def start_position(self) -> int:
+        return self.tokens[0].start_position
+    @property
+    def end_position(self) -> int:
+        return self.tokens[-1].end_position
+    @property
+    def text(self) -> str:
+        return " ".join([t.text for t in self.tokens])
+    @property
+    def unlabeled_identifier(self) -> str:
+        return f'Span[{self.tokens[0].idx -1}:{self.tokens[-1].idx}]: "{self.text}"'
+    def __repr__(self):
+        return self.__str__()
+    def __getitem__(self, idx: int) -> Token:
+        return self.tokens[idx]
+    def __iter__(self):
+        return iter(self.tokens)
+    def __len__(self) -> int:
+        return len(self.tokens)
+    @property
+    def embedding(self):
+        pass

part/dropout.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+class LockedDropout(torch.nn.Module):
+    """
+    Implementation of locked (or variational) dropout.
+    Randomly drops out entire parameters in embedding space.
+    :param dropout_rate: represent the fraction of the input unit to be dropped. It will be from 0 to 1.
+    :param batch_first: represent if the drop will perform in an ascending manner
+    :param inplace:
+    """
+    def __init__(self, dropout_rate=0.5, batch_first=True, inplace=False):
+        super(LockedDropout, self).__init__()
+        self.dropout_rate = dropout_rate
+        self.batch_first = batch_first
+        self.inplace = inplace
+    def forward(self, x):
+        if not self.training or not self.dropout_rate:
+            return x
+        if not self.batch_first:
+            m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - self.dropout_rate)
+        else:
+            m = x.data.new(x.size(0), 1, x.size(2)).bernoulli_(1 - self.dropout_rate)
+        mask = torch.autograd.Variable(m, requires_grad=False) / (1 - self.dropout_rate)
+        mask = mask.expand_as(x)
+        return mask * x
+    def extra_repr(self):
+        inplace_str = ", inplace" if self.inplace else ""
+        return "p={}{}".format(self.dropout_rate, inplace_str)
+class WordDropout(torch.nn.Module):
+    """
+    Implementation of word dropout. Randomly drops out entire words
+    (or characters) in embedding space.
+    """
+    def __init__(self, dropout_rate=0.05, inplace=False):
+        super(WordDropout, self).__init__()
+        self.dropout_rate = dropout_rate
+        self.inplace = inplace
+    def forward(self, x):
+        if not self.training or not self.dropout_rate:
+            return x
+        m = x.data.new(x.size(0), x.size(1), 1).bernoulli_(1 - self.dropout_rate)
+        mask = torch.autograd.Variable(m, requires_grad=False)
+        return mask * x
+    def extra_repr(self):
+        inplace_str = ", inplace" if self.inplace else ""
+        return "p={}{}".format(self.dropout_rate, inplace_str)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+torchvision
+torchaudio
+flair
+numpy
+pandas
+nltk
+panel
+hvplot