Spaces:

JuanJoseMV
/

neuraltextgen_BERT

Paused

App Files Files Community

JuanJoseMV commited on Mar 16, 2023

Commit

66a3123

1 Parent(s): 23bd3af

first commit

Browse files

Files changed (5) hide show

NeuralTextGenerator.py +484 -0
app.py +7 -0
requirements +4 -0
textprocessing.py +90 -0
utils.py +38 -0

NeuralTextGenerator.py ADDED Viewed

	@@ -0,0 +1,484 @@

+import math
+import time
+import torch
+import numpy as np
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+from torch.utils.data import TensorDataset, DataLoader, RandomSampler
+from transformers import AdamW, get_linear_schedule_with_warmup
+import torch.nn.functional as F
+from textprocessing import *
+from utils import *
+try:
+    from apex import amp
+    APEX_AVAILABLE = True
+except ModuleNotFoundError:
+    APEX_AVAILABLE = False
+DEFAULT_DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+class BertTextGenerator:
+    def __init__(self, model_version, device=DEFAULT_DEVICE, use_apex=APEX_AVAILABLE, use_fast=True,
+                 do_basic_tokenize=True):
+        """
+        Wrapper of a BERT model from AutoModelForMaskedLM from huggingfaces.
+        This class implements methods to generate text with the BERT module
+        Parameters
+        ----------
+        model_version : str
+            The name of the BERT model to initialize form AutoModelForMaskedLM
+        device : str
+            Type of pytorch device to adopt. By default is set to DEFAULT_DEVICE
+            that is 'cuda' if cuda is available otherwise is 'cpu'
+        use_apex : boolean
+            Flag to adopt nvidia apex
+        """
+        self.device = device
+        self.model_version = model_version
+        self.model = AutoModelForMaskedLM.from_pretrained(model_version, output_attentions=True)
+        self.model.to(self.device)
+        self.use_apex = use_apex
+#         Move to finetune
+        if use_apex:
+            optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-3)
+            self.model, optimizer = amp.initialize(self.model, optimizer, opt_level="O2", keep_batchnorm_fp32=True,
+                                                   loss_scale="dynamic")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_version, do_lower_case="uncased" in model_version,
+                                                       use_fast=use_fast,
+                                                       do_basic_tokenize=do_basic_tokenize)  # added to avoid splitting of unused tokens
+        self.num_attention_masks = len(self.model.base_model.base_model.encoder.layer)
+        self.has_format_tokenizer = False
+    def generate(self, save_to_path=None, n_sentences=100, seed_text="", batch_size=10, max_iter=500, verbose=False,
+                 print_every=50, max_len=40, min_len=4, avg_len=20, std_len=4, init_mask_prob=1,
+                 generation_method="parallel", masked_portion=1, temperature=1.0, sample=True, top_k=100, burnin=None):
+        '''
+        Principal method of the class, used to generate sentences. The methodology used to generate a batch of sentences
+        can be decomposed into 3 main points:
+            1) Initialization: each batch is initialized as a matrix of tokens where each row represent a sentence
+            2) Selection: for each iteration and for each sentence one or more tokens are selected and masked
+            3) Sampling: for each iteration BERT is used to compute logits of the masked tokens that are then used to sample
+                        new tokens that will replace the masked ones
+        Parameters
+        ==============================
+        (General)
+        ------------------------------
+        save_to_path: str, default = None
+            path of txt file where to store the sentences generated
+        n_sentences: int, default = 100
+            total number of sentences to generate
+        seed_text: str, default = ""
+            Initial text used to generate the sentences
+        batch_size: int, default = 10
+            number of sentences for each batch
+        max_iter: int, default = 300
+            number of iterations
+        verbose: boolean, default = False
+        print_every:int, default = 50
+            print a sample from the batch every print_every iteration.Used only if verbose is True
+        (Length of the sentences)
+        ------------------------------
+        The method can generated sentences with different length. For each batch the len of the sentences in it
+        is sampled from a normal distribution N(avg_len, std_len) and then rounded to the closest int.
+        max_len and min_len are used to clip the length
+        max_len: int, default = 40
+            maximum length of each sentence
+        min_len: int, default = 4
+            minimum length of each sentence
+        avg_len: float or int, default = 20
+            average length of the sentences
+        std_len: float or int, default = 4
+            standard deviation of the sentences
+        (Initialization)
+        ------------------------------
+        Each batch is initialized as a matrix of tokens of dimension (batch_size x batch_len + 2), where batch_len is
+        selected as described above. At the beginning of each sentences is added a cls_token and at the end a sep_token.
+        Each other token is selected based on the value of init_mask_prob:
+            - if init_mask_prob == 1  -> each token is [MASK] with probability 1 (the batch is whole [MASK]s)
+            - if init_mask_prob == 0  -> each token is selected as a random token in the tokenizer vocabulary (the batch is init as random sentences)
+            - if init_mask_prob in (0, 1) -> each token is sampled as [MASK] with prob init_mask_prob or with probability
+                                        (1 - init_mask_prob) as any other token in the tokenizer vocabulary
+        init_mask_prob: float in [0,1], default = 1
+            probability of the mask token
+        (Selection)
+        ------------------------------
+        generation_method: str, default = "parallel"
+            method used to select the tokens to replace at each iteration
+            - 'parallel': for each sentence is selected randomly one token or a percentage of tokens based on the value of masked_portion
+            - 'sequential': the tokens are selected sequentially. At iteration i the token in position i % batch_len is selected
+            - 'attention': At the first iteration one token is selected randomly for each sentence. In later iterations
+                        for each sentence the token is selected with probabilty distribution based on the attention mask
+                        of the token sampled in the previous iteration
+        masked_portion: int or float in [0, 1], default = 1
+            percentage of tokens to mask for each sentence. Used only if generation_method is 'parallel'
+        (Sampling)
+        ------------------------------
+        temperature: float, default = 1
+            temperature for logits ( logits <- logits/temperature)
+        sample: boolean, default = True
+            when sample is True each masked token is replaced sampling randomly according to the corresponding logits
+        top_k: int or None, default = 100
+             when top_k > 0  each masked token is replaced sampling randomly according to the logits considering
+             only the top_k tokens. If setted to None all the tokens will be considered
+        burnin: int, default = None
+            after burnin iterations the tokens will be chosen determinsitically selecting the one with maximum
+            logit score
+        Returns
+        -------
+        list
+            a list of sentences (str) already detokenized and cleaned
+        '''
+        n_batches = math.ceil(n_sentences / batch_size)
+        if burnin is None:
+            burnin = max_iter
+        sentences = []
+        for batch_n in range(n_batches):
+            batch_sentence_len = np.round(np.random.normal(avg_len, std_len))
+            batch_sentence_len = int(np.clip(batch_sentence_len, min_len, max_len))
+            # Generate and append batch of sentences
+            sentences += self.generate_batch(seed_text, batch_size, max_iter, verbose=verbose, print_every=print_every,
+                                             sent_len=batch_sentence_len, init_mask_prob=init_mask_prob,
+                                             generation_method=generation_method,
+                                             masked_portion=masked_portion, temperature=temperature, sample=sample,
+                                             top_k=top_k, burnin=burnin)
+            # Print if verbose
+            if verbose and (batch_n + 1) % print_every == 0:
+                print("Finished batch %d in %.3fs" % (batch_n + 1, time.time() - start_time))
+                start_time = time.time()
+        # Store results
+        if save_to_path is not None:
+            with open(save_to_path, 'w') as f:
+                for sent in sentences:
+                    f.write(sent + '\n')
+        return sentences
+    def generate_batch(self, seed_text, batch_size, max_iter, verbose, print_every, sent_len, init_mask_prob,
+                       generation_method, masked_portion, temperature, sample, top_k, burnin):
+        # Init batch
+        seed_text = self.tokenizer.tokenize(
+            self.tokenizer.cls_token + seed_text)  # add [CLS] token at the beggining of the seed_text
+        seed_len = len(seed_text)
+        batch = self.get_init_text(seed_text, sent_len, batch_size, init_mask_prob)
+        # Init sampling parameters
+        if generation_method == "parallel":
+            if type(masked_portion) is int:
+                num_mask = masked_portion
+            else:
+                num_mask = int(np.round(sent_len * masked_portion))
+            list_probs = None
+        elif generation_method == "sequential":
+            list_probs = None
+            num_mask = 1
+        else:
+            # One probability distribution for each sentence in the batch (initially uniform among all tokens)
+            num_mask = 1
+            list_probs = [np.full(sent_len, 1.0 / sent_len)] * batch_size
+            counter = np.zeros((batch_size, sent_len))
+        with torch.no_grad():
+            for ii in range(max_iter):
+                # 1. Select indices to replace
+                idx_to_replace = self.__select_tokens_to_replace(generation_method, sent_len, batch_size, num_mask, ii,
+                                                                 seed_len, list_probs)
+                # 2. Replace with mask
+                self.__replace_tokens(batch, idx_to_replace, tokens=self.tokenizer.mask_token_id)
+                # 3. Sample new tokens
+                out = self.model(batch)
+                logits = out['logits']
+                if generation_method == 'attention':
+                    counter[np.arange(batch_size), idx_to_replace.flatten() - seed_len] += 1
+                    attentions = torch.stack(out['attentions'])
+                    list_probs = self.__compute_probs(attentions, batch_size, idx_to_replace, seed_len, counter)
+                sample = False if ii >= burnin else sample
+                idxs = self.generate_step(logits, gen_idx=idx_to_replace, temperature=temperature, sample=sample,
+                                          top_k=top_k)
+                # 4. Replace tokens
+                self.__replace_tokens(batch, idx_to_replace, tokens=idxs)
+                if verbose and ii % print_every == 0:
+                    print_batch(self.tokenizer, batch, 3)
+        return self.tokenizer.batch_decode(batch, skip_special_tokens=True)
+    def get_init_text(self, seed_text, sent_len, batch_size, init_mask_prob):
+        """ Get initial sentence by padding seed_text with either masks or random words to sent_len """
+        seed_text = self.tokenizer.convert_tokens_to_ids(seed_text)
+        if init_mask_prob == 1:
+            batch = [seed_text + [self.tokenizer.mask_token_id] * sent_len + [self.tokenizer.sep_token_id] for _ in
+                     range(batch_size)]
+        elif init_mask_prob == 0:
+            batch = [seed_text + np.random.randint(0, self.tokenizer.vocab_size, sent_len).tolist() + [
+                self.tokenizer.sep_token_id] for _ in range(batch_size)]
+        else:
+            p = [(1 - init_mask_prob) / (self.tokenizer.vocab_size - 1)] * self.tokenizer.vocab_size
+            p[self.tokenizer.mask_token_id] = init_mask_prob
+            batch = [seed_text + np.random.choice(np.arange(self.tokenizer.vocab_size), sent_len, p=p).tolist() + [
+                self.tokenizer.sep_token_id] for _ in range(batch_size)]
+        return torch.tensor(batch).to(self.device)
+    def __select_tokens_to_replace(self, generation_method, sent_len, batch_size, num_mask, ii, seed_len, list_probs):
+        if generation_method == "sequential":
+            kk = [[ii % sent_len] for _ in range(batch_size)]
+        elif generation_method == "attention":
+            kk = [np.random.choice(range(sent_len), num_mask, p=p).tolist() for p in list_probs]
+        elif generation_method == 'parallel':
+            #             kk = np.random.randint(0, sent_len, (batch_size, num_mask))
+            x = np.random.randint(0, sent_len)
+            kk = [[x] for _ in range(batch_size)]
+        #         elif generation_method == 'parallel original':
+        #             x = np.random.randint(0, sent_len)
+        #             kk = [[x] for _ in range(batch_size)]
+        return np.array(kk) + seed_len
+    def __replace_tokens(self, batch, idx_to_replace, tokens):
+        rows_idx = np.repeat(range(len(batch)), idx_to_replace.shape[-1]).reshape(idx_to_replace.shape)
+        if type(tokens) is not int:
+            tokens = tokens.reshape(idx_to_replace.shape)
+        batch[rows_idx, idx_to_replace] = tokens
+    def __compute_probs(self, attentions, batch_size, idx, seed_len, counter):
+        ''' compute probabilities from attention masks'''
+        # list_probs = []
+        #
+        # # attentions has dimension (batch_size, num_attention_masks, sentence_len, sentence_len)
+        # for i in range(batch_size):
+        #     average_prob = attentions[i, :, idx[i], :].mean(axis=0).flatten().cpu().numpy()
+        #     average_prob = average_prob[seed_len:-1]  # avoid seed_text and last token ([SEP])
+        #     average_prob = average_prob / average_prob.sum()  # normalize
+        #     list_probs.append(average_prob)
+        #
+        # return list_probs
+        avg_attentions = attentions.mean(axis=(0, 2)).cpu().detach().numpy() # mean through encoders and attention masks
+        avg_attentions = avg_attentions[np.arange(batch_size),seed_len:-1,idx.flatten()]  # for each sentence extract the
+                                                                                  # attention corresponding to the
+                                                                                  # masked token (avoiding special tokens and seed)
+        c = counter + 1
+        prob = avg_attentions / c
+        return prob / prob.sum(axis=1)[:, np.newaxis]
+    # def counter_penalization(attention, idx_mask, counter, **kwargs):
+    #     a = attention.mean(
+    #         axis=(0, 1)).cpu().detach().numpy()  # mean over ax0 that is encoders and ax1 that is attention_mask
+    #     a = a[1:-1, idx_mask].reshape(-1, 1)
+    #     c = np.array(counter) + 1
+    #     prob = a.flatten() / c
+    #     prob = prob / sum(prob)
+    #     return prob
+    def generate_step(self, out, gen_idx, temperature=1, sample=True, top_k=None):
+        """ Generate a word from from out[gen_idx]
+        args:
+            - out (torch.Tensor): tensor of logits of size batch_size x seq_len x vocab_size
+            - gen_idx (int): location for which to generate for
+            - top_k (int): if >0, only sample from the top k most probable words
+            - sample (Bool): if True, sample from full distribution. Overridden by top_k
+        """
+        if type(gen_idx) is int:
+            gen_idx = np.array(gen_idx)
+        rows_idx = np.repeat(range(len(out)), gen_idx.shape[-1]).reshape(gen_idx.shape)
+        logits = out[rows_idx, gen_idx]
+        if temperature is not None:
+            logits = logits / temperature
+        if sample:
+            # general sampling
+            if top_k is None:
+                dist = torch.distributions.categorical.Categorical(logits=logits)
+                idx = dist.sample().squeeze(-1)
+            # top_k sampling
+            else:
+                kth_vals, kth_idx = logits.topk(top_k, dim=-1)
+                dist = torch.distributions.categorical.Categorical(logits=kth_vals)
+                idx = kth_idx.gather(dim=-1, index=dist.sample().unsqueeze(-1)).squeeze(-1)
+        # burnin - deterministic
+        else:
+            idx = torch.argmax(logits, dim=-1)
+        return idx
+    def finetune(self, sentences, labels=None, encoded_dict=None, mask_percentage=0.15, epochs=4, batch_size=32,
+                 optimizer=AdamW, optimizer_parameters=dict(lr=2e-5, eps=1e-8),
+                 scheduler=get_linear_schedule_with_warmup, scheduler_parameters=dict(num_warmup_steps=0),
+                 num_tokens_per_class=3
+                 ):
+        if encoded_dict is None:
+            # set encoder
+            if labels is None:
+                self.encoder = Encoder(self.tokenizer)
+                encoded_dict = self.encoder.encode(sentences)
+            else:
+                classes = np.unique(labels)
+                self.encoder = LabelEncoder(self.model, self.tokenizer, classes=classes,
+                                            num_tokens_per_class=num_tokens_per_class)
+                encoded_dict = self.encoder.encode(sentences, labels)
+        # Retrieve tokenized sentences and attention masks
+        input_ids = encoded_dict['input_ids']
+        attention_mask = encoded_dict['attention_mask']
+        dataset = TensorDataset(input_ids, attention_mask)
+        dataloader = DataLoader(dataset, sampler=RandomSampler(dataset), batch_size=batch_size)
+        # Setting optimizer and scheduler
+        optimizer = optimizer(self.model.parameters(), **optimizer_parameters)
+        if self.use_apex:
+            self.model, optimizer = amp.initialize(self.model, optimizer, opt_level="O2", keep_batchnorm_fp32=True,
+                                                   loss_scale="dynamic")
+        total_steps = len(dataloader) * epochs
+        scheduler = scheduler(optimizer, num_training_steps=total_steps, **scheduler_parameters)
+        # TODO add stats
+        training_stats = []
+        test_stats = []
+        total_t0 = time.time()
+        self.model.train()
+        for epoch_i in range(0, epochs):
+            print(f'\n======== Epoch {epoch_i + 1} / {epochs} ========')
+            print('Training...')
+            t0 = time.time()
+            total_train_loss = 0
+            for step, batch in enumerate(dataloader):
+                if step % 25 == 0 and not step == 0:
+                    elapsed = format_time(time.time() - t0)
+                    print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(dataloader), elapsed))
+                batch_input = batch[0].to(self.device)
+                batch_attention = batch[1].to(self.device)
+                # 512 to truncate max bert input
+                if len(batch[0]) > 512:
+                    batch_input = batch_input[:, :512]
+                    batch_attention = batch_attention[:, :512]
+                # Computing number to tokens to mask based on mask_percentage
+                num_sent, num_tokens = batch_input.shape
+                num_tokens_to_mask = int(mask_percentage * num_tokens)
+                # Generating randomly num_tokens_to_mask to mask for each sentence, considering only real tokens
+                # (not [CLS] nor label-tokens that are at the beginning of the sentence)
+                start_id = 1 + num_tokens_per_class    # mask only
+                batch_mask_ids = torch.randint(start_id, num_tokens - 1, size=(num_sent, num_tokens_to_mask))
+                #  Each sentence needs to be indexed num_tokens_to_mask times.
+                #  This array is of the type [0,0,0 ..., 1,1,1, ..., 2,2,2, ... num_sentences -1]
+                sentence_ids =  np.repeat(np.arange(len(batch_input)), num_tokens_to_mask)
+                # Retrieve the original tokens to mask:
+                batch_masked_tokens = batch_input[sentence_ids, batch_mask_ids.flatten()]
+                # Mask the tokens
+                batch_input[sentence_ids, batch_mask_ids.flatten()] = self.tokenizer.mask_token_id
+                # Forward pass
+                self.model.zero_grad()
+                result = self.model(batch_input, attention_mask=batch_attention, return_dict=True)
+                logits = result['logits']
+                # Retrieve logits only for masked tokens. logits is a tensor of dim [batch_size, num_tokens, len_vocab]
+                # logits = logits[np.concatenate([[i] * batch_mask_ids.shape[1] for i in range(len(batch_mask_ids))], 0),
+                #          batch_mask_ids.flatten(), :]
+                logits = logits[sentence_ids, batch_mask_ids.flatten(), :]
+                loss = F.cross_entropy(logits, batch_masked_tokens.flatten())
+                total_train_loss += loss.item()
+                # Backward pass
+                if self.use_apex:
+                    with amp.scale_loss(loss, optimizer) as scaled_loss:
+                        scaled_loss.backward()
+                else:
+                    loss.backward()
+                # Clip the norm of the gradients to 1.0.
+                # This is to help prevent the "exploding gradients" problem.
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+                optimizer.step()
+                scheduler.step()
+                # Restoring masked tokens
+                batch_input[sentence_ids, batch_mask_ids.flatten()] = batch_masked_tokens.flatten()
+            avg_train_loss = total_train_loss / len(dataloader)
+            training_time = format_time(time.time() - t0)
+            print("")
+            print("  Average training loss: {0:.2f}".format(avg_train_loss))
+            print("  Training epcoh took: {:}".format(training_time))
+        print("")
+        print("Training complete!")
+        print("Total training took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
+if __name__ == '__main__':
+    # model initialization
+    en_bert_model = BertTextGenerator('bert-base-uncased')
+    # text generation
+    parameters = {'n_sentences': 10,  # 1000
+                  'seed_text': "",
+                  'batch_size': 10,  # 50
+                  'max_iter': 150,
+                  'init_mask_prob': 1,
+                  'generation_method': "attention",
+                  'masked_portion': 1,
+                  'temperature': 1,
+                  'sample': True,
+                  'top_k': 100,
+                  }
+    file_path = None
+    print('\n\n ENGLISH TEXT GENERATION')
+    en_bert_sents = en_bert_model.generate(save_to_path=file_path, **parameters)
+    print("\nEnglish text generated: ")
+    for sent in en_bert_sents:
+        print(f"\t{sent}")

app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+iface = gr.Interface(fn=greet, inputs="text", outputs="text")
+iface.launch()

requirements ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers
+datasets
+torch
+evaluate

textprocessing.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import re
+import numpy as np
+class Formatter():
+    def __init__(self, replace_tokens, unused_type='[unusedi]'):
+        self.dict_token_replace = {k: ' ' + unused_type.replace('i', str(i + 1)) + ' ' for i, k in
+                                   enumerate(replace_tokens)}
+    def format(self, path, pattern):
+        lines = []
+        re_line = re.compile(pattern)
+        with open(path, 'r') as f:
+            for match in re_line.finditer(''.join(f.readlines())):
+                line = match[0]
+                # Replace
+                for k, v in self.dict_token_replace.items():
+                    line = line.replace(k, v)
+                lines.append(line)
+        return lines
+    def unformat(self, sentences):
+        unformatted_sentences = []
+        for sent in sentences:
+            # Replace
+            for k, v in self.dict_token_replace.items():
+                sent = sent.replace(v.strip(), k)
+            unformatted_sentences.append(sent)
+        return unformatted_sentences
+class Encoder():
+    def __init__(self, tokenizer):
+        self.set_tokenizer(tokenizer)
+    def set_tokenizer(self, tokenizer):
+        self.tokenizer = tokenizer
+    def set_model(self, model):
+        self.model = model
+    def encode(self, lines):
+        encoded_dict = self.tokenizer.batch_encode_plus(
+            lines,  # Sentence to encode.
+            padding=True,
+            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
+            return_attention_mask=True,  # Construct attn. masks.
+            return_tensors='pt',  # Return pytorch tensors.
+        )
+        return encoded_dict
+class LabelEncoder(Encoder):
+    def __init__(self, model, tokenizer, classes=[], num_tokens_per_class=3):
+        super().__init__(tokenizer)
+        self.set_model(model)
+        # Preparing special tokens related to labels.
+        # Each token is of the type 'cls-k' where cls is a class in classes and
+        # k is an integer value in range(0, num_tokens_per_class)
+        self.num_tokens_per_class = num_tokens_per_class
+        self.label_special_tokens_dict = {cls: [f'[{cls}-{i}]' for i in range(num_tokens_per_class)] for cls in classes}
+        self.label_special_tokens_list = np.concatenate([list(x) for x in self.label_special_tokens_dict.values()]).tolist()
+        # Addd special tokens and replace vocabulary
+        self.tokenizer.add_special_tokens({'additional_special_tokens': self.label_special_tokens_list})
+        self.model.resize_token_embeddings(len(self.tokenizer))
+    def encode(self, lines, labels):
+        labeled_lines = [' '.join(self.label_special_tokens_dict[label]) + ' ' + line for line, label in zip(lines, labels)]
+        return super().encode(labeled_lines)

utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+import datetime
+def print_batch(tokenizer, batch, n, header=None):
+    '''
+    print a batch of tokens. Used mainly for debugging
+    Parameters
+    ------------
+    tokenizer : Tokenizer (https://huggingface.co/docs/tokenizers/python/latest/api/reference.html#tokenizers.Tokenizer)
+    batch : List of List[int]
+    n : int
+        number of sentences to print from the batch
+    header : str
+        header of the batch printed before the sentences
+    '''
+    print(f'=== {header or "Batch"} ===')
+    print(tokenizer.batch_decode(batch[:n], skip_special_tokens=True))
+    print('...\n' if n < len(batch) else '')
+def flat_accuracy(preds, labels):
+    pred_flat = np.argmax(preds, axis=1).flatten()
+    labels_flat = labels.flatten()
+    return np.sum(pred_flat == labels_flat) / len(labels_flat)
+def format_time(elapsed):
+    '''
+    Takes a time in seconds and returns a string hh:mm:ss
+    '''
+    elapsed_rounded = int(round((elapsed)))
+    return str(datetime.timedelta(seconds=elapsed_rounded))