step 50000
Browse files- .gitattributes +3 -0
- codeparrot_training.py +39 -47
- config.json +2 -1
- log/debug_0.log +0 -0
- log/debug_1.log +1 -0
- log/debug_10.log +1 -0
- log/debug_11.log +1 -0
- log/debug_12.log +1 -0
- log/debug_13.log +1 -0
- log/debug_14.log +1 -0
- log/debug_15.log +1 -0
- log/debug_2.log +1 -0
- log/debug_3.log +1 -0
- log/debug_4.log +1 -0
- log/debug_5.log +1 -0
- log/debug_6.log +1 -0
- log/debug_7.log +1 -0
- log/debug_8.log +1 -0
- log/debug_9.log +1 -0
- pytorch_model.bin +1 -1
- requirements.txt +1 -1
- runs/Sep20_14-28-12_leandro-16x-v100/1632148092.8874874/events.out.tfevents.1632148092.leandro-16x-v100.8660.1 +3 -0
- runs/Sep20_14-28-12_leandro-16x-v100/events.out.tfevents.1632148092.leandro-16x-v100.8660.0 +3 -0
- wandb/debug-internal.log +1 -0
- wandb/debug.log +1 -0
- wandb/latest-run +1 -0
- wandb/run-20210920_142810-36cw69uv/files/conda-environment.yaml +131 -0
- wandb/run-20210920_142810-36cw69uv/files/config.yaml +89 -0
- wandb/run-20210920_142810-36cw69uv/files/output.log +0 -0
- wandb/run-20210920_142810-36cw69uv/files/requirements.txt +81 -0
- wandb/run-20210920_142810-36cw69uv/files/wandb-metadata.json +24 -0
- wandb/run-20210920_142810-36cw69uv/files/wandb-summary.json +1 -0
- wandb/run-20210920_142810-36cw69uv/logs/debug-internal.log +3 -0
- wandb/run-20210920_142810-36cw69uv/logs/debug.log +22 -0
- wandb/run-20210920_142810-36cw69uv/run-36cw69uv.wandb +3 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -15,3 +15,6 @@ | |
| 15 | 
             
            *.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 16 | 
             
            *.pth filter=lfs diff=lfs merge=lfs -text
         | 
| 17 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | 
|  | |
| 15 | 
             
            *.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 16 | 
             
            *.pth filter=lfs diff=lfs merge=lfs -text
         | 
| 17 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 18 | 
            +
            wandb/debug-internal.log filter=lfs diff=lfs merge=lfs -text
         | 
| 19 | 
            +
            wandb/run-20210920_142810-36cw69uv/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
         | 
| 20 | 
            +
            wandb/run-20210920_142810-36cw69uv/run-36cw69uv.wandb filter=lfs diff=lfs merge=lfs -text
         | 
    	
        codeparrot_training.py
    CHANGED
    
    | @@ -12,24 +12,22 @@ from argparse import Namespace | |
| 12 | 
             
            import torch
         | 
| 13 | 
             
            import logging
         | 
| 14 | 
             
            import wandb
         | 
| 15 | 
            -
            import time
         | 
| 16 | 
            -
             | 
| 17 |  | 
| 18 | 
             
            class ConstantLengthDataset(IterableDataset):
         | 
|  | |
| 19 | 
             
                def __init__(self, tokenizer, dataset, seq_length=1024,
         | 
| 20 | 
             
                             num_of_sequences=1024, chars_per_token=3.6):
         | 
| 21 | 
             
                    self.tokenizer = tokenizer
         | 
| 22 | 
            -
                    self. | 
| 23 | 
             
                    self.dataset = dataset
         | 
| 24 | 
             
                    self.seq_length = seq_length
         | 
| 25 | 
             
                    self.input_characters = seq_length * chars_per_token * num_of_sequences
         | 
| 26 | 
            -
             | 
| 27 | 
             
                def __iter__(self):
         | 
| 28 | 
             
                    iterator = iter(self.dataset)
         | 
| 29 | 
             
                    more_examples = True
         | 
| 30 | 
             
                    while more_examples:
         | 
| 31 | 
            -
                        buffer = []
         | 
| 32 | 
            -
                        buffer_len = 0
         | 
| 33 | 
             
                        while True:
         | 
| 34 | 
             
                            if buffer_len >= self.input_characters:
         | 
| 35 | 
             
                                break
         | 
| @@ -42,7 +40,7 @@ class ConstantLengthDataset(IterableDataset): | |
| 42 | 
             
                        tokenized_inputs = tokenizer(buffer, truncation=False)['input_ids']
         | 
| 43 | 
             
                        all_token_ids = []
         | 
| 44 | 
             
                        for tokenized_input in tokenized_inputs:
         | 
| 45 | 
            -
                            all_token_ids.extend(tokenized_input + [self. | 
| 46 | 
             
                        for i in range(0, len(all_token_ids), self.seq_length):
         | 
| 47 | 
             
                            input_ids = all_token_ids[i : i + self.seq_length]
         | 
| 48 | 
             
                            if len(input_ids) == self.seq_length:
         | 
| @@ -52,14 +50,16 @@ def setup_logging(project_name): | |
| 52 | 
             
                logger = logging.getLogger(__name__)
         | 
| 53 | 
             
                logging.basicConfig(
         | 
| 54 | 
             
                    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
         | 
| 55 | 
            -
                    datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, | 
|  | |
|  | |
| 56 | 
             
                if accelerator.is_main_process: # we only want to setup logging once
         | 
| 57 | 
             
                    wandb.init(project=project_name, config=args)
         | 
| 58 | 
             
                    run_name = wandb.run.name
         | 
| 59 | 
             
                    tb_writer = SummaryWriter()
         | 
| 60 | 
             
                    tb_writer.add_hparams(vars(args), {'0': 0})
         | 
| 61 | 
             
                    logger.setLevel(logging.INFO)
         | 
| 62 | 
            -
                    datasets.utils.logging. | 
| 63 | 
             
                    transformers.utils.logging.set_verbosity_info()
         | 
| 64 | 
             
                else:
         | 
| 65 | 
             
                    tb_writer = None
         | 
| @@ -69,13 +69,12 @@ def setup_logging(project_name): | |
| 69 | 
             
                    transformers.utils.logging.set_verbosity_error()
         | 
| 70 | 
             
                return logger, tb_writer, run_name
         | 
| 71 |  | 
| 72 | 
            -
            def create_dataloaders(dataset_name):
         | 
| 73 | 
            -
                 | 
| 74 | 
            -
             | 
| 75 | 
             
                train_data = train_data.shuffle(buffer_size=args.shuffle_buffer,
         | 
| 76 | 
             
                                                seed=args.seed)
         | 
| 77 | 
            -
                valid_data = load_dataset(dataset_name+'-valid', split="train",
         | 
| 78 | 
            -
                                          streaming=True)
         | 
| 79 | 
             
                train_dataset = ConstantLengthDataset(tokenizer, train_data,
         | 
| 80 | 
             
                                                      seq_length=args.seq_length)
         | 
| 81 | 
             
                valid_dataset = ConstantLengthDataset(tokenizer, valid_data,
         | 
| @@ -84,7 +83,7 @@ def create_dataloaders(dataset_name): | |
| 84 | 
             
                eval_dataloader=DataLoader(valid_dataset, batch_size=args.valid_batch_size)
         | 
| 85 | 
             
                return train_dataloader, eval_dataloader
         | 
| 86 |  | 
| 87 | 
            -
            def get_grouped_params(model, no_decay=["bias", "LayerNorm.weight"]):
         | 
| 88 | 
             
                params_with_wd, params_without_wd = [], []
         | 
| 89 | 
             
                for n, p in model.named_parameters():
         | 
| 90 | 
             
                    if any(nd in n for nd in no_decay): params_without_wd.append(p)
         | 
| @@ -98,7 +97,7 @@ def log_metrics(step, metrics): | |
| 98 | 
             
                    wandb.log(metrics)
         | 
| 99 | 
             
                    [tb_writer.add_scalar(k, v, step) for k, v in metrics.items()]
         | 
| 100 |  | 
| 101 | 
            -
            def evaluate():
         | 
| 102 | 
             
                model.eval()
         | 
| 103 | 
             
                losses = []
         | 
| 104 | 
             
                for step, batch in enumerate(eval_dataloader):
         | 
| @@ -112,44 +111,44 @@ def evaluate(): | |
| 112 | 
             
                except OverflowError: perplexity = float("inf")
         | 
| 113 | 
             
                return loss.item(), perplexity.item()
         | 
| 114 |  | 
|  | |
|  | |
|  | |
| 115 | 
             
            # Hyperparameters
         | 
| 116 | 
             
            project_name = 'transformersbook/codeparrot'
         | 
| 117 | 
            -
            dataset_name = ' | 
| 118 | 
            -
            config = {"train_batch_size":  | 
| 119 | 
            -
                      "valid_batch_size":  | 
| 120 | 
             
                      "weight_decay": 0.1,
         | 
| 121 | 
            -
                      "shuffle_buffer":  | 
| 122 | 
            -
                      "learning_rate":  | 
| 123 | 
             
                      "lr_scheduler_type": "cosine",
         | 
| 124 | 
            -
                      "num_warmup_steps":  | 
| 125 | 
            -
                      "gradient_accumulation_steps":  | 
| 126 | 
            -
                      "max_train_steps":  | 
| 127 | 
            -
                      "max_eval_steps":  | 
| 128 | 
             
                      "seq_length": 1024,
         | 
| 129 | 
             
                      "seed": 1,
         | 
| 130 | 
            -
                      "save_checkpoint_steps": | 
| 131 | 
            -
            args = Namespace(**config)
         | 
| 132 | 
            -
            set_seed(args.seed)
         | 
| 133 | 
            -
             | 
| 134 | 
            -
            # Accelerator
         | 
| 135 | 
            -
            accelerator = Accelerator()
         | 
| 136 | 
             
            samples_per_step = accelerator.state.num_processes * args.train_batch_size
         | 
|  | |
| 137 |  | 
| 138 | 
             
            # Logging
         | 
| 139 | 
             
            logger, tb_writer, run_name = setup_logging(project_name.split("/")[1])
         | 
| 140 | 
             
            logger.info(accelerator.state)
         | 
| 141 |  | 
| 142 | 
             
            # Load model and tokenizer
         | 
| 143 | 
            -
            if accelerator.is_main_process: | 
| 144 | 
             
                hf_repo = Repository("./", clone_from=project_name, revision=run_name)
         | 
| 145 | 
            -
            model = GPT2LMHeadModel.from_pretrained("./")
         | 
| 146 | 
             
            tokenizer = AutoTokenizer.from_pretrained("./")
         | 
| 147 |  | 
| 148 | 
             
            # Load dataset and dataloader
         | 
| 149 | 
            -
            train_dataloader, eval_dataloader = create_dataloaders(dataset_name)
         | 
| 150 |  | 
| 151 | 
             
            # Prepare the optimizer and learning rate scheduler
         | 
| 152 | 
            -
            optimizer = AdamW(get_grouped_params(model), lr=args.learning_rate)
         | 
| 153 | 
             
            lr_scheduler = get_scheduler(name=args.lr_scheduler_type, optimizer=optimizer,
         | 
| 154 | 
             
                                         num_warmup_steps=args.num_warmup_steps,
         | 
| 155 | 
             
                                         num_training_steps=args.max_train_steps,)
         | 
| @@ -162,24 +161,21 @@ model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare( | |
| 162 | 
             
            # Train model
         | 
| 163 | 
             
            model.train()
         | 
| 164 | 
             
            completed_steps = 0
         | 
| 165 | 
            -
            t0 = time.time()
         | 
| 166 | 
             
            for step, batch in enumerate(train_dataloader, start=1):
         | 
| 167 | 
            -
                 | 
| 168 | 
            -
                loss = model(batch, labels=batch).loss
         | 
| 169 | 
            -
                t2 = time.time()
         | 
| 170 | 
             
                log_metrics(step, {'lr': get_lr(), 'samples': step*samples_per_step,
         | 
| 171 | 
             
                                   'steps': completed_steps, 'loss/train': loss.item()})
         | 
| 172 | 
             
                loss = loss / args.gradient_accumulation_steps
         | 
| 173 | 
             
                accelerator.backward(loss)
         | 
| 174 | 
            -
                t3 = time.time()
         | 
| 175 | 
             
                if step % args.gradient_accumulation_steps == 0:
         | 
|  | |
| 176 | 
             
                    optimizer.step()
         | 
| 177 | 
             
                    lr_scheduler.step()
         | 
| 178 | 
             
                    optimizer.zero_grad()
         | 
| 179 | 
             
                    completed_steps += 1
         | 
| 180 | 
             
                if step % args.save_checkpoint_steps == 0:
         | 
| 181 | 
             
                    logger.info('Evaluating and saving model checkpoint')
         | 
| 182 | 
            -
                    eval_loss, perplexity = evaluate()
         | 
| 183 | 
             
                    log_metrics(step, {'loss/eval': eval_loss, 'perplexity': perplexity})
         | 
| 184 | 
             
                    accelerator.wait_for_everyone()
         | 
| 185 | 
             
                    unwrapped_model = accelerator.unwrap_model(model)
         | 
| @@ -189,17 +185,13 @@ for step, batch in enumerate(train_dataloader, start=1): | |
| 189 | 
             
                    model.train()
         | 
| 190 | 
             
                if completed_steps >= args.max_train_steps:
         | 
| 191 | 
             
                    break
         | 
| 192 | 
            -
                t4 = time.time()
         | 
| 193 | 
            -
                #logger.info(f'ITER: {t1-t0:.3f}, FRWD: {t2-t1:.3f}, BKWD: {t3-t2:.3f}, OPT: {t4-t3:.3f}, ALL: {t4-t0}')
         | 
| 194 | 
            -
                t0 = time.time()
         | 
| 195 |  | 
| 196 | 
             
            # Evaluate and save the last checkpoint
         | 
| 197 | 
             
            logger.info('Evaluating and saving model after training')
         | 
| 198 | 
            -
            eval_loss, perplexity = evaluate()
         | 
| 199 | 
             
            log_metrics(step, {'loss/eval': eval_loss, 'perplexity': perplexity})
         | 
| 200 | 
             
            accelerator.wait_for_everyone()
         | 
| 201 | 
             
            unwrapped_model = accelerator.unwrap_model(model)
         | 
| 202 | 
             
            if accelerator.is_main_process:
         | 
| 203 | 
             
                unwrapped_model.save_pretrained("./")
         | 
| 204 | 
            -
                 | 
| 205 | 
            -
                except: logger.info('No changes to previously saved model.')
         | 
|  | |
| 12 | 
             
            import torch
         | 
| 13 | 
             
            import logging
         | 
| 14 | 
             
            import wandb
         | 
|  | |
|  | |
| 15 |  | 
| 16 | 
             
            class ConstantLengthDataset(IterableDataset):
         | 
| 17 | 
            +
                
         | 
| 18 | 
             
                def __init__(self, tokenizer, dataset, seq_length=1024,
         | 
| 19 | 
             
                             num_of_sequences=1024, chars_per_token=3.6):
         | 
| 20 | 
             
                    self.tokenizer = tokenizer
         | 
| 21 | 
            +
                    self.concat_token_id = tokenizer.bos_token_id
         | 
| 22 | 
             
                    self.dataset = dataset
         | 
| 23 | 
             
                    self.seq_length = seq_length
         | 
| 24 | 
             
                    self.input_characters = seq_length * chars_per_token * num_of_sequences
         | 
| 25 | 
            +
                
         | 
| 26 | 
             
                def __iter__(self):
         | 
| 27 | 
             
                    iterator = iter(self.dataset)
         | 
| 28 | 
             
                    more_examples = True
         | 
| 29 | 
             
                    while more_examples:
         | 
| 30 | 
            +
                        buffer, buffer_len = [], 0
         | 
|  | |
| 31 | 
             
                        while True:
         | 
| 32 | 
             
                            if buffer_len >= self.input_characters:
         | 
| 33 | 
             
                                break
         | 
|  | |
| 40 | 
             
                        tokenized_inputs = tokenizer(buffer, truncation=False)['input_ids']
         | 
| 41 | 
             
                        all_token_ids = []
         | 
| 42 | 
             
                        for tokenized_input in tokenized_inputs:
         | 
| 43 | 
            +
                            all_token_ids.extend(tokenized_input + [self.concat_token_id])
         | 
| 44 | 
             
                        for i in range(0, len(all_token_ids), self.seq_length):
         | 
| 45 | 
             
                            input_ids = all_token_ids[i : i + self.seq_length]
         | 
| 46 | 
             
                            if len(input_ids) == self.seq_length:
         | 
|  | |
| 50 | 
             
                logger = logging.getLogger(__name__)
         | 
| 51 | 
             
                logging.basicConfig(
         | 
| 52 | 
             
                    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
         | 
| 53 | 
            +
                    datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, handlers=[
         | 
| 54 | 
            +
                    logging.FileHandler(f"log/debug_{accelerator.process_index}.log"),
         | 
| 55 | 
            +
                    logging.StreamHandler()])
         | 
| 56 | 
             
                if accelerator.is_main_process: # we only want to setup logging once
         | 
| 57 | 
             
                    wandb.init(project=project_name, config=args)
         | 
| 58 | 
             
                    run_name = wandb.run.name
         | 
| 59 | 
             
                    tb_writer = SummaryWriter()
         | 
| 60 | 
             
                    tb_writer.add_hparams(vars(args), {'0': 0})
         | 
| 61 | 
             
                    logger.setLevel(logging.INFO)
         | 
| 62 | 
            +
                    datasets.utils.logging.set_verbosity_info()
         | 
| 63 | 
             
                    transformers.utils.logging.set_verbosity_info()
         | 
| 64 | 
             
                else:
         | 
| 65 | 
             
                    tb_writer = None
         | 
|  | |
| 69 | 
             
                    transformers.utils.logging.set_verbosity_error()
         | 
| 70 | 
             
                return logger, tb_writer, run_name
         | 
| 71 |  | 
| 72 | 
            +
            def create_dataloaders(dataset_name, args):
         | 
| 73 | 
            +
                ds_kwargs = {"streaming":True, "chunksize":40<<20, "error_bad_chunk":False}
         | 
| 74 | 
            +
                train_data = load_dataset(dataset_name+'-train', split='train', **ds_kwargs)
         | 
| 75 | 
             
                train_data = train_data.shuffle(buffer_size=args.shuffle_buffer,
         | 
| 76 | 
             
                                                seed=args.seed)
         | 
| 77 | 
            +
                valid_data = load_dataset(dataset_name+'-valid', split="train", **ds_kwargs)
         | 
|  | |
| 78 | 
             
                train_dataset = ConstantLengthDataset(tokenizer, train_data,
         | 
| 79 | 
             
                                                      seq_length=args.seq_length)
         | 
| 80 | 
             
                valid_dataset = ConstantLengthDataset(tokenizer, valid_data,
         | 
|  | |
| 83 | 
             
                eval_dataloader=DataLoader(valid_dataset, batch_size=args.valid_batch_size)
         | 
| 84 | 
             
                return train_dataloader, eval_dataloader
         | 
| 85 |  | 
| 86 | 
            +
            def get_grouped_params(model, args, no_decay=["bias", "LayerNorm.weight"]):
         | 
| 87 | 
             
                params_with_wd, params_without_wd = [], []
         | 
| 88 | 
             
                for n, p in model.named_parameters():
         | 
| 89 | 
             
                    if any(nd in n for nd in no_decay): params_without_wd.append(p)
         | 
|  | |
| 97 | 
             
                    wandb.log(metrics)
         | 
| 98 | 
             
                    [tb_writer.add_scalar(k, v, step) for k, v in metrics.items()]
         | 
| 99 |  | 
| 100 | 
            +
            def evaluate(args):
         | 
| 101 | 
             
                model.eval()
         | 
| 102 | 
             
                losses = []
         | 
| 103 | 
             
                for step, batch in enumerate(eval_dataloader):
         | 
|  | |
| 111 | 
             
                except OverflowError: perplexity = float("inf")
         | 
| 112 | 
             
                return loss.item(), perplexity.item()
         | 
| 113 |  | 
| 114 | 
            +
            # Accelerator
         | 
| 115 | 
            +
            accelerator = Accelerator(dispatch_batches=True)
         | 
| 116 | 
            +
            acc_state = {str(k): str(v) for k, v in accelerator.state.__dict__.items()}
         | 
| 117 | 
             
            # Hyperparameters
         | 
| 118 | 
             
            project_name = 'transformersbook/codeparrot'
         | 
| 119 | 
            +
            dataset_name = '../codeparrot'
         | 
| 120 | 
            +
            config = {"train_batch_size": 2,
         | 
| 121 | 
            +
                      "valid_batch_size": 2,
         | 
| 122 | 
             
                      "weight_decay": 0.1,
         | 
| 123 | 
            +
                      "shuffle_buffer": 1_000,
         | 
| 124 | 
            +
                      "learning_rate": 2e-4,
         | 
| 125 | 
             
                      "lr_scheduler_type": "cosine",
         | 
| 126 | 
            +
                      "num_warmup_steps": 750,
         | 
| 127 | 
            +
                      "gradient_accumulation_steps": 16,
         | 
| 128 | 
            +
                      "max_train_steps": 50_000,
         | 
| 129 | 
            +
                      "max_eval_steps": -1,
         | 
| 130 | 
             
                      "seq_length": 1024,
         | 
| 131 | 
             
                      "seed": 1,
         | 
| 132 | 
            +
                      "save_checkpoint_steps": 50_000}
         | 
| 133 | 
            +
            args = Namespace(**config, **acc_state)
         | 
|  | |
|  | |
|  | |
|  | |
| 134 | 
             
            samples_per_step = accelerator.state.num_processes * args.train_batch_size
         | 
| 135 | 
            +
            set_seed(args.seed)
         | 
| 136 |  | 
| 137 | 
             
            # Logging
         | 
| 138 | 
             
            logger, tb_writer, run_name = setup_logging(project_name.split("/")[1])
         | 
| 139 | 
             
            logger.info(accelerator.state)
         | 
| 140 |  | 
| 141 | 
             
            # Load model and tokenizer
         | 
| 142 | 
            +
            if accelerator.is_main_process:
         | 
| 143 | 
             
                hf_repo = Repository("./", clone_from=project_name, revision=run_name)
         | 
| 144 | 
            +
            model = GPT2LMHeadModel.from_pretrained("./", gradient_checkpointing=True)
         | 
| 145 | 
             
            tokenizer = AutoTokenizer.from_pretrained("./")
         | 
| 146 |  | 
| 147 | 
             
            # Load dataset and dataloader
         | 
| 148 | 
            +
            train_dataloader, eval_dataloader = create_dataloaders(dataset_name, args)
         | 
| 149 |  | 
| 150 | 
             
            # Prepare the optimizer and learning rate scheduler
         | 
| 151 | 
            +
            optimizer = AdamW(get_grouped_params(model, args), lr=args.learning_rate)
         | 
| 152 | 
             
            lr_scheduler = get_scheduler(name=args.lr_scheduler_type, optimizer=optimizer,
         | 
| 153 | 
             
                                         num_warmup_steps=args.num_warmup_steps,
         | 
| 154 | 
             
                                         num_training_steps=args.max_train_steps,)
         | 
|  | |
| 161 | 
             
            # Train model
         | 
| 162 | 
             
            model.train()
         | 
| 163 | 
             
            completed_steps = 0
         | 
|  | |
| 164 | 
             
            for step, batch in enumerate(train_dataloader, start=1):
         | 
| 165 | 
            +
                loss = model(batch, labels=batch, use_cache=False).loss
         | 
|  | |
|  | |
| 166 | 
             
                log_metrics(step, {'lr': get_lr(), 'samples': step*samples_per_step,
         | 
| 167 | 
             
                                   'steps': completed_steps, 'loss/train': loss.item()})
         | 
| 168 | 
             
                loss = loss / args.gradient_accumulation_steps
         | 
| 169 | 
             
                accelerator.backward(loss)
         | 
|  | |
| 170 | 
             
                if step % args.gradient_accumulation_steps == 0:
         | 
| 171 | 
            +
                    accelerator.clip_grad_norm_(model.parameters(), 1.0)
         | 
| 172 | 
             
                    optimizer.step()
         | 
| 173 | 
             
                    lr_scheduler.step()
         | 
| 174 | 
             
                    optimizer.zero_grad()
         | 
| 175 | 
             
                    completed_steps += 1
         | 
| 176 | 
             
                if step % args.save_checkpoint_steps == 0:
         | 
| 177 | 
             
                    logger.info('Evaluating and saving model checkpoint')
         | 
| 178 | 
            +
                    eval_loss, perplexity = evaluate(args)
         | 
| 179 | 
             
                    log_metrics(step, {'loss/eval': eval_loss, 'perplexity': perplexity})
         | 
| 180 | 
             
                    accelerator.wait_for_everyone()
         | 
| 181 | 
             
                    unwrapped_model = accelerator.unwrap_model(model)
         | 
|  | |
| 185 | 
             
                    model.train()
         | 
| 186 | 
             
                if completed_steps >= args.max_train_steps:
         | 
| 187 | 
             
                    break
         | 
|  | |
|  | |
|  | |
| 188 |  | 
| 189 | 
             
            # Evaluate and save the last checkpoint
         | 
| 190 | 
             
            logger.info('Evaluating and saving model after training')
         | 
| 191 | 
            +
            eval_loss, perplexity = evaluate(args)
         | 
| 192 | 
             
            log_metrics(step, {'loss/eval': eval_loss, 'perplexity': perplexity})
         | 
| 193 | 
             
            accelerator.wait_for_everyone()
         | 
| 194 | 
             
            unwrapped_model = accelerator.unwrap_model(model)
         | 
| 195 | 
             
            if accelerator.is_main_process:
         | 
| 196 | 
             
                unwrapped_model.save_pretrained("./")
         | 
| 197 | 
            +
                hf_repo.push_to_hub(commit_message=f'final model')
         | 
|  | 
    	
        config.json
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 | 
             
            {
         | 
|  | |
| 2 | 
             
              "activation_function": "gelu_new",
         | 
| 3 | 
             
              "architectures": [
         | 
| 4 | 
             
                "GPT2LMHeadModel"
         | 
| @@ -7,7 +8,7 @@ | |
| 7 | 
             
              "bos_token_id": 50256,
         | 
| 8 | 
             
              "embd_pdrop": 0.1,
         | 
| 9 | 
             
              "eos_token_id": 50256,
         | 
| 10 | 
            -
              "gradient_checkpointing":  | 
| 11 | 
             
              "initializer_range": 0.02,
         | 
| 12 | 
             
              "layer_norm_epsilon": 1e-05,
         | 
| 13 | 
             
              "model_type": "gpt2",
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
              "_name_or_path": "./",
         | 
| 3 | 
             
              "activation_function": "gelu_new",
         | 
| 4 | 
             
              "architectures": [
         | 
| 5 | 
             
                "GPT2LMHeadModel"
         | 
|  | |
| 8 | 
             
              "bos_token_id": 50256,
         | 
| 9 | 
             
              "embd_pdrop": 0.1,
         | 
| 10 | 
             
              "eos_token_id": 50256,
         | 
| 11 | 
            +
              "gradient_checkpointing": true,
         | 
| 12 | 
             
              "initializer_range": 0.02,
         | 
| 13 | 
             
              "layer_norm_epsilon": 1e-05,
         | 
| 14 | 
             
              "model_type": "gpt2",
         | 
    	
        log/debug_0.log
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        log/debug_1.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_10.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_11.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_12.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_13.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_14.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_15.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_2.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_3.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_4.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_5.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_6.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_7.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_8.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        log/debug_9.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            09/20/2021 14:29:09 - INFO - root - Reducer buckets have been rebuilt in this iteration.
         | 
    	
        pytorch_model.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 6169094681
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:00ec35b14b049e5188c1ba8fd432ffa094b481d96393f02052b1c9a9fa4fdc2a
         | 
| 3 | 
             
            size 6169094681
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -3,5 +3,5 @@ wandb | |
| 3 | 
             
            tensorboard
         | 
| 4 | 
             
            git+https://github.com/huggingface/huggingface_hub.git
         | 
| 5 | 
             
            git+https://github.com/huggingface/transformers.git
         | 
| 6 | 
            -
            git+https://github.com/huggingface/datasets.git@ | 
| 7 | 
             
            git+https://github.com/huggingface/accelerate.git
         | 
|  | |
| 3 | 
             
            tensorboard
         | 
| 4 | 
             
            git+https://github.com/huggingface/huggingface_hub.git
         | 
| 5 | 
             
            git+https://github.com/huggingface/transformers.git
         | 
| 6 | 
            +
            git+https://github.com/huggingface/datasets.git@json-dont-raise
         | 
| 7 | 
             
            git+https://github.com/huggingface/accelerate.git
         | 
    	
        runs/Sep20_14-28-12_leandro-16x-v100/1632148092.8874874/events.out.tfevents.1632148092.leandro-16x-v100.8660.1
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:222b05fb22ccb39b7d43f507f7c672d8c741e4281e65c71c12d98b19c1d3ff1f
         | 
| 3 | 
            +
            size 1373
         | 
    	
        runs/Sep20_14-28-12_leandro-16x-v100/events.out.tfevents.1632148092.leandro-16x-v100.8660.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:349e549f0e23501888f84c37ff54aff187c6c97313a732fe502a7cf7c77c3a64
         | 
| 3 | 
            +
            size 9134099
         | 
    	
        wandb/debug-internal.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            run-20210920_142810-36cw69uv/logs/debug-internal.log
         | 
    	
        wandb/debug.log
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            run-20210920_142810-36cw69uv/logs/debug.log
         | 
    	
        wandb/latest-run
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            run-20210920_142810-36cw69uv
         | 
    	
        wandb/run-20210920_142810-36cw69uv/files/conda-environment.yaml
    ADDED
    
    | @@ -0,0 +1,131 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            name: codeparrot
         | 
| 2 | 
            +
            channels:
         | 
| 3 | 
            +
              - pytorch
         | 
| 4 | 
            +
              - nvidia
         | 
| 5 | 
            +
              - defaults
         | 
| 6 | 
            +
            dependencies:
         | 
| 7 | 
            +
              - _libgcc_mutex=0.1=main
         | 
| 8 | 
            +
              - _openmp_mutex=4.5=1_gnu
         | 
| 9 | 
            +
              - blas=1.0=mkl
         | 
| 10 | 
            +
              - bzip2=1.0.8=h7b6447c_0
         | 
| 11 | 
            +
              - ca-certificates=2021.7.5=h06a4308_1
         | 
| 12 | 
            +
              - certifi=2021.5.30=py38h06a4308_0
         | 
| 13 | 
            +
              - cudatoolkit=11.1.74=h6bb024c_0
         | 
| 14 | 
            +
              - ffmpeg=4.3=hf484d3e_0
         | 
| 15 | 
            +
              - freetype=2.10.4=h5ab3b9f_0
         | 
| 16 | 
            +
              - gmp=6.2.1=h2531618_2
         | 
| 17 | 
            +
              - gnutls=3.6.15=he1e5248_0
         | 
| 18 | 
            +
              - intel-openmp=2021.3.0=h06a4308_3350
         | 
| 19 | 
            +
              - jpeg=9b=h024ee3a_2
         | 
| 20 | 
            +
              - lame=3.100=h7b6447c_0
         | 
| 21 | 
            +
              - lcms2=2.12=h3be6417_0
         | 
| 22 | 
            +
              - ld_impl_linux-64=2.35.1=h7274673_9
         | 
| 23 | 
            +
              - libffi=3.3=he6710b0_2
         | 
| 24 | 
            +
              - libgcc-ng=9.3.0=h5101ec6_17
         | 
| 25 | 
            +
              - libgomp=9.3.0=h5101ec6_17
         | 
| 26 | 
            +
              - libiconv=1.15=h63c8f33_5
         | 
| 27 | 
            +
              - libidn2=2.3.2=h7f8727e_0
         | 
| 28 | 
            +
              - libpng=1.6.37=hbc83047_0
         | 
| 29 | 
            +
              - libstdcxx-ng=9.3.0=hd4cf53a_17
         | 
| 30 | 
            +
              - libtasn1=4.16.0=h27cfd23_0
         | 
| 31 | 
            +
              - libtiff=4.2.0=h85742a9_0
         | 
| 32 | 
            +
              - libunistring=0.9.10=h27cfd23_0
         | 
| 33 | 
            +
              - libuv=1.40.0=h7b6447c_0
         | 
| 34 | 
            +
              - libwebp-base=1.2.0=h27cfd23_0
         | 
| 35 | 
            +
              - lz4-c=1.9.3=h295c915_1
         | 
| 36 | 
            +
              - mkl=2021.3.0=h06a4308_520
         | 
| 37 | 
            +
              - mkl-service=2.4.0=py38h7f8727e_0
         | 
| 38 | 
            +
              - mkl_fft=1.3.0=py38h42c9631_2
         | 
| 39 | 
            +
              - mkl_random=1.2.2=py38h51133e4_0
         | 
| 40 | 
            +
              - ncurses=6.2=he6710b0_1
         | 
| 41 | 
            +
              - nettle=3.7.3=hbbd107a_1
         | 
| 42 | 
            +
              - numpy=1.20.3=py38hf144106_0
         | 
| 43 | 
            +
              - numpy-base=1.20.3=py38h74d4b33_0
         | 
| 44 | 
            +
              - olefile=0.46=pyhd3eb1b0_0
         | 
| 45 | 
            +
              - openh264=2.1.0=hd408876_0
         | 
| 46 | 
            +
              - openjpeg=2.4.0=h3ad879b_0
         | 
| 47 | 
            +
              - openssl=1.1.1l=h7f8727e_0
         | 
| 48 | 
            +
              - pillow=8.3.1=py38h2c7a002_0
         | 
| 49 | 
            +
              - pip=21.0.1=py38h06a4308_0
         | 
| 50 | 
            +
              - python=3.8.11=h12debd9_0_cpython
         | 
| 51 | 
            +
              - pytorch=1.9.0=py3.8_cuda11.1_cudnn8.0.5_0
         | 
| 52 | 
            +
              - readline=8.1=h27cfd23_0
         | 
| 53 | 
            +
              - setuptools=52.0.0=py38h06a4308_0
         | 
| 54 | 
            +
              - six=1.16.0=pyhd3eb1b0_0
         | 
| 55 | 
            +
              - sqlite=3.36.0=hc218d9a_0
         | 
| 56 | 
            +
              - tk=8.6.10=hbc83047_0
         | 
| 57 | 
            +
              - torchaudio=0.9.0=py38
         | 
| 58 | 
            +
              - torchvision=0.10.0=py38_cu111
         | 
| 59 | 
            +
              - typing_extensions=3.10.0.0=pyhca03da5_0
         | 
| 60 | 
            +
              - wheel=0.37.0=pyhd3eb1b0_1
         | 
| 61 | 
            +
              - xz=5.2.5=h7b6447c_0
         | 
| 62 | 
            +
              - zlib=1.2.11=h7b6447c_3
         | 
| 63 | 
            +
              - zstd=1.4.9=haebb681_0
         | 
| 64 | 
            +
              - pip:
         | 
| 65 | 
            +
                - absl-py==0.13.0
         | 
| 66 | 
            +
                - accelerate==0.5.0.dev0
         | 
| 67 | 
            +
                - aiohttp==3.7.4.post0
         | 
| 68 | 
            +
                - async-timeout==3.0.1
         | 
| 69 | 
            +
                - attrs==21.2.0
         | 
| 70 | 
            +
                - cachetools==4.2.2
         | 
| 71 | 
            +
                - chardet==4.0.0
         | 
| 72 | 
            +
                - charset-normalizer==2.0.5
         | 
| 73 | 
            +
                - click==8.0.1
         | 
| 74 | 
            +
                - configparser==5.0.2
         | 
| 75 | 
            +
                - datasets==1.10.3.dev0
         | 
| 76 | 
            +
                - deepspeed==0.5.2
         | 
| 77 | 
            +
                - dill==0.3.4
         | 
| 78 | 
            +
                - docker-pycreds==0.4.0
         | 
| 79 | 
            +
                - filelock==3.0.12
         | 
| 80 | 
            +
                - fsspec==2021.8.1
         | 
| 81 | 
            +
                - gitdb==4.0.7
         | 
| 82 | 
            +
                - gitpython==3.1.18
         | 
| 83 | 
            +
                - google-auth==1.35.0
         | 
| 84 | 
            +
                - google-auth-oauthlib==0.4.6
         | 
| 85 | 
            +
                - grpcio==1.40.0
         | 
| 86 | 
            +
                - huggingface-hub==0.0.17
         | 
| 87 | 
            +
                - idna==3.2
         | 
| 88 | 
            +
                - joblib==1.0.1
         | 
| 89 | 
            +
                - markdown==3.3.4
         | 
| 90 | 
            +
                - multidict==5.1.0
         | 
| 91 | 
            +
                - multiprocess==0.70.12.2
         | 
| 92 | 
            +
                - ninja==1.10.2
         | 
| 93 | 
            +
                - oauthlib==3.1.1
         | 
| 94 | 
            +
                - packaging==21.0
         | 
| 95 | 
            +
                - pandas==1.3.3
         | 
| 96 | 
            +
                - pathtools==0.1.2
         | 
| 97 | 
            +
                - promise==2.3
         | 
| 98 | 
            +
                - protobuf==3.18.0
         | 
| 99 | 
            +
                - psutil==5.8.0
         | 
| 100 | 
            +
                - pyarrow==5.0.0
         | 
| 101 | 
            +
                - pyasn1==0.4.8
         | 
| 102 | 
            +
                - pyasn1-modules==0.2.8
         | 
| 103 | 
            +
                - pyparsing==2.4.7
         | 
| 104 | 
            +
                - python-dateutil==2.8.2
         | 
| 105 | 
            +
                - pytz==2021.1
         | 
| 106 | 
            +
                - pyyaml==5.4.1
         | 
| 107 | 
            +
                - regex==2021.8.28
         | 
| 108 | 
            +
                - requests==2.26.0
         | 
| 109 | 
            +
                - requests-oauthlib==1.3.0
         | 
| 110 | 
            +
                - rsa==4.7.2
         | 
| 111 | 
            +
                - sacremoses==0.0.45
         | 
| 112 | 
            +
                - sentry-sdk==1.3.1
         | 
| 113 | 
            +
                - shortuuid==1.0.1
         | 
| 114 | 
            +
                - smmap==4.0.0
         | 
| 115 | 
            +
                - subprocess32==3.5.4
         | 
| 116 | 
            +
                - tensorboard==2.6.0
         | 
| 117 | 
            +
                - tensorboard-data-server==0.6.1
         | 
| 118 | 
            +
                - tensorboard-plugin-wit==1.8.0
         | 
| 119 | 
            +
                - tensorboardx==1.8
         | 
| 120 | 
            +
                - termcolor==1.1.0
         | 
| 121 | 
            +
                - tokenizers==0.10.3
         | 
| 122 | 
            +
                - tqdm==4.62.2
         | 
| 123 | 
            +
                - transformers==4.11.0.dev0
         | 
| 124 | 
            +
                - triton==1.0.0
         | 
| 125 | 
            +
                - urllib3==1.26.6
         | 
| 126 | 
            +
                - wandb==0.12.2
         | 
| 127 | 
            +
                - werkzeug==2.0.1
         | 
| 128 | 
            +
                - xxhash==2.0.2
         | 
| 129 | 
            +
                - yarl==1.6.3
         | 
| 130 | 
            +
                - yaspin==2.1.0
         | 
| 131 | 
            +
            prefix: /home/leandro/miniconda3/envs/codeparrot
         | 
    	
        wandb/run-20210920_142810-36cw69uv/files/config.yaml
    ADDED
    
    | @@ -0,0 +1,89 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            wandb_version: 1
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            _wandb:
         | 
| 4 | 
            +
              desc: null
         | 
| 5 | 
            +
              value:
         | 
| 6 | 
            +
                cli_version: 0.12.2
         | 
| 7 | 
            +
                framework: huggingface
         | 
| 8 | 
            +
                huggingface_version: 4.11.0.dev0
         | 
| 9 | 
            +
                is_jupyter_run: false
         | 
| 10 | 
            +
                is_kaggle_kernel: false
         | 
| 11 | 
            +
                python_version: 3.8.11
         | 
| 12 | 
            +
                start_time: 1632148090
         | 
| 13 | 
            +
                t:
         | 
| 14 | 
            +
                  1:
         | 
| 15 | 
            +
                  - 1
         | 
| 16 | 
            +
                  - 11
         | 
| 17 | 
            +
                  3:
         | 
| 18 | 
            +
                  - 16
         | 
| 19 | 
            +
                  4: 3.8.11
         | 
| 20 | 
            +
                  5: 0.12.2
         | 
| 21 | 
            +
                  6: 4.11.0.dev0
         | 
| 22 | 
            +
                  8:
         | 
| 23 | 
            +
                  - 5
         | 
| 24 | 
            +
            backend:
         | 
| 25 | 
            +
              desc: null
         | 
| 26 | 
            +
              value: nccl
         | 
| 27 | 
            +
            deepspeed_plugin:
         | 
| 28 | 
            +
              desc: null
         | 
| 29 | 
            +
              value: None
         | 
| 30 | 
            +
            device:
         | 
| 31 | 
            +
              desc: null
         | 
| 32 | 
            +
              value: cuda:0
         | 
| 33 | 
            +
            distributed_type:
         | 
| 34 | 
            +
              desc: null
         | 
| 35 | 
            +
              value: DistributedType.MULTI_GPU
         | 
| 36 | 
            +
            gradient_accumulation_steps:
         | 
| 37 | 
            +
              desc: null
         | 
| 38 | 
            +
              value: 16
         | 
| 39 | 
            +
            initialized:
         | 
| 40 | 
            +
              desc: null
         | 
| 41 | 
            +
              value: 'True'
         | 
| 42 | 
            +
            learning_rate:
         | 
| 43 | 
            +
              desc: null
         | 
| 44 | 
            +
              value: 0.0002
         | 
| 45 | 
            +
            local_process_index:
         | 
| 46 | 
            +
              desc: null
         | 
| 47 | 
            +
              value: '0'
         | 
| 48 | 
            +
            lr_scheduler_type:
         | 
| 49 | 
            +
              desc: null
         | 
| 50 | 
            +
              value: cosine
         | 
| 51 | 
            +
            max_eval_steps:
         | 
| 52 | 
            +
              desc: null
         | 
| 53 | 
            +
              value: -1
         | 
| 54 | 
            +
            max_train_steps:
         | 
| 55 | 
            +
              desc: null
         | 
| 56 | 
            +
              value: 50000
         | 
| 57 | 
            +
            num_processes:
         | 
| 58 | 
            +
              desc: null
         | 
| 59 | 
            +
              value: '16'
         | 
| 60 | 
            +
            num_warmup_steps:
         | 
| 61 | 
            +
              desc: null
         | 
| 62 | 
            +
              value: 750
         | 
| 63 | 
            +
            process_index:
         | 
| 64 | 
            +
              desc: null
         | 
| 65 | 
            +
              value: '0'
         | 
| 66 | 
            +
            save_checkpoint_steps:
         | 
| 67 | 
            +
              desc: null
         | 
| 68 | 
            +
              value: 50000
         | 
| 69 | 
            +
            seed:
         | 
| 70 | 
            +
              desc: null
         | 
| 71 | 
            +
              value: 1
         | 
| 72 | 
            +
            seq_length:
         | 
| 73 | 
            +
              desc: null
         | 
| 74 | 
            +
              value: 1024
         | 
| 75 | 
            +
            shuffle_buffer:
         | 
| 76 | 
            +
              desc: null
         | 
| 77 | 
            +
              value: 1000
         | 
| 78 | 
            +
            train_batch_size:
         | 
| 79 | 
            +
              desc: null
         | 
| 80 | 
            +
              value: 2
         | 
| 81 | 
            +
            use_fp16:
         | 
| 82 | 
            +
              desc: null
         | 
| 83 | 
            +
              value: 'True'
         | 
| 84 | 
            +
            valid_batch_size:
         | 
| 85 | 
            +
              desc: null
         | 
| 86 | 
            +
              value: 2
         | 
| 87 | 
            +
            weight_decay:
         | 
| 88 | 
            +
              desc: null
         | 
| 89 | 
            +
              value: 0.1
         | 
    	
        wandb/run-20210920_142810-36cw69uv/files/output.log
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        wandb/run-20210920_142810-36cw69uv/files/requirements.txt
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            absl-py==0.13.0
         | 
| 2 | 
            +
            accelerate==0.5.0.dev0
         | 
| 3 | 
            +
            aiohttp==3.7.4.post0
         | 
| 4 | 
            +
            async-timeout==3.0.1
         | 
| 5 | 
            +
            attrs==21.2.0
         | 
| 6 | 
            +
            cachetools==4.2.2
         | 
| 7 | 
            +
            certifi==2021.5.30
         | 
| 8 | 
            +
            chardet==4.0.0
         | 
| 9 | 
            +
            charset-normalizer==2.0.5
         | 
| 10 | 
            +
            click==8.0.1
         | 
| 11 | 
            +
            configparser==5.0.2
         | 
| 12 | 
            +
            datasets==1.10.3.dev0
         | 
| 13 | 
            +
            deepspeed==0.5.2
         | 
| 14 | 
            +
            dill==0.3.4
         | 
| 15 | 
            +
            docker-pycreds==0.4.0
         | 
| 16 | 
            +
            filelock==3.0.12
         | 
| 17 | 
            +
            fsspec==2021.8.1
         | 
| 18 | 
            +
            gitdb==4.0.7
         | 
| 19 | 
            +
            gitpython==3.1.18
         | 
| 20 | 
            +
            google-auth-oauthlib==0.4.6
         | 
| 21 | 
            +
            google-auth==1.35.0
         | 
| 22 | 
            +
            grpcio==1.40.0
         | 
| 23 | 
            +
            huggingface-hub==0.0.17
         | 
| 24 | 
            +
            idna==3.2
         | 
| 25 | 
            +
            joblib==1.0.1
         | 
| 26 | 
            +
            markdown==3.3.4
         | 
| 27 | 
            +
            mkl-fft==1.3.0
         | 
| 28 | 
            +
            mkl-random==1.2.2
         | 
| 29 | 
            +
            mkl-service==2.4.0
         | 
| 30 | 
            +
            multidict==5.1.0
         | 
| 31 | 
            +
            multiprocess==0.70.12.2
         | 
| 32 | 
            +
            ninja==1.10.2
         | 
| 33 | 
            +
            numpy==1.20.3
         | 
| 34 | 
            +
            oauthlib==3.1.1
         | 
| 35 | 
            +
            olefile==0.46
         | 
| 36 | 
            +
            packaging==21.0
         | 
| 37 | 
            +
            pandas==1.3.3
         | 
| 38 | 
            +
            pathtools==0.1.2
         | 
| 39 | 
            +
            pillow==8.3.1
         | 
| 40 | 
            +
            pip==21.0.1
         | 
| 41 | 
            +
            promise==2.3
         | 
| 42 | 
            +
            protobuf==3.18.0
         | 
| 43 | 
            +
            psutil==5.8.0
         | 
| 44 | 
            +
            pyarrow==5.0.0
         | 
| 45 | 
            +
            pyasn1-modules==0.2.8
         | 
| 46 | 
            +
            pyasn1==0.4.8
         | 
| 47 | 
            +
            pyparsing==2.4.7
         | 
| 48 | 
            +
            python-dateutil==2.8.2
         | 
| 49 | 
            +
            pytz==2021.1
         | 
| 50 | 
            +
            pyyaml==5.4.1
         | 
| 51 | 
            +
            regex==2021.8.28
         | 
| 52 | 
            +
            requests-oauthlib==1.3.0
         | 
| 53 | 
            +
            requests==2.26.0
         | 
| 54 | 
            +
            rsa==4.7.2
         | 
| 55 | 
            +
            sacremoses==0.0.45
         | 
| 56 | 
            +
            sentry-sdk==1.3.1
         | 
| 57 | 
            +
            setuptools==52.0.0.post20210125
         | 
| 58 | 
            +
            shortuuid==1.0.1
         | 
| 59 | 
            +
            six==1.16.0
         | 
| 60 | 
            +
            smmap==4.0.0
         | 
| 61 | 
            +
            subprocess32==3.5.4
         | 
| 62 | 
            +
            tensorboard-data-server==0.6.1
         | 
| 63 | 
            +
            tensorboard-plugin-wit==1.8.0
         | 
| 64 | 
            +
            tensorboard==2.6.0
         | 
| 65 | 
            +
            tensorboardx==1.8
         | 
| 66 | 
            +
            termcolor==1.1.0
         | 
| 67 | 
            +
            tokenizers==0.10.3
         | 
| 68 | 
            +
            torch==1.9.0
         | 
| 69 | 
            +
            torchaudio==0.9.0a0+33b2469
         | 
| 70 | 
            +
            torchvision==0.10.0
         | 
| 71 | 
            +
            tqdm==4.62.2
         | 
| 72 | 
            +
            transformers==4.11.0.dev0
         | 
| 73 | 
            +
            triton==1.0.0
         | 
| 74 | 
            +
            typing-extensions==3.10.0.0
         | 
| 75 | 
            +
            urllib3==1.26.6
         | 
| 76 | 
            +
            wandb==0.12.2
         | 
| 77 | 
            +
            werkzeug==2.0.1
         | 
| 78 | 
            +
            wheel==0.37.0
         | 
| 79 | 
            +
            xxhash==2.0.2
         | 
| 80 | 
            +
            yarl==1.6.3
         | 
| 81 | 
            +
            yaspin==2.1.0
         | 
    	
        wandb/run-20210920_142810-36cw69uv/files/wandb-metadata.json
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "os": "Linux-5.4.0-1052-gcp-x86_64-with-glibc2.17",
         | 
| 3 | 
            +
                "python": "3.8.11",
         | 
| 4 | 
            +
                "heartbeatAt": "2021-09-20T14:28:11.537999",
         | 
| 5 | 
            +
                "startedAt": "2021-09-20T14:28:10.785470",
         | 
| 6 | 
            +
                "docker": null,
         | 
| 7 | 
            +
                "gpu": "NVIDIA A100-SXM4-40GB",
         | 
| 8 | 
            +
                "gpu_count": 16,
         | 
| 9 | 
            +
                "cpu_count": 96,
         | 
| 10 | 
            +
                "cuda": "10.1.243",
         | 
| 11 | 
            +
                "args": [],
         | 
| 12 | 
            +
                "state": "running",
         | 
| 13 | 
            +
                "program": "codeparrot_training.py",
         | 
| 14 | 
            +
                "codePath": "codeparrot_training.py",
         | 
| 15 | 
            +
                "git": {
         | 
| 16 | 
            +
                    "remote": "https://huggingface.co/transformersbook/codeparrot",
         | 
| 17 | 
            +
                    "commit": "ea70f93cfbf64eb723d41b350d14827e68b0a6c3"
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "email": "[email protected]",
         | 
| 20 | 
            +
                "root": "/home/leandro/codeparrot",
         | 
| 21 | 
            +
                "host": "leandro-16x-v100",
         | 
| 22 | 
            +
                "username": "leandro",
         | 
| 23 | 
            +
                "executable": "/home/leandro/miniconda3/envs/codeparrot/bin/python"
         | 
| 24 | 
            +
            }
         | 
    	
        wandb/run-20210920_142810-36cw69uv/files/wandb-summary.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {"lr": 0.00019885557166781018, "samples": 1600000, "steps": 3124, "loss/train": 1.4012274742126465, "_runtime": 40644, "_timestamp": 1632188734, "_step": 50000, "loss/eval": 1.7745720148086548, "perplexity": 5.897756576538086}
         | 
    	
        wandb/run-20210920_142810-36cw69uv/logs/debug-internal.log
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f4b2174d10551473549ba5ce66d8348228b83bacf57bbc30dca02a46c5e0319c
         | 
| 3 | 
            +
            size 26678411
         | 
    	
        wandb/run-20210920_142810-36cw69uv/logs/debug.log
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            2021-09-20 14:28:10,787 INFO    MainThread:8660 [wandb_setup.py:_flush():69] setting env: {}
         | 
| 2 | 
            +
            2021-09-20 14:28:10,787 INFO    MainThread:8660 [wandb_setup.py:_flush():69] setting login settings: {}
         | 
| 3 | 
            +
            2021-09-20 14:28:10,787 INFO    MainThread:8660 [wandb_init.py:_log_setup():348] Logging user logs to /home/leandro/codeparrot/wandb/run-20210920_142810-36cw69uv/logs/debug.log
         | 
| 4 | 
            +
            2021-09-20 14:28:10,787 INFO    MainThread:8660 [wandb_init.py:_log_setup():349] Logging internal logs to /home/leandro/codeparrot/wandb/run-20210920_142810-36cw69uv/logs/debug-internal.log
         | 
| 5 | 
            +
            2021-09-20 14:28:10,788 INFO    MainThread:8660 [wandb_init.py:init():381] calling init triggers
         | 
| 6 | 
            +
            2021-09-20 14:28:10,788 INFO    MainThread:8660 [wandb_init.py:init():386] wandb.init called with sweep_config: {}
         | 
| 7 | 
            +
            config: {'train_batch_size': 2, 'valid_batch_size': 2, 'weight_decay': 0.1, 'shuffle_buffer': 1000, 'learning_rate': 0.0002, 'lr_scheduler_type': 'cosine', 'num_warmup_steps': 750, 'gradient_accumulation_steps': 16, 'max_train_steps': 50000, 'max_eval_steps': -1, 'seq_length': 1024, 'seed': 1, 'save_checkpoint_steps': 50000, 'backend': 'nccl', 'deepspeed_plugin': 'None', 'distributed_type': 'DistributedType.MULTI_GPU', 'num_processes': '16', 'process_index': '0', 'local_process_index': '0', 'device': 'cuda:0', 'use_fp16': 'True', 'initialized': 'True'}
         | 
| 8 | 
            +
            2021-09-20 14:28:10,788 INFO    MainThread:8660 [wandb_init.py:init():430] starting backend
         | 
| 9 | 
            +
            2021-09-20 14:28:10,788 INFO    MainThread:8660 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
         | 
| 10 | 
            +
            2021-09-20 14:28:10,805 INFO    MainThread:8660 [backend.py:ensure_launched():135] starting backend process...
         | 
| 11 | 
            +
            2021-09-20 14:28:10,816 INFO    MainThread:8660 [backend.py:ensure_launched():139] started backend process with pid: 9038
         | 
| 12 | 
            +
            2021-09-20 14:28:10,818 INFO    MainThread:8660 [wandb_init.py:init():435] backend started and connected
         | 
| 13 | 
            +
            2021-09-20 14:28:10,825 INFO    MainThread:8660 [wandb_init.py:init():494] updated telemetry
         | 
| 14 | 
            +
            2021-09-20 14:28:10,826 INFO    MainThread:8660 [wandb_init.py:init():517] communicating current version
         | 
| 15 | 
            +
            2021-09-20 14:28:11,406 INFO    MainThread:8660 [wandb_init.py:init():522] got version response 
         | 
| 16 | 
            +
            2021-09-20 14:28:11,406 INFO    MainThread:8660 [wandb_init.py:init():530] communicating run to backend with 30 second timeout
         | 
| 17 | 
            +
            2021-09-20 14:28:11,486 INFO    MainThread:8660 [wandb_init.py:init():557] starting run threads in backend
         | 
| 18 | 
            +
            2021-09-20 14:28:12,872 INFO    MainThread:8660 [wandb_run.py:_console_start():1605] atexit reg
         | 
| 19 | 
            +
            2021-09-20 14:28:12,873 INFO    MainThread:8660 [wandb_run.py:_redirect():1479] redirect: SettingsConsole.REDIRECT
         | 
| 20 | 
            +
            2021-09-20 14:28:12,873 INFO    MainThread:8660 [wandb_run.py:_redirect():1484] Redirecting console.
         | 
| 21 | 
            +
            2021-09-20 14:28:12,876 INFO    MainThread:8660 [wandb_run.py:_redirect():1540] Redirects installed.
         | 
| 22 | 
            +
            2021-09-20 14:28:12,876 INFO    MainThread:8660 [wandb_init.py:init():582] run started, returning control to user process
         | 
    	
        wandb/run-20210920_142810-36cw69uv/run-36cw69uv.wandb
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d3dea2a070cd3c7d6079d138e9461283968f789819a375be8fd99762250f9064
         | 
| 3 | 
            +
            size 20083529
         | 

