updating tokenizers

Files changed (5) hide show

modeling_gpt2.py CHANGED Viewed

@@ -48,9 +48,6 @@ from transformers.utils import (
 from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
 from .configuration_gpt2 import GPT2Config
-##let's test again
 logger = logging.get_logger(__name__)
 _CHECKPOINT_FOR_DOC = "gpt2"

 from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
 from .configuration_gpt2 import GPT2Config
 logger = logging.get_logger(__name__)
 _CHECKPOINT_FOR_DOC = "gpt2"

special_tokens_map.json DELETED Viewed

@@ -1,5 +0,0 @@
-{
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
-  "unk_token": "<|endoftext|>"
-}

tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": "<|endoftext|>",
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "<|endoftext|>",
-  "model_max_length": 1024,
-  "tokenizer_class": "GPT2Tokenizer",
-  "unk_token": "<|endoftext|>"
-}

vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff