diff --git "a/tokenizer.json" "b/tokenizer.json"
new file mode 100644--- /dev/null
+++ "b/tokenizer.json"
@@ -0,0 +1,250345 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 3,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 4,
+ "content": "",
+ "single_word": false,
+ "lstrip": true,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": null,
+ "pre_tokenizer": {
+ "type": "ByteLevel",
+ "add_prefix_space": false,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "post_processor": {
+ "type": "RobertaProcessing",
+ "sep": [
+ "",
+ 2
+ ],
+ "cls": [
+ "",
+ 0
+ ],
+ "trim_offsets": true,
+ "add_prefix_space": false
+ },
+ "decoder": {
+ "type": "ByteLevel",
+ "add_prefix_space": true,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": null,
+ "continuing_subword_prefix": "",
+ "end_of_word_suffix": "",
+ "fuse_unk": false,
+ "byte_fallback": false,
+ "ignore_merges": false,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "": 5,
+ "": 6,
+ "": 7,
+ "