File size: 1,691 Bytes
dc6c577
 
 
 
 
 
af62b73
dc6c577
 
 
 
 
 
 
 
af62b73
be9d154
dc6c577
 
 
 
 
 
be9d154
af62b73
be9d154
dc6c577
 
 
be9d154
 
dc6c577
be9d154
af62b73
be9d154
dc6c577
 
 
be9d154
 
dc6c577
be9d154
dc6c577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e16c918
dc6c577
 
 
 
 
e7e81f8
dc6c577
210ee5b
be9d154
 
 
 
e7e81f8
 
51e2ff0
be9d154
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "version": "1.0",
  "truncation": null,
  "padding": null,
  "added_tokens": [
    {
      "id": 1,
      "content": "<|endoftext|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 0,
      "content": "<|beginoftext|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
        {
      "id": 2,
      "content": "<|unknown|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
        {
      "id": 1,
      "content": "<|padding|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    }
    
  ],
  "normalizer": {
    "type": "NFC"
  },
  "pre_tokenizer": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "post_processor": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "decoder": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "model": {
    "type": "BPE",
    "dropout": null,
    "unk_token": "<|unknown|>",
    "continuing_subword_prefix": null,
    "end_of_word_suffix": null,
    "fuse_unk": false,
    "byte_fallback": false,
    "vocab": {
      "<|endoftext|>": 1,
      "<|padding|>": 1,
      "<|unknown|>" : 2, 
      "a": 28,
      "c": 29,
      "g": 30,
      "t": 31,
      "n": 32, 
      "<|beginoftext|>" : 0
    }, "merges":[] }}