Rachel Bawden
commited on
Commit
·
7f2f93b
1
Parent(s):
c22a4d0
update creative tokeniser
Browse files- tokenizer.json +14 -76
tokenizer.json
CHANGED
@@ -68,75 +68,7 @@
|
|
68 |
"replacement": "▁",
|
69 |
"add_prefix_space": true
|
70 |
},
|
71 |
-
"post_processor":
|
72 |
-
"type": "TemplateProcessing",
|
73 |
-
"single": [
|
74 |
-
{
|
75 |
-
"SpecialToken": {
|
76 |
-
"id": "<s>",
|
77 |
-
"type_id": 0
|
78 |
-
}
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"Sequence": {
|
82 |
-
"id": "A",
|
83 |
-
"type_id": 0
|
84 |
-
}
|
85 |
-
},
|
86 |
-
{
|
87 |
-
"SpecialToken": {
|
88 |
-
"id": "</s>",
|
89 |
-
"type_id": 0
|
90 |
-
}
|
91 |
-
}
|
92 |
-
],
|
93 |
-
"pair": [
|
94 |
-
{
|
95 |
-
"SpecialToken": {
|
96 |
-
"id": "<s>",
|
97 |
-
"type_id": 0
|
98 |
-
}
|
99 |
-
},
|
100 |
-
{
|
101 |
-
"Sequence": {
|
102 |
-
"id": "A",
|
103 |
-
"type_id": 0
|
104 |
-
}
|
105 |
-
},
|
106 |
-
{
|
107 |
-
"Sequence": {
|
108 |
-
"id": "B",
|
109 |
-
"type_id": 1
|
110 |
-
}
|
111 |
-
},
|
112 |
-
{
|
113 |
-
"SpecialToken": {
|
114 |
-
"id": "</s>",
|
115 |
-
"type_id": 1
|
116 |
-
}
|
117 |
-
}
|
118 |
-
],
|
119 |
-
"special_tokens": {
|
120 |
-
"</s>": {
|
121 |
-
"id": "</s>",
|
122 |
-
"ids": [
|
123 |
-
2
|
124 |
-
],
|
125 |
-
"tokens": [
|
126 |
-
"</s>"
|
127 |
-
]
|
128 |
-
},
|
129 |
-
"<s>": {
|
130 |
-
"id": "<s>",
|
131 |
-
"ids": [
|
132 |
-
1
|
133 |
-
],
|
134 |
-
"tokens": [
|
135 |
-
"<s>"
|
136 |
-
]
|
137 |
-
}
|
138 |
-
}
|
139 |
-
},
|
140 |
"decoder": {
|
141 |
"type": "Metaspace",
|
142 |
"replacement": "▁",
|
@@ -8112,12 +8044,18 @@
|
|
8112 |
"▁précé": 7959,
|
8113 |
"▁satis": 7960,
|
8114 |
"▁unilat": 7961,
|
8115 |
-
"
|
8116 |
-
"
|
8117 |
-
"
|
8118 |
-
"
|
8119 |
-
"
|
8120 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
8121 |
},
|
8122 |
"merges": [
|
8123 |
"▁ d",
|
@@ -21716,4 +21654,4 @@
|
|
21716 |
"</s >"
|
21717 |
]
|
21718 |
}
|
21719 |
-
}
|
|
|
68 |
"replacement": "▁",
|
69 |
"add_prefix_space": true
|
70 |
},
|
71 |
+
"post_processor": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
"decoder": {
|
73 |
"type": "Metaspace",
|
74 |
"replacement": "▁",
|
|
|
8044 |
"▁précé": 7959,
|
8045 |
"▁satis": 7960,
|
8046 |
"▁unilat": 7961,
|
8047 |
+
"madeupword0000": 7962,
|
8048 |
+
"madeupword0001": 7963,
|
8049 |
+
"madeupword0002": 7964,
|
8050 |
+
"madeupword0003": 7965,
|
8051 |
+
"madeupword0004": 7966,
|
8052 |
+
"madeupword0005": 7967,
|
8053 |
+
"<": 7968,
|
8054 |
+
"<t": 7969,
|
8055 |
+
"▁<t": 7970,
|
8056 |
+
"▁<t>": 7971,
|
8057 |
+
"</": 7972,
|
8058 |
+
"</s": 7973
|
8059 |
},
|
8060 |
"merges": [
|
8061 |
"▁ d",
|
|
|
21654 |
"</s >"
|
21655 |
]
|
21656 |
}
|
21657 |
+
}
|