Rachel Bawden commited on
Commit
d1188c3
·
1 Parent(s): 7f2f93b

update creative tokeniser

Browse files
Files changed (1) hide show
  1. tokenizer.json +7 -13
tokenizer.json CHANGED
@@ -8044,18 +8044,12 @@
8044
  "▁précé": 7959,
8045
  "▁satis": 7960,
8046
  "▁unilat": 7961,
8047
- "madeupword0000": 7962,
8048
- "madeupword0001": 7963,
8049
- "madeupword0002": 7964,
8050
- "madeupword0003": 7965,
8051
- "madeupword0004": 7966,
8052
- "madeupword0005": 7967,
8053
- "<": 7968,
8054
- "<t": 7969,
8055
- "▁<t": 7970,
8056
- "▁<t>": 7971,
8057
- "</": 7972,
8058
- "</s": 7973
8059
  },
8060
  "merges": [
8061
  "▁ d",
@@ -21654,4 +21648,4 @@
21654
  "</s >"
21655
  ]
21656
  }
21657
- }
 
8044
  "▁précé": 7959,
8045
  "▁satis": 7960,
8046
  "▁unilat": 7961,
8047
+ "<": 7962,
8048
+ "<t": 7963,
8049
+ "▁<t": 7964,
8050
+ "▁<t>": 7967,
8051
+ "</": 7965,
8052
+ "</s": 7966
 
 
 
 
 
 
8053
  },
8054
  "merges": [
8055
  "▁ d",
 
21648
  "</s >"
21649
  ]
21650
  }
21651
+ }