Kesian commited on
Commit
5605150
1 Parent(s): 6704638

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +111 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +6 -0
special_tokens_map.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[sentinel_0]",
4
+ "[sentinel_1]",
5
+ "[sentinel_2]",
6
+ "[sentinel_3]",
7
+ "[sentinel_4]",
8
+ "[sentinel_5]",
9
+ "[sentinel_6]",
10
+ "[sentinel_7]",
11
+ "[sentinel_8]",
12
+ "[sentinel_9]",
13
+ "[sentinel_10]",
14
+ "[sentinel_11]",
15
+ "[sentinel_12]",
16
+ "[sentinel_13]",
17
+ "[sentinel_14]",
18
+ "[sentinel_15]",
19
+ "[sentinel_16]",
20
+ "[sentinel_17]",
21
+ "[sentinel_18]",
22
+ "[sentinel_19]",
23
+ "[sentinel_20]",
24
+ "[sentinel_21]",
25
+ "[sentinel_22]",
26
+ "[sentinel_23]",
27
+ "[sentinel_24]",
28
+ "[sentinel_25]",
29
+ "[sentinel_26]",
30
+ "[sentinel_27]",
31
+ "[sentinel_28]",
32
+ "[sentinel_29]",
33
+ "[sentinel_30]",
34
+ "[sentinel_31]",
35
+ "[sentinel_32]",
36
+ "[sentinel_33]",
37
+ "[sentinel_34]",
38
+ "[sentinel_35]",
39
+ "[sentinel_36]",
40
+ "[sentinel_37]",
41
+ "[sentinel_38]",
42
+ "[sentinel_39]",
43
+ "[sentinel_40]",
44
+ "[sentinel_41]",
45
+ "[sentinel_42]",
46
+ "[sentinel_43]",
47
+ "[sentinel_44]",
48
+ "[sentinel_45]",
49
+ "[sentinel_46]",
50
+ "[sentinel_47]",
51
+ "[sentinel_48]",
52
+ "[sentinel_49]",
53
+ "[sentinel_50]",
54
+ "[sentinel_51]",
55
+ "[sentinel_52]",
56
+ "[sentinel_53]",
57
+ "[sentinel_54]",
58
+ "[sentinel_55]",
59
+ "[sentinel_56]",
60
+ "[sentinel_57]",
61
+ "[sentinel_58]",
62
+ "[sentinel_59]",
63
+ "[sentinel_60]",
64
+ "[sentinel_61]",
65
+ "[sentinel_62]",
66
+ "[sentinel_63]",
67
+ "[sentinel_64]",
68
+ "[sentinel_65]",
69
+ "[sentinel_66]",
70
+ "[sentinel_67]",
71
+ "[sentinel_68]",
72
+ "[sentinel_69]",
73
+ "[sentinel_70]",
74
+ "[sentinel_71]",
75
+ "[sentinel_72]",
76
+ "[sentinel_73]",
77
+ "[sentinel_74]",
78
+ "[sentinel_75]",
79
+ "[sentinel_76]",
80
+ "[sentinel_77]",
81
+ "[sentinel_78]",
82
+ "[sentinel_79]",
83
+ "[sentinel_80]",
84
+ "[sentinel_81]",
85
+ "[sentinel_82]",
86
+ "[sentinel_83]",
87
+ "[sentinel_84]",
88
+ "[sentinel_85]",
89
+ "[sentinel_86]",
90
+ "[sentinel_87]",
91
+ "[sentinel_88]",
92
+ "[sentinel_89]",
93
+ "[sentinel_90]",
94
+ "[sentinel_91]",
95
+ "[sentinel_92]",
96
+ "[sentinel_93]",
97
+ "[sentinel_94]",
98
+ "[sentinel_95]",
99
+ "[sentinel_96]",
100
+ "[sentinel_97]",
101
+ "[sentinel_98]",
102
+ "[sentinel_99]",
103
+ "[R]",
104
+ "[X]",
105
+ "[S]"
106
+ ],
107
+ "eos_token": "[EOS]",
108
+ "pad_token": "[PAD]",
109
+ "sep_token": "[SEP]",
110
+ "unk_token": "[UNK]"
111
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "name_or_path": "law_tokenizer",
4
+ "special_tokens_map_file": "test_tokenizer/special_tokens_map.json",
5
+ "tokenizer_class": "PreTrainedTokenizerFast"
6
+ }