Geraldine commited on
Commit
d73b15f
·
verified ·
1 Parent(s): 9242b5e

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</abbr>": 30905,
3
+ "</abstract>": 30856,
4
+ "</accessrestrict>": 30857,
5
+ "</accruals>": 30858,
6
+ "</acqinfo>": 30859,
7
+ "</address>": 30906,
8
+ "</addressline>": 30907,
9
+ "</altformavail>": 30860,
10
+ "</appraisal>": 30861,
11
+ "</arc>": 30944,
12
+ "</archdesc>": 30837,
13
+ "</archdescgrp>": 30838,
14
+ "</archref>": 30862,
15
+ "</arrangement>": 30863,
16
+ "</author>": 30814,
17
+ "</bibliography>": 30864,
18
+ "</bibref>": 30865,
19
+ "</bibseries>": 30908,
20
+ "</bioghist>": 30866,
21
+ "</blockquote>": 30909,
22
+ "</c01>": 30840,
23
+ "</c02>": 30841,
24
+ "</c03>": 30842,
25
+ "</c04>": 30843,
26
+ "</c05>": 30844,
27
+ "</c06>": 30845,
28
+ "</c07>": 30846,
29
+ "</c08>": 30847,
30
+ "</c09>": 30848,
31
+ "</c10>": 30849,
32
+ "</c11>": 30850,
33
+ "</c12>": 30851,
34
+ "</c>": 30839,
35
+ "</change>": 30815,
36
+ "</chronitem>": 30910,
37
+ "</chronlist>": 30911,
38
+ "</colspec>": 30912,
39
+ "</container>": 30867,
40
+ "</controlaccess>": 30852,
41
+ "</corpname>": 30894,
42
+ "</creation>": 30816,
43
+ "</custodhist>": 30868,
44
+ "</dao>": 30945,
45
+ "</daodesc>": 30946,
46
+ "</daogrp>": 30947,
47
+ "</daoloc>": 30948,
48
+ "</date>": 30869,
49
+ "</defitem>": 30913,
50
+ "</descgrp>": 30914,
51
+ "</descrules>": 30817,
52
+ "</did>": 30853,
53
+ "</dimensions>": 30870,
54
+ "</div>": 30915,
55
+ "</dsc>": 30854,
56
+ "</dscgrp>": 30855,
57
+ "</ead>": 30818,
58
+ "</eadgrp>": 30819,
59
+ "</eadheader>": 30820,
60
+ "</eadid>": 30821,
61
+ "</edition>": 30822,
62
+ "</editionstmt>": 30823,
63
+ "</emph>": 30916,
64
+ "</entry>": 30917,
65
+ "</event>": 30918,
66
+ "</eventgrp>": 30919,
67
+ "</expan>": 30920,
68
+ "</extent>": 30871,
69
+ "</extptr>": 30949,
70
+ "</extptrloc>": 30950,
71
+ "</extref>": 30951,
72
+ "</extrefloc>": 30952,
73
+ "</famname>": 30895,
74
+ "</filedesc>": 30824,
75
+ "</fileplan>": 30872,
76
+ "</frontmatter>": 30825,
77
+ "</function>": 30896,
78
+ "</genreform>": 30897,
79
+ "</geogname>": 30898,
80
+ "</head01>": 30922,
81
+ "</head02>": 30923,
82
+ "</head>": 30921,
83
+ "</imprint>": 30924,
84
+ "</index>": 30925,
85
+ "</indexentry>": 30926,
86
+ "</item>": 30927,
87
+ "</label>": 30928,
88
+ "</langmaterial>": 30873,
89
+ "</language>": 30899,
90
+ "</langusage>": 30826,
91
+ "</lb>": 30929,
92
+ "</legalstatus>": 30874,
93
+ "</linkgrp>": 30953,
94
+ "</list>": 30930,
95
+ "</listhead>": 30931,
96
+ "</materialspec>": 30875,
97
+ "</name>": 30900,
98
+ "</namegrp>": 30932,
99
+ "</note>": 30933,
100
+ "</notestmt>": 30827,
101
+ "</num>": 30934,
102
+ "</occupation>": 30901,
103
+ "</odd>": 30876,
104
+ "</originalsloc>": 30877,
105
+ "</origination>": 30878,
106
+ "</otherfindaid>": 30879,
107
+ "</p>": 30935,
108
+ "</persname>": 30902,
109
+ "</physdesc>": 30880,
110
+ "</physfacet>": 30881,
111
+ "</physloc>": 30882,
112
+ "</phystech>": 30883,
113
+ "</prefercite>": 30884,
114
+ "</processinfo>": 30885,
115
+ "</profiledesc>": 30828,
116
+ "</ptr>": 30954,
117
+ "</ptrgrp>": 30955,
118
+ "</ptrloc>": 30956,
119
+ "</publicationstmt>": 30829,
120
+ "</publisher>": 30830,
121
+ "</ref>": 30957,
122
+ "</refloc>": 30958,
123
+ "</relatedmaterial>": 30886,
124
+ "</repository>": 30887,
125
+ "</resource>": 30959,
126
+ "</revisiondesc>": 30831,
127
+ "</row>": 30936,
128
+ "</runner>": 30937,
129
+ "</scopecontent>": 30888,
130
+ "</separatedmaterial>": 30889,
131
+ "</seriesstmt>": 30832,
132
+ "</sponsor>": 30833,
133
+ "</subarea>": 30938,
134
+ "</subject>": 30903,
135
+ "</subtitle>": 30834,
136
+ "</table>": 30939,
137
+ "</tbody>": 30940,
138
+ "</tgroup>": 30941,
139
+ "</thead>": 30942,
140
+ "</title>": 30904,
141
+ "</titlepage>": 30943,
142
+ "</titleproper>": 30835,
143
+ "</titlestmt>": 30836,
144
+ "</unitdate>": 30890,
145
+ "</unitid>": 30891,
146
+ "</unittitle>": 30892,
147
+ "</userestrict>": 30893,
148
+ "<abbr": 30759,
149
+ "<abbr>": 30613,
150
+ "<abstract": 30710,
151
+ "<abstract>": 30564,
152
+ "<accessrestrict": 30711,
153
+ "<accessrestrict>": 30565,
154
+ "<accruals": 30712,
155
+ "<accruals>": 30566,
156
+ "<acqinfo": 30713,
157
+ "<acqinfo>": 30567,
158
+ "<address": 30760,
159
+ "<address>": 30614,
160
+ "<addressline": 30761,
161
+ "<addressline>": 30615,
162
+ "<altformavail": 30714,
163
+ "<altformavail>": 30568,
164
+ "<appraisal": 30715,
165
+ "<appraisal>": 30569,
166
+ "<arc": 30798,
167
+ "<arc>": 30652,
168
+ "<archdesc": 30691,
169
+ "<archdesc>": 30545,
170
+ "<archdescgrp": 30692,
171
+ "<archdescgrp>": 30546,
172
+ "<archref": 30716,
173
+ "<archref>": 30570,
174
+ "<arrangement": 30717,
175
+ "<arrangement>": 30571,
176
+ "<author": 30668,
177
+ "<author>": 30522,
178
+ "<bibliography": 30718,
179
+ "<bibliography>": 30572,
180
+ "<bibref": 30719,
181
+ "<bibref>": 30573,
182
+ "<bibseries": 30762,
183
+ "<bibseries>": 30616,
184
+ "<bioghist": 30720,
185
+ "<bioghist>": 30574,
186
+ "<blockquote": 30763,
187
+ "<blockquote>": 30617,
188
+ "<c": 30693,
189
+ "<c01": 30694,
190
+ "<c01>": 30548,
191
+ "<c02": 30695,
192
+ "<c02>": 30549,
193
+ "<c03": 30696,
194
+ "<c03>": 30550,
195
+ "<c04": 30697,
196
+ "<c04>": 30551,
197
+ "<c05": 30698,
198
+ "<c05>": 30552,
199
+ "<c06": 30699,
200
+ "<c06>": 30553,
201
+ "<c07": 30700,
202
+ "<c07>": 30554,
203
+ "<c08": 30701,
204
+ "<c08>": 30555,
205
+ "<c09": 30702,
206
+ "<c09>": 30556,
207
+ "<c10": 30703,
208
+ "<c10>": 30557,
209
+ "<c11": 30704,
210
+ "<c11>": 30558,
211
+ "<c12": 30705,
212
+ "<c12>": 30559,
213
+ "<c>": 30547,
214
+ "<change": 30669,
215
+ "<change>": 30523,
216
+ "<chronitem": 30764,
217
+ "<chronitem>": 30618,
218
+ "<chronlist": 30765,
219
+ "<chronlist>": 30619,
220
+ "<colspec": 30766,
221
+ "<colspec>": 30620,
222
+ "<container": 30721,
223
+ "<container>": 30575,
224
+ "<controlaccess": 30706,
225
+ "<controlaccess>": 30560,
226
+ "<corpname": 30748,
227
+ "<corpname>": 30602,
228
+ "<creation": 30670,
229
+ "<creation>": 30524,
230
+ "<custodhist": 30722,
231
+ "<custodhist>": 30576,
232
+ "<dao": 30799,
233
+ "<dao>": 30653,
234
+ "<daodesc": 30800,
235
+ "<daodesc>": 30654,
236
+ "<daogrp": 30801,
237
+ "<daogrp>": 30655,
238
+ "<daoloc": 30802,
239
+ "<daoloc>": 30656,
240
+ "<date": 30723,
241
+ "<date>": 30577,
242
+ "<defitem": 30767,
243
+ "<defitem>": 30621,
244
+ "<descgrp": 30768,
245
+ "<descgrp>": 30622,
246
+ "<descrules": 30671,
247
+ "<descrules>": 30525,
248
+ "<did": 30707,
249
+ "<did>": 30561,
250
+ "<dimensions": 30724,
251
+ "<dimensions>": 30578,
252
+ "<div": 30769,
253
+ "<div>": 30623,
254
+ "<dsc": 30708,
255
+ "<dsc>": 30562,
256
+ "<dscgrp": 30709,
257
+ "<dscgrp>": 30563,
258
+ "<ead": 30672,
259
+ "<ead>": 30526,
260
+ "<eadgrp": 30673,
261
+ "<eadgrp>": 30527,
262
+ "<eadheader": 30674,
263
+ "<eadheader>": 30528,
264
+ "<eadid": 30675,
265
+ "<eadid>": 30529,
266
+ "<edition": 30676,
267
+ "<edition>": 30530,
268
+ "<editionstmt": 30677,
269
+ "<editionstmt>": 30531,
270
+ "<emph": 30770,
271
+ "<emph>": 30624,
272
+ "<entry": 30771,
273
+ "<entry>": 30625,
274
+ "<event": 30772,
275
+ "<event>": 30626,
276
+ "<eventgrp": 30773,
277
+ "<eventgrp>": 30627,
278
+ "<expan": 30774,
279
+ "<expan>": 30628,
280
+ "<extent": 30725,
281
+ "<extent>": 30579,
282
+ "<extptr": 30803,
283
+ "<extptr>": 30657,
284
+ "<extptrloc": 30804,
285
+ "<extptrloc>": 30658,
286
+ "<extref": 30805,
287
+ "<extref>": 30659,
288
+ "<extrefloc": 30806,
289
+ "<extrefloc>": 30660,
290
+ "<famname": 30749,
291
+ "<famname>": 30603,
292
+ "<filedesc": 30678,
293
+ "<filedesc>": 30532,
294
+ "<fileplan": 30726,
295
+ "<fileplan>": 30580,
296
+ "<frontmatter": 30679,
297
+ "<frontmatter>": 30533,
298
+ "<function": 30750,
299
+ "<function>": 30604,
300
+ "<genreform": 30751,
301
+ "<genreform>": 30605,
302
+ "<geogname": 30752,
303
+ "<geogname>": 30606,
304
+ "<head": 30775,
305
+ "<head01": 30776,
306
+ "<head01>": 30630,
307
+ "<head02": 30777,
308
+ "<head02>": 30631,
309
+ "<head>": 30629,
310
+ "<imprint": 30778,
311
+ "<imprint>": 30632,
312
+ "<index": 30779,
313
+ "<index>": 30633,
314
+ "<indexentry": 30780,
315
+ "<indexentry>": 30634,
316
+ "<item": 30781,
317
+ "<item>": 30635,
318
+ "<label": 30782,
319
+ "<label>": 30636,
320
+ "<langmaterial": 30727,
321
+ "<langmaterial>": 30581,
322
+ "<language": 30753,
323
+ "<language>": 30607,
324
+ "<langusage": 30680,
325
+ "<langusage>": 30534,
326
+ "<lb": 30783,
327
+ "<lb>": 30637,
328
+ "<legalstatus": 30728,
329
+ "<legalstatus>": 30582,
330
+ "<linkgrp": 30807,
331
+ "<linkgrp>": 30661,
332
+ "<list": 30784,
333
+ "<list>": 30638,
334
+ "<listhead": 30785,
335
+ "<listhead>": 30639,
336
+ "<materialspec": 30729,
337
+ "<materialspec>": 30583,
338
+ "<name": 30754,
339
+ "<name>": 30608,
340
+ "<namegrp": 30786,
341
+ "<namegrp>": 30640,
342
+ "<note": 30787,
343
+ "<note>": 30641,
344
+ "<notestmt": 30681,
345
+ "<notestmt>": 30535,
346
+ "<num": 30788,
347
+ "<num>": 30642,
348
+ "<occupation": 30755,
349
+ "<occupation>": 30609,
350
+ "<odd": 30730,
351
+ "<odd>": 30584,
352
+ "<originalsloc": 30731,
353
+ "<originalsloc>": 30585,
354
+ "<origination": 30732,
355
+ "<origination>": 30586,
356
+ "<otherfindaid": 30733,
357
+ "<otherfindaid>": 30587,
358
+ "<p": 30789,
359
+ "<p>": 30643,
360
+ "<persname": 30756,
361
+ "<persname>": 30610,
362
+ "<physdesc": 30734,
363
+ "<physdesc>": 30588,
364
+ "<physfacet": 30735,
365
+ "<physfacet>": 30589,
366
+ "<physloc": 30736,
367
+ "<physloc>": 30590,
368
+ "<phystech": 30737,
369
+ "<phystech>": 30591,
370
+ "<prefercite": 30738,
371
+ "<prefercite>": 30592,
372
+ "<processinfo": 30739,
373
+ "<processinfo>": 30593,
374
+ "<profiledesc": 30682,
375
+ "<profiledesc>": 30536,
376
+ "<ptr": 30808,
377
+ "<ptr>": 30662,
378
+ "<ptrgrp": 30809,
379
+ "<ptrgrp>": 30663,
380
+ "<ptrloc": 30810,
381
+ "<ptrloc>": 30664,
382
+ "<publicationstmt": 30683,
383
+ "<publicationstmt>": 30537,
384
+ "<publisher": 30684,
385
+ "<publisher>": 30538,
386
+ "<ref": 30811,
387
+ "<ref>": 30665,
388
+ "<refloc": 30812,
389
+ "<refloc>": 30666,
390
+ "<relatedmaterial": 30740,
391
+ "<relatedmaterial>": 30594,
392
+ "<repository": 30741,
393
+ "<repository>": 30595,
394
+ "<resource": 30813,
395
+ "<resource>": 30667,
396
+ "<revisiondesc": 30685,
397
+ "<revisiondesc>": 30539,
398
+ "<row": 30790,
399
+ "<row>": 30644,
400
+ "<runner": 30791,
401
+ "<runner>": 30645,
402
+ "<scopecontent": 30742,
403
+ "<scopecontent>": 30596,
404
+ "<separatedmaterial": 30743,
405
+ "<separatedmaterial>": 30597,
406
+ "<seriesstmt": 30686,
407
+ "<seriesstmt>": 30540,
408
+ "<sponsor": 30687,
409
+ "<sponsor>": 30541,
410
+ "<subarea": 30792,
411
+ "<subarea>": 30646,
412
+ "<subject": 30757,
413
+ "<subject>": 30611,
414
+ "<subtitle": 30688,
415
+ "<subtitle>": 30542,
416
+ "<table": 30793,
417
+ "<table>": 30647,
418
+ "<tbody": 30794,
419
+ "<tbody>": 30648,
420
+ "<tgroup": 30795,
421
+ "<tgroup>": 30649,
422
+ "<thead": 30796,
423
+ "<thead>": 30650,
424
+ "<title": 30758,
425
+ "<title>": 30612,
426
+ "<titlepage": 30797,
427
+ "<titlepage>": 30651,
428
+ "<titleproper": 30689,
429
+ "<titleproper>": 30543,
430
+ "<titlestmt": 30690,
431
+ "<titlestmt>": 30544,
432
+ "<unitdate": 30744,
433
+ "<unitdate>": 30598,
434
+ "<unitid": 30745,
435
+ "<unitid>": 30599,
436
+ "<unittitle": 30746,
437
+ "<unittitle>": 30600,
438
+ "<userestrict": 30747,
439
+ "<userestrict>": 30601,
440
+ "ABBR": 30970,
441
+ "ALIGN": 31015,
442
+ "ALTHEAD": 30971,
443
+ "ALTRENDER": 30972,
444
+ "ARCROLE": 31003,
445
+ "AUDIENCE": 30973,
446
+ "AUTHFILENUMBER": 30993,
447
+ "CALENDAR": 30994,
448
+ "CERTAINTY": 30974,
449
+ "CHAR": 31016,
450
+ "CHAROFF": 31017,
451
+ "COLNAME": 31018,
452
+ "COLNUM": 31019,
453
+ "COLS": 31020,
454
+ "COLSEP": 31021,
455
+ "COLWIDTH": 31022,
456
+ "CONTINUATION": 30975,
457
+ "COUNTRYCODE": 30976,
458
+ "COUNTRYENCODING": 30960,
459
+ "DATECHAR": 30977,
460
+ "DATEENCODING": 30961,
461
+ "ENCODINGANALOG": 30978,
462
+ "ENTITYREF": 31004,
463
+ "ERA": 30995,
464
+ "EXPAN": 30979,
465
+ "FINDAIDSTATUS": 30962,
466
+ "FRAME": 31023,
467
+ "FROM": 31005,
468
+ "HREF": 31006,
469
+ "ID": 31007,
470
+ "IDENTIFIER": 30963,
471
+ "LABEL": 30980,
472
+ "LANGCODE": 30996,
473
+ "LANGENCODING": 30964,
474
+ "LEVEL": 30981,
475
+ "LINKTYPE": 31008,
476
+ "MAINAGENCYCODE": 30982,
477
+ "MARK": 30983,
478
+ "MOREROWS": 31024,
479
+ "NAMEEND": 31025,
480
+ "NAMEST": 31026,
481
+ "NORMAL": 30997,
482
+ "NUMERATION": 30984,
483
+ "OTHERLEVEL": 30985,
484
+ "OTHERTYPE": 30986,
485
+ "PARENT": 31009,
486
+ "PGWIDE": 31027,
487
+ "PLACEMENT": 30987,
488
+ "PUBLICID": 30988,
489
+ "RELATEDENCODING": 30965,
490
+ "RENDER": 30989,
491
+ "REPOSITORYCODE": 30990,
492
+ "REPOSITORYENCODING": 30966,
493
+ "ROLE": 30998,
494
+ "ROWSEP": 31028,
495
+ "RULES": 30999,
496
+ "SCRIPTCODE": 31000,
497
+ "SCRIPTENCODING": 30967,
498
+ "SHOW": 31010,
499
+ "SOURCE": 31001,
500
+ "TARGET": 31011,
501
+ "TITLE": 31012,
502
+ "TO": 31013,
503
+ "TPATTERN": 31029,
504
+ "TYPECTUATE": 31002,
505
+ "UNIT": 30991,
506
+ "URL": 30968,
507
+ "URN": 30969,
508
+ "VALIGN": 31030,
509
+ "XMLNS": 30992,
510
+ "XPOINTER": 31014,
511
+ "abbr": 31041,
512
+ "althead": 31042,
513
+ "altrender": 31043,
514
+ "arcrole": 31059,
515
+ "authfilenumber": 31055,
516
+ "charoff": 31064,
517
+ "colname": 31065,
518
+ "colnum": 31066,
519
+ "cols": 31067,
520
+ "colsep": 31068,
521
+ "colwidth": 31069,
522
+ "countrycode": 31044,
523
+ "countryencoding": 31031,
524
+ "datechar": 31045,
525
+ "dateencoding": 31032,
526
+ "encodinganalog": 31046,
527
+ "entityref": 31060,
528
+ "expan": 31047,
529
+ "findaidstatus": 31033,
530
+ "href": 31061,
531
+ "identifier": 31034,
532
+ "langcode": 31056,
533
+ "langencoding": 31035,
534
+ "linktype": 31062,
535
+ "mainagencycode": 31048,
536
+ "morerows": 31070,
537
+ "nameend": 31071,
538
+ "namest": 31072,
539
+ "numeration": 31049,
540
+ "otherlevel": 31050,
541
+ "othertype": 31051,
542
+ "pgwide": 31073,
543
+ "publicid": 31052,
544
+ "relatedencoding": 31036,
545
+ "repositorycode": 31053,
546
+ "repositoryencoding": 31037,
547
+ "rowsep": 31074,
548
+ "scriptcode": 31057,
549
+ "scriptencoding": 31038,
550
+ "tpattern": 31075,
551
+ "typectuate": 31058,
552
+ "url": 31039,
553
+ "urn": 31040,
554
+ "valign": 31076,
555
+ "xmlns": 31054,
556
+ "xpointer": 31063
557
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,619 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<author>",
4
+ "<change>",
5
+ "<creation>",
6
+ "<descrules>",
7
+ "<ead>",
8
+ "<eadgrp>",
9
+ "<eadheader>",
10
+ "<eadid>",
11
+ "<edition>",
12
+ "<editionstmt>",
13
+ "<filedesc>",
14
+ "<frontmatter>",
15
+ "<langusage>",
16
+ "<notestmt>",
17
+ "<profiledesc>",
18
+ "<publicationstmt>",
19
+ "<publisher>",
20
+ "<revisiondesc>",
21
+ "<seriesstmt>",
22
+ "<sponsor>",
23
+ "<subtitle>",
24
+ "<titleproper>",
25
+ "<titlestmt>",
26
+ "<archdesc>",
27
+ "<archdescgrp>",
28
+ "<c>",
29
+ "<c01>",
30
+ "<c02>",
31
+ "<c03>",
32
+ "<c04>",
33
+ "<c05>",
34
+ "<c06>",
35
+ "<c07>",
36
+ "<c08>",
37
+ "<c09>",
38
+ "<c10>",
39
+ "<c11>",
40
+ "<c12>",
41
+ "<controlaccess>",
42
+ "<did>",
43
+ "<dsc>",
44
+ "<dscgrp>",
45
+ "<abstract>",
46
+ "<accessrestrict>",
47
+ "<accruals>",
48
+ "<acqinfo>",
49
+ "<altformavail>",
50
+ "<appraisal>",
51
+ "<archref>",
52
+ "<arrangement>",
53
+ "<bibliography>",
54
+ "<bibref>",
55
+ "<bioghist>",
56
+ "<container>",
57
+ "<custodhist>",
58
+ "<date>",
59
+ "<dimensions>",
60
+ "<extent>",
61
+ "<fileplan>",
62
+ "<langmaterial>",
63
+ "<legalstatus>",
64
+ "<materialspec>",
65
+ "<odd>",
66
+ "<originalsloc>",
67
+ "<origination>",
68
+ "<otherfindaid>",
69
+ "<physdesc>",
70
+ "<physfacet>",
71
+ "<physloc>",
72
+ "<phystech>",
73
+ "<prefercite>",
74
+ "<processinfo>",
75
+ "<relatedmaterial>",
76
+ "<repository>",
77
+ "<scopecontent>",
78
+ "<separatedmaterial>",
79
+ "<unitdate>",
80
+ "<unitid>",
81
+ "<unittitle>",
82
+ "<userestrict>",
83
+ "<corpname>",
84
+ "<famname>",
85
+ "<function>",
86
+ "<genreform>",
87
+ "<geogname>",
88
+ "<language>",
89
+ "<name>",
90
+ "<occupation>",
91
+ "<persname>",
92
+ "<subject>",
93
+ "<title>",
94
+ "<abbr>",
95
+ "<address>",
96
+ "<addressline>",
97
+ "<bibseries>",
98
+ "<blockquote>",
99
+ "<chronitem>",
100
+ "<chronlist>",
101
+ "<colspec>",
102
+ "<defitem>",
103
+ "<descgrp>",
104
+ "<div>",
105
+ "<emph>",
106
+ "<entry>",
107
+ "<event>",
108
+ "<eventgrp>",
109
+ "<expan>",
110
+ "<head>",
111
+ "<head01>",
112
+ "<head02>",
113
+ "<imprint>",
114
+ "<index>",
115
+ "<indexentry>",
116
+ "<item>",
117
+ "<label>",
118
+ "<lb>",
119
+ "<list>",
120
+ "<listhead>",
121
+ "<namegrp>",
122
+ "<note>",
123
+ "<num>",
124
+ "<p>",
125
+ "<row>",
126
+ "<runner>",
127
+ "<subarea>",
128
+ "<table>",
129
+ "<tbody>",
130
+ "<tgroup>",
131
+ "<thead>",
132
+ "<titlepage>",
133
+ "<arc>",
134
+ "<dao>",
135
+ "<daodesc>",
136
+ "<daogrp>",
137
+ "<daoloc>",
138
+ "<extptr>",
139
+ "<extptrloc>",
140
+ "<extref>",
141
+ "<extrefloc>",
142
+ "<linkgrp>",
143
+ "<ptr>",
144
+ "<ptrgrp>",
145
+ "<ptrloc>",
146
+ "<ref>",
147
+ "<refloc>",
148
+ "<resource>",
149
+ "<author",
150
+ "<change",
151
+ "<creation",
152
+ "<descrules",
153
+ "<ead",
154
+ "<eadgrp",
155
+ "<eadheader",
156
+ "<eadid",
157
+ "<edition",
158
+ "<editionstmt",
159
+ "<filedesc",
160
+ "<frontmatter",
161
+ "<langusage",
162
+ "<notestmt",
163
+ "<profiledesc",
164
+ "<publicationstmt",
165
+ "<publisher",
166
+ "<revisiondesc",
167
+ "<seriesstmt",
168
+ "<sponsor",
169
+ "<subtitle",
170
+ "<titleproper",
171
+ "<titlestmt",
172
+ "<archdesc",
173
+ "<archdescgrp",
174
+ "<c",
175
+ "<c01",
176
+ "<c02",
177
+ "<c03",
178
+ "<c04",
179
+ "<c05",
180
+ "<c06",
181
+ "<c07",
182
+ "<c08",
183
+ "<c09",
184
+ "<c10",
185
+ "<c11",
186
+ "<c12",
187
+ "<controlaccess",
188
+ "<did",
189
+ "<dsc",
190
+ "<dscgrp",
191
+ "<abstract",
192
+ "<accessrestrict",
193
+ "<accruals",
194
+ "<acqinfo",
195
+ "<altformavail",
196
+ "<appraisal",
197
+ "<archref",
198
+ "<arrangement",
199
+ "<bibliography",
200
+ "<bibref",
201
+ "<bioghist",
202
+ "<container",
203
+ "<custodhist",
204
+ "<date",
205
+ "<dimensions",
206
+ "<extent",
207
+ "<fileplan",
208
+ "<langmaterial",
209
+ "<legalstatus",
210
+ "<materialspec",
211
+ "<odd",
212
+ "<originalsloc",
213
+ "<origination",
214
+ "<otherfindaid",
215
+ "<physdesc",
216
+ "<physfacet",
217
+ "<physloc",
218
+ "<phystech",
219
+ "<prefercite",
220
+ "<processinfo",
221
+ "<relatedmaterial",
222
+ "<repository",
223
+ "<scopecontent",
224
+ "<separatedmaterial",
225
+ "<unitdate",
226
+ "<unitid",
227
+ "<unittitle",
228
+ "<userestrict",
229
+ "<corpname",
230
+ "<famname",
231
+ "<function",
232
+ "<genreform",
233
+ "<geogname",
234
+ "<language",
235
+ "<name",
236
+ "<occupation",
237
+ "<persname",
238
+ "<subject",
239
+ "<title",
240
+ "<abbr",
241
+ "<address",
242
+ "<addressline",
243
+ "<bibseries",
244
+ "<blockquote",
245
+ "<chronitem",
246
+ "<chronlist",
247
+ "<colspec",
248
+ "<defitem",
249
+ "<descgrp",
250
+ "<div",
251
+ "<emph",
252
+ "<entry",
253
+ "<event",
254
+ "<eventgrp",
255
+ "<expan",
256
+ "<head",
257
+ "<head01",
258
+ "<head02",
259
+ "<imprint",
260
+ "<index",
261
+ "<indexentry",
262
+ "<item",
263
+ "<label",
264
+ "<lb",
265
+ "<list",
266
+ "<listhead",
267
+ "<namegrp",
268
+ "<note",
269
+ "<num",
270
+ "<p",
271
+ "<row",
272
+ "<runner",
273
+ "<subarea",
274
+ "<table",
275
+ "<tbody",
276
+ "<tgroup",
277
+ "<thead",
278
+ "<titlepage",
279
+ "<arc",
280
+ "<dao",
281
+ "<daodesc",
282
+ "<daogrp",
283
+ "<daoloc",
284
+ "<extptr",
285
+ "<extptrloc",
286
+ "<extref",
287
+ "<extrefloc",
288
+ "<linkgrp",
289
+ "<ptr",
290
+ "<ptrgrp",
291
+ "<ptrloc",
292
+ "<ref",
293
+ "<refloc",
294
+ "<resource",
295
+ "</author>",
296
+ "</change>",
297
+ "</creation>",
298
+ "</descrules>",
299
+ "</ead>",
300
+ "</eadgrp>",
301
+ "</eadheader>",
302
+ "</eadid>",
303
+ "</edition>",
304
+ "</editionstmt>",
305
+ "</filedesc>",
306
+ "</frontmatter>",
307
+ "</langusage>",
308
+ "</notestmt>",
309
+ "</profiledesc>",
310
+ "</publicationstmt>",
311
+ "</publisher>",
312
+ "</revisiondesc>",
313
+ "</seriesstmt>",
314
+ "</sponsor>",
315
+ "</subtitle>",
316
+ "</titleproper>",
317
+ "</titlestmt>",
318
+ "</archdesc>",
319
+ "</archdescgrp>",
320
+ "</c>",
321
+ "</c01>",
322
+ "</c02>",
323
+ "</c03>",
324
+ "</c04>",
325
+ "</c05>",
326
+ "</c06>",
327
+ "</c07>",
328
+ "</c08>",
329
+ "</c09>",
330
+ "</c10>",
331
+ "</c11>",
332
+ "</c12>",
333
+ "</controlaccess>",
334
+ "</did>",
335
+ "</dsc>",
336
+ "</dscgrp>",
337
+ "</abstract>",
338
+ "</accessrestrict>",
339
+ "</accruals>",
340
+ "</acqinfo>",
341
+ "</altformavail>",
342
+ "</appraisal>",
343
+ "</archref>",
344
+ "</arrangement>",
345
+ "</bibliography>",
346
+ "</bibref>",
347
+ "</bioghist>",
348
+ "</container>",
349
+ "</custodhist>",
350
+ "</date>",
351
+ "</dimensions>",
352
+ "</extent>",
353
+ "</fileplan>",
354
+ "</langmaterial>",
355
+ "</legalstatus>",
356
+ "</materialspec>",
357
+ "</odd>",
358
+ "</originalsloc>",
359
+ "</origination>",
360
+ "</otherfindaid>",
361
+ "</physdesc>",
362
+ "</physfacet>",
363
+ "</physloc>",
364
+ "</phystech>",
365
+ "</prefercite>",
366
+ "</processinfo>",
367
+ "</relatedmaterial>",
368
+ "</repository>",
369
+ "</scopecontent>",
370
+ "</separatedmaterial>",
371
+ "</unitdate>",
372
+ "</unitid>",
373
+ "</unittitle>",
374
+ "</userestrict>",
375
+ "</corpname>",
376
+ "</famname>",
377
+ "</function>",
378
+ "</genreform>",
379
+ "</geogname>",
380
+ "</language>",
381
+ "</name>",
382
+ "</occupation>",
383
+ "</persname>",
384
+ "</subject>",
385
+ "</title>",
386
+ "</abbr>",
387
+ "</address>",
388
+ "</addressline>",
389
+ "</bibseries>",
390
+ "</blockquote>",
391
+ "</chronitem>",
392
+ "</chronlist>",
393
+ "</colspec>",
394
+ "</defitem>",
395
+ "</descgrp>",
396
+ "</div>",
397
+ "</emph>",
398
+ "</entry>",
399
+ "</event>",
400
+ "</eventgrp>",
401
+ "</expan>",
402
+ "</head>",
403
+ "</head01>",
404
+ "</head02>",
405
+ "</imprint>",
406
+ "</index>",
407
+ "</indexentry>",
408
+ "</item>",
409
+ "</label>",
410
+ "</lb>",
411
+ "</list>",
412
+ "</listhead>",
413
+ "</namegrp>",
414
+ "</note>",
415
+ "</num>",
416
+ "</p>",
417
+ "</row>",
418
+ "</runner>",
419
+ "</subarea>",
420
+ "</table>",
421
+ "</tbody>",
422
+ "</tgroup>",
423
+ "</thead>",
424
+ "</titlepage>",
425
+ "</arc>",
426
+ "</dao>",
427
+ "</daodesc>",
428
+ "</daogrp>",
429
+ "</daoloc>",
430
+ "</extptr>",
431
+ "</extptrloc>",
432
+ "</extref>",
433
+ "</extrefloc>",
434
+ "</linkgrp>",
435
+ "</ptr>",
436
+ "</ptrgrp>",
437
+ "</ptrloc>",
438
+ "</ref>",
439
+ "</refloc>",
440
+ "</resource>",
441
+ "COUNTRYENCODING",
442
+ "DATEENCODING",
443
+ "FINDAIDSTATUS",
444
+ "IDENTIFIER",
445
+ "LANGENCODING",
446
+ "RELATEDENCODING",
447
+ "REPOSITORYENCODING",
448
+ "SCRIPTENCODING",
449
+ "URL",
450
+ "URN",
451
+ "ABBR",
452
+ "ALTHEAD",
453
+ "ALTRENDER",
454
+ "AUDIENCE",
455
+ "CERTAINTY",
456
+ "CONTINUATION",
457
+ "COUNTRYCODE",
458
+ "DATECHAR",
459
+ "ENCODINGANALOG",
460
+ "EXPAN",
461
+ "LABEL",
462
+ "LEVEL",
463
+ "MAINAGENCYCODE",
464
+ "MARK",
465
+ "NUMERATION",
466
+ "OTHERLEVEL",
467
+ "OTHERTYPE",
468
+ "PLACEMENT",
469
+ "PUBLICID",
470
+ "RENDER",
471
+ "REPOSITORYCODE",
472
+ "UNIT",
473
+ "XMLNS",
474
+ "AUTHFILENUMBER",
475
+ "CALENDAR",
476
+ "ERA",
477
+ "LANGCODE",
478
+ "NORMAL",
479
+ "ROLE",
480
+ "RULES",
481
+ "SCRIPTCODE",
482
+ "SOURCE",
483
+ "TYPECTUATE",
484
+ "ARCROLE",
485
+ "ENTITYREF",
486
+ "FROM",
487
+ "HREF",
488
+ "ID",
489
+ "LINKTYPE",
490
+ "PARENT",
491
+ "SHOW",
492
+ "TARGET",
493
+ "TITLE",
494
+ "TO",
495
+ "XPOINTER",
496
+ "ALIGN",
497
+ "CHAR",
498
+ "CHAROFF",
499
+ "COLNAME",
500
+ "COLNUM",
501
+ "COLS",
502
+ "COLSEP",
503
+ "COLWIDTH",
504
+ "FRAME",
505
+ "MOREROWS",
506
+ "NAMEEND",
507
+ "NAMEST",
508
+ "PGWIDE",
509
+ "ROWSEP",
510
+ "TPATTERN",
511
+ "VALIGN",
512
+ "countryencoding",
513
+ "dateencoding",
514
+ "findaidstatus",
515
+ "identifier",
516
+ "langencoding",
517
+ "relatedencoding",
518
+ "repositoryencoding",
519
+ "scriptencoding",
520
+ "url",
521
+ "urn",
522
+ "abbr",
523
+ "althead",
524
+ "altrender",
525
+ "audience",
526
+ "certainty",
527
+ "continuation",
528
+ "countrycode",
529
+ "datechar",
530
+ "encodinganalog",
531
+ "expan",
532
+ "label",
533
+ "level",
534
+ "mainagencycode",
535
+ "mark",
536
+ "numeration",
537
+ "otherlevel",
538
+ "othertype",
539
+ "placement",
540
+ "publicid",
541
+ "render",
542
+ "repositorycode",
543
+ "unit",
544
+ "xmlns",
545
+ "authfilenumber",
546
+ "calendar",
547
+ "era",
548
+ "langcode",
549
+ "normal",
550
+ "role",
551
+ "rules",
552
+ "scriptcode",
553
+ "source",
554
+ "typectuate",
555
+ "arcrole",
556
+ "entityref",
557
+ "from",
558
+ "href",
559
+ "id",
560
+ "linktype",
561
+ "parent",
562
+ "show",
563
+ "target",
564
+ "title",
565
+ "to",
566
+ "xpointer",
567
+ "align",
568
+ "char",
569
+ "charoff",
570
+ "colname",
571
+ "colnum",
572
+ "cols",
573
+ "colsep",
574
+ "colwidth",
575
+ "frame",
576
+ "morerows",
577
+ "nameend",
578
+ "namest",
579
+ "pgwide",
580
+ "rowsep",
581
+ "tpattern",
582
+ "valign"
583
+ ],
584
+ "cls_token": {
585
+ "content": "[CLS]",
586
+ "lstrip": false,
587
+ "normalized": false,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ "mask_token": {
592
+ "content": "[MASK]",
593
+ "lstrip": false,
594
+ "normalized": false,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ "pad_token": {
599
+ "content": "[PAD]",
600
+ "lstrip": false,
601
+ "normalized": false,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ "sep_token": {
606
+ "content": "[SEP]",
607
+ "lstrip": false,
608
+ "normalized": false,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ "unk_token": {
613
+ "content": "[UNK]",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ }
619
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff