goldfish-models commited on
Commit
183ba99
1 Parent(s): b585c23

Upload tlh_latn_full tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX176]": 47000, "[XXXXX131]": 46955, "[XXXXX136]": 46960, "[XXXXX217]": 47041, "[XXXXX117]": 46941, "[XXXXX19]": 46843, "[XXXXX162]": 46986, "[XXXXX20]": 46844, "[XXXXX179]": 47003, "[XXXXX273]": 47097, "[XXXXX21]": 46845, "[XXXXX191]": 47015, "[XXXXX32]": 46856, "[XXXXX1]": 46825, "[XXXXX92]": 46916, "[XXXXX270]": 47094, "[XXXXX182]": 47006, "[XXXXX50]": 46874, "[XXXXX214]": 47038, "[XXXXX265]": 47089, "[XXXXX124]": 46948, "[XXXXX249]": 47073, "[XXXXX104]": 46928, "[XXXXX105]": 46929, "[XXXXX67]": 46891, "[XXXXX193]": 47017, "[XXXXX143]": 46967, "[XXXXX141]": 46965, "[XXXXX79]": 46903, "[XXXXX116]": 46940, "[XXXXX253]": 47077, "[XXXXX34]": 46858, "[XXXXX87]": 46911, "[XXXXX139]": 46963, "[SEP]": 46821, "[XXXXX188]": 47012, "[XXXXX242]": 47066, "[XXXXX47]": 46871, "[XXXXX69]": 46893, "[XXXXX240]": 47064, "[XXXXX175]": 46999, "[XXXXX76]": 46900, "[XXXXX203]": 47027, "[XXXXX6]": 46830, "[XXXXX199]": 47023, "<pad>": 46822, "[XXXXX215]": 47039, "[XXXXX247]": 47071, "[XXXXX212]": 47036, "[XXXXX83]": 46907, "[XXXXX267]": 47091, "[XXXXX145]": 46969, "[XXXXX89]": 46913, "[XXXXX97]": 46921, "[XXXXX266]": 47090, "[XXXXX56]": 46880, "[XXXXX123]": 46947, "[XXXXX26]": 46850, "[XXXXX250]": 47074, "[XXXXX252]": 47076, "[XXXXX22]": 46846, "[XXXXX211]": 47035, "[XXXXX115]": 46939, "[XXXXX192]": 47016, "[XXXXX257]": 47081, "[XXXXX243]": 47067, "[XXXXX263]": 47087, "[XXXXX28]": 46852, "[XXXXX103]": 46927, "[XXXXX178]": 47002, "[XXXXX51]": 46875, "[XXXXX160]": 46984, "[XXXXX7]": 46831, "[XXXXX111]": 46935, "[XXXXX144]": 46968, "[XXXXX85]": 46909, "[XXXXX35]": 46859, "[XXXXX41]": 46865, "[XXXXX70]": 46894, "[XXXXX135]": 46959, "[XXXXX46]": 46870, "[XXXXX100]": 46924, "[XXXXX11]": 46835, "[XXXXX54]": 46878, "[XXXXX216]": 47040, "[XXXXX256]": 47080, "[XXXXX180]": 47004, "[XXXXX63]": 46887, "[XXXXX164]": 46988, "[XXXXX189]": 47013, "[XXXXX110]": 46934, "[XXXXX37]": 46861, "[XXXXX201]": 47025, "[XXXXX165]": 46989, "[XXXXX146]": 46970, "[XXXXX150]": 46974, "[XXXXX262]": 47086, "[XXXXX221]": 47045, "[XXXXX18]": 46842, "[XXXXX52]": 46876, "[XXXXX209]": 47033, "[XXXXX274]": 47098, "[XXXXX233]": 47057, "[XXXXX77]": 46901, "[XXXXX172]": 46996, "[XXXXX174]": 46998, "[XXXXX71]": 46895, "[CLS]": 46820, "[XXXXX205]": 47029, "[XXXXX112]": 46936, "[XXXXX130]": 46954, "[XXXXX239]": 47063, "[XXXXX23]": 46847, "[XXXXX86]": 46910, "[XXXXX94]": 46918, "[XXXXX66]": 46890, "[XXXXX229]": 47053, "[XXXXX155]": 46979, "[XXXXX244]": 47068, "[XXXXX278]": 47102, "[XXXXX246]": 47070, "[XXXXX156]": 46980, "[XXXXX55]": 46879, "[XXXXX9]": 46833, "[XXXXX129]": 46953, "[XXXXX177]": 47001, "[XXXXX38]": 46862, "[XXXXX49]": 46873, "[XXXXX200]": 47024, "[XXXXX161]": 46985, "[XXXXX13]": 46837, "[XXXXX268]": 47092, "[XXXXX207]": 47031, "[XXXXX186]": 47010, "[XXXXX198]": 47022, "[XXXXX163]": 46987, "[XXXXX151]": 46975, "[XXXXX157]": 46981, "[XXXXX62]": 46886, "[XXXXX185]": 47009, "[XXXXX226]": 47050, "[MASK]": 46823, "[XXXXX251]": 47075, "[XXXXX68]": 46892, "[XXXXX44]": 46868, "[XXXXX17]": 46841, "[XXXXX5]": 46829, "[XXXXX88]": 46912, "[XXXXX121]": 46945, "[XXXXX72]": 46896, "[XXXXX196]": 47020, "[XXXXX90]": 46914, "[XXXXX238]": 47062, "[XXXXX264]": 47088, "[XXXXX30]": 46854, "[XXXXX210]": 47034, "[XXXXX39]": 46863, "[XXXXX40]": 46864, "[XXXXX272]": 47096, "[XXXXX12]": 46836, "[XXXXX73]": 46897, "[XXXXX58]": 46882, "[XXXXX101]": 46925, "[XXXXX208]": 47032, "[XXXXX271]": 47095, "[XXXXX122]": 46946, "[XXXXX75]": 46899, "[XXXXX114]": 46938, "[XXXXX133]": 46957, "[XXXXX109]": 46933, "[XXXXX120]": 46944, "[XXXXX108]": 46932, "[XXXXX190]": 47014, "[XXXXX154]": 46978, "[XXXXX241]": 47065, "[XXXXX159]": 46983, "[XXXXX93]": 46917, "[XXXXX102]": 46926, "[XXXXX125]": 46949, "[XXXXX98]": 46922, "[XXXXX228]": 47052, "[XXXXX171]": 46995, "[XXXXX277]": 47101, "[XXXXX254]": 47078, "[XXXXX234]": 47058, "[XXXXX59]": 46883, "[XXXXX84]": 46908, "[XXXXX153]": 46977, "[XXXXX24]": 46848, "[XXXXX134]": 46958, "[XXXXX27]": 46851, "[XXXXX167]": 46991, "[XXXXX64]": 46888, "[XXXXX158]": 46982, "[XXXXX166]": 46990, "[XXXXX43]": 46867, "[XXXXX60]": 46884, "[XXXXX147]": 46971, "[XXXXX230]": 47054, "[XXXXX152]": 46976, "[XXXXX140]": 46964, "[XXXXX132]": 46956, "[XXXXX107]": 46931, "[XXXXX2]": 46826, "[XXXXX187]": 47011, "[XXXXX220]": 47044, "[XXXXX218]": 47042, "[XXXXX0]": 46824, "[XXXXX3]": 46827, "[XXXXX224]": 47048, "[XXXXX237]": 47061, "[XXXXX16]": 46840, "[XXXXX61]": 46885, "[XXXXX204]": 47028, "[XXXXX223]": 47047, "[XXXXX258]": 47082, "[XXXXX127]": 46951, "[XXXXX78]": 46902, "[XXXXX8]": 46832, "[XXXXX219]": 47043, "[XXXXX227]": 47051, "[XXXXX45]": 46869, "[XXXXX248]": 47072, "[XXXXX260]": 47084, "[XXXXX14]": 46838, "[XXXXX113]": 46937, "[XXXXX173]": 46997, "[XXXXX4]": 46828, "[XXXXX48]": 46872, "[XXXXX82]": 46906, "[XXXXX65]": 46889, "[XXXXX225]": 47049, "[XXXXX170]": 46994, "[XXXXX57]": 46881, "[XXXXX126]": 46950, "[XXXXX194]": 47018, "[XXXXX235]": 47059, "[XXXXX74]": 46898, "[XXXXX276]": 47100, "[XXXXX245]": 47069, "[XXXXX168]": 46992, "[XXXXX213]": 47037, "[XXXXX53]": 46877, "[XXXXX206]": 47030, "[XXXXX80]": 46904, "[XXXXX119]": 46943, "[XXXXX118]": 46942, "[XXXXX142]": 46966, "[XXXXX202]": 47026, "[XXXXX269]": 47093, "[XXXXX148]": 46972, "[XXXXX259]": 47083, "[XXXXX222]": 47046, "[XXXXX279]": 47103, "[XXXXX15]": 46839, "[XXXXX95]": 46919, "[XXXXX91]": 46915, "[XXXXX169]": 46993, "[XXXXX10]": 46834, "[XXXXX81]": 46905, "[XXXXX106]": 46930, "[XXXXX99]": 46923, "[XXXXX195]": 47019, "[XXXXX33]": 46857, "[XXXXX25]": 46849, "[XXXXX261]": 47085, "[XXXXX255]": 47079, "[XXXXX231]": 47055, "[XXXXX183]": 47007, "[XXXXX181]": 47005, "[XXXXX137]": 46961, "[XXXXX36]": 46860, "[XXXXX197]": 47021, "[XXXXX149]": 46973, "[XXXXX184]": 47008, "[XXXXX138]": 46962, "[XXXXX96]": 46920, "[XXXXX29]": 46853, "[XXXXX42]": 46866, "[XXXXX128]": 46952, "[XXXXX236]": 47060, "[XXXXX31]": 46855, "[XXXXX232]": 47056, "[XXXXX275]": 47099}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189cc1c3f31d8a5e66233a739802d666886ca52ae2abfa3a1a83dbc70c462889
3
+ size 1009836
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/full/tlh_latn_full", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/full/tlh_latn_full/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}