{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "^", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "_", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": " ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "§", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "°", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "^", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "_", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "^": { "id": "^", "ids": [ 0 ], "tokens": [ "^" ] }, "_": { "id": "_", "ids": [ 1 ], "tokens": [ "_" ] } } }, "decoder": null, "model": { "type": "Unigram", "unk_id": 3, "vocab": [ [ "^", 0.0 ], [ "_", 0.0 ], [ " ", 0.0 ], [ "§", 0.0 ], [ "°", 0.0 ], [ "C", -2.2339120063759843 ], [ "c1", -3.108178186508052 ], [ "cc", -3.2719370147830347 ], [ "N", -3.2932537094066436 ], [ "O", -3.294834188186176 ], [ "n", -3.3053461817854917 ], [ ")", -3.3475212288619822 ], [ "CC", -3.4312206506012544 ], [ "(", -3.5624560302888746 ], [ "c(", -3.765813889966134 ], [ "c", -3.8218193366886393 ], [ "C(=O)", -3.856958418790814 ], [ "c3", -3.9254062324983607 ], [ "=", -3.975334503903719 ], [ "c2", -4.003423620506245 ], [ "1", -4.112680698145304 ], [ "(C)", -4.121209869936163 ], [ "2", -4.1361761826809555 ], [ "C1", -4.21768981835341 ], [ "c1ccc(", -4.251735247120498 ], [ "-", -4.252411803847931 ], [ "2)", -4.270562896654599 ], [ "cc(", -4.276471688527964 ], [ "C(", -4.3086368671374995 ], [ "CO", -4.337904719405833 ], [ "C(=O)N", -4.428846224698317 ], [ "3)", -4.449247292348714 ], [ "C2", -4.522375726063721 ], [ "O)", -4.5371556203299654 ], [ "CCC", -4.589271525409394 ], [ "3", -4.724794788518322 ], [ "C(C)", -4.750491328820496 ], [ "cc1", -4.774712806549024 ], [ "cn", -4.786244699312933 ], [ "NC(=O)", -4.789019365908235 ], [ "CC1", -4.79366206708262 ], [ "=O)", -4.843480092939821 ], [ "C)", -4.8437048190443175 ], [ "S", -4.850239832224922 ], [ "n1", -4.864510173862389 ], [ "c1ccccc1", -4.92499844194878 ], [ "c2ccc(", -5.024291675089154 ], [ "[nH]", -5.110243493192227 ], [ "c4", -5.110439906920133 ], [ "s", -5.124712107023752 ], [ "N1", -5.13109013422166 ], [ "o", -5.144341645447907 ], [ "F)", -5.173986710079323 ], [ "N(C", -5.185845166641148 ], [ "S(=O)(=O)", -5.212557352968634 ], [ "Cl)", -5.220168182623269 ], [ "c2ccc", -5.2702619038121234 ], [ "C(O)", -5.276647874822254 ], [ "2)cc1", -5.314256517516519 ], [ "O=C(", -5.316263615262265 ], [ "c3ccccc3", -5.35319737432803 ], [ "4", -5.365584577986091 ], [ "c(Cl)c", -5.396247268723645 ], [ "C=C", -5.435516000963592 ], [ "5", -5.521017741806011 ], [ "N2CC", -5.731120719935269 ], [ "c(F)c", -5.761758311176596 ], [ "C(F)(F)F)", -5.816389560959275 ], [ "[", -5.9137028227376565 ], [ "]", -5.9137028227376565 ], [ "c(OC)c", -5.948373907493249 ], [ "c(-c3cc", -6.076591700432848 ], [ "Br)", -6.173257390983636 ], [ "#", -6.406719606169013 ], [ "[N+](=O)[O-])", -6.4134780734073065 ], [ "+", -6.459711891957882 ], [ "F", -6.6661526991253 ], [ "P", -7.117461698495431 ], [ "6", -7.298200155096458 ], [ "B", -8.020755755896921 ], [ "I", -8.076335551264686 ], [ "7", -9.064406674700315 ], [ "H", -9.766986825131063 ], [ "8", -10.67648343715202 ], [ "9", -11.6208264490498 ], [ "%", -13.14094059722758 ], [ "0", -13.512972359677438 ], [ "p", -13.73258273972798 ], [ "l", -17.659453778747075 ], [ "e", -17.659553778747075 ], [ "i", -17.659653778747074 ], [ "r", -17.659753778747074 ], [ "b", -17.659753778747074 ] ] } }