{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard" }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "v8u-nj2QMdOe" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NEhxV9lTIJSC", "outputId": "591217f5-abc4-4813-a8f0-f4c54e6040ac" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: rdkit in /usr/local/lib/python3.10/dist-packages (2023.3.1)\n", "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from rdkit) (8.4.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rdkit) (1.22.4)\n" ] } ], "source": [ "pip install rdkit" ] }, { "cell_type": "code", "source": [ "'''\n", "from google.colab import drive\n", "drive.mount('/content/drive')\n", "'''" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "z296l7b2M6CV", "outputId": "b02b2c1c-4e65-4f2d-c8a3-7fdceb18e79e" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "\"\\nfrom google.colab import drive\\ndrive.mount('/content/drive')\\n\"" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5m_uOzL0IJOQ" }, "outputs": [], "source": [ "import math\n", "import logging\n", "\n", "import torch\n", "import torch.nn as nn\n", "from torch.nn import functional as F\n", "\n", "logger = logging.getLogger(__name__)\n", "\n", "class GPTConfig:\n", " \"\"\" base GPT config, params common to all GPT versions \"\"\"\n", " embd_pdrop = 0.1\n", " resid_pdrop = 0.1\n", " attn_pdrop = 0.1\n", "\n", " def __init__(self, vocab_size, block_size, **kwargs):\n", " self.vocab_size = vocab_size\n", " self.block_size = block_size\n", " for k,v in kwargs.items():\n", " setattr(self, k, v)\n", "\n", "class GPT1Config(GPTConfig):\n", " \"\"\" GPT-1 like network roughly 125M params \"\"\"\n", " n_layer = 12\n", " n_head = 12\n", " n_embd = 768\n", "\n", "class CausalSelfAttention(nn.Module):\n", " \"\"\"\n", " A vanilla multi-head masked self-attention layer with a projection at the end.\n", " I believe I could have just used torch.nn.MultiheadAttention but their documentation\n", " is all but absent and code ugly so I don't trust it, rolling my own here.\n", " \"\"\"\n", "\n", " def __init__(self, config):\n", " super().__init__()\n", " assert config.n_embd % config.n_head == 0\n", " # key, query, value projections for all heads\n", " self.key = nn.Linear(config.n_embd, config.n_embd)\n", " self.query = nn.Linear(config.n_embd, config.n_embd)\n", " self.value = nn.Linear(config.n_embd, config.n_embd)\n", " # regularization\n", " self.attn_drop = nn.Dropout(config.attn_pdrop)\n", " self.resid_drop = nn.Dropout(config.resid_pdrop)\n", " # output projection\n", " self.proj = nn.Linear(config.n_embd, config.n_embd)\n", " # causal mask to ensure that attention is only applied to the left in the input sequence\n", " self.register_buffer(\"mask\", torch.tril(torch.ones(config.block_size, config.block_size))\n", " .view(1, 1, config.block_size, config.block_size))\n", " self.n_head = config.n_head\n", "\n", " def forward(self, x, layer_past=None):\n", " B, T, C = x.size()\n", "\n", " # calculate query, key, values for all heads in batch and move head forward to be the batch dim\n", " k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)\n", " q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)\n", " v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)\n", "\n", " # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)\n", " att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))\n", " att = att.masked_fill(self.mask[:,:,:T,:T] == 0, -1e10) # todo: just use float('-inf') instead?\n", " att = F.softmax(att, dim=-1)\n", " att = self.attn_drop(att)\n", " y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)\n", " y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side\n", "\n", " # output projection\n", " y = self.resid_drop(self.proj(y))\n", " return y\n", "\n", "class Block(nn.Module):\n", " \"\"\" an unassuming Transformer block \"\"\"\n", "\n", " def __init__(self, config):\n", " super().__init__()\n", " self.ln1 = nn.LayerNorm(config.n_embd)\n", " self.ln2 = nn.LayerNorm(config.n_embd)\n", " self.attn = CausalSelfAttention(config)\n", " self.mlp = nn.Sequential(\n", " nn.Linear(config.n_embd, 4 * config.n_embd),\n", " nn.GELU(),\n", " nn.Linear(4 * config.n_embd, config.n_embd),\n", " nn.Dropout(config.resid_pdrop),\n", " )\n", "\n", " def forward(self, x):\n", " x = x + self.attn(self.ln1(x))\n", " x = x + self.mlp(self.ln2(x))\n", " return x\n", "\n", "class GPT(nn.Module):\n", " \"\"\" the full GPT language model, with a context size of block_size \"\"\"\n", "\n", " def __init__(self, config):\n", " super().__init__()\n", "\n", " # input embedding stem\n", " self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)\n", " self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))\n", " self.drop = nn.Dropout(config.embd_pdrop)\n", " # transformer\n", " self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])\n", " # decoder head\n", " self.ln_f = nn.LayerNorm(config.n_embd)\n", " self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)\n", "\n", " self.block_size = config.block_size\n", " self.apply(self._init_weights)\n", "\n", " logger.info(\"number of parameters: %e\", sum(p.numel() for p in self.parameters()))\n", "\n", " def _init_weights(self, module):\n", " if isinstance(module, (nn.Linear, nn.Embedding)):\n", " module.weight.data.normal_(mean=0.0, std=0.02)\n", " if isinstance(module, nn.Linear) and module.bias is not None:\n", " module.bias.data.zero_()\n", " elif isinstance(module, nn.LayerNorm):\n", " module.bias.data.zero_()\n", " module.weight.data.fill_(1.0)\n", "\n", " def get_block_size(self):\n", " return self.block_size\n", "\n", " def forward(self, idx, targets=None):\n", " b, t = idx.size()\n", " assert t <= self.block_size, \"Cannot forward, model block size is exhausted.\"\n", "\n", " # forward the GPT model\n", " token_embeddings = self.tok_emb(idx) # each index maps to a (learnable) vector\n", " position_embeddings = self.pos_emb[:, :t, :] # each position maps to a (learnable) vector\n", " x = self.drop(token_embeddings + position_embeddings)\n", " x = self.blocks(x)\n", " x = self.ln_f(x)\n", " logits = self.head(x)\n", "\n", " # if we are given some desired targets also calculate the loss\n", " loss = None\n", " if targets is not None:\n", " loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))\n", "\n", " return logits, loss" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1cbaiuQ4IJLF" }, "outputs": [], "source": [ "import math\n", "import logging\n", "\n", "from tqdm import tqdm\n", "import numpy as np\n", "\n", "import torch\n", "import torch.optim as optim\n", "from torch.optim.lr_scheduler import LambdaLR\n", "from torch.utils.data.dataloader import DataLoader\n", "\n", "logger = logging.getLogger(__name__)\n", "\n", "class TrainerConfig:\n", " # optimization parameters\n", " max_epochs = 10\n", " batch_size = 64\n", " learning_rate = 3e-4\n", " betas = (0.9, 0.95)\n", " grad_norm_clip = 1.0\n", " weight_decay = 0.1 # only applied on matmul weights\n", " # learning rate decay params: linear warmup followed by cosine decay to 10% of original\n", " lr_decay = False\n", " warmup_tokens = 375e6 # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere\n", " final_tokens = 260e9 # (at what point we reach 10% of original LR)\n", " # checkpoint settings\n", " ckpt_path = None\n", " num_workers = 0 # for DataLoader\n", "\n", " def __init__(self, **kwargs):\n", " for k,v in kwargs.items():\n", " setattr(self, k, v)\n", "\n", "class Trainer:\n", "\n", " def __init__(self, model, train_dataset, test_dataset, config):\n", " self.model = model\n", " self.train_dataset = train_dataset\n", " self.test_dataset = test_dataset\n", " self.config = config\n", "\n", " # take over whatever gpus are on the system\n", " self.device = 'cpu'\n", " if torch.cuda.is_available():\n", " self.device = torch.cuda.current_device()\n", " self.model = torch.nn.DataParallel(self.model).to(self.device)\n", "\n", " def save_checkpoint(self):\n", " if self.config.ckpt_path is not None:\n", " ckpt_model = self.model.module if hasattr(self.model, \"module\") else self.model\n", " logger.info(\"saving %s\", self.config.ckpt_path)\n", " torch.save(ckpt_model.state_dict(), self.config.ckpt_path)\n", "\n", " def train(self):\n", " model, config = self.model, self.config\n", "\n", " # create the optimizer\n", " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", " params_decay = [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)]\n", " params_nodecay = [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)]\n", " optim_groups = [\n", " {\"params\": params_decay, \"weight_decay\": config.weight_decay},\n", " {\"params\": params_nodecay, \"weight_decay\": 0.0},\n", " ]\n", " optimizer = optim.AdamW(optim_groups, lr=config.learning_rate, betas=config.betas)\n", "\n", " def run_epoch(split):\n", " is_train = split == 'train'\n", " model.train(is_train)\n", " data = self.train_dataset if is_train else self.test_dataset\n", " loader = DataLoader(data, batch_size=config.batch_size, num_workers=config.num_workers)\n", "\n", " losses = []\n", " pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)\n", " for it, (x, y) in pbar:\n", "\n", " # place data on the correct device\n", " x = x.to(self.device)\n", " y = y.to(self.device)\n", "\n", " # forward the model\n", " with torch.set_grad_enabled(is_train):\n", " logits, loss = model(x, y)\n", " loss = loss.mean() # collapse all losses if they are scattered on multiple gpus\n", " losses.append(loss.item())\n", "\n", " if is_train:\n", "\n", " # backprop and update the parameters\n", " model.zero_grad()\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)\n", " optimizer.step()\n", "\n", " # decay the learning rate based on our progress\n", " if config.lr_decay:\n", " self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)\n", " if self.tokens < config.warmup_tokens:\n", " # linear warmup\n", " lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))\n", " else:\n", " # cosine learning rate decay\n", " progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))\n", " lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))\n", " lr = config.learning_rate * lr_mult\n", " for param_group in optimizer.param_groups:\n", " param_group['lr'] = lr\n", " else:\n", " lr = config.learning_rate\n", "\n", " # report progress\n", " pbar.set_description(f\"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}\")\n", "\n", " if not is_train:\n", " logger.info(\"test loss: %f\", np.mean(losses))\n", "\n", " self.tokens = 0 # counter used for learning rate decay\n", " for epoch in range(config.max_epochs):\n", "\n", " run_epoch('train')\n", " if self.test_dataset is not None:\n", " run_epoch('test')\n", "\n", " self.save_checkpoint()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "mMNpQFu_IJHv" }, "outputs": [], "source": [ "import random\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from torch.nn import functional as F\n", "\n", "def set_seed(seed):\n", " random.seed(seed)\n", " np.random.seed(seed)\n", " torch.manual_seed(seed)\n", " torch.cuda.manual_seed_all(seed)\n", "\n", "def top_k_logits(logits, k):\n", " v, ix = torch.topk(logits, k)\n", " out = logits.clone()\n", " out[out < v[:, [-1]]] = -float('Inf')\n", " return out\n", "\n", "@torch.no_grad()\n", "def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):\n", " \"\"\"\n", " take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in\n", " the sequence, feeding the predictions back into the model each time. Clearly the sampling\n", " has quadratic complexity unlike an RNN that is only linear, and has a finite context window\n", " of block_size, unlike an RNN that has an infinite context window.\n", " \"\"\"\n", " block_size = model.get_block_size()\n", " model.eval()\n", " for k in range(steps):\n", " x_cond = x if x.size(1) <= block_size else x[:, -block_size:] # crop context if needed\n", " logits, _ = model(x_cond)\n", " # pluck the logits at the final step and scale by temperature\n", " logits = logits[:, -1, :] / temperature\n", " # optionally crop probabilities to only the top k options\n", " if top_k is not None:\n", " logits = top_k_logits(logits, top_k)\n", " # apply softmax to convert to probabilities\n", " probs = F.softmax(logits, dim=-1)\n", " # sample from the distribution or take the most likely\n", " if sample:\n", " ix = torch.multinomial(probs, num_samples=1)\n", " else:\n", " _, ix = torch.topk(probs, k=1, dim=-1)\n", " # append to the sequence and continue\n", " x = torch.cat((x, ix), dim=1)\n", "\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_7F51OUaIJDB" }, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ElaOntuyH37Z" }, "outputs": [], "source": [ "# set up logging\n", "import logging\n", "import pandas as pd\n", "logging.basicConfig(\n", " format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", " datefmt=\"%m/%d/%Y %H:%M:%S\",\n", " level=logging.INFO,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UbFxi0jzH37a" }, "outputs": [], "source": [ "# make deterministic\n", "#from mingpt.utils import set_seed\n", "set_seed(42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1ycyj9FEH37b" }, "outputs": [], "source": [ "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from torch.nn import functional as F" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Q98b17X5H37b" }, "outputs": [], "source": [ "from torch.utils.data import Dataset\n", "\n", "class CharDataset(Dataset):\n", "\n", " def __init__(self, data, content):\n", " chars = sorted(list(set(content)))\n", " data_size, vocab_size = len(data), len(chars)\n", " print('data has %d smiles, %d unique characters.' % (data_size, vocab_size))\n", " \n", " self.stoi = { ch:i for i,ch in enumerate(chars) }\n", " self.itos = { i:ch for i,ch in enumerate(chars) }\n", " self.block_size = block_size\n", " self.vocab_size = vocab_size\n", " self.data = data\n", " \n", " def __len__(self):\n", " return math.ceil(len(self.data) / (self.block_size + 1))\n", "\n", " def __getitem__(self, idx):\n", " smiles = self.data[idx]\n", " len_smiles = len(smiles)\n", " dix = [self.stoi[s] for s in smiles]\n", " x = torch.tensor(dix[:-1], dtype=torch.long)\n", " y = torch.tensor(dix[1:], dtype=torch.long)\n", " \n", " return x, y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6o9wcP7-H37c" }, "outputs": [], "source": [ "# you can download this moses file here https://media.githubusercontent.com/media/molecularsets/moses/master/data/dataset_v1.csv\n", "f = open('/content/moses.txt')\n", "smiles = f.read().splitlines()\n", "#smiles =# pd.read_csv('/content/dataset_v1.csv')['SMILES']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "t-MtD_7CH37c" }, "outputs": [], "source": [ "# some preprocessin, adding \"<\" to make every smile of max length (for us '<' is an end token)\n", "lens = [len(i) for i in smiles]\n", "max_len = max(lens)\n", "smiles = [ i + str('<')*(max_len - len(i)) for i in smiles]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Eib-CkQaH37d" }, "outputs": [], "source": [ "content = ' '.join(smiles)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_sa8P9wiH37d" }, "outputs": [], "source": [ "block_size = max_len" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "juqft0fIH37e", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9ad94fc2-41c7-43b6-c295-7a7adc27a8b4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data has 1584783 smiles, 35 unique characters.\n" ] } ], "source": [ "train_dataset = CharDataset(smiles, content, )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HvmcQyMOH37f" }, "outputs": [], "source": [ "#from mingpt.model import GPT, GPTConfig\n", "mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,\n", " n_layer=8, n_head=8, n_embd=256)\n", "model = GPT(mconf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "qEd0ltlOH37f", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "14d01029-44ee-4722-f31f-956c44c128db" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:561: UserWarning: This DataLoader will create 10 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n", " warnings.warn(_create_warning_msg(\n", "epoch 1 iter 50: train loss 0.28502. lr 5.999633e-04: 100%|██████████| 51/51 [00:35<00:00, 1.42it/s]\n", "epoch 2 iter 50: train loss 0.23235. lr 5.998532e-04: 100%|██████████| 51/51 [00:33<00:00, 1.51it/s]\n", "epoch 3 iter 50: train loss 0.20597. lr 5.996698e-04: 100%|██████████| 51/51 [00:34<00:00, 1.49it/s]\n", "epoch 4 iter 50: train loss 0.18653. lr 5.994130e-04: 100%|██████████| 51/51 [00:34<00:00, 1.48it/s]\n", "epoch 5 iter 50: train loss 0.17477. lr 5.990830e-04: 100%|██████████| 51/51 [00:34<00:00, 1.47it/s]\n", "epoch 6 iter 50: train loss 0.16794. lr 5.986797e-04: 100%|██████████| 51/51 [00:34<00:00, 1.46it/s]\n", "epoch 7 iter 50: train loss 0.15940. lr 5.982034e-04: 100%|██████████| 51/51 [00:35<00:00, 1.45it/s]\n", "epoch 8 iter 50: train loss 0.15124. lr 5.976541e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 9 iter 50: train loss 0.14482. lr 5.970320e-04: 100%|██████████| 51/51 [00:36<00:00, 1.38it/s]\n", "epoch 10 iter 50: train loss 0.13632. lr 5.963372e-04: 100%|██████████| 51/51 [00:35<00:00, 1.43it/s]\n", "epoch 11 iter 50: train loss 0.13094. lr 5.955699e-04: 100%|██████████| 51/51 [00:36<00:00, 1.40it/s]\n", "epoch 12 iter 50: train loss 0.12819. lr 5.947302e-04: 100%|██████████| 51/51 [00:36<00:00, 1.41it/s]\n", "epoch 13 iter 50: train loss 0.12430. lr 5.938184e-04: 100%|██████████| 51/51 [00:36<00:00, 1.42it/s]\n", "epoch 14 iter 50: train loss 0.12056. lr 5.928348e-04: 100%|██████████| 51/51 [00:35<00:00, 1.43it/s]\n", "epoch 15 iter 50: train loss 0.12150. lr 5.917794e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 16 iter 50: train loss 0.11648. lr 5.906527e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 17 iter 50: train loss 0.11410. lr 5.894549e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 18 iter 50: train loss 0.11176. lr 5.881862e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 19 iter 50: train loss 0.10950. lr 5.868470e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 20 iter 50: train loss 0.10975. lr 5.854376e-04: 100%|██████████| 51/51 [00:35<00:00, 1.45it/s]\n", "epoch 21 iter 50: train loss 0.10889. lr 5.839584e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 22 iter 50: train loss 0.10522. lr 5.824096e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 23 iter 50: train loss 0.10198. lr 5.807918e-04: 100%|██████████| 51/51 [00:35<00:00, 1.45it/s]\n", "epoch 24 iter 50: train loss 0.10006. lr 5.791053e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 25 iter 50: train loss 0.10136. lr 5.773504e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 26 iter 50: train loss 0.09903. lr 5.755277e-04: 100%|██████████| 51/51 [00:35<00:00, 1.45it/s]\n", "epoch 27 iter 50: train loss 0.09688. lr 5.736376e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 28 iter 50: train loss 0.09715. lr 5.716805e-04: 100%|██████████| 51/51 [00:35<00:00, 1.45it/s]\n", "epoch 29 iter 50: train loss 0.09342. lr 5.696569e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n", "epoch 30 iter 50: train loss 0.09446. lr 5.675674e-04: 100%|██████████| 51/51 [00:35<00:00, 1.44it/s]\n" ] } ], "source": [ "#from mingpt.trainer import Trainer, TrainerConfig\n", "import math\n", "# initialize a trainer instance and kick off training\n", "tconf = TrainerConfig(max_epochs=30, batch_size=128, learning_rate=6e-4,\n", " lr_decay=True, warmup_tokens=32*20, final_tokens=200*len(train_dataset)*block_size,\n", " num_workers=10)\n", "trainer = Trainer(model, train_dataset, None, tconf)\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zFZyEKG0FeGK" }, "outputs": [], "source": [ "#torch.save(model.state_dict(), '/content/gpt_model_state')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eC3Vw3lHIImL" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "10Vy3IcOM8Jj" }, "outputs": [], "source": [ "# alright, let's sample some molecules and draw them using rdkit\n", "\n", "from rdkit import Chem\n", "from rdkit.Chem.Draw import IPythonConsole\n", "from IPython.core.display import HTML\n", "from rdkit.Chem.QED import qed\n", "from rdkit.Chem import PandasTools\n", "#from mingpt.utils import sample\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5LsEKNc-M8Jj" }, "outputs": [], "source": [ "def show(df):\n", " return HTML(df.to_html(notebook=True))\n", "PandasTools.RenderImagesInAllDataFrames(images=True)" ] }, { "cell_type": "code", "source": [ "import tqdm as tqdm" ], "metadata": { "id": "Xa3IuwDeey5k" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 234 }, "outputId": "828d570d-084a-441f-f133-67535d88179a", "id": "6-hBepJvM8Jk" }, "outputs": [ { "output_type": "error", "ename": "NameError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mgen_smiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mcontext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"C\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m500\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstoi\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlong\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m...\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemperature\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtop_k\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'tqdm' is not defined" ] } ], "source": [ "molecules = []\n", "gen_smiles = []\n", "context = \"C\"\n", "for i in tqdm.tqdm(range(500)):\n", " x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)\n", " y = sample(model, x, block_size, temperature=0.7, sample=True, top_k=5)[0]\n", " completion = ''.join([train_dataset.itos[int(i)] for i in y])\n", " completion = completion.replace('<', '')\n", " mol = Chem.MolFromSmiles(completion)\n", " if mol:\n", " molecules.append(mol)\n", " gen_smiles.append(completion)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a9f8f5b3-d809-4fbf-f203-3e9ff460ac0e", "id": "kowxCMhZM8Jk" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['COc1cc(C(=O)Nc2ccccc2)cc(OC)c1OC',\n", " 'Cc1ccc(C(=O)Nc2ccccc2Cl)cc1',\n", " 'COc1ccc(C(=O)Nc2ccc(OC)c(C)c2)cc1',\n", " 'CCOc1ccc(OC)c(NC(=O)CSc2nnc3ccccc3n2)c1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2ccc(C)o2)cc1',\n", " 'Cc1sc2ncnc(NC(=O)c3ccccc3)c2c1C',\n", " 'COc1ccc(-c2nc(C)c(C)c(C(=O)Nc3ccccc3)c2)cc1',\n", " 'COC(=O)c1cccc(NC(=O)COc2ccc(C)c(C)c2)c1',\n", " 'CC(=O)Nc1ccc(-c2ccc3c(c2)OCO3)cc1',\n", " 'Cc1ccc(-c2nnc(C(=O)Nc3ncncc3N)cc2)cc1',\n", " 'COc1cc2nc(SCC(=O)Nc3ccccc3)c(=O)c2cc1',\n", " 'Cc1ccc(C(=O)N2CCN(c3ccccc3)CC2)cc1',\n", " 'COc1ccc(S(=O)(=O)Nc2ccccc2C)cc1',\n", " 'COc1ccc(NC(=O)c2ccc3c(c2)OCO3)cc1',\n", " 'COc1ccc(-n2nnnc2-c2ccccc2C(C)(C)C)cc1',\n", " 'Cc1ccc(S(=O)(=O)Nc2ccccc2C(F)(F)F)cc1',\n", " 'COc1ccc(NC(=O)c2cccc(Cl)c2)cc1',\n", " 'COc1ccc(C(=O)Nc2ccc(F)cc2)cc1OC',\n", " 'Cc1cccc(C(=O)Nc2ccc(C(F)(F)F)cc2)c1',\n", " 'COc1cccc(OC)c1C(=O)Nc1ccc(C)cc1',\n", " 'CCOC(=O)Nc1cccc(NC(=O)c2cccs2)c1',\n", " 'COc1nc2c(cc1S(=O)(=O)c1ccccc1)CC2',\n", " 'Cc1nnc(NC(=O)CSc2ncnc3ccccc3n2C)s1',\n", " 'COc1cccc(C(=O)Nc2ccc(OC)cn2)c1',\n", " 'COC(=O)c1ccccc1NC(=O)c1ccccc1',\n", " 'COc1ccc(CNS(=O)(=O)c2ccccc2)cc1',\n", " 'Cc1cccc(NC(=O)Nc2ccc(Cl)cc2)c1',\n", " 'Cc1ccc(NS(=O)(=O)c2ccc(Cl)c(Cl)c2)cc1',\n", " 'Cc1ccc(NC(=O)Cn2nnc3ccccc3c2=O)cc1',\n", " 'COc1c(Cl)cc(Cl)cc1OC(=O)NCc1ccccc1',\n", " 'CC(=O)C1CCCN(C(=O)Nc2ccc(F)cc2)c2ccccc21',\n", " 'Cc1nc(-c2ccccc2)c(C#N)c(Cl)c1C#N',\n", " 'Cc1cccc(NC(=O)NCc2cccs2)c1',\n", " 'Cc1cc(C)nc(SCC(=O)NCc2cccs2)n1',\n", " 'Cc1ccc(-c2ccccc2)cc1OCC(=O)Nc1ccccc1',\n", " 'Cn1cnnc1SCC(=O)Nc1ccc(Cl)cc1',\n", " 'Cn1c(=O)c2ccc(Nc3ccccc3)cc2n(C)c1=O',\n", " 'CC(C)C(=O)Nc1ccc(C)cc1C(=O)Nc1cccs1',\n", " 'CCOc1ccc(C(=O)Nc2cccc(C(F)(F)F)c2)cc1',\n", " 'COc1ccc(NC(=O)CSc2nc3ccccc3n2C)cc1',\n", " 'Cc1ccc(NC(=O)Cn2c(-c3ccco3)nc3ccccc23)cc1',\n", " 'COc1ccc(NC(=O)c2ccc(Cl)c(Cl)c2)cc1',\n", " 'CN(C)CC(=O)Nc1ccc(C)cc1Cl',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccc(F)cc2)cc1',\n", " 'CCOc1ccc(C(=O)Nc2ccc(C)cc2)cc1',\n", " 'COC(=O)c1cc(C)nc(SCc2ccc(F)cc2)n1',\n", " 'CC(C)(C)CC(=O)Nc1ccc(Cl)cc1',\n", " 'CCOC(=O)c1c(NC(=O)c2ccccc2)nc2ccccc2c1C',\n", " 'Cn1cnnc1SCC(=O)Nc1ccc(F)cc1',\n", " 'COc1ccc(-c2ccc(NC(=O)c3ccncc3)cn2)cc1',\n", " 'COC(=O)c1sc2nc(SCC(=O)N(C)C)c2c1C',\n", " 'CC(C)S(=O)(=O)Nc1ccc(Cl)cc1',\n", " 'COc1ccc(C(=O)Nc2cccs2)cc1OC',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccccc2)cc1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COC(=O)c1c(N)n(-c2cccs2)c2ccccc2[nH]1',\n", " 'COc1ccc(NC(=O)c2ccc(NC(C)=O)cc2)cc1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2ccccc2)cc1',\n", " 'CCOC(=O)c1ccc(C(=O)NCc2ccco2)cc1',\n", " 'Cc1cc(C)c(C#N)c(SCC(=O)N2CCCOCC2)n1',\n", " 'Cc1ccc(-c2cc(NC(=O)CC(C)C)cc(C)c2)cc1',\n", " 'COC(=O)c1nn(-c2ccc(C(F)(F)F)cc2)cc1C#N',\n", " 'COC(=O)C1=C(C)N(C)C(=O)NC1c1ccc(OC)cc1',\n", " 'COc1ccccc1NC(=O)c1ccc(Cl)cc1',\n", " 'Cc1ccc(-c2nc3ccccc3n2CC(=O)Nc2ccccc2)cc1',\n", " 'Cc1cc(C)n(-c2ccc(Cl)cc2)n1',\n", " 'COc1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COC(=O)c1ccc(NC(=O)c2cccc(Cl)c2)cc1',\n", " 'COc1ccc(OC)c(C(=O)Nc2ccc(C)cc2C#N)c1',\n", " 'Cc1nc2cc(Cl)cc2c(Cl)c(=O)n1Cc1ccccc1',\n", " 'COc1ccc(C(=O)Nc2ccccc2C)cc1',\n", " 'Cc1ccc(NC(=O)CSc2nc(C)cc(C)n2)cc1',\n", " 'COC(=O)c1ccc(S(=O)(=O)NCc2ccccc2)cc1',\n", " 'COc1ccc(C(=O)NCCc2ccccc2C)cc1',\n", " 'COc1ccc(NC(=O)c2ccncc2)cc1',\n", " 'CC1=C(C(=O)Nc2ccc(Cl)cc2)NC(c2ccccc2)NC(=O)N1',\n", " 'CCOC(=O)Cn1nc2cc(Cl)cc2c(=O)n1CCc1ccccc1',\n", " 'COc1ccc2c(c1OC)C(O)(C(=O)c1ccccc1)N2',\n", " 'CCOC(=O)Nc1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COc1ccc(NC(=O)CSc2nc3ccccc3c(=O)s2)cc1',\n", " 'Cc1ccc(OC(=O)Nc2ccccc2)c(C(N)=O)c1',\n", " 'Cc1nc2cccc(-c3ccc(Cl)cc3)cc2n1',\n", " 'CN(C)S(=O)(=O)c1ccc(NC(=O)c2ccccn2)cc1',\n", " 'COc1ccccc1NC(=O)c1cccc(OC)c1',\n", " 'COc1cc(-c2ccc(C)cc2)cc(O)c1OC',\n", " 'CC(=O)Nc1ccc(NC(=O)c2cncc(Cl)c2Cl)cc1',\n", " 'Cc1ccc2c(c1)N(c1ccccc1)S(=O)(=O)N2',\n", " 'CC(=O)c1ccc(NC(=O)c2ccc(NC(C)=O)cc2)cc1',\n", " 'COc1ccccc1NS(=O)(=O)c1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COc1ccc(C(=O)Nc2nc(C)cc(C)c2C#N)cc1',\n", " 'COc1ccc(CC(=O)Nc2cccc(C)c2)cc1',\n", " 'Cc1noc(-c2cccc(Cl)c2)c1C(=O)N1CCOCC1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2cccc(Cl)c2)cc1',\n", " 'CC(=O)OC1CC(C)N(C(=O)c2ccccc2)c2ccccc21',\n", " 'Cc1cccc(NC(=O)Nc2ccccc2)c1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2ccc(Cl)cc2)cc1',\n", " 'Cc1cccc(NC(=O)CSc2nc3ccccc3[nH]2)c1',\n", " 'Cc1cc(C)c(C#N)c(SCC(=O)Nc2ccccc2)n1',\n", " 'Cc1cc(C)c(C#N)c(SCC(=O)N2CCOCC2)n1',\n", " 'CCOC(=O)c1c(NC(=O)c2cccnc2)sc2c1CCCC2',\n", " 'Cc1cc(C)c(NC(=O)c2ccccc2)c(C#N)c1',\n", " 'CC(=O)Nc1ccc(NC(=O)COc2ccccc2)cc1',\n", " 'Cc1nc(NC(=O)CSc2ccc(Cl)cc2)no1',\n", " 'COc1ccc(NC(=O)N2CCN(c3ccc(C)cc3)CC2)cc1',\n", " 'COC(=O)c1scc(C(=O)NCc2ccccc2)c1Cl',\n", " 'Cc1ccc(C(=O)Nc2ccccn2)cc1',\n", " 'COc1ccc(CCNC(=O)CSc2nnc(C)c(C)c2C#N)cc1',\n", " 'CCOC(=O)c1cn(-c2ccccc2)c(C#N)c1C#N',\n", " 'Cc1ccc(S(=O)(=O)Nc2ccc(Cl)cc2)cc1',\n", " 'CCC(C)(C)C(=O)N(c1ccccc1)c1ccc(OC)cc1',\n", " 'CCOC(=O)c1c(NC(=O)c2ccccc2)noc1C',\n", " 'Cc1cccc(NC(=O)CSc2nc3ccccc3n2C)c1',\n", " 'COc1ccc(NC(=O)c2ccc(Cl)cc2OC)cc1',\n", " 'CCN(CC)S(=O)(=O)c1ccc(C)cc1',\n", " 'COc1ccc(OC)c(NC(=O)CSc2nc3ccccc3[nH]2)c1',\n", " 'COc1ccc(C2OC(=O)c3ccccc32)cc1',\n", " 'Cc1ccc(NC(=O)c2ccc(Cl)cc2)cc1',\n", " 'Cc1ccc(NC(=O)c2ccc(NC(C)=O)cc2)cc1',\n", " 'COc1ccc(C2NC(=O)Nc3ccccc3C(=O)OCc3ccccc32)cc1',\n", " 'COc1ccc(C(=O)Oc2ccc(OC)c(OC)c2)cc1',\n", " 'CC(C)(C)NC(=O)N(C)Cc1ccc(OC)cc1',\n", " 'CC(Oc1ccccc1)S(=O)(=O)N1CCN(C)C(C)C1',\n", " 'COc1cccc(C(=O)Nc2cccc(C)c2C(=O)N)c1',\n", " 'COC(=O)c1ccc(C(=O)Nc2cccc(F)c2)cc1',\n", " 'Cc1cccc(NC(=O)c2ccc(Br)cc2)n1',\n", " 'Cc1ccc(S(=O)(=O)NC(C)(C)C)cc1',\n", " 'Cc1cccc(NC(=O)c2ccc(Cl)cc2Cl)c1',\n", " 'Cc1ccc(C(=O)Nc2ccc(S(N)(=O)=O)cc2)cc1',\n", " 'CC1=C(C(=O)Oc2ccccc2Cl)NC(c2ccco2)n2ncnc2N1',\n", " 'COc1ccc(-c2nc(-c3ccccc3)cs2)cc1',\n", " 'COc1ccc(C(=O)Nc2cccc(C)c2)cc1',\n", " 'Cc1ccc(NC(=O)c2ccccc2)c(OC)c1',\n", " 'CC1=C(C(=O)Nc2cccc(F)c2)NC(=O)NC1c1ccccn1',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccc(Cl)cc2)cc1',\n", " 'Cc1cc(C)nc(NC(=O)c2ccc(Cl)cc2)n1',\n", " 'COc1cccc(NC(=O)c2ccccc2Cl)c1',\n", " 'COc1ccc(C(=O)Nc2ccccc2C#N)cc1',\n", " 'Cc1nc2ccccc2c(=O)n1-c1ccc(Cl)cc1',\n", " 'Cc1nc2ccccc2n1Cc1ccccc1Cl',\n", " 'COc1ccc(C(=O)Nc2ccc(Cl)c(SC)c2)cc1',\n", " 'Cc1onc(-c2ccccc2)c1CNc1cccnc1',\n", " 'Cc1ccc(C(=O)Nc2cc(Cl)c(Cl)cc2)cc1',\n", " 'CC(=O)N1CCN(C(=O)c2cccnc2C)c2ccccc21',\n", " 'CC(=O)Nc1ccc(-c2ccc(C(C)=O)cc2)cc1',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COc1ccc(C2CC(=O)Nc3ccccc3C2)cc1',\n", " 'COc1cc(C(=O)N(C)c2ccc(O)cc2)cc(OC)c1OC',\n", " 'CC(=O)c1ccc(NC(=O)c2ccccc2)c(OC)c1',\n", " 'COc1cccc(NC(=O)CSc2nc(C)cn2C)c1',\n", " 'COc1ccccc1CNC(=O)CSc1nc2ccccc2o1',\n", " 'CCOC(=O)c1cccc(NC(=O)c2ccc(OC)cc2)c1',\n", " 'Cc1ccc(NC(=O)C2CC(=O)N(C)c3ccccc32)cc1',\n", " 'COc1ccc(C(=O)Nc2ccc(Cl)cc2)cc1OC',\n", " 'Cc1nc2ccccc2c(=O)n1-c1ccc(Cl)cc1',\n", " 'Cc1cccc(NC(=O)c2cccc(C(=O)Nc3ccccc3)c2)n1',\n", " 'COc1ccc(C2CC(=O)Nc3ccccc3C2)cc1OC',\n", " 'COc1ccc(C(=O)NC(C)c2ccc(OC)cc2)cc1',\n", " 'COc1ccc(CNC(=O)c2ccccc2C)cc1',\n", " 'CCN(CC)C(=O)c1ccc(OC)c(NC(=O)c2ccccc2)c1',\n", " 'CC(=O)Nc1ccc(NC(=O)COc2cccc(F)c2)cc1',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccc3ccccc3n2)cc1',\n", " 'CCOC(=O)c1ccc(O)c(C)c1NC(=O)c1ccccc1',\n", " 'Cc1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1',\n", " 'CC(C)(C)N(CC(=O)c1cccnc1)c1ccccc1',\n", " 'Cc1cc(C)n(-c2ccc(Cl)cc2)n1',\n", " 'Cc1sc(NC(=O)c2ccccc2)c(C#N)c1C',\n", " 'COc1ccc(CC(=O)Nc2ccccc2C(F)(F)F)cc1',\n", " 'Cc1cc(S(=O)(=O)NC2CCCC2)cc(N)c1',\n", " 'CC(=O)Nc1ccc(S(=O)(=O)NCc2ccccc2)cc1',\n", " 'Cc1noc(-c2ccc(Cl)cc2)c1C(=O)NC1CCCCC1',\n", " 'Cc1cccc(-c2cccnc2)c1C(=O)NCc1cccnc1',\n", " 'CCOC(=O)Nc1cccc(NC(=O)c2ccccc2)c1',\n", " 'COc1cc(C(=O)Nc2ccc(C)c(C)c2)cc(OC)c1OC',\n", " 'COc1ccc(C(=O)Nc2ccc(Cl)c(Cl)c2)cc1',\n", " 'Cc1cc(C(=O)Nc2ccccc2)cc(OC)c1',\n", " 'CC(=O)Nc1ccc(C(=O)Nc2ccccc2)cc1',\n", " 'CC(=O)Nc1ccc(NC(=O)COc2ccccc2)cc1',\n", " 'CC(=O)Nc1ccc(C(=O)Nc2ccc(Cl)cc2)cc1',\n", " 'COc1cc(Cl)ccc1S(=O)(=O)Nc1ccc(C(F)(F)F)cc1',\n", " 'COc1ccc(CC(=O)Nc2ccc(CC(F)(F)F)cc2)nc1',\n", " 'Cc1noc(C)c1C(=O)Nc1ccc(Cl)cc1',\n", " 'CC(=O)Nc1ccc(S(=O)(=O)N2CCCCC2)cc1',\n", " 'Cc1ccc(NC(=O)CSc2nnc(N)c(C)n2)cc1',\n", " 'COc1ccc(OC)c(C(=O)Nc2cccnc2)c1',\n", " 'CC(=O)N(C)c1ccc(OC)cc1C(=O)Nc1ccco1',\n", " 'Cc1ccc(S(=O)(=O)NCc2ccc3ccccc3n2)cc1',\n", " 'COc1ccc(C2C(=O)Nc3ccccc3C2)cc1OC',\n", " 'Cc1ccc(C(=O)Nc2cc(Cl)ccc2Cl)cc1',\n", " 'CC(=O)Oc1ccc(-c2ccc(Cl)cc2)cc1OC',\n", " 'COc1ccc(-c2nnc3ccccc3c2N)cc1',\n", " 'COc1ccc(C(=O)Nc2ccc(C#N)c(C)c2)cc1',\n", " 'COc1ccc(C(=O)Nc2ccccc2C(C)(C)C)c(C)c1',\n", " 'COc1ccc(-c2nc(C(=O)NCc3ccccc3)no2)cc1',\n", " 'CCOc1ccc(C(=O)Nc2ccccc2)cc1',\n", " 'CCOC(=O)c1sc2ncn(CC(=O)c3ccccc3)c2c1C',\n", " 'CCOC(=O)c1cc2cc(O)c(OC)c(OC)c2[nH]c1=O',\n", " 'COc1ccc(C(=O)Nc2ccc(F)cc2)cc1',\n", " 'CC(=O)c1c(NC(=O)COc2ccccc2)sc(C)c1C#N',\n", " 'COc1cccc(NC(=O)c2ccc(Cl)cc2Cl)c1',\n", " 'COc1ccc(C(=O)NCc2ccccc2)cc1OC',\n", " 'CCOC(=O)c1ccn(C(=O)Nc2ccccc2)c1C',\n", " 'Cc1ccc(-c2noc(-c3ccccc3)n2)cc1',\n", " 'COc1ccccc1-c1nnc(C2CC(=O)Nc3ccccc32)cc1',\n", " 'CC(=O)Nc1ccc(Nc2nc3ccccc3[nH]2)cc1',\n", " 'CC(=O)Nc1ccc(C(=O)Nc2ccc(C)cc2)cc1',\n", " 'CCOC(=O)N1CCN(C(=O)c2ccc(F)cc2)CC1',\n", " 'COc1ccc(S(=O)(=O)N2CCCC2)cc1',\n", " 'COC(=O)c1ccc(NS(=O)(=O)c2cccs2)cc1',\n", " 'COc1ccc(C(=O)N2CCN(C(=O)c3ccco3)CCC2)cc1',\n", " 'Cc1onc(-c2ccccc2)c1CNC(=O)c1ccccc1',\n", " 'COc1ccc(S(=O)(=O)N2CCCCC2)cc1',\n", " 'COc1ccc(C(=O)Nc2ccccc2)cc1',\n", " 'COc1ccc(S(=O)(=O)N2CCN(c3ccc(N)cc3)CC2)cc1',\n", " 'CCOc1cccc(NC(=O)c2ccc(Cl)cc2C#N)c1',\n", " 'Cc1ccc(-c2csc3ncnc3n2)cc1Cl',\n", " 'COc1ccc(NC(=O)c2ccc(Cl)cc2)cc1',\n", " 'Cc1cccc(NC(=O)c2ccc(Cl)cc2)c1',\n", " 'Cc1cc(C)n(C(=O)Nc2ccc(F)cc2)c(=O)c1',\n", " 'Cc1ccc(NC(=O)C2CCCN(c3ccccc3)NC2)cc1',\n", " 'CC(=O)NC1CCN(C(=O)c2cccs2)CC1',\n", " 'COC(=O)c1ccc(NC(=O)c2ccccc2)cc1',\n", " 'CCOC(=O)c1cnn(-c2ccccc2)c1N',\n", " 'CCC(=O)Nc1ccc(NC(=O)c2ccccc2C(N)=O)cc1',\n", " 'CCOC(=O)c1c(NC(=O)c2ccccc2)sc2c1CCCC2',\n", " 'COc1cc(C(=O)Nc2ccccc2)cc(OC)c1OC',\n", " 'CCOC(=O)C1=C(C)N(C)C(=O)NC1c1cccs1',\n", " 'CCOC(=O)c1cn(C)c(NC(=O)c2ccc(Cl)cc2)c1C',\n", " 'COc1cc(CNC(=O)c2ccc(OC)cc2)ccc1OC',\n", " 'CC(C)NC(=O)CN1CCN(C(=O)c2cccs2)CC1',\n", " 'COc1ccc(Nc2nc(-c3ccccc3)no2)cc1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2ccccc2)cc1',\n", " 'COc1cccc(C(=O)Nc2cccc(C)c2)c1',\n", " 'COc1ccc(C(=O)Nc2cccs2)cc1OC',\n", " 'COc1ccc(C(=O)Nc2ccc(Cl)cc2Cl)cc1',\n", " 'COc1ccc(C(=O)Nc2ccc(Cl)cc2)cc1',\n", " 'COc1ccc(NC(=O)c2ccc(F)cc2)cc1',\n", " 'COc1ccccc1NC(=O)c1cccc(NC(=O)c2ccccn2)c1',\n", " 'CCOc1ccc(NC(=O)c2ccc(Cl)c(Cl)c2)cc1',\n", " 'COc1ccc(-c2nnc(C)c(N)n2)cc1OC',\n", " 'COc1ccccc1CNC(=O)c1ccc(C)cc1',\n", " 'Cc1cccc(NC(=O)c2ccc(Cl)cc2)c1',\n", " 'COC(=O)c1cc(C)n(-c2ccc(Cl)cc2)c1C',\n", " 'Cc1sc2ncnc(NC(=O)c3cccs3)c2c1C',\n", " 'COc1ccc(NC(=O)c2ccc(OC)cc2)cc1',\n", " 'CCOC(=O)c1sc2nc[nH]c(=O)c2c1C#N',\n", " 'CCN(CC)S(=O)(=O)c1ccc(NC(=O)c2ccccn2)cc1',\n", " 'Cc1ccc(S(=O)(=O)Nc2ccccc2)cc1',\n", " 'CCOC(=O)Cn1nc2ccc(Br)cc2n1',\n", " 'Cc1cccc(C(=O)Nc2ccc3ccccc3c2)c1',\n", " 'COC(=O)C1=C(C)NC(=O)NC1c1ccc(F)cc1',\n", " 'COc1cc(C(=O)Nc2ccccn2)cc(OC)c1OC',\n", " 'Cc1ccc(NC(=O)c2ccccc2)c(OC)c1',\n", " 'COc1ccc(OC)c(NC(=O)c2ccc(C)cc2)c1',\n", " 'COc1ccc(C(=O)N2CCN(C(=O)c3ccccc3)CCC2)cc1',\n", " 'COc1ccc(C2OC(=O)Nc3ccccc3O2)cc1',\n", " 'COc1ccc(OC)c(-c2nnc(N)nc(NC(C)=O)s2)c1',\n", " 'COc1ccc(C2C(=O)Nc3ccccc3S2)cc1',\n", " 'COc1ccc(C2CC(=O)Nc3ccccc32)cc1OC',\n", " 'CC(=O)N1CCN(S(=O)(=O)c2ccccc2)CC1',\n", " 'COC(=O)c1sc(NC(=O)c2ccccc2)cc1C#N',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccc(F)cc2)cc1',\n", " 'CCOC(=O)c1ccc(NC(=O)c2cncc(C)c2)cc1',\n", " 'Cc1ccc(NC(=O)CSc2nnc(C)c(-c3ccccc3)n2)cc1',\n", " 'COc1ccc(C(=O)Nc2cccc(Cl)c2)cc1',\n", " 'COc1cccc(C2OC(=O)c3ccccc32)c1',\n", " 'Cn1c(=O)c2c(ncn2Cc2ccccc2)c(=O)n(C)c1=O',\n", " 'CC(=O)Nc1ccc(S(=O)(=O)NCCc2ccccn2)cc1',\n", " 'CCOC(=O)c1cnc2cccc2n1NC(=O)CCc1ccccc1',\n", " 'COc1ccc(NC(=O)c2ccc(Cl)cc2)cc1',\n", " 'COc1cccc(NC(=O)CSc2nc3ccccc3n2C)c1',\n", " 'Cc1cc(C)n(C(=O)Nc2ccccc2)c(=O)c1C#N',\n", " 'CCOC(=O)Nc1ccc(NC(=O)c2cccs2)cc1',\n", " 'COC(=O)c1nc2cc(-c3ccccc3)n2ccc1',\n", " 'CC(=O)Nc1scc(NC(=O)c2ccccc2)c1C',\n", " 'CC(=O)c1ccc(C(=O)NCCc2cccs2)cc1',\n", " 'COc1ccccc1C1CC(=O)Nc2ccc(C(F)(F)F)cc21',\n", " 'COc1ccc(NC(=O)CSc2ncc3cccc23)cc1',\n", " 'COC(=O)c1sc(NC(=O)c2ccc(Cl)cc2)nc1C',\n", " 'CC(=O)Nc1ccc(NC(=O)CSc2nc3ccccc3o2)cc1',\n", " 'Cc1ccc(NC(=O)c2cccc3cc(c2)OCO3)cc1',\n", " 'COc1ccccc1C(=O)Oc1sc(C)c(C(N)=O)c1',\n", " 'CCOC(=O)c1sc2ncn(CC(C)C)c2c1C',\n", " 'COc1ccc(NC(=O)c2ccc(OC)cc2)cc1',\n", " 'COc1ccc(C(=O)Nc2cccc(C)c2)cc1OC',\n", " 'COc1cc(-c2cc(F)c(C)cc2C)cc(OC)c1OC',\n", " 'COc1ccc(NC(=O)c2ccccc2Cl)cc1',\n", " 'COc1ccccc1NC(=O)c1ccccc1C(=O)Nc1cccs1',\n", " 'CC(C)(C)c1ccc(NC(=O)COc2ccccc2)cc1',\n", " 'Cc1nc2ccccc2c(NC(=O)c2ccccc2)c1',\n", " 'COc1cc(C(=O)Nc2ccccc2C(F)(F)F)ccc1OC',\n", " 'CC(=O)Nc1ccc(NC(=O)c2ccc(OC)c(OC)c2)cc1',\n", " 'Cc1onc(-c2ccccc2Cl)c1C(=O)Nc1ccccc1',\n", " 'CC(=O)Nc1ccc(C(=O)Nc2ccccc2CC(C)(C)C)cc1',\n", " 'Cn1c(=O)c2ccccc2cc(SCc2ccccc2)c1=O',\n", " 'COc1ncc2c3c(=O)oc(C(=O)NCC(C)C)c3c(=O)c(C)c2c1',\n", " 'CC(=O)N1CCN(C(=O)c2ccccc2F)c2ccccc21',\n", " 'Cc1cccc(C(=O)Nc2ccc(Cl)cc2)c1',\n", " 'Cc1ccc(NC(=O)CSc2nc(C)cc(C)c2C)cc1',\n", " 'CC(=O)Nc1ccc(NC(=O)c2c(Cl)cccc2C#N)cc1',\n", " 'Cc1c(N)nc(SCC(=O)c2ccccc2)n1CC(=O)Nc1cccnc1',\n", " 'CN(C)S(=O)(=O)c1ccc(NC(=O)c2ccc(F)cc2)cc1',\n", " 'CN(C)C(=O)c1nnc2ccc(Cl)cc2c1OCC',\n", " 'Cc1cc(C)n(Cc2c(Cl)cccc2Cl)c(=O)n1',\n", " 'COc1ccc(-c2nc(-c3cccs3)no2)cc1',\n", " 'COc1ccc(Cl)cc1NC(=O)c1ccc(-c2nonc2N)cc1',\n", " 'CCOC(=O)c1cccc(NC(=O)c2ccc(NC(C)=O)cc2)c1',\n", " 'COc1ccc(C(=O)NCC2CCCCCCC2)cc1',\n", " 'CC(=O)Nc1ccc(NS(=O)(=O)c2ccc(Cl)cc2)cc1',\n", " 'Cc1cc(C)n(C(=O)Nc2ccc(Cl)cc2)n1',\n", " 'COc1cccc(CNC(=O)c2ccc(OC)cc2)c1',\n", " 'COc1cc(C(=O)Nc2nc(C)ccc2C)cc(OC)c1OC',\n", " 'CC(=O)Nc1ccc(C(=O)Nc2ccn(C)c2)cc1',\n", " 'COc1ccc(NC(=O)c2ccccc2Cl)cc1',\n", " 'CC(=O)Nc1ccc(NC(=O)c2cccc(F)c2)cc1',\n", " 'CCN(CC)C(=O)c1ccc(OC(=O)Nc2ccccc2)cc1',\n", " 'CC(=O)Nc1ccc(NC2C(=O)c3ccccc3C2)cc1',\n", " 'COc1cc(OC)cc(C(=O)Nc2cc(Cl)ccc2Cl)c1',\n", " 'COc1cc(C(=O)Nc2ccccc2C#N)cc(OC)c1OC',\n", " 'COc1ccc(C(=O)NC2CCN(c3cccs3)CC2)cc1']" ] }, "metadata": {}, "execution_count": 23 } ], "source": [ "gen_smiles" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "outputId": "3db18e7e-4824-4bf4-a32c-5a5075168929", "id": "Sb4rde_0M8Jl" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Valid molecules % = 319'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 25 } ], "source": [ "\"Valid molecules % = {}\".format(len(molecules))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "kzAjOnY_M8Jl" }, "outputs": [], "source": [ "mol_dict = []\n", "for i in molecules:\n", " mol_dict.append({'molecule' : i, 'qed': qed(i), 'smiles': Chem.MolToSmiles(i)})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "39BNrxcTM8Jl" }, "outputs": [], "source": [ "results = pd.DataFrame(mol_dict)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "f40b484b-fcba-4ced-b653-de378758d2f8", "id": "GytZST97M8Jl" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " molecule qed \\\n", "0 0.878882 \n", "1 0.881082 \n", "2 0.707781 \n", "3 0.678701 \n", "4 0.783312 \n", "5 0.941276 \n", "6 0.807600 \n", "7 0.878873 \n", "8 0.876200 \n", "9 0.864057 \n", "10 0.860424 \n", "11 0.811711 \n", "12 0.837287 \n", "13 0.874613 \n", "14 0.717143 \n", "15 0.855347 \n", "16 0.841536 \n", "17 0.873587 \n", "18 0.663220 \n", "19 0.899845 \n", "20 0.932065 \n", "21 0.794003 \n", "22 0.788492 \n", "23 0.829125 \n", "24 0.895830 \n", "25 0.726468 \n", "26 0.856963 \n", "27 0.501545 \n", "28 0.946285 \n", "29 0.933409 \n", "30 0.712747 \n", "31 0.884463 \n", "32 0.924984 \n", "33 0.877083 \n", "34 0.831132 \n", "35 0.864707 \n", "36 0.868283 \n", "37 0.866578 \n", "38 0.855010 \n", "39 0.767471 \n", "40 0.745913 \n", "41 0.887220 \n", "42 0.888703 \n", "43 0.841396 \n", "44 0.627649 \n", "45 0.792612 \n", "46 0.773102 \n", "47 0.875962 \n", "48 0.833716 \n", "49 0.879208 \n", "50 0.713352 \n", "51 0.918215 \n", "52 0.909016 \n", "53 0.654133 \n", "54 0.780083 \n", "55 0.798360 \n", "56 0.907214 \n", "\n", " smiles \n", "0 COC(=O)c1cccc(NC(=O)c2ccc(C)c(C)c2)c1 \n", "1 COC(=O)c1sc(NS(=O)(=O)c2ccccc2)cc1C \n", "2 Cc1onc(-c2ccccc2)c1OCc1ccccc1 \n", "3 COC(=O)c1nc(N)cc(C(=O)c2cccs2)c1C#N \n", "4 Cc1ccc(OCC(=O)Nc2ccc3ccccc3c2)cc1 \n", "5 CN(C)S(=O)(=O)c1cccc(NC(=O)c2ccccc2)c1 \n", "6 Cc1ccc(-n2nc(C(N)=O)c(-c3csc(C)c3)n2)cc1 \n", "7 COc1ccc(C2Cc3ccccc3NC(=O)N2)cc1 \n", "8 COc1cc(S(=O)(=O)Nc2ccc(C)c(C)c2)cc(OC)c1O \n", "9 CCOC(=O)N1CCN(c2cccc(OC)c2)c2ccccc21 \n", "10 CCOC(=O)N1CCN(C(=O)Cc2ccccc2)CC(C)(C)C1 \n", "11 CCOC(=O)c1cc(C)n(Cc2ccc(F)cc2)c1C#N \n", "12 Cc1sc(NC(=O)c2ccc(Cl)cc2)c(Cl)c1C \n", "13 CCOC(=O)c1cccc(C(=O)Nc2ccc(C)cc2)c1 \n", "14 CC1CCN(C(=O)c2ccccc2Cl)C1 \n", "15 Cc1onc(-c2ccccc2)c1C(=O)N1CCCN(C)C1C \n", "16 COC(=O)Cn1cnc(Nc2ccccc2)c1C#N \n", "17 Cn1cnnc1SCC(=O)Nc1ccc(Cl)cc1 \n", "18 Cc1cc(C)nc(SCC(=O)Nc2ccc(C(C)(F)F)cc2)n1 \n", "19 CCN(CC)C(=O)Nc1ccc2ccccc2n1 \n", "20 COc1ccc(C(=O)Nc2ccc3c(c2)OCO3)cc1 \n", "21 COc1ccc(NC(C)=O)cc1Cl \n", "22 CCOC(=O)c1ccc(-c2ccc(OC)c(OC)c2)cc1 \n", "23 COC(=O)c1ccc(NC(=O)COc2ccccc2)c(OC)c1 \n", "24 COc1ccc(C(=O)Nc2cccc(S(N)(=O)=O)c2)cc1 \n", "25 Cn1cnnc1SCC(=O)NCc1nc2ccccc2s1 \n", "26 COc1cccc(NC(=O)CNc2ccccc2OC)c1 \n", "27 Cc1cc(N)nc(SCC(=O)Nc2nc3ccccc3[nH]2)n1 \n", "28 Cc1cc(C)c(S(=O)(=O)Nc2ccccn2)cc1Cl \n", "29 Cc1ccc(NS(=O)(=O)c2ccccc2C#N)cc1 \n", "30 CCc1c(C)sc2ncnc(Cc3ccccc3)c12 \n", "31 CC(=O)Nc1ccc(NC(=O)c2conc2C)cc1 \n", "32 COc1cccc(CNC(=O)c2ccc(C)cc2C)c1 \n", "33 COC(=O)c1ccc(NS(=O)(=O)c2ccccc2)cc1 \n", "34 CCOC(=O)C1=C(C)NC(=O)NC1c1ccccc1F \n", "35 CCOC(=O)C1=C(C)N(C)C(=O)NC1c1ccc(OCl)cc1 \n", "36 COc1ccc(S(=O)(=O)c2ccc(Cl)cc2)cc1 \n", "37 CC(C)(C)CNS(=O)(=O)c1ccc(NC(=O)c2cccs2)cc1 \n", "38 COc1ccc(CNC(=O)Cn2nc(C)cc2N)cc1 \n", "39 Cn1cncc1SCc1cccc(F)c1F \n", "40 CC(=O)Nc1ccc2oc(-c3ccccc3)cc2c1 \n", "41 COc1cc(OC)cc(OCC(=O)Nc2ccccc2)c1 \n", "42 CC(=O)NC1Nc2ccccc2C1c1ccccc1Cl \n", "43 COc1ccc(OC)c(NC(=O)c2ccc(NC(C)=O)cc2OC)c1 \n", "44 COc1ccc2c(=O)c3cc(Cl)ccc3oc2c1 \n", "45 CCOC(=O)NC1=CC2=C(C)C=C(Cl)C2=C1N \n", "46 CC(=O)Nc1ccc(-c2nc3ccccc3n2C)cc1 \n", "47 CCOC(=O)c1c(NC(=O)c2ccccc2)sc2c1CCC2 \n", "48 Cn1cnnc1SCC(=O)Nc1ccc(N2CCOCC2)cc1 \n", "49 COC(=O)c1sc(NC(=O)c2cccs2)cc1C \n", "50 Cn1c(=O)c2c(ncn2C(=O)Nc2ccc(F)cc2)n(C)c1=O \n", "51 COc1cccc(NC(=O)c2ccc(OC)c(OC)c2)c1 \n", "52 COc1cc(O)c(OC)c(C(=O)Nc2cccc(Cl)c2)c1 \n", "53 Cc1cccc(C)c1NC(=O)CSc1nc(-c2ccc(Cl)cc2)no1 \n", "54 Cc1onc(-c2ccccc2)c1CNC(=O)c1ccccc1Cl \n", "55 Cc1onc(-c2ccccc2)c1C(=O)Nc1ccccn1 \n", "56 CC(=O)Nc1ccc(C(=O)Nc2ccc(Cl)cc2)cc1 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
moleculeqedsmiles
0
\"Mol\"/
0.878882COC(=O)c1cccc(NC(=O)c2ccc(C)c(C)c2)c1
1
\"Mol\"/
0.881082COC(=O)c1sc(NS(=O)(=O)c2ccccc2)cc1C
2
\"Mol\"/
0.707781Cc1onc(-c2ccccc2)c1OCc1ccccc1
3
\"Mol\"/
0.678701COC(=O)c1nc(N)cc(C(=O)c2cccs2)c1C#N
4
\"Mol\"/
0.783312Cc1ccc(OCC(=O)Nc2ccc3ccccc3c2)cc1
5
\"Mol\"/
0.941276CN(C)S(=O)(=O)c1cccc(NC(=O)c2ccccc2)c1
6
\"Mol\"/
0.807600Cc1ccc(-n2nc(C(N)=O)c(-c3csc(C)c3)n2)cc1
7
\"Mol\"/
0.878873COc1ccc(C2Cc3ccccc3NC(=O)N2)cc1
8
\"Mol\"/
0.876200COc1cc(S(=O)(=O)Nc2ccc(C)c(C)c2)cc(OC)c1O
9
\"Mol\"/
0.864057CCOC(=O)N1CCN(c2cccc(OC)c2)c2ccccc21
10
\"Mol\"/
0.860424CCOC(=O)N1CCN(C(=O)Cc2ccccc2)CC(C)(C)C1
11
\"Mol\"/
0.811711CCOC(=O)c1cc(C)n(Cc2ccc(F)cc2)c1C#N
12
\"Mol\"/
0.837287Cc1sc(NC(=O)c2ccc(Cl)cc2)c(Cl)c1C
13
\"Mol\"/
0.874613CCOC(=O)c1cccc(C(=O)Nc2ccc(C)cc2)c1
14
\"Mol\"/
0.717143CC1CCN(C(=O)c2ccccc2Cl)C1
15
\"Mol\"/
0.855347Cc1onc(-c2ccccc2)c1C(=O)N1CCCN(C)C1C
16
\"Mol\"/
0.841536COC(=O)Cn1cnc(Nc2ccccc2)c1C#N
17
\"Mol\"/
0.873587Cn1cnnc1SCC(=O)Nc1ccc(Cl)cc1
18
\"Mol\"/
0.663220Cc1cc(C)nc(SCC(=O)Nc2ccc(C(C)(F)F)cc2)n1
19
\"Mol\"/
0.899845CCN(CC)C(=O)Nc1ccc2ccccc2n1
20
\"Mol\"/
0.932065COc1ccc(C(=O)Nc2ccc3c(c2)OCO3)cc1
21
\"Mol\"/
0.794003COc1ccc(NC(C)=O)cc1Cl
22
\"Mol\"/
0.788492CCOC(=O)c1ccc(-c2ccc(OC)c(OC)c2)cc1
23
\"Mol\"/
0.829125COC(=O)c1ccc(NC(=O)COc2ccccc2)c(OC)c1
24
\"Mol\"/
0.895830COc1ccc(C(=O)Nc2cccc(S(N)(=O)=O)c2)cc1
25
\"Mol\"/
0.726468Cn1cnnc1SCC(=O)NCc1nc2ccccc2s1
26
\"Mol\"/
0.856963COc1cccc(NC(=O)CNc2ccccc2OC)c1
27
\"Mol\"/
0.501545Cc1cc(N)nc(SCC(=O)Nc2nc3ccccc3[nH]2)n1
28
\"Mol\"/
0.946285Cc1cc(C)c(S(=O)(=O)Nc2ccccn2)cc1Cl
29
\"Mol\"/
0.933409Cc1ccc(NS(=O)(=O)c2ccccc2C#N)cc1
30
\"Mol\"/
0.712747CCc1c(C)sc2ncnc(Cc3ccccc3)c12
31
\"Mol\"/
0.884463CC(=O)Nc1ccc(NC(=O)c2conc2C)cc1
32
\"Mol\"/
0.924984COc1cccc(CNC(=O)c2ccc(C)cc2C)c1
33
\"Mol\"/
0.877083COC(=O)c1ccc(NS(=O)(=O)c2ccccc2)cc1
34
\"Mol\"/
0.831132CCOC(=O)C1=C(C)NC(=O)NC1c1ccccc1F
35
\"Mol\"/
0.864707CCOC(=O)C1=C(C)N(C)C(=O)NC1c1ccc(OCl)cc1
36
\"Mol\"/
0.868283COc1ccc(S(=O)(=O)c2ccc(Cl)cc2)cc1
37
\"Mol\"/
0.866578CC(C)(C)CNS(=O)(=O)c1ccc(NC(=O)c2cccs2)cc1
38
\"Mol\"/
0.855010COc1ccc(CNC(=O)Cn2nc(C)cc2N)cc1
39
\"Mol\"/
0.767471Cn1cncc1SCc1cccc(F)c1F
40
\"Mol\"/
0.745913CC(=O)Nc1ccc2oc(-c3ccccc3)cc2c1
41
\"Mol\"/
0.887220COc1cc(OC)cc(OCC(=O)Nc2ccccc2)c1
42
\"Mol\"/
0.888703CC(=O)NC1Nc2ccccc2C1c1ccccc1Cl
43
\"Mol\"/
0.841396COc1ccc(OC)c(NC(=O)c2ccc(NC(C)=O)cc2OC)c1
44
\"Mol\"/
0.627649COc1ccc2c(=O)c3cc(Cl)ccc3oc2c1
45
\"Mol\"/
0.792612CCOC(=O)NC1=CC2=C(C)C=C(Cl)C2=C1N
46
\"Mol\"/
0.773102CC(=O)Nc1ccc(-c2nc3ccccc3n2C)cc1
47
\"Mol\"/
0.875962CCOC(=O)c1c(NC(=O)c2ccccc2)sc2c1CCC2
48
\"Mol\"/
0.833716Cn1cnnc1SCC(=O)Nc1ccc(N2CCOCC2)cc1
49
\"Mol\"/
0.879208COC(=O)c1sc(NC(=O)c2cccs2)cc1C
50
\"Mol\"/
0.713352Cn1c(=O)c2c(ncn2C(=O)Nc2ccc(F)cc2)n(C)c1=O
51
\"Mol\"/
0.918215COc1cccc(NC(=O)c2ccc(OC)c(OC)c2)c1
52
\"Mol\"/
0.909016COc1cc(O)c(OC)c(C(=O)Nc2cccc(Cl)c2)c1
53
\"Mol\"/
0.654133Cc1cccc(C)c1NC(=O)CSc1nc(-c2ccc(Cl)cc2)no1
54
\"Mol\"/
0.780083Cc1onc(-c2ccccc2)c1CNC(=O)c1ccccc1Cl
55
\"Mol\"/
0.798360Cc1onc(-c2ccccc2)c1C(=O)Nc1ccccn1
56
\"Mol\"/
0.907214CC(=O)Nc1ccc(C(=O)Nc2ccc(Cl)cc2)cc1
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 25 } ], "source": [ "results" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "uYgVRVT5CXft" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 469 }, "id": "tmwbGaSTH37h", "outputId": "8cc62188-d8bb-46d2-d345-dd75f4c816e3" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Text(0.5, 1.0, 'QED plot')" ] }, "metadata": {}, "execution_count": 26 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGzCAYAAAABsTylAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQBklEQVR4nO3deXiU5aE28HuWzEyWSUL2FchCEnbCKltRBNyKuLQVlLVq1UKrpZ4W9NSqVbHHT+qOG4JUFKuiVkEEkUUUBMIWtoTsIfs+WWeSmff7Y5KRSIAsM/O8M3P/rmuuUyaTyc17knD7bK9CkiQJRERERDKkFB2AiIiI6FJYVIiIiEi2WFSIiIhItlhUiIiISLZYVIiIiEi2WFSIiIhItlhUiIiISLZYVIiIiEi2WFSIiIhItlhUiMitrV+/HgqFAnl5eaKjEFEvsKgQUZdOnTqF+fPnIzo6GlqtFlFRUZg/fz5Onz590Ws7ysClHgcOHLC99sLn1Wo1goKCMGbMGDz44INdvrdI77//Pl544QXRMYg8mlp0ACKSn82bN2PevHkICgrC3Xffjbi4OOTl5WHt2rX4+OOP8eGHH2LOnDkXfd6TTz6JuLi4i55PTEzs9OeZM2di4cKFkCQJdXV1OH78ON5991289tpr+Oc//4nly5c77O/WE++//z5OnjyJhx56SHQUIo/FokJEnWRnZ2PBggWIj4/H3r17ERoaavvYgw8+iKlTp2L+/Pk4ceLERaXkhhtuwNixY6/4NZKSkjB//vxOzz377LOYPXs2/vznPyMlJQU33nijff5CROTSOPVDRJ0899xzaGpqwptvvtmppABASEgI3njjDTQ0NOC5556z69cNDg7Gpk2boFar8fTTT1/x9QqFAsuWLcPGjRuRnJwMnU6HMWPGYO/evd36eq+99hqGDh1qm9ZaunQpamtrbR+/+uqrsWXLFuTn59umqgYOHNjLvx0R9RZHVIioky+++AIDBw7E1KlTu/z4L37xCwwcOBBffPEFXnvttU4fq6urQ2VlZafnFAoFgoODu/W1+/fvj2nTpmHXrl0wGAzw9/e/7Ov37NmDDz/8EH/84x+h1Wrx2muv4frrr8fBgwcxbNiwS37e448/jieeeAIzZszAAw88gIyMDKxZswaHDh3C999/Dy8vLzz66KOoq6vD+fPn8a9//QsA4Ofn162/BxHZD4sKEdnU1dWhuLi4y/UnFxoxYgT++9//or6+Hnq93vb8jBkzLnqtVqtFS0tLtzMMGzYMO3fuRF5eHkaMGHHZ1548eRKHDx/GmDFjAABz585FcnIyHnvsMWzevLnLz6moqMCqVaswa9YsfPXVV1AqrQPLKSkpWLZsGd577z0sWbIEM2fORHR0NGpqai6apiIi52FRISKb+vp6AOhUPrrS8fGfF5VXX30VSUlJnV6rUql6lKFj1KIjy+VMnDjRVlIA64jMnDlz8MUXX8BsNnf5tb/55huYTCY89NBDtpICAPfeey8eeeQRbNmyBUuWLOlRZiJyHBYVIrK5sIBcTn19PRQKBUJCQjo9P378+G4tpr2choaGTlkuZ9CgQRc9l5SUhKamJlRUVCAiIuKij+fn5wMAkpOTOz2v0WgQHx9v+zgRyQMX0xKRTUBAAKKionDixInLvu7EiROIiYmBRqOxe4aTJ09CpVJ1uc2ZiDwPiwoRdTJ79mzk5uZi3759XX78u+++Q15eHn7961/b/WsXFBRgz549mDhxYrdGVM6dO3fRc5mZmfDx8blox1KHAQMGAAAyMjI6PW8ymZCbm2v7OGBdCExEYrGoEFEnDz/8MHx8fHDfffehqqqq08eqq6tx//33w9/fH8uWLbPr162ursa8efNgNpvx6KOPdutz9u/fjyNHjtj+XFhYiM8//xyzZs265NqYGTNmQKPR4KWXXoIkSbbn165di7q6Otx0002253x9fVFXV9fLvxER2QPXqBBRJ4mJidiwYQPmzZuH4cOHX3QybU1NDTZt2tTl1MxXX32Fs2fPXvT8pEmTEB8fb/tzZmYm3nvvPUiSBIPBgOPHj+Ojjz5CQ0MDVq9ejeuvv75bWYcNG4brrruu0/ZkAHjiiScu+TmhoaFYuXIlnnjiCVx//fW4+eabkZGRgddeew3jxo3rtMNnzJgx+PDDD7F8+XKMGzcOfn5+mD17dreyEZGdSEREXUhPT5fuvPNOKSIiQlIqlRIASafTSadOnbrotevWrZMAXPKxbt0622svfF6pVEqBgYFSamqq9OCDD3b53pcCQFq6dKn03nvvSYMGDZK0Wq2Umpoq7dq1q8tsubm5nZ5/5ZVXpJSUFMnLy0sKDw+XHnjgAammpqbTaxoaGqQ777xTCgwMlABIAwYM6HY+IrIPhSRdMPZJRHQJGzZswOLFizF//nxs2LBBdBwoFAosXboUr7zyiugoRORAnPohom5ZuHAhSkpKsGLFCsTExOCZZ54RHYmIPABHVIjIJXFEhcgzcNcPERERyRanfojIJXEwmMgzcESFiIiIZItFhYiIiGTLpad+LBYLiouLodfredQ1ERGRi5AkCfX19YiKiup0F/OuuHRRKS4uRmxsrOgYRERE1AuFhYWIiYm57Gtcuqh03LSssLAQ/v7+gtMQERFRdxgMBsTGxnbr5qMuXVQ6pnv8/f1ZVIiIiFxMd5ZtcDEtERERyRaLChEREckWiwoRERHJFosKERERyRaLChEREckWiwoRERHJFosKERERyRaLChEREckWiwoRERHJFosKERERyRaLChEREckWiwoRERHJFosKERERyZZL3z2ZiIjsy9DSij0ZFTiUV43cykaMGdAPM4eEY0ikf7fudEtkbywqREQEADiUV41l7x9BmcFoe+67c5V44ZtzmJ4ShpfmpcJPy382yLk49UNERHj7uxzMffMAygxGxPTzxuJJA/H47CGYOSQcGpUS354tx29e34/SuhbRUcnDKCRJkkSH6C2DwYCAgADU1dXB399fdBwiIpf0n0OF+MsnJwAAt4yKwtO3DofvBSMnxwprcc+7h1DZYEJMP29s+eNUBHh7iYpLbqAn/35zRIWIyIOdOF+L//38JADgD9MT8a87RnUqKQAwKjYQn/5+MmKDvHG+phmPtb+eyBlYVIiIPFR1own3/zsNpjYLZgwOw59mJF1ywWxskA9enJsKlVKBz48V4/NjRU5OS56KRYWIyEM99/VZFNe1IC7EF6vvGAWl8vK7ekb374c/TE8EAPzvZydRZuB6FXI8FhUiIg90sqgOmw4VAgCe+9UI+Ou6t+Zk2TWJGBkTgPqWNry6K8uREYkAsKgQEXkcSZLw5BenIUnAzSOjMHZgULc/V61SYsUNgwEAmw4Wori22VExiQCwqBAReZwt6SU4mFcNnZcSK25I6fHnT0wIxlXxQTCZLRxVIYdjUSEi8iAWi4QXvjkHAHhgWiKiAr179T5/mpEEAPjP4UKcr2myWz6in2NRISLyIDvPliOrvAF6nRq/nTKw1+8zIT4YkxOD0WqW8PZ3ufYLSPQzQovK448/DoVC0emRktLzYUgiIuqe1/dkAwDmXzUA+m4uoL2U+36RAADYfOQ8mk3mPmcj6orwEZWhQ4eipKTE9ti3b5/oSEREbulQXjXS8mugUSmxZNLAPr/flMQQxPTzhqGlDVvTS/oekKgLwouKWq1GRESE7RESEiI6EhGRW3qjfTTl9jHRCPPX9fn9lEoF5o3vDwD44GBBn9+PqCvCi8q5c+cQFRWF+Ph43HXXXSgouPQ3u9FohMFg6PQgIqIrK6xuws6z5QCAe6bG2+19fz0mBiqlAofza5BZVm+39yXqILSoTJgwAevXr8e2bduwZs0a5ObmYurUqaiv7/qbfdWqVQgICLA9YmNjnZyYiMg1fXS4EJIETE4MRkKon93eN8xfh2tTwgBwVIUcQ1Z3T66trcWAAQOwevVq3H333Rd93Gg0wmg02v5sMBgQGxvLuycTEV1Gm9mCKf/chVJDC16el4rZI6Ps+v67zpZjyfpDCPbV4MdHroVaJXywnmSuJ3dPVl/2o04WGBiIpKQkZGV1fYCQVquFVqt1cioiIte2J7MCpYYW9PPxwqyh4XZ//ymDQhDo44WqRhMO5lZjUiLXGpL9yKr2NjQ0IDs7G5GRkaKjEBG5jQ8OWu/pc/voGGjVKru/v5dKieuGRAAAtp7k7h+yL6FF5eGHH8aePXuQl5eHH374AbfeeitUKhXmzZsnMhYRkdsoN7RgV4Z1Ee3c8Y5b13fDcGtR2XayDGaLbFYUkBsQOvVz/vx5zJs3D1VVVQgNDcWUKVNw4MABhIaGioxFROQ2/nu8GGaLhNH9A5EYpnfY15mcGIIAby9UNhhxMLcaExOCHfa1yLMILSqbNm0S+eWJiNzeF8eLAQC3pEY79Ot4qZSYNSQcH6Wdx1cnS1hUyG5ktUaFiIjsJ7eyEcfP10GlVODG4Y5f+9fxNb46WcrpH7IbFhUiIjfVMZoyOTEEIX6O3zE5OTEEeq0aFfVGnDhf6/CvR56BRYWIyA1JkoTPjxUBAG6287kpl6JRKzFlkHVr8u6MCqd8TXJ/LCpERG7odIkB2RWN0KiVuM4BZ6dcytXJ1s0QuzNZVMg+WFSIiNzQlhPW80ymJ4dBr/Ny2tedlmQ9Tv/E+VpUNRiv8GqiK2NRISJyQ9tOlQL46XwTZ4kI0GFwpD8kCfjuXKVTvza5JxYVIiI3k1Vej5yKRmhUSkxvv2GgM9mmf9oPmiPqCxYVIiI3s+2kdTRlcmKwU6d9OlydZC0qezIruE2Z+oxFhYjIzXx9qgwAcN1Q5077dBg9oB/0WjVqmlq5TZn6jEWFiMiNnK9pQnpRHZQKYMYQ5+32uZCXSonJ7XdQ/j6L61Sob1hUiIjcyPb20ZSxA4OccsjbpUxKtB6hvz+nSlgGcg8sKkREbmT7aev6FFHTPh2uircWlcN5NTC2mYVmIdfGokJE5CYMLa04nFcDAJg5WMy0T4dBYX4I8dPA2GbBsYJaoVnItbGoEBG5ie/PVaLNIiE+1Bf9g32EZlEoFJgQz+kf6jsWFSIiN7Gr/dySa5Kdf3ZKVyZ2FJVsFhXqPRYVIiI3IEkSdrXfCFA2RSXBWlSOFtSipZXrVKh3WFSIiNzAqWIDKuqN8NGoMC6un+g4AID4EF+E6bUwmS04UlAjOg65KBYVIiI30HFc/eTEEGjVKsFprBQKhW1U5QCnf6iXWFSIiNyA3KZ9OkyIsxaVg3nVgpOQq2JRISJycTWNJhxtn1rpuCGgXIwdaJ2GOlZYi1azRXAackUsKkRELm7vuQpYJCAlQo+oQG/RcTpJDPWDv06NllYLThcbRMchF8SiQkTk4nadta5PuVpm0z4AoFQqMGaAdVTlcD4X1FLPsagQEbkws0XCnkzr+pTpKfIrKoD1vkMAkJbPdSrUcywqREQu7Pj5WtQ0tUKvU2N0/0DRcbpkG1HJq4EkSYLTkKthUSEicmG726d9fpEUCrVKnr/SR8YEQq1UoLzeiPM1zaLjkIuR53c1ERF1i1y3JV/IW6PC0OgAAMBhTv9QD7GoEBG5qPL6FqQX1QEApiXJa1vyz429YPqHqCdYVIiIXNR3mZUAgOHRAQjVawWnubyOopLGnT/UQywqREQu6vssa1GZMihEcJIr61hQm1FWjwZjm+A05EpYVIiIXJAkSdjXXlSmJsq/qIT56xAVoIMkAenn60THIRfCokJE5ILOlTegvN4IrVqJ0QPkcbfkKxkZGwjAepw+UXexqBARuaDvzllHU8bHBUHnJY+7JV/JqPaicpxFhXqARYWIyAXtO2fdljzVBdandOCICvUGiwoRkYsxtVnwY671PJLJLrA+pcPw6AAoFUCpoQWldS2i45CLYFEhInIxRwtq0GQyI9hXg8ER/qLjdJuvVo2kcD0AjqpQ97GoEBG5mI5tyZMSQ6BUKgSn6RnbOpXztUJzkOtgUSEicjHfudC25J/rKCrHCmqF5iDXwaJCRORC6ppbbbtmJrvQQtoOHQtqT5yvhdnCOynTlbGoEBG5kAM5VbBIQHyIL6IDvUXH6bGkcD18NCo0mszIrmgQHYdcAIsKEZEL2XfOdY7N74pKqcCQSOsC4FPFPKGWroxFhYjIhXQspHWlbck/Nyw6AABwssggOAm5AhYVIiIXUVTbjJzKRigVwMSEYNFxem1olHVE5WQRR1ToylhUiIhcxPft0z4jYwPhr/MSnKb3hsdYR1ROFxtg4YJaugIWFSIiF+HK25IvlBjqB61aiXpjGwqqm0THIZljUSEicgEWi2RbnzJlUKjgNH2jVimR0r6gNp3TP3QFLCpERC7gTKkB1Y0m+GhUtkPTXNmwjnUq3PlDV8CiQkTkAjq2JV8VHwyN2vV/dXfs/DnFnT90Ba7/3U5E5AH2ucG25AsNi2rfolxcB0niglq6NBYVIiKZa2k142BuNQBgqose9PZzSRF+UCsVqG1qRVFts+g4JGMsKkREMpeWXwNjmwVhei0GhfmJjmMXWrUKSeF6ADz4jS6PRYWISOY6pn2mJIZAoVAITmM/w6J5lD5dGYsKEZHMufr9fS7lp6P0WVTo0lhUiIhkrKbRZNvC6y4LaTsMtS2o5dQPXRqLChGRjO3PqYIkAUnhfgj314mOY1dDIv2hVAAV9UaUG1pExyGZYlEhIpKx786517bkC3lrVEhsXxzMg9/oUlhUiIhk7PsLFtK6I9t5Ktz5Q5fAokJEJFMFVU0oqG6CWqnAhPhg0XEcYigX1NIVyKaoPPvss1AoFHjooYdERyEikoXvs62jKan9A+GnVQtO4xgd9/w5xQW1dAmyKCqHDh3CG2+8gREjRoiOQkQkG/vceH1KhyHtRaWothnVjSbBaUiOhBeVhoYG3HXXXXjrrbfQr18/0XGIiGTBYpFsIyruuj4FAPQ6L8SF+ALg9A91TXhRWbp0KW666SbMmDHjiq81Go0wGAydHkRE7uh0iQG1Ta3w06oxMjZQdByHGto+qsKdP9QVoUVl06ZNOHLkCFatWtWt169atQoBAQG2R2xsrIMTEhGJ0XFs/lXxQfBSCf9vSofqOKH2FHf+UBeEffcXFhbiwQcfxMaNG6HTde8Qo5UrV6Kurs72KCwsdHBKIiIxPGF9SochkdYRlTOlLCp0MWHLyNPS0lBeXo7Ro0fbnjObzdi7dy9eeeUVGI1GqFSqTp+j1Wqh1WqdHZWIyKlaWs04mFcNAJjqZvf36UpKpPUuynmVjWg2meGtUV3hM8iTCCsq1157LdLT0zs9t2TJEqSkpOCvf/3rRSWFiMhTpOXXwNRmQbi/FgmhfqLjOFyYXocQPw0qG0zILKt3+zU51DPCioper8ewYcM6Pefr64vg4OCLnici8iQd61MmJ4ZAoVAITuMcKRH+2JdViTMlBhYV6sS9V2gREbmgjvUp7rwt+ecGt0//nC2tF5yE5EZWRx3u3r1bdAQiIqFqGk22bbqeVFRSIqwLak+XcEEtdcYRFSIiGdmfUwVJApLC/RDm370dke5gcPvOn7MlBkiSJDgNyQmLChGRjFy4PsWTJIT5Qq1UwNDShuK6FtFxSEZYVIiIZKRjfYonbEu+kFatQmKYdYfTWU7/0AVYVIiIZKKgqgkF1U1QKxUYHxcsOo7TpURYF9SeYVGhC7CoEBHJRMdNCFP7B8JPK6u9Dk4x2HZCLXf+0E9YVIiIZKJjfcqUxFDBScRI6SgqHFGhC7CoEBHJgMUi4YeOojLI86Z9gJ/OUuk4Sp8IYFEhIpKF0yUG1DS1wk+rxoiYQNFxhAj10yLYVwOLBGSWcfqHrFhUiIhkoGPa56r4IHipPPNXs0Kh+Ok8Fd5Jmdp55k8DEZHMeOKx+V35aecPR1TIikWFiEiwZpMZB/OqAQBTPOz8lJ/jglr6ORYVIiLBDuRWwdRmQXSgNxJC/UTHEerCmxPyKH0CWFSIiITbm1kBAPhFUggUCoXgNGIlhvlBrVSgrrkVJTxKn8CiQkQk3J72ojItyTPPT7mQVq2yjSpxQS0BLCpEREIVVjchp6IRKqUCkzx8IW2HlEguqKWfsKgQEQm095x1NGV0/0D467wEp5GHwVxQSxdgUSEiEsi2PmUQp3068OaEdCEWFSIiQVrNFnyfVQUAmJbMotJhSPuISm5lI1paeZS+p2NRISIS5GhBLRqMbQjy1WBYVIDoOLIRqtciiEfpUzsWFSIiQfZklgMApg4KgVLp2duSL6RQKGzTP2dLWVQ8HYsKEZEgezOtx+ZzfcrFktuLSgaLisdjUSEiEqCywYj0ojoAwNQkbkv+uRQWFWrHokJEJEDHTQiHRPojTK8TnEZ+kiN4F2WyYlEhIhJgj+3YfE77dCUp3A8KBVDZYEJlg1F0HBKIRYWIyMksFgnfneOx+Zfjo1FjQJAPAE7/eDoWFSIiJztdYkBlgwm+GhXGDOgnOo5sJXPnD4FFhYjI6XZnWLclT0wIgUbNX8OX0rFOJYPrVDwaf0KIiJxs51lrUZmeEiY4ibxx5w8BLCpERE5V1WDEscJaACwqV2I7S6WsHmaLJDgNicKiQkTkRLszKiBJwNAof0QEcFvy5QwM9oVWrURLqwUF1U2i45AgLCpERE70Lad9uk2lVCApvGP6h+tUPBWLChGRk5jaLNjbfn4Ki0r3cOcPsagQETnJ4bxq1BvbEOyrwciYQNFxXAIX1BKLChGRk3RM+1yTEsa7JXcTR1SIRYWIyEm4PqXnUtrPUsmrakSzySw4DYnAokJE5AS5lY3IqWyEWqnA1EG8W3J3heq1CPbVQJKAc+UcVfFELCpERE7QMZoyIT4Iep2X4DSuhdM/no1FhYjICb49WwYAmJ4SLjiJ60nmglqPxqJCRORg9S2t+DGnGgDXp/RGim1EhWepeCIWFSIiB9t3rhJtFgnxIb6IC/EVHcflpNhuTsgRFU/EokJE5GC8CWHfJIXroVAAlQ0mVDYYRcchJ2NRISJyILNFwi4WlT7x1qgwIMgHAEdVPBGLChGRA6Xl16Cq0YQAby+MiwsSHcdlceeP52JRISJyoG0nSwEA16aEwUvFX7m9ldy+TuVsCRfUehr+1BAROYgkSfj6lLWozBoaITiNaxvcsUW5jCMqnoZFhYjIQU4VG1BU2wydlxLTkkJFx3FpHVM/mWX1MFskwWnImVhUiIgcZHv7aMq0pFB4a1SC07i2AcG+0Hkp0dJqQUF1k+g45EQsKkREDvL1KetptNdx2qfPVEoFBoV1nFDLdSqehEWFiMgBcisbkVFWD7VSgWt5bL5dcOePZ2JRISJygI5FtFfFByPAhzchtAfbUfolLCqehEWFiMgBOorKdcM47WMvtqP0ufPHo7CoEBHZWZmhBUcLagEAs4Zw2sdeOqZ+8qoa0WwyC05DzsKiQkRkZx27fVL7ByLcXyc4jfsI1WsR7KuBJAHnyjmq4ilYVIiI7Iy7fRyHC2o9D4sKEZEd1TW14kBOFQAWFUdIsR2lz6LiKXpVVHJycuydg4jILXxzpgxtFglJ4X6IC/EVHcftpNiO0udZKp6iV0UlMTER11xzDd577z20tLT0+ouvWbMGI0aMgL+/P/z9/TFx4kR89dVXvX4/IiLRtqSXAACuHxYpOIl76pj6yeDUj8foVVE5cuQIRowYgeXLlyMiIgL33XcfDh482OP3iYmJwbPPPou0tDQcPnwY06dPx5w5c3Dq1KnexCIiEqq2yYS9mRUAgJtHsqg4QlK4HgoFUNlgQmWDUXQccoJeFZVRo0bhxRdfRHFxMd555x2UlJRgypQpGDZsGFavXo2Kiopuvc/s2bNx4403YtCgQUhKSsLTTz8NPz8/HDhwoDexiIiE+vpUKdosElIi9EhsP+6d7Mtbo8KAIB8AHFXxFH1aTKtWq3Hbbbfho48+wj//+U9kZWXh4YcfRmxsLBYuXIiSkpJuv5fZbMamTZvQ2NiIiRMndvkao9EIg8HQ6UFEJBdfHLf+zps9MkpwEvfWsaD2TAn/DfAEfSoqhw8fxu9//3tERkZi9erVePjhh5GdnY0dO3aguLgYc+bMueJ7pKenw8/PD1qtFvfffz8+/fRTDBkypMvXrlq1CgEBAbZHbGxsX+ITEdlNZYMRP2RXAgB+OYLTPo7EdSqepVdFZfXq1Rg+fDgmTZqE4uJibNiwAfn5+XjqqacQFxeHqVOnYv369Thy5MgV3ys5ORnHjh3Djz/+iAceeACLFi3C6dOnu3ztypUrUVdXZ3sUFhb2Jj4Rkd19lV4CiwSMiAnAgGDu9nGkn3b+sKh4AnVvPmnNmjX47W9/i8WLFyMysuv/cggLC8PatWuv+F4ajQaJiYkAgDFjxuDQoUN48cUX8cYbb1z0Wq1WC61W25vIREQO9cWJ9mmfEZz2cbSOEZXMsnqYLRJUSoXgRORIvSoqO3bsQP/+/aFUdh6QkSQJhYWF6N+/PzQaDRYtWtTj97ZYLDAauZKbiFxHSV0zDuVVAwBu4rSPww0I9oXOS4mWVgsKqpt4Xo2b69XUT0JCAiorKy96vrq6GnFxcd1+n5UrV2Lv3r3Iy8tDeno6Vq5cid27d+Ouu+7qTSwiIiG2nCiBJAFjB/RDVKC36DhuT6VUICm8/Sh9Lqh1e70qKpIkdfl8Q0MDdLru34CrvLwcCxcuRHJyMq699locOnQIX3/9NWbOnNmbWEREQnzZPu3DRbTOkxzOe/54ih5N/SxfvhwAoFAo8Nhjj8HHx8f2MbPZjB9//BGjRo3q9vt1Zw0LEZGcFVY34VhhLZQK4EYWFafhzh/P0aOicvToUQDWEZX09HRoNBrbxzQaDUaOHImHH37YvgmJiGSsYzTlqvhghOm7P6JMfdNxlgp3/ri/HhWVXbt2AQCWLFmCF198Ef7+/g4JRUTkKr44XgwA+CV3+zhVx4hKXlUjmk1meGtUghORo/Rqjcq6detYUojI42WU1uN0iQFqpQLXD4sQHcejhOq1CPHTQJKAc+UcVXFn3R5Rue2227B+/Xr4+/vjtttuu+xrN2/e3OdgRERyt/nIeQDANSlhCPLVXOHVZG/JEXpUZlXhbEk9RsQEio5DDtLtohIQEACFQmH730REnqzNbMGnR4sAALePjhGcxjMlh/vj+6wq7vxxc90uKuvWrevyfxMReaJ9WZUorzein48XpqeEiY7jkX46Sp9nqbizXq1RaW5uRlNTk+3P+fn5eOGFF7B9+3a7BSMikrNPjlhHU24eGQWNuk/3d6Ve4hZlz9Crn645c+Zgw4YNAIDa2lqMHz8ezz//PObMmYM1a9bYNSARkdwYWlqx/VQpAOD2MZz2ESUpXA+FAqhsMKGinrdecVe9KipHjhzB1KlTAQAff/wxIiIikJ+fjw0bNuCll16ya0AiIrnZcqIExjYLBoX5YXg01+yJ4q1RYWD7nao5quK+elVUmpqaoNdbh9y2b9+O2267DUqlEldddRXy8/PtGpCISG4+SbPu9rl9TIxtkwGJ8dNR+lyn4q56VVQSExPx2WefobCwEF9//TVmzZoFwHrvHp6vQkTuLK+yEYfza6BUALemRouO4/G4TsX99aqoPPbYY3j44YcxcOBATJgwARMnTgRgHV1JTU21a0AiIjnpODtlcmIIwv15ZL5oP+38YVFxVz06Qr/Dr371K0yZMgUlJSUYOXKk7flrr70Wt956q93CERHJicUiYXP72Sm/4iJaWUiJtI7iZ5bVw2yRoFJyKs7d9KqoAEBERAQiIjofGT1+/Pg+ByIikquDedU4X9MMP60as4bwyHw56B/kA52XEi2tFuRXNSI+1E90JLKzXhWVxsZGPPvss9i5cyfKy8thsVg6fTwnJ8cu4YiI5OQ/hwoBADcNj+RN8GRCpVQgKVyPE+frkFFaz6LihnpVVO655x7s2bMHCxYsQGRkJFe9E5Hbq2k04cv0EgDA3PGxgtPQhZLbi8rZ0nrcMDxSdByys14Vla+++gpbtmzB5MmT7Z2HiEiWPjlyHqY2C4ZE+mNUbKDoOHQB7vxxb73a9dOvXz8EBQXZOwsRkSxJkoT3DxYAAO6c0J+jyDKTEmFdUMudP+6pV0XlH//4Bx577LFO9/shInJXP+ZWI6eiET4aFeaMihIdh34mJdI6opJX1YgmU5vgNGRvvZr6ef7555GdnY3w8HAMHDgQXl5enT5+5MgRu4QjIpKDjT9aR1PmjIqCXud1hVeTs4X4aRHip0FlgwnnyhowklNzbqVXReWWW26xcwwiInmqajBi20nrIto7xw8QnIYuJTlCj8qsKmSU1rOouJleFZW///3v9s5BRCRLH6edR6tZwoiYAAyP4Q0I5So53B/fZ1XhLBfUup1erVEBgNraWrz99ttYuXIlqqurAVinfIqKiuwWjohIJItFwgcdi2jH9xechi7np6P0eXNCd9OrEZUTJ05gxowZCAgIQF5eHu69914EBQVh8+bNKCgowIYNG+ydk4jI6X7IrkJeVRP8tGrMHslFtHLWsaCWW5TdT69GVJYvX47Fixfj3Llz0Ol+uinXjTfeiL1799otHBGRSO8fzAcA3JIaBV9tr+84Qk4wKEwPhQKobDChot4oOg7ZUa+KyqFDh3Dfffdd9Hx0dDRKS0v7HIqISLTSuhZsP1UGgItoXYG3RoWBwb4AOKribnpVVLRaLQyGi+cBMzMzERoa2udQRESibdifhzaLhHED+2FIlL/oONQNyeHW6Z+zpVyn4k56VVRuvvlmPPnkk2htbQUAKBQKFBQU4K9//Stuv/12uwYkInK2ZpPZdhLt3VPiBaeh7uJR+u6pV0Xl+eefR0NDA0JDQ9Hc3Ixp06YhMTERer0eTz/9tL0zEhE51SdHzqO2qRX9g3wwc0i46DjUTYMjO0ZUWFTcSa9WhwUEBGDHjh34/vvvcfz4cTQ0NGD06NGYMWOGvfMRETmVxSLhne9zAQCLJw2ESsn7+riKwZE/3fOnzWyBWtXrEzhIRnpcVCwWC9avX4/NmzcjLy8PCoUCcXFxiIiIgCRJvFkXEbm0PZkVyKlohF6rxm/GxYqOQz0Q288Hflo1GoxtyK5otE0FkWvrUd2UJAk333wz7rnnHhQVFWH48OEYOnQo8vPzsXjxYtx6662OyklE5BRr91lHU+4YFws/bkl2KUqlwjb9c7qkTnAaspce/RSuX78ee/fuxc6dO3HNNdd0+ti3336LW265BRs2bMDChQvtGpKIyBnOlhqwL6sSSgWwaNJA0XGoF4ZGBeBQXg1OFxtwa6roNGQPPRpR+eCDD/DII49cVFIAYPr06VixYgU2btxot3BERM609jvraMr1wyIQG+QjOA31xpD2dSqnS7hF2V30qKicOHEC119//SU/fsMNN+D48eN9DkVE5GwV9UZ8fqwYALcku7KOM29OFxsgSZLgNGQPPSoq1dXVCA+/9Fa98PBw1NTU9DkUEZGz/ftAPkxmC0bFBmLMgH6i41AvJYb5Qa1UoKapFSV1LaLjkB30qKiYzWao1Zde1qJSqdDW1tbnUEREzlTf0or17VuS753K0RRXpvNSITHMD4B1VIVcX48W00qShMWLF0Or1Xb5caORN4IiItezYX8+DC1tSAzzww3DIkTHoT4aEumPs6X1OF1iwAwe2OfyelRUFi1adMXXcMcPEbmSRmMb3v4uBwCw7JpEKHnAm8sbEuWPzUeLOKLiJnpUVNatW+eoHEREQmz8MR81Ta0YGOyDX46IFB2H7MC2oJY7f9wCzxcmIo/V0mrGm3uta1N+f00ij1x3Ex1blAuqm2BoaRWchvqKP5VE5LE+OFiAygYjogO9cWtqtOg4ZCeBPhpEB3oDAM5w+sflsagQkUcytpnxxh7r2pQHrk6AF0dT3MpgHvzmNviTSUQe6aPD51FqaEGEvw6/HhsjOg7Z2YUHv5FrY1EhIo9jarNgze5sAMB90+KhVasEJyJ741H67oNFhYg8zoeHClBU24wQPy3mje8vOg45wND2EZXMsnqY2iyC01BfsKgQkUdpMrXhxZ1ZAIA/XpsInRdHU9xRTD9v6HVqtJolZJU3iI5DfcCiQkQeZd33eahsMCI2yBtzx3E0xV0pFApO/7gJFhUi8hi1TSa8vse6NmX5zCRo1PwV6M64oNY98KeUiDzGK99mob6lDcnhetw8kuemuLuOEZVTxXWCk1BfsKgQkUfIr2rEu/vzAAArb0yBivf0cXsXHqUvSZLgNNRbLCpE5BH+ue0sWs0Spg4KwdXJYaLjkBMMCtPDS6VAfUsbztc0i45DvcSiQkRuLy2/GlvTS6FUAI/eNFh0HHISjVqJQWF6AMAprlNxWSwqROTWzBYJj//3NADgN2NjkRLhLzgROdOwaOv/v08WcZ2Kq2JRISK39p/DhUgvqoNeq8afZyWLjkNONjw6AACQzqLislhUiMht1TaZ8H/bzgIA/jQzCaF6reBE5GzDYwIBWIsKF9S6JhYVInJb/297BmqaWpEcrsfCiQNExyEBUiL0UCsVqG40oaiWC2pdkdCismrVKowbNw56vR5hYWG45ZZbkJGRITISEbmJIwU12PhjAQDg8ZuHQq3if5d5Ip2XCskR1gW1XKfimoT+5O7ZswdLly7FgQMHsGPHDrS2tmLWrFlobGwUGYuIXFyr2YJHNqdDkoDbRkdjYkKw6EgkUMc6lRPnWVRckVrkF9+2bVunP69fvx5hYWFIS0vDL37xi4tebzQaYTQabX82GLjdjIgu9vZ3uThbWo9+Pl7435uGiI5Dgg2PCcCmQ4VcUOuiZDUWWldn/SYKCgrq8uOrVq1CQECA7REbG+vMeETkAnIrG/HCN5kAgP+9aQiCfDWCE5FoI6IDAXBBrauSTVGxWCx46KGHMHnyZAwbNqzL16xcuRJ1dXW2R2FhoZNTEpGcmS0S/uej4zC2WTAlMQS3jeb9fAhIivCDl0qB2qZWnlDrgoRO/Vxo6dKlOHnyJPbt23fJ12i1Wmi13F5IRF1b930uDufXwFejwrO3D4dCwfv5EKBVq5AS4Y/0ojqcOF+H2CAf0ZGoB2QxorJs2TJ8+eWX2LVrF2JiYkTHISIXlF3RgOe+tu4afPSmIYjpx3+M6CfDY3jwm6sSOqIiSRL+8Ic/4NNPP8Xu3bsRFxcnMg4RuShjmxl//OAojG0WTB0UgnnjuX6NOvvphNpasUGox4QWlaVLl+L999/H559/Dr1ej9LSUgBAQEAAvL29RUYjIhfy7FdncarYgH4+XnjuVyM55UMXsRWV89YFtfwecR1Cp37WrFmDuro6XH311YiMjLQ9PvzwQ5GxiMiF7DxThnXf5wEA/t+vRyIiQCc2EMlSUrgeGrUShpY2FFQ3iY5DPSB86oeIqLdK61rw8EfHAQBLJg/EtYPDBSciudKolRgcocfx89YFtQOCfUVHom6SxWJaIqKeMlskPPThUdQ0tWJolD9W3JAiOhLJHBfUuiYWFSJySa/tysKBnGr4aFR4eV4qtGqV6Egkc7aD33iUvkthUSEil7M7oxz/aj999sk5wxAf6ic4EbmCYe0Lak8W1cFi4dIDV8GiQkQuJaeiAX/44CgsEvCbsTG4nafPUjcNCveDVq1EvbENeVW8+a2rYFEhIpdhaGnFvRsOo76lDWMG9MM/bhnGbabUbV4qJYZE+QPgOhVXwqJCRC7BbJHw0KZjyK5oRIS/Dmvmj+a6FOqxC89TIdfAokJELuH57Rn49mw5tGol3lw4BmF6npdCPddRVE5wRMVlsKgQkex9drQIr+3OBgD8369GYERMoNhA5LJGxgYCsC6obTNbxIahbmFRISJZ+z6rEv/zsfVQt/umxWPOKC6epd5LDPWDXqtGk8mMzLIG0XGoG1hUiEi2zpQYcP+/09BqlnDTiEj89Toe6kZ9o1QqbKMqRwpqxIahbmFRISJZKqptxuJ1B1FvbMP4uCA8/+uRUCq5w4f6bnT/QAAsKq6CRYWIZKeuqRWL3zmIMoMRSeF+eGvBWOi8uMOH7CO1fz8AwLGCWrFBqFtYVIhIVppNZtz778M4V96ACH8d1i8ZjwAfL9GxyI2kto+o5FQ2oqbRJDYMXRGLChHJhqnNggc2puFgbjX0WjXW/3YcogK9RcciNxPoo0F8qPXuyccKa8WGoStiUSEiWWgzW/DgpqPYnVEBnZcS7ywZh5QIf9GxyE2lxlqnf7hORf5YVIhIOItFwl8+PoGvTpZCo1LirYVjMW5gkOhY5MZGDwgEABzlOhXZY1EhIqEkScJj/z2JzUeLoFIq8MqdqZg6KFR0LHJzHSMqxwprYeadlGWNRYWIhJEkCc9+dRbvHSiAQgGs/s1IzBoaIToWeYDkCD18NSo0GNuQVc6D3+SMRYWIhHn52yy8sTcHALDq1uE8dZacRsWD31wGiwoRCfH2dzlYvSMTAPC3Xw7B3PH9BSciT9OxTfkoi4qssagQkdN9cLAAT205AwBYPjMJd0+JE5yIPNHo/h07f2rFBqHLYlEhIqf6/FgRHvk0HYD1JoN/mJ4oOBF5qlHtUz9Z5Q2oa2oVG4YuiUWFiJxm+6lSLP/PcUgSsOCqAVhxfQoUCt6/h8QI9tNiYLAPAODY+VqxYeiSWFSIyCm+O1eBZe8fhdki4bbR0Xji5qEsKSScbfonn+tU5IpFhYgc7nBeNe7dcBgmswU3DIvA/90+gndCJlmwLajlUfqyxaJCRA51qrgOS9YfQkurBdOSQvHi3FSoVfzVQ/LQcSflowU1sPDgN1nibwsicpicigYsXHsQ9S1tGD8wCK/PHwONmr92SD5SIvTw9lKhvqUNOZU8+E2O+BuDiByiqLYZ89/+EVWNJgyL9sfbi8fCW6MSHYuoE7VKiRExAQCANK5TkSUWFSKyu8oGIxa8/SOK61oQH+qLd5eMh7/OS3Qsoi6NHWid/jmUx6IiRywqRGRXdc2tWLj2IHIqGxEd6I337p6AYD+t6FhEl9Rxp+6DudWCk1BXWFSIyG6aTWbc8+4hnC4xIMRPg3/fPR5Rgd6iYxFd1pgB/aBUAAXVTSgztIiOQz/DokJEdmFqs+D+99JwKK8Gep0aG347AfGhfqJjEV2RXueFIVH+ADiqIkcsKkTUZ5IkYcXmE9iTWQGdlxLrFo+z/eIncgUd0z+H8lhU5IZFhYj67Pntmdh8pAgqpQJr7hqDse2/9IlcxXiuU5EtFhUi6pONP+bjlV1ZAIBnbh2Ga1LCBCci6rlxcdaiklFWzxsUygyLChH12o7TZfjbZycBAA/NGIQ7xvUXnIiod0L8tIgP9YUkAYfzOaoiJywqRNQrRwpq8IcPjsAiAXPHxeLBaweJjkTUJ5z+kScWFSLqsdzKRtzz7mG0tFpwTXIonrplGO+ETC5vfPv0zwEWFVlhUSGiHqmoN2LROwdR3WjCiJgAvHLnaN5kkNzCxIRgAMDJojrUt3CdilzwtwsRdVvHgW4F1U0YEOyDdxaPg69WLToWkV1EBnhjYLAPzBaJ25RlhEWFiLrFYpHw54+O4fj5OvTz8cL6JeMRwqPxyc10jKr8kFUlOAl1YFEhom751zeZ2JpeCi+VAm8sGIu4EF/RkYjs7qp4a1HZn8OiIhcsKkR0RZ8dLcLL31rPSll12wjbokMidzOxvaicLjGgtskkOA0BLCpEdAVp+TX4yycnAAD3T0vAr8bECE5E5Dhh/joktJ+n8iN3/8gCiwoRXVJhdRPu+/dhmNosmDUkHH+5Lll0JCKH61insj+b0z9ywKJCRF2qb2nFPe8eRmWDCUOj/PHC3FFQKnlWCrm/ifEhAFhU5IJFhYguYrZIeHDTMWSU1SNMr8Xbi8bCR8NtyOQZror/6b4/FfVGwWmIRYWILvLsV2fw7dlyaNVKvLVwLCIDvEVHInKaYD8thkb5AwC+z6oUnIZYVIiok/8eL8Zb3+UCAJ7/zUiMjA0UG4hIgKmDQgEAe89VCE5CLCpEZJNRWo+/fmzd4fPA1Qn45YgowYmIxJg6yLpOZd+5SkiSJDiNZ2NRISIAgKGlFfe/l4bmVjOmJIbg4Vnc4UOea8yAftCqlSivN+JceYPoOB6NRYWIYLFIWP7hceRWNiI60BsvzUuFijt8yIPpvFSY0H74295MTv+IxKJCRHh1Vxa+OVMGjVqJ1+ePQZCvRnQkIuGmJrZP/3BBrVAsKkQebndGOVZ/kwkAeGrOMAyPCRCciEgepiZZi8qPOdUwtpkFp/FcLCpEHqywugkPbjoGSQLunNAfvxkXKzoSkWwkh+sRqteiudWMw3k1ouN4LBYVIg/VbDLjvn+noa65FaNiA/H32UNERyKSFYVCgWlJ1m3Ku86WC07juYQWlb1792L27NmIioqCQqHAZ599JjIOkceQJAmPfpaO0yUGBPtqsGb+aGjVKtGxiGRnekoYAGBXBouKKEKLSmNjI0aOHIlXX31VZAwij/PegXxsPlIEpQJ4+c5UnjxLdAlTBoVApVQgu6IRBVVNouN4JKE377jhhhtwww03iIxA5HHS8qvx5JenAQArbxiMSQkhghMRyZe/zgtjB/TDj7nV2JVRjkWTBoqO5HFcao2K0WiEwWDo9CCi7iuvb8ED7x1Bq1nCTSMicc/UONGRiGSvY/rnW65TEcKlisqqVasQEBBge8TGcocCUXe1mi1YtvEoyuuNGBTmh/+7fQQUCh7qRnQl17QXlf05VWg2cZuys7lUUVm5ciXq6upsj8LCQtGRiFzGqq1ncTCvGnqtGq8vGANfrdCZXyKXMSjMD9GB3jC1WfBDNg9/czaXKiparRb+/v6dHkR0ZZ8fK8I73/90R+SEUD/BiYhch0KhwDUp1m3K35zh9I+zuVRRIaKeO1tqwIpP0gEAS69JwKyhEYITEbmeWUOsPzc7TpfBYuHdlJ1J6NhvQ0MDsrKybH/Ozc3FsWPHEBQUhP79+wtMRuQe6ppbcd+/rXdEnjooBMtn8o7IRL1xVXww9Fo1KhuMOFpYizED+omO5DGEjqgcPnwYqampSE1NBQAsX74cqampeOyxx0TGInIL1jsiH0N+VRNi+nnjpbm8IzJRb2nUStui2u2nSgWn8SxCi8rVV18NSZIueqxfv15kLCK38PK3Wdh5thza9jsi9+MdkYn65Lr2adOvT5VCkjj94yxco0LkhnZllOOFndY7Ij9963AMi+YdkYn6alpyKDQqJfKqmpBV3iA6jsdgUSFyMwVVTXjwg6OQJGD+Vf3xqzExoiMRuQU/rRqTE4MBANtPlwlO4zlYVIjcSLPJjPveS4OhpQ2p/QPx2C+Hio5E5FY6ds19dbJEcBLPwaJC5CYkScIjn6bjTIkBIX4arLlrDDRq/ogT2dN1QyOgUipwssiAvMpG0XE8An+LEbmJDfvz8enRIqiUCrxy52hEBOhERyJyO0G+GkxKsE7/fHmiWHAaz8CiQuQGDudV4x+2OyKn4Kr4YMGJiNzX7BFRAIAvT3D6xxlYVIhcXLmhBQ9sPII2i4RfjojE3VN4R2QiR7puaAS8VAqcLa1HVnm96Dhuj0WFyIW1mi1Y+v4RVNQbkRyux//9indEJnK0AB8vTB1kvffPF8c5quJoLCpELuzpLWdwKK/GdkdkHw3viEzkDL8cEQnAuk6Fh785FosKkYv67GgR1v+QBwD41x2jEBfiKzYQkQeZOSQcWrUS2RWNOFlkEB3HrbGoELmg08UGrNh8AgDwx+mJmDEkXHAiIs+i13nZzlT55Mh5wWncG4sKkYupa2rF/e+loaXVgmlJoXhwRpLoSEQe6fbR0QCAz48VwdRmEZzGfbGoELkQs0XCgx8eRUF1E2KDvPHi3FG8IzKRIFMSQxCq16KmqRW7MspFx3FbLCpELmT1jgzszqiAzst6R+RAH94RmUgUtUqJW1OtoyqbOf3jMCwqRC7iq/QSvLorGwDwz9tHYGgU74hMJNpt7dM/354tR3WjSXAa98SiQuQCMkrr8eePjgMA7p0ahzmjogUnIiIASInwx7Bof7SaJY6qOAiLCpHM1TW14nf/PowmkxmTE4Px1+tTREciogvMG98fAPD+wQKeqeIALCpEMma2SPjjpqPIr2pCdKA3Xp43GmoVf2yJ5GTOqGj4alTIqWjEgZxq0XHcDn/jEcnY89szsCfTunj2zYVjEOTLxbNEcuOnVWNO+6LajT/mC07jflhUiGRqa3oJXtvNxbNEruDO9umfr0+VorLBKDiNe2FRIZKhs6UGPMzFs0QuY1h0AEbFBqLVLOHDQ4Wi47gVFhUimalsMOLu9Vw8S+RqFlw1AACwYX8eT6q1IxYVIhlpaTXjdxsOo6i2GXEhvnj1Ti6eJXIVs0dGIUyvRZnBiC3pxaLjuA3+BiSSCUmSsOKTEzhSUAt/nRprF43lybNELkSjVmLRpIEAgLe/y+VWZTthUSGSiVe+zcJnx4qhViqwZv4YxIf6iY5ERD1014T+8PZS4VSxgVuV7YRFhUgGtpwowfM7MgEAT84ZhsmJIYITEVFvBPpo8KsxMQCAN/dmC07jHlhUiAQ7nFeN5f85BgD47eQ43Dmhv9hARNQnd0+Jg1IB7MqoQPr5OtFxXB6LCpFA58rqcfe7h2Fss2DG4DA8etNg0ZGIqI8GhvjajhR46dtzgtO4PhYVIkFK6pqx6J2DqGtuRWr/QLw8bzRUSoXoWERkB8umJ0KhAHacLsOpYo6q9AWLCpEAdc2tWPzOIRTXtSA+1BfvLBoHb41KdCwispOEUD/MHhEFAHhpJ0dV+oJFhcjJWlrNuHfDYWSU1SNUr8W7S8ajH+/hQ+R2/nitdVTl61NlOF5YKzqOy2JRIXIiU5sFy94/goO51dBr1Xh3yXjEBvmIjkVEDpAYpset7TcrfHrrGZ6r0kssKkRO0mq24I8fHMU3Z8qhVSvxxsIxGBLlLzoWETnQw7OSoVUrcTC3Gt+cKRcdxyWxqBA5QZvZguX/OY5tp0qhUSnx5sKxmJTAs1KI3F1UoDd+OyUOAPDsV2fQZuY9gHqKRYXIwcwWCX/5+AS+OF4ML5UCa+aPxrSkUNGxiMhJHrg6AUG+GmRXNGLD/nzRcVwOiwqRA5kt1vv3bD5aBLVSgVfuHI1rB4eLjkVETuSv88KfZyUBAFbvyERpXYvgRK6FRYXIQVpazVi68Qg+SjsPpQJ4cW4qrhsaIToWEQkwb1x/pPYPRIOxDU9+eUp0HJfCokLkAIaWVixed9C2JuXVO0fjphGRomMRkSBKpQJP3zIcKqUCW9NL8e3ZMtGRXAaLCpGdVdQbMfeNAziQUw0/rRrrfzsONwxnSSHydEOi/PHbyQMBACs3p6O2ySQ2kItgUSGyo7zKRvzq9R9wusSAED8NNv3uKu7uISKb5TOTER/qizKDEY9+epJnq3QDiwqRnXyfVYk5r36P/Kom9A/ywcf3T8Kw6ADRsYhIRrw1KrxwxyiolQpsSS/BZ8eKREeSPRYVoj6SJAlr9+ViYfsNBkfGBuLjByZiYIiv6GhEJEMjYgLx4LWDAAD/++lJnCurF5xI3lhUiPqgrrkV97+Xhn98eRpmi4TbRkfjw99dhTC9TnQ0IpKxB65OwMT4YDSazPjdv9NgaGkVHUm2WFSIeulAThVueuk7fH2qDF4qBR6fPQTP/3okdF68CzIRXZ5apcQrd6YiKkCH3MpGLP/wGMwWrlfpCosKUQ81mdrwjy9PY95bB3C+phmxQd745IFJWDw5DgqFQnQ8InIRwX5avLFgLDRqJb45U44nvjjFxbVdYFEh6oHtp0oxc/VerN2XC0kC5o2PxdY/TsWImEDR0YjIBQ2PCcDq34yEQgFs2J+PV77NEh1JdtSiAxC5gpNFdfjntrP47lwlACA60BtP3TIM16SECU5GRK7ulyOiUFlvxONfnMbzOzLhrVHhnqnxomPJBosK9Uh9SyvO1zSj1NCC0jrro6LBiGaTGS2t1oexzXp3UJ2XCjovJXReKvhoVAj21SLcX4dwfy3C9DpEBeoQ5KuR9XRJ+vk6vL4nG1vSSwAAXioF7p0aj2XTE+Gj4Y8PEdnH4slxqG404aVvs/DUljMwtlmw9JpE0bFkgb9pqUuSJKGwuhmH86uRUVaPzNJ6ZJTWo9jON9Py16kRF+qH+BBfxIf4IjHMD4PC/TAg2BdeKjEzk8Y2M3acLsPGAwXYn1Nle/6WUVFYPjMZ/YN9hOQiIvf2p5lJUCmV+Nc3mXju6wzUNpmw4obBUCnl+x9zzqCQXHjljsFgQEBAAOrq6uDv7y86jssrrWvB/pxK/JBVhR+yq1BU29zl6/r5eCEiwBsR/lpEBHgjTK+Fr1ZlHUFRq6D1shYMY6sFLW1mNJvMaDSZUdlgRLmhBWUGI8oM1pGYS333eakUiA+xlpakcD0GhflhULgeA4N9oHZAgTG2mXEgpxrbTpZg28lS1DRZtwqqlArMHhGJ+6YlYHAkv8eIyPHe3JuNZ7aeBQBMTwnDi3NHQa/zEpzKvnry7zeLigerajDiQE41fsiuxP7sKuRUNnb6uFqpwMjYQAyN8kdSuB7JEXokhekR4GOfH5iWVjPyq5qQU9GAnMpGZFc0ILu8AefKG9BkMnf5ORqVEvGh1pGXpHA9ksL9EBfihzC9FoE+Xt2aRjK2mVFU04zMsnqcLjbgcH4N0vJrbFNWABDur8VvxsbijnGxiOnHERQicq4vjhfj4Y+Ow9hmQXyoL16am+pWJ12zqFCXDC2tOJhTjR+yq/BDdiXOlnY+DVGpAIZHB2BiQggmJQRj7MB+QtZhWCwSimqbkVXegMyyemSWNeBceT3OlTWgubXrAgNYR2GCfbUI0WugU6ugVCigUFhHRdosEgzNrahuNF1yJCdMr8XMIeG4flgEJsYHO2Tkhoiou06cr8XvNqSh1NACL5UCy2cm496pcW7xu4lFhQBYz/s4nFeD/TnWqZz087X4+XlCKRF6TEwIxqSEEIyPC0KAt3yHFzsKTEdp6Sgw+VVNqGvu2amOOi8lEsP8MCTSH8NjAjExPggJoX6yXthLRJ6nptGElZvTse1UKQDr7+ynbhmGsQODBCfrGxYVD2VsM+NYQS1+yK7C/uwqHC2sQau58/9740N8bcXkqvggBPtpBaW1L1ObBVWNRlTUG1HZYISpzQKLBFgkCWaLBKVCgUAfLwR6axAZqEOwzHcbERF1kCQJH6WdxzNbz6C2ff3c9UMj8KeZSUiO0AtO1zssKh6iwdiGtPwaHMqtxsG8ahwvrO20zgKwnvdhLSbBmJgQjMgAb0FpiYioL6obTfi/bWfx4eFCSBKgUAAzB4djyeQ4XBUf5FL/8cWi4oYsFgn51U1IL6rD0YIaHMqrxuliw0VTOSF+WkxqLyaTEkIQG+TtUt+8RER0eZll9Xjhm0xsTS+1PZcY5odbU6Nx88goxAbJfwOAyxWVV199Fc899xxKS0sxcuRIvPzyyxg/fvwVP89di0p9Syuy2ne/ZJbW42RxHU4VGVBvbLvotbFB3hg3MAjjBwZhXFwQ4kN8WUyIiDzAubJ6vLs/D5+kFXXaaJASocf0lDCMjwtCamw/u+3UtCeXKioffvghFi5ciNdffx0TJkzACy+8gI8++ggZGRkIC7v88eSuWlRMbRaU1rXgfG0Tztc0o6im2fp/a5uQX9WEkkscqqZRKzE40h8jogMwLs5aTiICdE5OT0REcmJoacW2k6X47GgRDuRUXTTSnhjmh9TYQAyPCUBciC8GBvsiKtBb6EFyLlVUJkyYgHHjxuGVV14BAFgsFsTGxuIPf/gDVqxYcdnPdXZRaTNb0GqWYDJb0Gq2wNRmQXOrGU1GMxqMbWgytaHRZEaTsQ0NxjbUNJlQ3Wh91DS2oqrRiJqmVtQ0mS550FmHcH8tBoXpbTtThkUHYFC4n7DTWomISP5qGk3YnVmOvZmVOFJQg/yqpi5fp1Er0T/IBxH+OoTqtdaHnxb9fDXQ69TQ69Tw13lBr1Mj0Edj9x2hPfn3W+gR+iaTCWlpaVi5cqXtOaVSiRkzZmD//v0Xvd5oNMJoNNr+XFdXB8D6F7anHafLsGrrabSaJWshMUtoM1suaql9oVErERWoQ2SAN6IDdYgK9EZUgDei+3kjPtSvy2+K5sYGdH1WLBEREaACcG2CP65NsP7jX91gxPHzdTheWIusigbkVzXifE0zWowSMhsbkNmN95wxOAwvzE21a86Of7e7M1YitKhUVlbCbDYjPDy80/Ph4eE4e/bsRa9ftWoVnnjiiYuej42NdVhGR8oWHYCIiOgK1gFYd59j3ru+vh4BAZc/cdelbkq4cuVKLF++3PZni8WC6upqBAcHC11AajAYEBsbi8LCQpdaK2NvvA5WvA5WvA5WvA5WvA4/4bWwjqTU19cjKirqiq8VWlRCQkKgUqlQVlbW6fmysjJERERc9HqtVguttvMBZYGBgY6M2CP+/v4e+013IV4HK14HK14HK14HK16Hn3j6tbjSSEoHoSszNRoNxowZg507d9qes1gs2LlzJyZOnCgwGREREcmB8Kmf5cuXY9GiRRg7dizGjx+PF154AY2NjViyZInoaERERCSY8KJyxx13oKKiAo899hhKS0sxatQobNu27aIFtnKm1Wrx97///aJpKU/D62DF62DF62DF62DF6/ATXoueEX6OChEREdGl8PQwIiIiki0WFSIiIpItFhUiIiKSLRYVIiIiki0WFSIiIpItFpVuevXVVzFw4EDodDpMmDABBw8e7Nbnbdq0CQqFArfccotjAzpJT67D+vXroVAoOj10Op0T0zpOT78famtrsXTpUkRGRkKr1SIpKQlbt251UlrH6cl1uPrqqy/6flAoFLjpppucmNgxevr98MILLyA5ORne3t6IjY3Fn/70J7S0tDgpreP05Dq0trbiySefREJCAnQ6HUaOHIlt27Y5Ma1j7N27F7Nnz0ZUVBQUCgU+++yzK37O7t27MXr0aGi1WiQmJmL9+vUOz+lSJLqiTZs2SRqNRnrnnXekU6dOSffee68UGBgolZWVXfbzcnNzpejoaGnq1KnSnDlznBPWgXp6HdatWyf5+/tLJSUltkdpaamTU9tfT6+D0WiUxo4dK914443Svn37pNzcXGn37t3SsWPHnJzcvnp6Haqqqjp9L5w8eVJSqVTSunXrnBvcznp6HTZu3ChptVpp48aNUm5urvT1119LkZGR0p/+9CcnJ7evnl6Hv/zlL1JUVJS0ZcsWKTs7W3rttdcknU4nHTlyxMnJ7Wvr1q3So48+Km3evFkCIH366aeXfX1OTo7k4+MjLV++XDp9+rT08ssvSyqVStq2bZtzArsAFpVuGD9+vLR06VLbn81msxQVFSWtWrXqkp/T1tYmTZo0SXr77belRYsWuUVR6el1WLdunRQQEOCkdM7T0+uwZs0aKT4+XjKZTM6K6BS9+bm40L/+9S9Jr9dLDQ0NjoroFD29DkuXLpWmT5/e6bnly5dLkydPdmhOR+vpdYiMjJReeeWVTs/ddttt0l133eXQnM7UnaLyl7/8RRo6dGin5+644w7puuuuc2Ay18KpnyswmUxIS0vDjBkzbM8plUrMmDED+/fvv+TnPfnkkwgLC8Pdd9/tjJgO19vr0NDQgAEDBiA2NhZz5szBqVOnnBHXYXpzHf773/9i4sSJWLp0KcLDwzFs2DA888wzMJvNzoptd739frjQ2rVrMXfuXPj6+joqpsP15jpMmjQJaWlptmmRnJwcbN26FTfeeKNTMjtCb66D0Wi8aCrY29sb+/btc2hWudm/f3+n6wYA1113Xbd/jjwBi8oVVFZWwmw2X3Skf3h4OEpLS7v8nH379mHt2rV46623nBHRKXpzHZKTk/HOO+/g888/x3vvvQeLxYJJkybh/PnzzojsEL25Djk5Ofj4449hNpuxdetW/O1vf8Pzzz+Pp556yhmRHaI31+FCBw8exMmTJ3HPPfc4KqJT9OY63HnnnXjyyScxZcoUeHl5ISEhAVdffTUeeeQRZ0R2iN5ch+uuuw6rV6/GuXPnYLFYsGPHDmzevBklJSXOiCwbpaWlXV43g8GA5uZmQankhUXFzurr67FgwQK89dZbCAkJER1HqIkTJ2LhwoUYNWoUpk2bhs2bNyM0NBRvvPGG6GhOZbFYEBYWhjfffBNjxozBHXfcgUcffRSvv/666GjCrF27FsOHD8f48eNFR3G63bt345lnnsFrr72GI0eOYPPmzdiyZQv+8Y9/iI7mVC+++CIGDRqElJQUaDQaLFu2DEuWLIFSyX+WqDPhNyWUu5CQEKhUKpSVlXV6vqysDBERERe9Pjs7G3l5eZg9e7btOYvFAgBQq9XIyMhAQkKCY0M7QE+vQ1e8vLyQmpqKrKwsR0R0it5ch8jISHh5eUGlUtmeGzx4MEpLS2EymaDRaBya2RH68v3Q2NiITZs24cknn3RkRKfozXX429/+hgULFthGk4YPH47Gxkb87ne/w6OPPuqS/1D35jqEhobis88+Q0tLC6qqqhAVFYUVK1YgPj7eGZFlIyIiosvr5u/vD29vb0Gp5MX1fiKcTKPRYMyYMdi5c6ftOYvFgp07d2LixIkXvT4lJQXp6ek4duyY7XHzzTfjmmuuwbFjxxAbG+vM+HbT0+vQFbPZjPT0dERGRjoqpsP15jpMnjwZWVlZtsIKAJmZmYiMjHTJkgL07fvho48+gtFoxPz58x0d0+F6cx2ampouKiMdJVZy0XvE9uX7QafTITo6Gm1tbfjkk08wZ84cR8eVlYkTJ3a6bgCwY8eObv9e9QiiV/O6gk2bNklarVZav369dPr0ael3v/udFBgYaNtqu2DBAmnFihWX/Hx32fXT0+vwxBNPSF9//bWUnZ0tpaWlSXPnzpV0Op106tQpUX8Fu+jpdSgoKJD0er20bNkyKSMjQ/ryyy+lsLAw6amnnhL1V7CL3v5cTJkyRbrjjjucHddhenod/v73v0t6vV764IMPpJycHGn79u1SQkKC9Jvf/EbUX8EuenodDhw4IH3yySdSdna2tHfvXmn69OlSXFycVFNTI+hvYB/19fXS0aNHpaNHj0oApNWrV0tHjx6V8vPzJUmSpBUrVkgLFiywvb5je/L//M//SGfOnJFeffVVbk/+GRaVbnr55Zel/v37SxqNRho/frx04MAB28emTZsmLVq06JKf6y5FRZJ6dh0eeugh22vDw8OlG2+80eXPSOjQ0++HH374QZowYYKk1Wql+Ph46emnn5ba2tqcnNr+enodzp49KwGQtm/f7uSkjtWT69Da2io9/vjjUkJCgqTT6aTY2Fjp97//vcv/Ay1JPbsOu3fvlgYPHixptVopODhYWrBggVRUVCQgtX3t2rVLAnDRo+PvvmjRImnatGkXfc6oUaMkjUYjxcfHu/zZQvamkCQXHWskIiIit8c1KkRERCRbLCpEREQkWywqREREJFssKkRERCRbLCpEREQkWywqREREJFssKkRERCRbLCpEREQkWywqREREJFssKkRERCRbLCpEREQkW/8fJ+IH5VE6WQ8AAAAASUVORK5CYII=\n" }, "metadata": {} } ], "source": [ "import matplotlib.pyplot as plt\n", "sns.kdeplot(results['qed'].values)\n", "plt.title('QED plot')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "SPrWfwl3H37h", "outputId": "128c91c0-435f-44f1-e860-aa3de030824f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
moleculeqedsmiles
0
\"Mol\"/
0.677112CCOC(=O)c1cnn(-c2nc(C)nc3sc(CC)c(C)c23)c1C
1
\"Mol\"/
0.728099CCc1cccc(CC(=O)Cn2cnc3ccccc3c2=O)c1
2
\"Mol\"/
0.783137CC(C)(C)NC(=O)Nc1ccc(C(=O)NC(C)(C)C)cc1
3
\"Mol\"/
0.895424CC(=O)Nc1ccc(NC(=O)Cc2ccccc2)cc1
4
\"Mol\"/
0.742755Cn1c(=O)c2c(ncn2CC(=O)Nc2ccc(F)cc2)n(C)c1=O
5
\"Mol\"/
0.806002CCOC(=O)c1c[nH]c2ccc(OC)cc12
6
\"Mol\"/
0.745607CC(=O)c1ccc(NC(=O)c2cccc(-n3cnnn3)c2)cc1
7
\"Mol\"/
0.727011Cc1ccccc1NC(=O)CSc1nnc(-c2ccco2)o1
8
\"Mol\"/
0.689722COc1ccc(NC(=O)Cn2nnc(-c3ccccc3N)n2)cc1
9
\"Mol\"/
0.622855CN(C)C(=O)CSc1nc2ccccc2c(=O)n1C
10
\"Mol\"/
0.771935COc1ccc(-n2nnnc2SCC(=O)N2CCCCC2C)cc1
11
\"Mol\"/
0.775285CN(C)C(=O)CSc1nnc(-c2cccnc2)o1
12
\"Mol\"/
0.719536Cc1ccccc1NC(=O)CSc1nnc(-c2cccs2)o1
13
\"Mol\"/
0.786158CC(=O)N(Cc1cccs1)Cc1cc2cc(C)cc(C)c2[nH]c1=O
14
\"Mol\"/
0.682315Cc1ccc(-n2nnnc2SCC(=O)NCc2cccnc2)cc1C
15
\"Mol\"/
0.798353Cc1ccc(-c2nnc(SCC(=O)N3CCOCC3)n2C)cc1
16
\"Mol\"/
0.778989COc1ccc(NC(=O)Cn2nnc(-c3cccs3)n2)cc1
17
\"Mol\"/
0.938534CC(=O)N(Cc1cc2ccc(C)cc2[nH]c1=O)CC1CCCCO1
18
\"Mol\"/
0.677132CCC(C)NC(=O)CSc1nc2ccccc2c(=O)n1C
19
\"Mol\"/
0.642878CC(=O)Nc1ccc(NC(=O)CSc2nnc(-c3cccnc3)o2)cc1
20
\"Mol\"/
0.456887COC(=O)c1ccc(C(=O)CSc2nnc(C3CC3)n2C)o1
21
\"Mol\"/
0.800048CC(=O)Nc1ccccc1-c1nnn(CCC(=O)NCC(C)C)n1
22
\"Mol\"/
0.642831CCC(=O)c1ccc(NC(=O)CSc2nnnn2C)cc1
23
\"Mol\"/
0.862908CCc1nnc(SCC(=O)Nc2ccc(C)cc2C)n1C
24
\"Mol\"/
0.692685COc1cccc(C(=O)Nc2ccc(OC(C)=O)cc2)c1
25
\"Mol\"/
0.800350Cc1cccc(C)c1NC(=O)Cn1nnc(-c2ccccc2F)n1
26
\"Mol\"/
0.822039CCC(C)(C)c1nnc(SCC(=O)Nc2ccccc2C)n1C
27
\"Mol\"/
0.736507Cc1cccc2cc(C(=O)Nc3ccccc3F)c(=O)oc12
28
\"Mol\"/
0.719212Cc1ccc(NC(=O)CSc2nnc(-c3ccco3)o2)cc1C
29
\"Mol\"/
0.890984CC(=O)Nc1ccc(NC(=O)c2cccs2)cc1
30
\"Mol\"/
0.593680CC(=O)Oc1ccc(NC(=O)Cn2cnc3ccccc32)cc1
31
\"Mol\"/
0.784833CC(=O)Nc1ccc(NC(=O)CSc2nnc(C3CC3)n2C)cc1
32
\"Mol\"/
0.802323Cc1ccc(NC(=O)Cn2nnc(-c3cccs3)n2)cc1
33
\"Mol\"/
0.784833CC(=O)Nc1ccc(NC(=O)CSc2nnc(C3CC3)n2C)cc1
34
\"Mol\"/
0.876471COc1ccc(NC(=O)N(C(C)C)C(C)C)cc1NC(C)=O
35
\"Mol\"/
0.677053COc1ccc(-n2nnnc2SCc2ccccc2F)cc1
36
\"Mol\"/
0.514397Cc1ccc(NC(=O)CCSc2nnc(-c3ccc(N)cc3)n2C)cc1
37
\"Mol\"/
0.703942COC(=O)c1ccc(NC(=O)Cn2nnc(-c3ccco3)n2)cc1
38
\"Mol\"/
0.871646CNC(=O)CSc1nnc(-c2cccc(Cl)c2)o1
39
\"Mol\"/
0.889039CC(=O)Nc1ccc(NC(=O)c2ccc(Cl)cc2Cl)cc1
40
\"Mol\"/
0.597320CC(C)CCSc1nc2ccccc2c(=O)n1CC1CCCO1
41
\"Mol\"/
0.820129COC(=O)C1=C(C)NC(=O)NC1c1cccc(OC)c1
42
\"Mol\"/
0.828800COc1ccc(NC(=O)CSc2nnc(C(C)C)n2C)cc1
43
\"Mol\"/
0.589398COC(=O)c1ccc(-n2nnnc2SCC(=O)N2CCCCC2)cc1
44
\"Mol\"/
0.807891Cc1ccc(-n2nnnc2SCC(=O)N2CCCCC2)cc1
45
\"Mol\"/
0.879891CC(C)CNC(=O)Cn1nnc(-c2ccc3c(c2)OCO3)n1
46
\"Mol\"/
0.869387Cc1ccc(NC(=O)CSc2nncn2C)cc1C
47
\"Mol\"/
0.880821Cc1nnc(SCC(=O)Nc2ccccc2Cl)n1C
48
\"Mol\"/
0.613201CCOC(=O)c1cc(CC)c(C(=O)c2ccc(Cl)cc2)nc1C
49
\"Mol\"/
0.592722COC(=O)CSc1nnc(-c2ccco2)o1
50
\"Mol\"/
0.803457Cc1cccc(NC(=O)Cn2nnc(-c3cccs3)n2)c1C
51
\"Mol\"/
0.776652COc1ccc(-n2nnnc2SCC(=O)N2CCCC2C)cc1
52
\"Mol\"/
0.830220CC(C)CNC(=O)CSc1nnc(-c2cccs2)o1
53
\"Mol\"/
0.797543Cc1ccccc1NC(=O)c1cccc(-n2cnnn2)c1
54
\"Mol\"/
0.938409CC(=O)N(Cc1cc2cc(C)cc(C)c2[nH]c1=O)CC1CCCO1
55
\"Mol\"/
0.807891Cc1ccc(-n2nnnc2SCC(=O)N2CCCCC2)cc1
56
\"Mol\"/
0.884997CC(C)C(=O)Nc1ccc(NC(=O)C2CCCCC2)cc1
57
\"Mol\"/
0.877238CC(C)(C)NC(=O)CSc1nnc(-c2ccccc2)o1
58
\"Mol\"/
0.669455CC(C)c1cccc(C(C)c2nc3ccccc3n2C)c1
59
\"Mol\"/
0.828525COc1ccc(NC(=O)CSc2nnc(C3CC3)n2C)cc1
60
\"Mol\"/
0.695628CCc1ccccc1NC(=O)CSc1nnc(-c2ccco2)o1
61
\"Mol\"/
0.644309CCOC(=O)CSc1nnc(-c2ccc(NC(C)=O)cc2)o1
62
\"Mol\"/
0.795909CCn1c(SCC(=O)N2CCCC2)nnc1-c1ccccc1
63
\"Mol\"/
0.838070Cc1ccccc1NC(=O)Nc1cccc(F)c1Cl
64
\"Mol\"/
0.515580COC(=O)c1ccc(-n2nnnc2SCC(=O)Nc2ccccc2C)cc1
65
\"Mol\"/
0.842617CCn1nnnc1SCC(=O)Nc1ccc(C)cc1
66
\"Mol\"/
0.861415Cc1cccc(NC(=O)CSc2nnc(C3CC3)n2C)c1C
67
\"Mol\"/
0.635345CNC(=O)CSc1nnc(-c2cccc(N)c2)o1
68
\"Mol\"/
0.802731CC(=O)Nc1ccc(-c2nnc(SCC(N)=O)o2)cc1
69
\"Mol\"/
0.731640Cc1cc(=O)oc2cc(OC(=O)Nc3ccccc3)ccc12
70
\"Mol\"/
0.862762Cc1ccc(NC(=O)CSc2nnc(C(C)C)n2C)cc1
71
\"Mol\"/
0.641072Cc1ccc(-n2nnnc2SCC(=O)Nc2ccccc2C(N)=O)cc1
72
\"Mol\"/
0.763287CC(=O)Nc1ccc(NC(=O)c2cccc(-n3cnnn3)c2)cc1
73
\"Mol\"/
0.812857Cc1ccc(-c2nnc(SCC(=O)N3CCCC3)o2)cc1
74
\"Mol\"/
0.881461CCCC(=O)Nc1nnc(COc2ccccc2)s1
75
\"Mol\"/
0.879589CC(C)(C)NC(=O)CSc1nnc(-c2ccc(F)cc2)o1
76
\"Mol\"/
0.810280Cc1ccc2nc(SCC(=O)N3CCCCC3)c(C#N)cc2c1
77
\"Mol\"/
0.689292Cc1ccc(C(C)C)cc1NC(=O)Cn1nnc(-c2ccccc2N)n1
78
\"Mol\"/
0.862933CC(C)c1nnc(SCC(=O)Nc2ccccc2F)n1C
79
\"Mol\"/
0.857897COc1ccc(CC(=O)Nc2cccc(C(C)=O)c2)cc1
80
\"Mol\"/
0.765705CCCn1c(SCC(=O)N2CCCC2)nnc1-c1ccco1
81
\"Mol\"/
0.694083COc1ccc(NC(=O)CSc2nnc(-c3ccco3)o2)cc1
82
\"Mol\"/
0.500273Cc1nn(-c2cc3ccccc3oc2=O)c2ccccc12
83
\"Mol\"/
0.829153CCn1c(SCC(=O)Nc2cccc(C)n2)nnc1C1CC1
84
\"Mol\"/
0.678305COc1ccc(C(=O)CC(=O)Nc2cccc(C)c2C)cc1
85
\"Mol\"/
0.915541COc1ccc(CC(=O)Nc2ccc(CC#N)cc2)cc1
86
\"Mol\"/
0.702972CCc1ccc(-n2c(SCC(N)=O)nnc2-c2ccco2)cc1
87
\"Mol\"/
0.594450CC(C)(C)C(=O)Oc1ccc(-n2cnnn2)cc1
88
\"Mol\"/
0.874446Cc1ccc(-n2nnnc2SCC(=O)NC(C)(C)C)cc1
89
\"Mol\"/
0.489481CCOC(=O)CSc1nc(C)nc2sc3c(c12)CCCC3
90
\"Mol\"/
0.745955COc1ccc(CNC(=O)Cn2nnc(-c3ccccc3)n2)cc1
91
\"Mol\"/
0.877238CC(C)(C)NC(=O)CSc1nnc(-c2ccccc2)o1
92
\"Mol\"/
0.725361Cc1ccc(NC(=O)CSc2nnc(-c3cccnc3)o2)cc1
93
\"Mol\"/
0.806911CC(C)C(=O)N(Cc1ccco1)CC1CCCO1
94
\"Mol\"/
0.784833CC(=O)Nc1cccc(NC(=O)CSc2nnc(C3CC3)n2C)c1
95
\"Mol\"/
0.662338COC(=O)c1ccc(NC(=O)CSc2nncn2C)cc1
96
\"Mol\"/
0.879391CC(=O)Nc1ccc(NC(=O)CCc2ccccc2Cl)cc1
97
\"Mol\"/
0.643678CCc1nc2sc3c(c2c(=O)n1CC(=O)OC)CCCCC3
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show(results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "-eHjSOTHH37i" }, "outputs": [], "source": [ "from rdkit.DataStructs import TanimotoSimilarity\n", "from rdkit.Chem import AllChem" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "nCyiYi2MH37i", "outputId": "62c3d694-22af-4fb7-f3eb-3605d03f2a0b" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Diversity of molecules % = 0.7729722352118349'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fp_list = []\n", "for molecule in molecules:\n", " fp = AllChem.GetMorganFingerprintAsBitVect(molecule, 2, nBits=1024)\n", " fp_list.append(fp)\n", "\n", "diversity = []\n", "for i in range(len(fp_list)):\n", " for j in range(i+1, len(fp_list)):\n", " current_diverity = 1 - float(TanimotoSimilarity(fp_list[i], fp_list[j]))\n", " diversity.append(current_diverity)\n", "\n", "\"Diversity of molecules % = {}\".format(np.mean(diversity))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aLRmtmWVH37i" }, "outputs": [], "source": [ "with open('gen_gpt_moses_1k.txt', 'w') as f:\n", " for line in gen_smiles:\n", " f.write(f\"{line}\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "G4Lfc2rtB4yM" }, "outputs": [], "source": [] } ] }