{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: rdkit in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (2022.3.5)\n", "Requirement already satisfied: Pillow in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from rdkit) (9.2.0)\n", "Requirement already satisfied: numpy in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from rdkit) (1.23.3)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install rdkit" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: torch in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.13.1)\n", "Requirement already satisfied: typing-extensions in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from torch) (4.3.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install torch" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pytorch_lightning in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.7.7)\n", "Requirement already satisfied: fsspec[http]!=2021.06.0,>=2021.05.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (2022.8.2)\n", "Requirement already satisfied: tensorboard>=2.9.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (2.10.1)\n", "Requirement already satisfied: torch>=1.9.* in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (1.13.1)\n", "Requirement already satisfied: torchmetrics>=0.7.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (0.9.3)\n", "Requirement already satisfied: PyYAML>=5.4 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (6.0)\n", "Requirement already satisfied: typing-extensions>=4.0.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (4.3.0)\n", "Requirement already satisfied: numpy>=1.17.2 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (1.23.3)\n", "Requirement already satisfied: tqdm>=4.57.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (4.64.1)\n", "Requirement already satisfied: pyDeprecate>=0.3.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pytorch_lightning) (0.3.2)\n", "Requirement already satisfied: packaging>=17.0 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from pytorch_lightning) (21.3)\n", "Requirement already satisfied: requests in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (2.28.1)\n", "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (3.8.3)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from packaging>=17.0->pytorch_lightning) (3.0.9)\n", "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (0.4.6)\n", "Requirement already satisfied: markdown>=2.6.8 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (3.4.1)\n", "Requirement already satisfied: protobuf<3.20,>=3.9.2 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (3.19.5)\n", "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (1.8.1)\n", "Requirement already satisfied: grpcio>=1.24.3 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (1.49.1)\n", "Requirement already satisfied: werkzeug>=1.0.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (2.2.2)\n", "Requirement already satisfied: setuptools>=41.0.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (56.0.0)\n", "Requirement already satisfied: wheel>=0.26 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (0.37.1)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (2.12.0)\n", "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (0.6.1)\n", "Requirement already satisfied: absl-py>=0.4 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from tensorboard>=2.9.1->pytorch_lightning) (1.2.0)\n", "Requirement already satisfied: colorama in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from tqdm>=4.57.0->pytorch_lightning) (0.4.5)\n", "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (22.1.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (6.0.2)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (4.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (1.8.1)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (2.1.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (1.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (1.3.1)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->pytorch_lightning) (4.9)\n", "Requirement already satisfied: six>=1.9.0 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->pytorch_lightning) (1.16.0)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->pytorch_lightning) (0.2.8)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->pytorch_lightning) (5.2.0)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.9.1->pytorch_lightning) (1.3.1)\n", "Requirement already satisfied: importlib-metadata>=4.4 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from markdown>=2.6.8->tensorboard>=2.9.1->pytorch_lightning) (4.12.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (1.26.12)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch_lightning) (2022.9.24)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->pytorch_lightning) (2.1.1)\n", "Requirement already satisfied: zipp>=0.5 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard>=2.9.1->pytorch_lightning) (3.8.1)\n", "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->pytorch_lightning) (0.4.8)\n", "Requirement already satisfied: oauthlib>=3.0.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.9.1->pytorch_lightning) (3.2.1)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install pytorch_lightning" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: subword_nmt in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.3.8)Note: you may need to restart the kernel to use updated packages.\n", "\n", "Requirement already satisfied: tqdm in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from subword_nmt) (4.64.1)\n", "Requirement already satisfied: mock in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from subword_nmt) (4.0.3)\n", "Requirement already satisfied: colorama in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from tqdm->subword_nmt) (0.4.5)\n" ] } ], "source": [ "pip install subword_nmt" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.5.0)\n", "Requirement already satisfied: numpy>=1.20.3 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.23.3)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.2.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install pandas" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Could not fetch URL https://pypi.org/simple/deepchem/: There was a problem confirming the ssl certificate: HTTPSConnectionPool(host='pypi.org', port=443): Max retries exceeded with url: /simple/deepchem/ (Caused by SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")) - skipping\n", "Could not fetch URL https://pypi.org/simple/pip/: There was a problem confirming the ssl certificate: HTTPSConnectionPool(host='pypi.org', port=443): Max retries exceeded with url: /simple/pip/ (Caused by SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")) - skipping\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: pip is configured with locations that require TLS/SSL, however the ssl module in Python is not available.\n", "WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")': /simple/deepchem/\n", "WARNING: Retrying (Retry(total=3, connect=None, read=None, redirect=None, status=None)) after connection broken by 'SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")': /simple/deepchem/\n", "WARNING: Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")': /simple/deepchem/\n", "WARNING: Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")': /simple/deepchem/\n", "WARNING: Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'SSLError(\"Can't connect to HTTPS URL because the SSL module is not available.\")': /simple/deepchem/\n", "ERROR: Could not find a version that satisfies the requirement deepchem\n", "ERROR: No matching distribution found for deepchem\n", "WARNING: pip is configured with locations that require TLS/SSL, however the ssl module in Python is not available.\n" ] } ], "source": [ "!pip install deepchem" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pip in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (23.0.1)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install --upgrade pip" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: deepchem in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (2.6.1)\n", "Requirement already satisfied: scipy in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from deepchem) (1.9.1)\n", "Requirement already satisfied: pandas in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from deepchem) (1.5.0)\n", "Requirement already satisfied: joblib in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from deepchem) (1.2.0)\n", "Requirement already satisfied: rdkit-pypi in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from deepchem) (2022.3.5)\n", "Requirement already satisfied: scikit-learn in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from deepchem) (1.1.2)\n", "Requirement already satisfied: numpy>=1.21 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from deepchem) (1.23.3)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas->deepchem) (2022.2.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from pandas->deepchem) (2.8.2)\n", "Requirement already satisfied: Pillow in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from rdkit-pypi->deepchem) (9.2.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\prajwal\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from scikit-learn->deepchem) (3.1.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\prajwal\\appdata\\roaming\\python\\python39\\site-packages (from python-dateutil>=2.8.1->pandas->deepchem) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install deepchem --user" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Prajwal\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from train_cli import FsrFgLightning\n", "from rdkit import Chem\n", "import codecs\n", "import numpy as np\n", "import pandas as pd\n", "from rdkit.Chem import Descriptors\n", "from subword_nmt.apply_bpe import BPE" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Prajwal\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torchmetrics\\utilities\\prints.py:36: UserWarning: Metric `AUROC` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n", " warnings.warn(*args, **kwargs)\n" ] }, { "data": { "text/plain": [ "FsrFgLightning(\n", " (net): FsrFgModel(\n", " (encoder): Sequential(\n", " (0): Linear(in_features=5372, out_features=500, bias=True)\n", " (1): SELU()\n", " (2): Linear(in_features=500, out_features=100, bias=True)\n", " )\n", " (decoder): Sequential(\n", " (0): Linear(in_features=100, out_features=500, bias=True)\n", " (1): SELU()\n", " (2): Linear(in_features=500, out_features=5372, bias=True)\n", " )\n", " (dropout): Dropout(p=0.8, inplace=False)\n", " (batch_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (predictor): Sequential(\n", " (0): Linear(in_features=100, out_features=200, bias=True)\n", " (1): SELU()\n", " (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (3): Linear(in_features=200, out_features=100, bias=True)\n", " (4): SELU()\n", " (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (6): Linear(in_features=100, out_features=50, bias=True)\n", " (7): SELU()\n", " (8): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (9): Dropout(p=0.8, inplace=False)\n", " (10): Linear(in_features=50, out_features=2, bias=True)\n", " )\n", " )\n", " (criterion): CrossEntropyLoss()\n", " (recon_loss): BCEWithLogitsLoss()\n", " (softmax): Softmax(dim=1)\n", " (train_auc): AUROC()\n", " (valid_auc): AUROC()\n", " (test_auc): AUROC()\n", ")" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = FsrFgLightning.load_from_checkpoint('epoch=22-step=5359.ckpt')\n", "model.eval()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def smiles2index(s1, words2idx, bpe):\n", " t1 = bpe.process_line(s1).split()\n", " i1 = [words2idx[i] for i in t1]\n", " return i1\n", "\n", "\n", "def index2multi_hot(i1, idx2word):\n", " v1 = np.zeros(len(idx2word))\n", " v1[i1] = 1\n", " return v1\n", "\n", "\n", "def index2multi_hot_fg(molecule, fgroups_list):\n", " v1 = np.zeros(len(fgroups_list))\n", " for idx in range(len(fgroups_list)):\n", " if molecule.HasSubstructMatch(fgroups_list[idx]):\n", " v1[idx] = 1\n", " return v1\n", "\n", "\n", "def smiles2vector_fgr(s1, words2idx, bpe, idx2word, fgroups_list):\n", " i1 = smiles2index(s1, words2idx, bpe)\n", " mfg = index2multi_hot(i1, idx2word)\n", " molecule = Chem.MolFromSmiles(s1)\n", " fg = index2multi_hot_fg(molecule, fgroups_list)\n", " return fg, mfg" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fgroups = pd.read_csv('Functional_groups_filtered.csv')\n", "fgroups_list = list(map(lambda x: Chem.MolFromSmarts(x), fgroups['SMARTS'].tolist()))\n", "fgroups_list = [i for i in fgroups_list if i]\n", "descriptor_funcs = {name: func for name, func in Descriptors.descList}\n", "\n", "vocab_path = 'codes_drug_chembl_1500.txt'\n", "bpe_codes_fin = codecs.open(vocab_path)\n", "bpe = BPE(bpe_codes_fin, merges=-1, separator='')\n", "vocab_map = pd.read_csv('subword_units_map_drug_chembl_1500.csv')\n", "idx2word = vocab_map['index'].values\n", "words2idx = dict(zip(idx2word, range(0, len(idx2word))))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f = open('saff_gen_gpt.txt')\n", "smiles = f.read().splitlines()\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 30/30 [00:01<00:00, 25.86it/s]\n" ] } ], "source": [ "from tqdm.auto import tqdm\n", "import torch\n", "sm = torch.nn.Softmax(dim = 1)\n", "preds = []\n", "probab = []\n", "for smile in tqdm(smiles):\n", " fg, mfg = smiles2vector_fgr(smile, words2idx, bpe, idx2word, fgroups_list)\n", " y_pred, _ = model(fg=torch.Tensor(fg).unsqueeze(0),mfg=torch.Tensor(mfg).unsqueeze(0),num_features=None)\n", " probabilities = sm(y_pred) \n", " # print(y_pred)\n", " # print(probabilities.detach().max().item())\n", " probab.append(probabilities.detach().max().item())\n", " # print(int(torch.argmax(y_pred)))\n", " preds.append(int(torch.argmax(y_pred)))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = {'SMILES': smiles,'Predicted_activity':preds, 'Probability': probab}" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.DataFrame(data).to_csv('moses250.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 30\n", "Name: Predicted_activity, dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(data)['Predicted_activity'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.5 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" }, "vscode": { "interpreter": { "hash": "a4329ea539b1232b51730207fd9c93849c82cf9ff2a2d6356a1e6b85d15167f8" } } }, "nbformat": 4, "nbformat_minor": 0 }