{ "cells": [ { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "sys.path.append('~/PROTAC-Degradation-Predictor/protac_degradation_predictor')\n", "sys.path.append('~/PROTAC-Degradation-Predictor')\n", "\n", "import protac_degradation_predictor as pdp\n", "\n", "import os\n", "from collections import defaultdict\n", "\n", "from rdkit import DataStructs\n", "import numpy as np\n", "import pandas as pd\n", "from tqdm import tqdm\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Compound ID | \n", "Uniprot | \n", "Smiles | \n", "E3 Ligase | \n", "InChI | \n", "InChI Key | \n", "Molecular Weight | \n", "Heavy Atom Count | \n", "Ring Count | \n", "Rotatable Bond Count | \n", "... | \n", "Assay (DC50/Dmax) | \n", "Exact Mass | \n", "XLogP3 | \n", "Target (Parsed) | \n", "POI Sequence | \n", "E3 Ligase Uniprot | \n", "E3 Ligase Sequence | \n", "Cell Line Identifier | \n", "Active - OR | \n", "Active (Dmax 0.6, pDC50 6.0) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C73H88ClF3N10O10S4/c1-47(49-13-15-51(... | \n", "SXPDUCVNMGMWBJ-FMZBIETASA-N | \n", "1486.282 | \n", "101 | \n", "10 | \n", "24 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "NaN | \n", "
1 | \n", "2 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C74H90ClF3N10O10S4/c1-48(50-13-15-52(... | \n", "HQKUMELJMUNTTF-NMKDNUEVSA-N | \n", "1500.309 | \n", "102 | \n", "10 | \n", "25 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "NaN | \n", "
2 | \n", "3 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C75H92ClF3N10O10S4/c1-49(51-16-18-53(... | \n", "ATQCEJKUPSBDMA-QARNUTPLSA-N | \n", "1514.336 | \n", "103 | \n", "10 | \n", "26 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "NaN | \n", "
3 | \n", "4 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C76H94ClF3N10O10S4/c1-50(52-17-19-54(... | \n", "FNKQAGMHNFFSEI-DTTPTBRMSA-N | \n", "1528.363 | \n", "104 | \n", "10 | \n", "27 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "NaN | \n", "NaN | \n", "
4 | \n", "5 | \n", "Q07817 | \n", "Cc1ncsc1-c1ccc([C@H](C)NC(=O)[C@@H]2C[C@@H](O)... | \n", "VHL | \n", "InChI=1S/C77H96ClF3N10O10S4/c1-51(53-18-20-55(... | \n", "PXVFFBGSTYQHRO-REQIQPEASA-N | \n", "1542.390 | \n", "105 | \n", "10 | \n", "28 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "MSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEME... | \n", "P40337 | \n", "MPRRAENWDEAEVGAEEAGVEEYGPEEDGGEESGAEESGPEESGPE... | \n", "MOLT-4 | \n", "True | \n", "True | \n", "
5 rows × 36 columns
\n", "