{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
molCIDClassModelpIC50MWAlogPHBAHBDRB...PEOE6 (PEOE6)PEOE7 (PEOE7)PEOE8 (PEOE8)PEOE9 (PEOE9)PEOE10 (PEOE10)PEOE11 (PEOE11)PEOE12 (PEOE12)PEOE13 (PEOE13)PEOE14 (PEOE14)canvasUID
0O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c...BACE_11Train9.154901431.569794.4014325...53.20571178.640335226.855410107.43491037.1338460.0000007.9801700.0000000.0000001
1Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(...BACE_21Train8.853872657.810732.64125416...73.81716247.171600365.676940174.07675034.9238897.98017024.1486680.00000024.6637882
2S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...BACE_31Train8.698970591.740912.54994311...70.36570747.941147192.406520255.75255023.6544780.23015915.8797900.00000024.6637883
3S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c...BACE_41Train8.698970591.678283.16804312...56.65716637.954151194.353040202.76335036.4986340.9809138.1883270.00000026.3851814
4S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...BACE_51Train8.698970629.712833.50863311...78.94570239.361153179.712880220.46130023.6544780.23015915.8797900.00000026.1001435
..................................................................
1508Clc1cc2nc(n(c2cc1)C(CC(=O)NCC1CCOCC1)CC)NBACE_15430Test3.000000364.869692.5942326...37.21279937.681076180.22641095.67012830.1075869.3681597.9801700.0000000.0000001543
1509Clc1cc2nc(n(c2cc1)C(CC(=O)NCc1ncccc1)CC)NBACE_15440Test3.000000357.837312.8229326...45.79279747.349350122.40150099.87714430.1075869.3681597.9801700.0000000.0000001544
1510Brc1cc(ccc1)C1CC1C=1N=C(N)N(C)C(=O)C=1BACE_15450Test2.953115320.184513.0895212...47.79060022.56357496.29079458.79893520.0717249.3681590.0000006.9041040.0000001545
1511O=C1N(C)C(=NC(=C1)C1CC1c1cc(ccc1)-c1ccccc1)NBACE_15460Test2.733298317.384403.8595213...77.2199789.31623495.907784112.60972020.0717249.3681590.0000006.9041040.0000001546
1512Clc1cc2nc(n(c2cc1)CCCC(=O)NCC1CC1)NBACE_15470Test2.544546306.790593.4271226...26.90707637.681076161.45078040.88291930.1075869.3681597.9801700.0000000.0000001547
\n", "

1513 rows × 595 columns

\n", "
" ], "text/plain": [ " mol CID Class \\\n", "0 O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c... BACE_1 1 \n", "1 Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(... BACE_2 1 \n", "2 S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](... BACE_3 1 \n", "3 S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c... BACE_4 1 \n", "4 S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](... BACE_5 1 \n", "... ... ... ... \n", "1508 Clc1cc2nc(n(c2cc1)C(CC(=O)NCC1CCOCC1)CC)N BACE_1543 0 \n", "1509 Clc1cc2nc(n(c2cc1)C(CC(=O)NCc1ncccc1)CC)N BACE_1544 0 \n", "1510 Brc1cc(ccc1)C1CC1C=1N=C(N)N(C)C(=O)C=1 BACE_1545 0 \n", "1511 O=C1N(C)C(=NC(=C1)C1CC1c1cc(ccc1)-c1ccccc1)N BACE_1546 0 \n", "1512 Clc1cc2nc(n(c2cc1)CCCC(=O)NCC1CC1)N BACE_1547 0 \n", "\n", " Model pIC50 MW AlogP HBA HBD RB ... PEOE6 (PEOE6) \\\n", "0 Train 9.154901 431.56979 4.4014 3 2 5 ... 53.205711 \n", "1 Train 8.853872 657.81073 2.6412 5 4 16 ... 73.817162 \n", "2 Train 8.698970 591.74091 2.5499 4 3 11 ... 70.365707 \n", "3 Train 8.698970 591.67828 3.1680 4 3 12 ... 56.657166 \n", "4 Train 8.698970 629.71283 3.5086 3 3 11 ... 78.945702 \n", "... ... ... ... ... ... ... .. ... ... \n", "1508 Test 3.000000 364.86969 2.5942 3 2 6 ... 37.212799 \n", "1509 Test 3.000000 357.83731 2.8229 3 2 6 ... 45.792797 \n", "1510 Test 2.953115 320.18451 3.0895 2 1 2 ... 47.790600 \n", "1511 Test 2.733298 317.38440 3.8595 2 1 3 ... 77.219978 \n", "1512 Test 2.544546 306.79059 3.4271 2 2 6 ... 26.907076 \n", "\n", " PEOE7 (PEOE7) PEOE8 (PEOE8) PEOE9 (PEOE9) PEOE10 (PEOE10) \\\n", "0 78.640335 226.855410 107.434910 37.133846 \n", "1 47.171600 365.676940 174.076750 34.923889 \n", "2 47.941147 192.406520 255.752550 23.654478 \n", "3 37.954151 194.353040 202.763350 36.498634 \n", "4 39.361153 179.712880 220.461300 23.654478 \n", "... ... ... ... ... \n", "1508 37.681076 180.226410 95.670128 30.107586 \n", "1509 47.349350 122.401500 99.877144 30.107586 \n", "1510 22.563574 96.290794 58.798935 20.071724 \n", "1511 9.316234 95.907784 112.609720 20.071724 \n", "1512 37.681076 161.450780 40.882919 30.107586 \n", "\n", " PEOE11 (PEOE11) PEOE12 (PEOE12) PEOE13 (PEOE13) PEOE14 (PEOE14) \\\n", "0 0.000000 7.980170 0.000000 0.000000 \n", "1 7.980170 24.148668 0.000000 24.663788 \n", "2 0.230159 15.879790 0.000000 24.663788 \n", "3 0.980913 8.188327 0.000000 26.385181 \n", "4 0.230159 15.879790 0.000000 26.100143 \n", "... ... ... ... ... \n", "1508 9.368159 7.980170 0.000000 0.000000 \n", "1509 9.368159 7.980170 0.000000 0.000000 \n", "1510 9.368159 0.000000 6.904104 0.000000 \n", "1511 9.368159 0.000000 6.904104 0.000000 \n", "1512 9.368159 7.980170 0.000000 0.000000 \n", "\n", " canvasUID \n", "0 1 \n", "1 2 \n", "2 3 \n", "3 4 \n", "4 5 \n", "... ... \n", "1508 1543 \n", "1509 1544 \n", "1510 1545 \n", "1511 1546 \n", "1512 1547 \n", "\n", "[1513 rows x 595 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df= pd.read_csv('BACE.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df=df[df['Class']==1]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
molCIDClassModelpIC50MWAlogPHBAHBDRB...PEOE6 (PEOE6)PEOE7 (PEOE7)PEOE8 (PEOE8)PEOE9 (PEOE9)PEOE10 (PEOE10)PEOE11 (PEOE11)PEOE12 (PEOE12)PEOE13 (PEOE13)PEOE14 (PEOE14)canvasUID
0O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c...BACE_11Train9.154901431.569794.4014325...53.20571178.640335226.855410107.4349137.1338460.0000007.9801700.0000000.0000001
1Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(...BACE_21Train8.853872657.810732.64125416...73.81716247.171600365.676940174.0767534.9238897.98017024.1486680.00000024.6637882
2S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...BACE_31Train8.698970591.740912.54994311...70.36570747.941147192.406520255.7525523.6544780.23015915.8797900.00000024.6637883
3S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c...BACE_41Train8.698970591.678283.16804312...56.65716637.954151194.353040202.7633536.4986340.9809138.1883270.00000026.3851814
4S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...BACE_51Train8.698970629.712833.50863311...78.94570239.361153179.712880220.4613023.6544780.23015915.8797900.00000026.1001435
..................................................................
789Fc1cc(cc(F)c1)CC(NC(=O)C)C(O)C[NH2+]C1(CCCCC1)...BACE_8211Test7.000000457.575813.8757239...32.84419668.638565260.466370108.2074223.3021037.9801708.1883270.00000024.663788821
790Fc1ccc(cc1OCCCCF)[C@]1(N=C(N)N(C)C1=O)c1ccc(OC...BACE_8221Test7.000000439.403414.4032409...25.73999248.51122389.019829138.5116136.0943538.9519147.9801700.0000006.970751822
791O=C1N(C)C(=NC1(c1cc(ccc1)-c1cncnc1)c1cn(nc1)C)NBACE_8231Test7.000000347.373900.2846503...25.73999213.182580127.74838097.0191544.0976753.2717397.9801700.0000000.000000823
792FC1(F)CN2C(=NC1)C(N=C2N)(c1cc(ccc1)-c1cncnc1)c...BACE_8241Test7.000000434.441312.6596504...42.89998632.98883158.408638173.9353357.8991433.2717390.000000-1.2735240.000000824
793S1(=O)(=O)CC(Cc2cc(OCCOC)c(N)c(F)c2)C(O)C([NH2...BACE_8251Test7.000000509.653691.78314310...56.65716627.648426167.337080231.5155236.4986340.0000008.1883270.00000024.663788825
\n", "

691 rows × 595 columns

\n", "
" ], "text/plain": [ " mol CID Class \\\n", "0 O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c... BACE_1 1 \n", "1 Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(... BACE_2 1 \n", "2 S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](... BACE_3 1 \n", "3 S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c... BACE_4 1 \n", "4 S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](... BACE_5 1 \n", ".. ... ... ... \n", "789 Fc1cc(cc(F)c1)CC(NC(=O)C)C(O)C[NH2+]C1(CCCCC1)... BACE_821 1 \n", "790 Fc1ccc(cc1OCCCCF)[C@]1(N=C(N)N(C)C1=O)c1ccc(OC... BACE_822 1 \n", "791 O=C1N(C)C(=NC1(c1cc(ccc1)-c1cncnc1)c1cn(nc1)C)N BACE_823 1 \n", "792 FC1(F)CN2C(=NC1)C(N=C2N)(c1cc(ccc1)-c1cncnc1)c... BACE_824 1 \n", "793 S1(=O)(=O)CC(Cc2cc(OCCOC)c(N)c(F)c2)C(O)C([NH2... BACE_825 1 \n", "\n", " Model pIC50 MW AlogP HBA HBD RB ... PEOE6 (PEOE6) \\\n", "0 Train 9.154901 431.56979 4.4014 3 2 5 ... 53.205711 \n", "1 Train 8.853872 657.81073 2.6412 5 4 16 ... 73.817162 \n", "2 Train 8.698970 591.74091 2.5499 4 3 11 ... 70.365707 \n", "3 Train 8.698970 591.67828 3.1680 4 3 12 ... 56.657166 \n", "4 Train 8.698970 629.71283 3.5086 3 3 11 ... 78.945702 \n", ".. ... ... ... ... ... ... .. ... ... \n", "789 Test 7.000000 457.57581 3.8757 2 3 9 ... 32.844196 \n", "790 Test 7.000000 439.40341 4.4032 4 0 9 ... 25.739992 \n", "791 Test 7.000000 347.37390 0.2846 5 0 3 ... 25.739992 \n", "792 Test 7.000000 434.44131 2.6596 5 0 4 ... 42.899986 \n", "793 Test 7.000000 509.65369 1.7831 4 3 10 ... 56.657166 \n", "\n", " PEOE7 (PEOE7) PEOE8 (PEOE8) PEOE9 (PEOE9) PEOE10 (PEOE10) \\\n", "0 78.640335 226.855410 107.43491 37.133846 \n", "1 47.171600 365.676940 174.07675 34.923889 \n", "2 47.941147 192.406520 255.75255 23.654478 \n", "3 37.954151 194.353040 202.76335 36.498634 \n", "4 39.361153 179.712880 220.46130 23.654478 \n", ".. ... ... ... ... \n", "789 68.638565 260.466370 108.20742 23.302103 \n", "790 48.511223 89.019829 138.51161 36.094353 \n", "791 13.182580 127.748380 97.01915 44.097675 \n", "792 32.988831 58.408638 173.93533 57.899143 \n", "793 27.648426 167.337080 231.51552 36.498634 \n", "\n", " PEOE11 (PEOE11) PEOE12 (PEOE12) PEOE13 (PEOE13) PEOE14 (PEOE14) \\\n", "0 0.000000 7.980170 0.000000 0.000000 \n", "1 7.980170 24.148668 0.000000 24.663788 \n", "2 0.230159 15.879790 0.000000 24.663788 \n", "3 0.980913 8.188327 0.000000 26.385181 \n", "4 0.230159 15.879790 0.000000 26.100143 \n", ".. ... ... ... ... \n", "789 7.980170 8.188327 0.000000 24.663788 \n", "790 8.951914 7.980170 0.000000 6.970751 \n", "791 3.271739 7.980170 0.000000 0.000000 \n", "792 3.271739 0.000000 -1.273524 0.000000 \n", "793 0.000000 8.188327 0.000000 24.663788 \n", "\n", " canvasUID \n", "0 1 \n", "1 2 \n", "2 3 \n", "3 4 \n", "4 5 \n", ".. ... \n", "789 821 \n", "790 822 \n", "791 823 \n", "792 824 \n", "793 825 \n", "\n", "[691 rows x 595 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.savetxt(r'bace.txt', df.mol.values, fmt='%s')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "a4329ea539b1232b51730207fd9c93849c82cf9ff2a2d6356a1e6b85d15167f8" } } }, "nbformat": 4, "nbformat_minor": 2 }