Spaces:
Sleeping
Sleeping
File size: 4,717 Bytes
996ea1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
'''
This is a standalone, importable SCScorer model. It does not have tensorflow as a
dependency and is a more attractive option for deployment. The calculations are
fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
'''
import math, sys, random, os
import numpy as np
import time
import rdkit.Chem as Chem
import rdkit.Chem.AllChem as AllChem
import json
import gzip
import six
import os
project_root = os.path.dirname(os.path.dirname(__file__))
score_scale = 5.0
min_separation = 0.25
FP_len = 1024
FP_rad = 2
def sigmoid(x):
return 1 / (1 + math.exp(-x))
class SCScorer():
def __init__(self, score_scale=score_scale):
self.vars = []
self.score_scale = score_scale
self._restored = False
def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
self.FP_len = FP_len; self.FP_rad = FP_rad
self._load_vars(weight_path)
# print('Restored variables from {}'.format(weight_path))
if 'uint8' in weight_path or 'counts' in weight_path:
def mol_to_fp(self, mol):
if mol is None:
return np.array((self.FP_len,), dtype=np.uint8)
fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
for k, v in six.iteritems(fp.GetNonzeroElements()):
fp_folded[k % self.FP_len] += v
return np.array(fp_folded)
else:
def mol_to_fp(self, mol):
if mol is None:
return np.zeros((self.FP_len,), dtype=np.float32)
return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
useChirality=True), dtype=np.bool_)
self.mol_to_fp = mol_to_fp
self._restored = True
return self
def smi_to_fp(self, smi):
if not smi:
return np.zeros((self.FP_len,), dtype=np.float32)
return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
def apply(self, x):
if not self._restored:
raise ValueError('Must restore model weights!')
# Each pair of vars is a weight and bias term
for i in range(0, len(self.vars), 2):
last_layer = (i == len(self.vars)-2)
W = self.vars[i]
b = self.vars[i+1]
x = np.matmul(x, W) + b
if not last_layer:
x = x * (x > 0) # ReLU
x = 1 + (score_scale - 1) * sigmoid(x)
return x
def get_score_from_smi(self, smi='', v=False):
if not smi:
return ('', 0.)
fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
if sum(fp) == 0:
if v: print('Could not get fingerprint?')
cur_score = 0.
else:
# Run
cur_score = self.apply(fp)
if v: print('Score: {}'.format(cur_score))
mol = Chem.MolFromSmiles(smi)
if mol:
smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
else:
smi = ''
return (smi, cur_score)
def get_avg_score(self, smis):
"""
Compute the average score for a list of SMILES strings.
Args:
smis (list of str): A list of SMILES strings.
Returns:
float: The average score of the given SMILES strings.
"""
if not smis: # Check if the list is empty
return 0.0
total_score = 0.0
valid_smiles_count = 0
for smi in smis:
_, score = self.get_score_from_smi(smi)
if score > 0: # Assuming only positive scores are valid
total_score += score
valid_smiles_count += 1
# Avoid division by zero
if valid_smiles_count == 0:
return 0.0
else:
return total_score / valid_smiles_count
def _load_vars(self, weight_path):
if weight_path.endswith('pickle'):
import pickle
with open(weight_path, 'rb') as fid:
self.vars = pickle.load(fid)
self.vars = [x.tolist() for x in self.vars]
elif weight_path.endswith('json.gz'):
with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip
json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON)
self.vars = json.loads(json_str)
self.vars = [np.array(x) for x in self.vars]
|