Spaces:

saicharan2804
/

molgenevalmetric

Sleeping

molgenevalmetric / SCScore.py

saicharan2804

Adding needed things for scs

996ea1d 10 months ago

4.72 kB

	'''
	This is a standalone, importable SCScorer model. It does not have tensorflow as a
	dependency and is a more attractive option for deployment. The calculations are
	fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
	'''

	import math, sys, random, os
	import numpy as np
	import time
	import rdkit.Chem as Chem
	import rdkit.Chem.AllChem as AllChem
	import json
	import gzip
	import six

	import os
	project_root = os.path.dirname(os.path.dirname(__file__))

	score_scale = 5.0
	min_separation = 0.25

	FP_len = 1024
	FP_rad = 2

	def sigmoid(x):
	return 1 / (1 + math.exp(-x))

	class SCScorer():
	def __init__(self, score_scale=score_scale):
	self.vars = []
	self.score_scale = score_scale
	self._restored = False

	def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
	self.FP_len = FP_len; self.FP_rad = FP_rad
	self._load_vars(weight_path)
	# print('Restored variables from {}'.format(weight_path))

	if 'uint8' in weight_path or 'counts' in weight_path:
	def mol_to_fp(self, mol):
	if mol is None:
	return np.array((self.FP_len,), dtype=np.uint8)
	fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
	fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
	for k, v in six.iteritems(fp.GetNonzeroElements()):
	fp_folded[k % self.FP_len] += v
	return np.array(fp_folded)
	else:
	def mol_to_fp(self, mol):
	if mol is None:
	return np.zeros((self.FP_len,), dtype=np.float32)
	return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
	useChirality=True), dtype=np.bool_)
	self.mol_to_fp = mol_to_fp

	self._restored = True
	return self

	def smi_to_fp(self, smi):
	if not smi:
	return np.zeros((self.FP_len,), dtype=np.float32)
	return self.mol_to_fp(self, Chem.MolFromSmiles(smi))

	def apply(self, x):
	if not self._restored:
	raise ValueError('Must restore model weights!')
	# Each pair of vars is a weight and bias term
	for i in range(0, len(self.vars), 2):
	last_layer = (i == len(self.vars)-2)
	W = self.vars[i]
	b = self.vars[i+1]
	x = np.matmul(x, W) + b
	if not last_layer:
	x = x * (x > 0) # ReLU
	x = 1 + (score_scale - 1) * sigmoid(x)
	return x

	def get_score_from_smi(self, smi='', v=False):
	if not smi:
	return ('', 0.)
	fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
	if sum(fp) == 0:
	if v: print('Could not get fingerprint?')
	cur_score = 0.
	else:
	# Run
	cur_score = self.apply(fp)
	if v: print('Score: {}'.format(cur_score))
	mol = Chem.MolFromSmiles(smi)
	if mol:
	smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
	else:
	smi = ''
	return (smi, cur_score)

	def get_avg_score(self, smis):
	"""
	Compute the average score for a list of SMILES strings.

	Args:
	smis (list of str): A list of SMILES strings.

	Returns:
	float: The average score of the given SMILES strings.
	"""
	if not smis: # Check if the list is empty
	return 0.0

	total_score = 0.0
	valid_smiles_count = 0

	for smi in smis:
	_, score = self.get_score_from_smi(smi)
	if score > 0: # Assuming only positive scores are valid
	total_score += score
	valid_smiles_count += 1

	# Avoid division by zero
	if valid_smiles_count == 0:
	return 0.0
	else:
	return total_score / valid_smiles_count

	def _load_vars(self, weight_path):
	if weight_path.endswith('pickle'):
	import pickle
	with open(weight_path, 'rb') as fid:
	self.vars = pickle.load(fid)
	self.vars = [x.tolist() for x in self.vars]
	elif weight_path.endswith('json.gz'):
	with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip
	json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
	json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON)
	self.vars = json.loads(json_str)
	self.vars = [np.array(x) for x in self.vars]