import pandas as pd df = pd.read_csv('/Users/saicharan/chembl_10000.csv') import evaluate molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric") ls= df['SMILES'].tolist() ls_gen = ls[0:5000] ls_train = ls[5000:10000] print('computing') print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train)) # import evaluate # from evaluate.utils import launch_gradio_widget # import gradio as gr # module = evaluate.load("saicharan2804/molgenevalmetric") # # launch_gradio_widget(module) # iface = gr.Interface( # fn = module, # inputs=[ # gr.File(label="Generated SMILES"), # gr.File(label="Training Data", value=None), # ], # outputs="text" # ) # iface.launch() # import pandas as pd # df = pd.read_csv('/home/saicharan/Downloads/chembl.csv') # df = df.rename(columns={'canonical_smiles': 'SMILES'}) # df = df[0:10000] # print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv')) # from SCScore import SCScorer # ''' # This is a standalone, importable SCScorer model. It does not have tensorflow as a # dependency and is a more attractive option for deployment. The calculations are # fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np) # ''' # import numpy as np # import time # import rdkit.Chem as Chem # import rdkit.Chem.AllChem as AllChem # import json # import gzip # import six # import os # project_root = os.path.dirname(os.path.dirname(__file__)) # score_scale = 5.0 # min_separation = 0.25 # FP_len = 1024 # FP_rad = 2 # def sigmoid(x): # return 1 / (1 + np.exp(-x)) # class SCScorer(): # def __init__(self, score_scale=score_scale): # self.vars = [] # self.score_scale = score_scale # self._restored = False # def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len): # self.FP_len = FP_len; self.FP_rad = FP_rad # self._load_vars(weight_path) # # print('Restored variables from {}'.format(weight_path)) # if 'uint8' in weight_path or 'counts' in weight_path: # def mol_to_fp(self, mol): # if mol is None: # return np.array((self.FP_len,), dtype=np.uint8) # fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect # fp_folded = np.zeros((self.FP_len,), dtype=np.uint8) # for k, v in six.iteritems(fp.GetNonzeroElements()): # fp_folded[k % self.FP_len] += v # return np.array(fp_folded) # else: # def mol_to_fp(self, mol): # if mol is None: # return np.zeros((self.FP_len,), dtype=np.float32) # return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len, # useChirality=True), dtype=np.bool_) # self.mol_to_fp = mol_to_fp # self._restored = True # return self # def smi_to_fp(self, smi): # if not smi: # return np.zeros((self.FP_len,), dtype=np.float32) # return self.mol_to_fp(self, Chem.MolFromSmiles(smi)) # def apply(self, x): # if not self._restored: # raise ValueError('Must restore model weights!') # # Each pair of vars is a weight and bias term # for i in range(0, len(self.vars), 2): # last_layer = (i == len(self.vars)-2) # W = self.vars[i] # b = self.vars[i+1] # x = np.matmul(x, W) + b # if not last_layer: # x = x * (x > 0) # ReLU # x = 1 + (score_scale - 1) * sigmoid(x) # return x # def get_score_from_smi(self, smi='', v=False): # if not smi: # return ('', 0.) # fp = np.array((self.smi_to_fp(smi)), dtype=np.float32) # if sum(fp) == 0: # if v: print('Could not get fingerprint?') # cur_score = 0. # else: # # Run # cur_score = self.apply(fp) # if v: print('Score: {}'.format(cur_score)) # mol = Chem.MolFromSmiles(smi) # if mol: # smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True) # else: # smi = '' # return (smi, cur_score) # def get_avg_score(self, smis): # """ # Compute the average score for a list of SMILES strings. # Args: # smis (list of str): A list of SMILES strings. # Returns: # float: The average score of the given SMILES strings. # """ # if not smis: # Check if the list is empty # return 0.0 # total_score = 0.0 # valid_smiles_count = 0 # for smi in smis: # _, score = self.get_score_from_smi(smi) # if score > 0: # Assuming only positive scores are valid # total_score += score # valid_smiles_count += 1 # # Avoid division by zero # if valid_smiles_count == 0: # return 0.0 # else: # return total_score / valid_smiles_count # def _load_vars(self, weight_path): # if weight_path.endswith('pickle'): # import pickle # with open(weight_path, 'rb') as fid: # self.vars = pickle.load(fid) # self.vars = [x.tolist() for x in self.vars] # elif weight_path.endswith('json.gz'): # with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip # json_bytes = fin.read() # 3. bytes (i.e. UTF-8) # json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON) # self.vars = json.loads(json_str) # self.vars = [np.array(x) for x in self.vars] # from myscscore.SCScore import SCScorer # import pandas as pd # model = SCScorer() # model.restore() # # import evaluate # # molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric") # df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv') # ls= df['SMILES'].tolist() # ls_gen = ls[0:5000] # ls_train = ls[5000:10000] # print('computing') # average_score = model.get_avg_score(ls_gen) # # Print the average score # print('Average score:', average_score) # # print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))