Spaces:

saicharan2804
/

molgenevalmetric

Sleeping

App Files Files Community

saicharan2804 commited on Apr 12, 2024

Commit

e8c430f

•

1 Parent(s): e9ff6dc

Change

Browse files

Files changed (3) hide show

__pycache__/SCScore.cpython-39.pyc +0 -0
app.py +151 -6
molgenevalmetric.py +5 -5

__pycache__/SCScore.cpython-39.pyc ADDED Viewed

Binary file (4.63 kB). View file

app.py CHANGED Viewed

@@ -25,20 +25,165 @@
 # df = df[0:10000]
 # print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
-from scscore.scscore.standalone_model_numpy import SCScorer
 import pandas as pd
 model = SCScorer()
 model.restore()
-pubchem = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
-# smis = ['CCCOCCC', 'CCCNc1ccccc1']
-smis = pubchem['SMILES'].tolist()
-smis = smis[0:1000]
 print('computing')
-average_score = model.get_avg_score(smis)
 # Print the average score
 print('Average score:', average_score)

 # df = df[0:10000]
 # print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
+# from SCScore import SCScorer
+# '''
+# This is a standalone, importable SCScorer model. It does not have tensorflow as a
+# dependency and is a more attractive option for deployment. The calculations are
+# fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
+# '''
+# import numpy as np
+# import time
+# import rdkit.Chem as Chem
+# import rdkit.Chem.AllChem as AllChem
+# import json
+# import gzip
+# import six
+# import os
+# project_root = os.path.dirname(os.path.dirname(__file__))
+# score_scale = 5.0
+# min_separation = 0.25
+# FP_len = 1024
+# FP_rad = 2
+# def sigmoid(x):
+#   return 1 / (1 + np.exp(-x))
+# class SCScorer():
+#     def __init__(self, score_scale=score_scale):
+#         self.vars = []
+#         self.score_scale = score_scale
+#         self._restored = False
+#     def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
+#         self.FP_len = FP_len; self.FP_rad = FP_rad
+#         self._load_vars(weight_path)
+#         # print('Restored variables from {}'.format(weight_path))
+#         if 'uint8' in weight_path or 'counts' in weight_path:
+#             def mol_to_fp(self, mol):
+#                 if mol is None:
+#                     return np.array((self.FP_len,), dtype=np.uint8)
+#                 fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
+#                 fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
+#                 for k, v in six.iteritems(fp.GetNonzeroElements()):
+#                     fp_folded[k % self.FP_len] += v
+#                 return np.array(fp_folded)
+#         else:
+#             def mol_to_fp(self, mol):
+#                 if mol is None:
+#                     return np.zeros((self.FP_len,), dtype=np.float32)
+#                 return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
+#                     useChirality=True), dtype=np.bool_)
+#         self.mol_to_fp = mol_to_fp
+#         self._restored = True
+#         return self
+#     def smi_to_fp(self, smi):
+#         if not smi:
+#             return np.zeros((self.FP_len,), dtype=np.float32)
+#         return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
+#     def apply(self, x):
+#         if not self._restored:
+#             raise ValueError('Must restore model weights!')
+#         # Each pair of vars is a weight and bias term
+#         for i in range(0, len(self.vars), 2):
+#             last_layer = (i == len(self.vars)-2)
+#             W = self.vars[i]
+#             b = self.vars[i+1]
+#             x = np.matmul(x, W) + b
+#             if not last_layer:
+#                 x = x * (x > 0) # ReLU
+#         x = 1 + (score_scale - 1) * sigmoid(x)
+#         return x
+#     def get_score_from_smi(self, smi='', v=False):
+#         if not smi:
+#             return ('', 0.)
+#         fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
+#         if sum(fp) == 0:
+#             if v: print('Could not get fingerprint?')
+#             cur_score = 0.
+#         else:
+#             # Run
+#             cur_score = self.apply(fp)
+#             if v: print('Score: {}'.format(cur_score))
+#         mol = Chem.MolFromSmiles(smi)
+#         if mol:
+#             smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
+#         else:
+#             smi = ''
+#         return (smi, cur_score)
+#     def get_avg_score(self, smis):
+#         """
+#         Compute the average score for a list of SMILES strings.
+#         Args:
+#             smis (list of str): A list of SMILES strings.
+#         Returns:
+#             float: The average score of the given SMILES strings.
+#         """
+#         if not smis:  # Check if the list is empty
+#             return 0.0
+#         total_score = 0.0
+#         valid_smiles_count = 0
+#         for smi in smis:
+#             _, score = self.get_score_from_smi(smi)
+#             if score > 0:  # Assuming only positive scores are valid
+#                 total_score += score
+#                 valid_smiles_count += 1
+#         # Avoid division by zero
+#         if valid_smiles_count == 0:
+#             return 0.0
+#         else:
+#             return total_score / valid_smiles_count
+#     def _load_vars(self, weight_path):
+#         if weight_path.endswith('pickle'):
+#             import pickle
+#             with open(weight_path, 'rb') as fid:
+#                 self.vars = pickle.load(fid)
+#                 self.vars = [x.tolist() for x in self.vars]
+#         elif weight_path.endswith('json.gz'):
+#             with gzip.GzipFile(weight_path, 'r') as fin:    # 4. gzip
+#                 json_bytes = fin.read()                      # 3. bytes (i.e. UTF-8)
+#                 json_str = json_bytes.decode('utf-8')            # 2. string (i.e. JSON)
+#                 self.vars = json.loads(json_str)
+#                 self.vars = [np.array(x) for x in self.vars]
+from myscscore.SCScore import SCScorer
 import pandas as pd
 model = SCScorer()
 model.restore()
+# import evaluate
+# molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")
+df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
+ls= df['SMILES'].tolist()
+ls_gen = ls[0:5000]
+ls_train = ls[5000:10000]
 print('computing')
+average_score = model.get_avg_score(ls_gen)
 # Print the average score
 print('Average score:', average_score)
+# print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))

molgenevalmetric.py CHANGED Viewed

@@ -517,11 +517,11 @@ class molgenevalmetric(evaluate.Metric):
     def _compute(self, gensmi, trainsmi):
         metrics = {}
-        metrics['novelty'] = novelty(gen = gensmi, train = trainsmi)
-        metrics['valid'] = fraction_valid(gen=gensmi)
-        metrics['unique'] = fraction_unique(gen=gensmi)
-        metrics['IntDiv'] = internal_diversity(gen=gensmi)
-        metrics['FCD'] = fcd_metric(gen = gensmi, train = trainsmi)
         # metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
         # metrics['SA'] = SAscore(gen=gensmi)

     def _compute(self, gensmi, trainsmi):
         metrics = {}
+        # metrics['novelty'] = novelty(gen = gensmi, train = trainsmi)
+        # metrics['valid'] = fraction_valid(gen=gensmi)
+        # metrics['unique'] = fraction_unique(gen=gensmi)
+        # metrics['IntDiv'] = internal_diversity(gen=gensmi)
+        # metrics['FCD'] = fcd_metric(gen = gensmi, train = trainsmi)
         # metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
         # metrics['SA'] = SAscore(gen=gensmi)