saicharan2804 commited on
Commit
e8c430f
1 Parent(s): e9ff6dc
__pycache__/SCScore.cpython-39.pyc ADDED
Binary file (4.63 kB). View file
 
app.py CHANGED
@@ -25,20 +25,165 @@
25
  # df = df[0:10000]
26
 
27
  # print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
28
- from scscore.scscore.standalone_model_numpy import SCScorer
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  import pandas as pd
31
 
32
  model = SCScorer()
33
  model.restore()
 
 
34
 
35
- pubchem = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
36
 
37
- # smis = ['CCCOCCC', 'CCCNc1ccccc1']
38
- smis = pubchem['SMILES'].tolist()
39
- smis = smis[0:1000]
 
40
  print('computing')
41
- average_score = model.get_avg_score(smis)
42
 
43
  # Print the average score
44
  print('Average score:', average_score)
 
 
25
  # df = df[0:10000]
26
 
27
  # print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
28
+ # from SCScore import SCScorer
29
 
30
+
31
+ # '''
32
+ # This is a standalone, importable SCScorer model. It does not have tensorflow as a
33
+ # dependency and is a more attractive option for deployment. The calculations are
34
+ # fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
35
+ # '''
36
+
37
+ # import numpy as np
38
+ # import time
39
+ # import rdkit.Chem as Chem
40
+ # import rdkit.Chem.AllChem as AllChem
41
+ # import json
42
+ # import gzip
43
+ # import six
44
+
45
+ # import os
46
+ # project_root = os.path.dirname(os.path.dirname(__file__))
47
+
48
+ # score_scale = 5.0
49
+ # min_separation = 0.25
50
+
51
+ # FP_len = 1024
52
+ # FP_rad = 2
53
+
54
+ # def sigmoid(x):
55
+ # return 1 / (1 + np.exp(-x))
56
+
57
+ # class SCScorer():
58
+ # def __init__(self, score_scale=score_scale):
59
+ # self.vars = []
60
+ # self.score_scale = score_scale
61
+ # self._restored = False
62
+
63
+ # def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
64
+ # self.FP_len = FP_len; self.FP_rad = FP_rad
65
+ # self._load_vars(weight_path)
66
+ # # print('Restored variables from {}'.format(weight_path))
67
+
68
+ # if 'uint8' in weight_path or 'counts' in weight_path:
69
+ # def mol_to_fp(self, mol):
70
+ # if mol is None:
71
+ # return np.array((self.FP_len,), dtype=np.uint8)
72
+ # fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
73
+ # fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
74
+ # for k, v in six.iteritems(fp.GetNonzeroElements()):
75
+ # fp_folded[k % self.FP_len] += v
76
+ # return np.array(fp_folded)
77
+ # else:
78
+ # def mol_to_fp(self, mol):
79
+ # if mol is None:
80
+ # return np.zeros((self.FP_len,), dtype=np.float32)
81
+ # return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
82
+ # useChirality=True), dtype=np.bool_)
83
+ # self.mol_to_fp = mol_to_fp
84
+
85
+ # self._restored = True
86
+ # return self
87
+
88
+ # def smi_to_fp(self, smi):
89
+ # if not smi:
90
+ # return np.zeros((self.FP_len,), dtype=np.float32)
91
+ # return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
92
+
93
+ # def apply(self, x):
94
+ # if not self._restored:
95
+ # raise ValueError('Must restore model weights!')
96
+ # # Each pair of vars is a weight and bias term
97
+ # for i in range(0, len(self.vars), 2):
98
+ # last_layer = (i == len(self.vars)-2)
99
+ # W = self.vars[i]
100
+ # b = self.vars[i+1]
101
+ # x = np.matmul(x, W) + b
102
+ # if not last_layer:
103
+ # x = x * (x > 0) # ReLU
104
+ # x = 1 + (score_scale - 1) * sigmoid(x)
105
+ # return x
106
+
107
+ # def get_score_from_smi(self, smi='', v=False):
108
+ # if not smi:
109
+ # return ('', 0.)
110
+ # fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
111
+ # if sum(fp) == 0:
112
+ # if v: print('Could not get fingerprint?')
113
+ # cur_score = 0.
114
+ # else:
115
+ # # Run
116
+ # cur_score = self.apply(fp)
117
+ # if v: print('Score: {}'.format(cur_score))
118
+ # mol = Chem.MolFromSmiles(smi)
119
+ # if mol:
120
+ # smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
121
+ # else:
122
+ # smi = ''
123
+ # return (smi, cur_score)
124
+
125
+ # def get_avg_score(self, smis):
126
+ # """
127
+ # Compute the average score for a list of SMILES strings.
128
+
129
+ # Args:
130
+ # smis (list of str): A list of SMILES strings.
131
+
132
+ # Returns:
133
+ # float: The average score of the given SMILES strings.
134
+ # """
135
+ # if not smis: # Check if the list is empty
136
+ # return 0.0
137
+
138
+ # total_score = 0.0
139
+ # valid_smiles_count = 0
140
+
141
+ # for smi in smis:
142
+ # _, score = self.get_score_from_smi(smi)
143
+ # if score > 0: # Assuming only positive scores are valid
144
+ # total_score += score
145
+ # valid_smiles_count += 1
146
+
147
+ # # Avoid division by zero
148
+ # if valid_smiles_count == 0:
149
+ # return 0.0
150
+ # else:
151
+ # return total_score / valid_smiles_count
152
+
153
+ # def _load_vars(self, weight_path):
154
+ # if weight_path.endswith('pickle'):
155
+ # import pickle
156
+ # with open(weight_path, 'rb') as fid:
157
+ # self.vars = pickle.load(fid)
158
+ # self.vars = [x.tolist() for x in self.vars]
159
+ # elif weight_path.endswith('json.gz'):
160
+ # with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip
161
+ # json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
162
+ # json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON)
163
+ # self.vars = json.loads(json_str)
164
+ # self.vars = [np.array(x) for x in self.vars]
165
+
166
+
167
+
168
+
169
+
170
+ from myscscore.SCScore import SCScorer
171
  import pandas as pd
172
 
173
  model = SCScorer()
174
  model.restore()
175
+ # import evaluate
176
+ # molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")
177
 
178
+ df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
179
 
180
+ ls= df['SMILES'].tolist()
181
+ ls_gen = ls[0:5000]
182
+ ls_train = ls[5000:10000]
183
+
184
  print('computing')
185
+ average_score = model.get_avg_score(ls_gen)
186
 
187
  # Print the average score
188
  print('Average score:', average_score)
189
+ # print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))
molgenevalmetric.py CHANGED
@@ -517,11 +517,11 @@ class molgenevalmetric(evaluate.Metric):
517
  def _compute(self, gensmi, trainsmi):
518
 
519
  metrics = {}
520
- metrics['novelty'] = novelty(gen = gensmi, train = trainsmi)
521
- metrics['valid'] = fraction_valid(gen=gensmi)
522
- metrics['unique'] = fraction_unique(gen=gensmi)
523
- metrics['IntDiv'] = internal_diversity(gen=gensmi)
524
- metrics['FCD'] = fcd_metric(gen = gensmi, train = trainsmi)
525
  # metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
526
 
527
  # metrics['SA'] = SAscore(gen=gensmi)
 
517
  def _compute(self, gensmi, trainsmi):
518
 
519
  metrics = {}
520
+ # metrics['novelty'] = novelty(gen = gensmi, train = trainsmi)
521
+ # metrics['valid'] = fraction_valid(gen=gensmi)
522
+ # metrics['unique'] = fraction_unique(gen=gensmi)
523
+ # metrics['IntDiv'] = internal_diversity(gen=gensmi)
524
+ # metrics['FCD'] = fcd_metric(gen = gensmi, train = trainsmi)
525
  # metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
526
 
527
  # metrics['SA'] = SAscore(gen=gensmi)