Spaces:
Sleeping
Sleeping
File size: 6,469 Bytes
c36a10c 9b06241 e8c430f 9b06241 e8c430f c36a10c 9b06241 c36a10c 9b06241 c36a10c 9b06241 c36a10c e8c430f c36a10c 9b06241 c36a10c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import pandas as pd
df = pd.read_csv('/Users/saicharan/chembl_10000.csv')
import evaluate
molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")
ls= df['SMILES'].tolist()
ls_gen = ls[0:5000]
ls_train = ls[5000:10000]
print('computing')
print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))
# import evaluate
# from evaluate.utils import launch_gradio_widget
# import gradio as gr
# module = evaluate.load("saicharan2804/molgenevalmetric")
# # launch_gradio_widget(module)
# iface = gr.Interface(
# fn = module,
# inputs=[
# gr.File(label="Generated SMILES"),
# gr.File(label="Training Data", value=None),
# ],
# outputs="text"
# )
# iface.launch()
# import pandas as pd
# df = pd.read_csv('/home/saicharan/Downloads/chembl.csv')
# df = df.rename(columns={'canonical_smiles': 'SMILES'})
# df = df[0:10000]
# print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
# from SCScore import SCScorer
# '''
# This is a standalone, importable SCScorer model. It does not have tensorflow as a
# dependency and is a more attractive option for deployment. The calculations are
# fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
# '''
# import numpy as np
# import time
# import rdkit.Chem as Chem
# import rdkit.Chem.AllChem as AllChem
# import json
# import gzip
# import six
# import os
# project_root = os.path.dirname(os.path.dirname(__file__))
# score_scale = 5.0
# min_separation = 0.25
# FP_len = 1024
# FP_rad = 2
# def sigmoid(x):
# return 1 / (1 + np.exp(-x))
# class SCScorer():
# def __init__(self, score_scale=score_scale):
# self.vars = []
# self.score_scale = score_scale
# self._restored = False
# def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
# self.FP_len = FP_len; self.FP_rad = FP_rad
# self._load_vars(weight_path)
# # print('Restored variables from {}'.format(weight_path))
# if 'uint8' in weight_path or 'counts' in weight_path:
# def mol_to_fp(self, mol):
# if mol is None:
# return np.array((self.FP_len,), dtype=np.uint8)
# fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
# fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
# for k, v in six.iteritems(fp.GetNonzeroElements()):
# fp_folded[k % self.FP_len] += v
# return np.array(fp_folded)
# else:
# def mol_to_fp(self, mol):
# if mol is None:
# return np.zeros((self.FP_len,), dtype=np.float32)
# return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
# useChirality=True), dtype=np.bool_)
# self.mol_to_fp = mol_to_fp
# self._restored = True
# return self
# def smi_to_fp(self, smi):
# if not smi:
# return np.zeros((self.FP_len,), dtype=np.float32)
# return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
# def apply(self, x):
# if not self._restored:
# raise ValueError('Must restore model weights!')
# # Each pair of vars is a weight and bias term
# for i in range(0, len(self.vars), 2):
# last_layer = (i == len(self.vars)-2)
# W = self.vars[i]
# b = self.vars[i+1]
# x = np.matmul(x, W) + b
# if not last_layer:
# x = x * (x > 0) # ReLU
# x = 1 + (score_scale - 1) * sigmoid(x)
# return x
# def get_score_from_smi(self, smi='', v=False):
# if not smi:
# return ('', 0.)
# fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
# if sum(fp) == 0:
# if v: print('Could not get fingerprint?')
# cur_score = 0.
# else:
# # Run
# cur_score = self.apply(fp)
# if v: print('Score: {}'.format(cur_score))
# mol = Chem.MolFromSmiles(smi)
# if mol:
# smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
# else:
# smi = ''
# return (smi, cur_score)
# def get_avg_score(self, smis):
# """
# Compute the average score for a list of SMILES strings.
# Args:
# smis (list of str): A list of SMILES strings.
# Returns:
# float: The average score of the given SMILES strings.
# """
# if not smis: # Check if the list is empty
# return 0.0
# total_score = 0.0
# valid_smiles_count = 0
# for smi in smis:
# _, score = self.get_score_from_smi(smi)
# if score > 0: # Assuming only positive scores are valid
# total_score += score
# valid_smiles_count += 1
# # Avoid division by zero
# if valid_smiles_count == 0:
# return 0.0
# else:
# return total_score / valid_smiles_count
# def _load_vars(self, weight_path):
# if weight_path.endswith('pickle'):
# import pickle
# with open(weight_path, 'rb') as fid:
# self.vars = pickle.load(fid)
# self.vars = [x.tolist() for x in self.vars]
# elif weight_path.endswith('json.gz'):
# with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip
# json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
# json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON)
# self.vars = json.loads(json_str)
# self.vars = [np.array(x) for x in self.vars]
# from myscscore.SCScore import SCScorer
# import pandas as pd
# model = SCScorer()
# model.restore()
# # import evaluate
# # molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")
# df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
# ls= df['SMILES'].tolist()
# ls_gen = ls[0:5000]
# ls_train = ls[5000:10000]
# print('computing')
# average_score = model.get_avg_score(ls_gen)
# # Print the average score
# print('Average score:', average_score)
# # print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))
|