Spaces:
Sleeping
Sleeping
import pandas as pd | |
df = pd.read_csv('/Users/saicharan/chembl_10000.csv') | |
import evaluate | |
molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric") | |
ls= df['SMILES'].tolist() | |
ls_gen = ls[0:5000] | |
ls_train = ls[5000:10000] | |
print('computing') | |
print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train)) | |
# import evaluate | |
# from evaluate.utils import launch_gradio_widget | |
# import gradio as gr | |
# module = evaluate.load("saicharan2804/molgenevalmetric") | |
# # launch_gradio_widget(module) | |
# iface = gr.Interface( | |
# fn = module, | |
# inputs=[ | |
# gr.File(label="Generated SMILES"), | |
# gr.File(label="Training Data", value=None), | |
# ], | |
# outputs="text" | |
# ) | |
# iface.launch() | |
# import pandas as pd | |
# df = pd.read_csv('/home/saicharan/Downloads/chembl.csv') | |
# df = df.rename(columns={'canonical_smiles': 'SMILES'}) | |
# df = df[0:10000] | |
# print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv')) | |
# from SCScore import SCScorer | |
# ''' | |
# This is a standalone, importable SCScorer model. It does not have tensorflow as a | |
# dependency and is a more attractive option for deployment. The calculations are | |
# fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np) | |
# ''' | |
# import numpy as np | |
# import time | |
# import rdkit.Chem as Chem | |
# import rdkit.Chem.AllChem as AllChem | |
# import json | |
# import gzip | |
# import six | |
# import os | |
# project_root = os.path.dirname(os.path.dirname(__file__)) | |
# score_scale = 5.0 | |
# min_separation = 0.25 | |
# FP_len = 1024 | |
# FP_rad = 2 | |
# def sigmoid(x): | |
# return 1 / (1 + np.exp(-x)) | |
# class SCScorer(): | |
# def __init__(self, score_scale=score_scale): | |
# self.vars = [] | |
# self.score_scale = score_scale | |
# self._restored = False | |
# def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len): | |
# self.FP_len = FP_len; self.FP_rad = FP_rad | |
# self._load_vars(weight_path) | |
# # print('Restored variables from {}'.format(weight_path)) | |
# if 'uint8' in weight_path or 'counts' in weight_path: | |
# def mol_to_fp(self, mol): | |
# if mol is None: | |
# return np.array((self.FP_len,), dtype=np.uint8) | |
# fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect | |
# fp_folded = np.zeros((self.FP_len,), dtype=np.uint8) | |
# for k, v in six.iteritems(fp.GetNonzeroElements()): | |
# fp_folded[k % self.FP_len] += v | |
# return np.array(fp_folded) | |
# else: | |
# def mol_to_fp(self, mol): | |
# if mol is None: | |
# return np.zeros((self.FP_len,), dtype=np.float32) | |
# return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len, | |
# useChirality=True), dtype=np.bool_) | |
# self.mol_to_fp = mol_to_fp | |
# self._restored = True | |
# return self | |
# def smi_to_fp(self, smi): | |
# if not smi: | |
# return np.zeros((self.FP_len,), dtype=np.float32) | |
# return self.mol_to_fp(self, Chem.MolFromSmiles(smi)) | |
# def apply(self, x): | |
# if not self._restored: | |
# raise ValueError('Must restore model weights!') | |
# # Each pair of vars is a weight and bias term | |
# for i in range(0, len(self.vars), 2): | |
# last_layer = (i == len(self.vars)-2) | |
# W = self.vars[i] | |
# b = self.vars[i+1] | |
# x = np.matmul(x, W) + b | |
# if not last_layer: | |
# x = x * (x > 0) # ReLU | |
# x = 1 + (score_scale - 1) * sigmoid(x) | |
# return x | |
# def get_score_from_smi(self, smi='', v=False): | |
# if not smi: | |
# return ('', 0.) | |
# fp = np.array((self.smi_to_fp(smi)), dtype=np.float32) | |
# if sum(fp) == 0: | |
# if v: print('Could not get fingerprint?') | |
# cur_score = 0. | |
# else: | |
# # Run | |
# cur_score = self.apply(fp) | |
# if v: print('Score: {}'.format(cur_score)) | |
# mol = Chem.MolFromSmiles(smi) | |
# if mol: | |
# smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True) | |
# else: | |
# smi = '' | |
# return (smi, cur_score) | |
# def get_avg_score(self, smis): | |
# """ | |
# Compute the average score for a list of SMILES strings. | |
# Args: | |
# smis (list of str): A list of SMILES strings. | |
# Returns: | |
# float: The average score of the given SMILES strings. | |
# """ | |
# if not smis: # Check if the list is empty | |
# return 0.0 | |
# total_score = 0.0 | |
# valid_smiles_count = 0 | |
# for smi in smis: | |
# _, score = self.get_score_from_smi(smi) | |
# if score > 0: # Assuming only positive scores are valid | |
# total_score += score | |
# valid_smiles_count += 1 | |
# # Avoid division by zero | |
# if valid_smiles_count == 0: | |
# return 0.0 | |
# else: | |
# return total_score / valid_smiles_count | |
# def _load_vars(self, weight_path): | |
# if weight_path.endswith('pickle'): | |
# import pickle | |
# with open(weight_path, 'rb') as fid: | |
# self.vars = pickle.load(fid) | |
# self.vars = [x.tolist() for x in self.vars] | |
# elif weight_path.endswith('json.gz'): | |
# with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip | |
# json_bytes = fin.read() # 3. bytes (i.e. UTF-8) | |
# json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON) | |
# self.vars = json.loads(json_str) | |
# self.vars = [np.array(x) for x in self.vars] | |
# from myscscore.SCScore import SCScorer | |
# import pandas as pd | |
# model = SCScorer() | |
# model.restore() | |
# # import evaluate | |
# # molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric") | |
# df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv') | |
# ls= df['SMILES'].tolist() | |
# ls_gen = ls[0:5000] | |
# ls_train = ls[5000:10000] | |
# print('computing') | |
# average_score = model.get_avg_score(ls_gen) | |
# # Print the average score | |
# print('Average score:', average_score) | |
# # print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train)) | |