Spaces:

saicharan2804
/

molgenevalmetric

Sleeping

File size: 6,657 Bytes

import pandas as pd
df = pd.read_csv('/Users/saicharan/chembl_10000.csv')
from molgenevalmetric import SYBAscore

import evaluate
met = evaluate.load("saicharan2804/molgenevalmetric")

ls= df['SMILES'].tolist()
ls_gen = ls[0:500]
ls_train = ls[500:1000]

print('computing')
# print(SYBAscore(gen=ls_gen))
print(met.compute(gensmi = ls_gen, trainsmi = ls_train))
# print(qed_metric(gen=ls_gen))
# print(logP_metric(gen=ls_gen))
# print(average_sascore(gen=ls_gen))

# print(oracles(gen=ls_gen, train=ls_train))

# import evaluate
# from evaluate.utils import launch_gradio_widget
# import gradio as gr

# module = evaluate.load("saicharan2804/molgenevalmetric")
# # launch_gradio_widget(module)

# iface = gr.Interface(
#     fn = module,
#     inputs=[
#         gr.File(label="Generated SMILES"), 
#         gr.File(label="Training Data", value=None),
#         ],
#     outputs="text"
# )

# iface.launch()

# import pandas as pd

# df = pd.read_csv('/home/saicharan/Downloads/chembl.csv')

# df = df.rename(columns={'canonical_smiles': 'SMILES'})

# df = df[0:10000]

# print(df[['SMILES']].to_csv('/home/saicharan/Downloads/chembl_10000.csv'))
# from SCScore import SCScorer


# '''
# This is a standalone, importable SCScorer model. It does not have tensorflow as a
# dependency and is a more attractive option for deployment. The calculations are
# fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
# '''

# import numpy as np
# import time
# import rdkit.Chem as Chem
# import rdkit.Chem.AllChem as AllChem
# import json
# import gzip
# import six

# import os
# project_root = os.path.dirname(os.path.dirname(__file__))

# score_scale = 5.0
# min_separation = 0.25

# FP_len = 1024
# FP_rad = 2

# def sigmoid(x):
#   return 1 / (1 + np.exp(-x))

# class SCScorer():
#     def __init__(self, score_scale=score_scale):
#         self.vars = []
#         self.score_scale = score_scale
#         self._restored = False

#     def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
#         self.FP_len = FP_len; self.FP_rad = FP_rad
#         self._load_vars(weight_path)
#         # print('Restored variables from {}'.format(weight_path))

#         if 'uint8' in weight_path or 'counts' in weight_path:
#             def mol_to_fp(self, mol):
#                 if mol is None:
#                     return np.array((self.FP_len,), dtype=np.uint8)
#                 fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
#                 fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
#                 for k, v in six.iteritems(fp.GetNonzeroElements()):
#                     fp_folded[k % self.FP_len] += v
#                 return np.array(fp_folded)
#         else:
#             def mol_to_fp(self, mol):
#                 if mol is None:
#                     return np.zeros((self.FP_len,), dtype=np.float32)
#                 return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
#                     useChirality=True), dtype=np.bool_)
#         self.mol_to_fp = mol_to_fp

#         self._restored = True
#         return self

#     def smi_to_fp(self, smi):
#         if not smi:
#             return np.zeros((self.FP_len,), dtype=np.float32)
#         return self.mol_to_fp(self, Chem.MolFromSmiles(smi))

#     def apply(self, x):
#         if not self._restored:
#             raise ValueError('Must restore model weights!')
#         # Each pair of vars is a weight and bias term
#         for i in range(0, len(self.vars), 2):
#             last_layer = (i == len(self.vars)-2)
#             W = self.vars[i]
#             b = self.vars[i+1]
#             x = np.matmul(x, W) + b
#             if not last_layer:
#                 x = x * (x > 0) # ReLU
#         x = 1 + (score_scale - 1) * sigmoid(x)
#         return x

#     def get_score_from_smi(self, smi='', v=False):
#         if not smi:
#             return ('', 0.)
#         fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
#         if sum(fp) == 0:
#             if v: print('Could not get fingerprint?')
#             cur_score = 0.
#         else:
#             # Run
#             cur_score = self.apply(fp)
#             if v: print('Score: {}'.format(cur_score))
#         mol = Chem.MolFromSmiles(smi)
#         if mol:
#             smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
#         else:
#             smi = ''
#         return (smi, cur_score)
    
#     def get_avg_score(self, smis):
#         """
#         Compute the average score for a list of SMILES strings.

#         Args:
#             smis (list of str): A list of SMILES strings.

#         Returns:
#             float: The average score of the given SMILES strings.
#         """
#         if not smis:  # Check if the list is empty
#             return 0.0
        
#         total_score = 0.0
#         valid_smiles_count = 0
        
#         for smi in smis:
#             _, score = self.get_score_from_smi(smi)
#             if score > 0:  # Assuming only positive scores are valid
#                 total_score += score
#                 valid_smiles_count += 1

#         # Avoid division by zero
#         if valid_smiles_count == 0:
#             return 0.0
#         else:
#             return total_score / valid_smiles_count

#     def _load_vars(self, weight_path):
#         if weight_path.endswith('pickle'):
#             import pickle
#             with open(weight_path, 'rb') as fid:
#                 self.vars = pickle.load(fid)
#                 self.vars = [x.tolist() for x in self.vars]
#         elif weight_path.endswith('json.gz'):
#             with gzip.GzipFile(weight_path, 'r') as fin:    # 4. gzip
#                 json_bytes = fin.read()                      # 3. bytes (i.e. UTF-8)
#                 json_str = json_bytes.decode('utf-8')            # 2. string (i.e. JSON)
#                 self.vars = json.loads(json_str)
#                 self.vars = [np.array(x) for x in self.vars]





# from myscscore.SCScore import SCScorer
# import pandas as pd

# model = SCScorer()
# model.restore()
# # import evaluate
# # molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")

# df = pd.read_csv('/home/saicharan/Downloads/chembl_10000.csv')
    
# ls= df['SMILES'].tolist()
# ls_gen = ls[0:5000]
# ls_train = ls[5000:10000]

# print('computing')
# average_score = model.get_avg_score(ls_gen)

# # Print the average score
# print('Average score:', average_score)
# # print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))