Spaces:
Sleeping
Sleeping
import evaluate | |
import datasets | |
# import moses | |
# from moses import metrics | |
import pandas as pd | |
# from tdc import Evaluator | |
# from tdc import Oracle | |
from metrics import novelty, fraction_valid, fraction_unique, SAscore, internal_diversity,fcd_metric, SYBAscore, oracles | |
_DESCRIPTION = """ | |
Comprehensive suite of metrics designed to assess the performance of molecular generation models, for understanding how well a model can produce novel, chemically valid molecules that are relevant to specific research objectives. | |
""" | |
_KWARGS_DESCRIPTION = """ | |
Args: | |
generated_smiles (`list` of `string`): A collection of SMILES (Simplified Molecular Input Line Entry System) strings generated by the model, ideally encompassing more than 30,000 samples. | |
train_smiles (`list` of `string`): The dataset of SMILES strings used to train the model, serving as a reference to evaluate the novelty and diversity of the generated molecules. | |
Returns: | |
Dectionary item containing various metrics to evaluate model performance | |
""" | |
_CITATION = """ | |
@article{DBLP:journals/corr/abs-1811-12823, | |
author = {Daniil Polykovskiy and | |
Alexander Zhebrak and | |
Benjam{\'{\i}}n S{\'{a}}nchez{-}Lengeling and | |
Sergey Golovanov and | |
Oktai Tatanov and | |
Stanislav Belyaev and | |
Rauf Kurbanov and | |
Aleksey Artamonov and | |
Vladimir Aladinskiy and | |
Mark Veselov and | |
Artur Kadurin and | |
Sergey I. Nikolenko and | |
Al{\'{a}}n Aspuru{-}Guzik and | |
Alex Zhavoronkov}, | |
title = {Molecular Sets {(MOSES):} {A} Benchmarking Platform for Molecular | |
Generation Models}, | |
journal = {CoRR}, | |
volume = {abs/1811.12823}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.12823}, | |
eprinttype = {arXiv}, | |
eprint = {1811.12823}, | |
timestamp = {Fri, 26 Nov 2021 15:34:30 +0100}, | |
biburl = {https://dblp.org/rec/journals/corr/abs-1811-12823.bib}, | |
bibsource = {dblp computer science bibliography, https://dblp.org} | |
} | |
""" | |
class molgenevalmetric(evaluate.Metric): | |
def _info(self): | |
return evaluate.MetricInfo( | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=datasets.Features( | |
{ | |
"generated_smiles": datasets.Sequence(datasets.Value("string")), | |
"train_smiles": datasets.Sequence(datasets.Value("string")), | |
} | |
if self.config_name == "multilabel" | |
else { | |
"generated_smiles": datasets.Value("string"), | |
"train_smiles": datasets.Value("string"), | |
} | |
), | |
reference_urls=["https://github.com/molecularsets/moses", "https://tdcommons.ai/functions/oracles/"], | |
) | |
def _compute(self, gensmi, trainsmi): | |
metrics = {} | |
metrics['novelty'] = novelty(gen = gensmi, train = trainsmi) | |
metrics['valid'] = fraction_valid(gen=gensmi) | |
metrics['unique'] = fraction_unique(gen=gensmi) | |
metrics['IntDiv'] = internal_diversity(gen=gensmi) | |
metrics['FCD'] = fcd_metric(gen = gensmi, train = trainsmi) | |
metrics['SA'] = SAscore(gen=gensmi) | |
metrics['SCS'] = SAscore(gen=trainsmi) | |
return metrics | |
# generated_smiles = [s for s in generated_smiles if s != ''] | |
# evaluator = Evaluator(name = 'KL_Divergence') | |
# KL_Divergence = evaluator(generated_smiles, train_smiles) | |
# Results.update({ | |
# "KL_Divergence": KL_Divergence, | |
# }) | |
# oracle_list = [ | |
# 'QED', 'SA', 'MPO', 'GSK3B', 'JNK3', | |
# 'DRD2', 'LogP', 'Rediscovery', 'Similarity', | |
# 'Median', 'Isomers', 'Valsartan_SMARTS', 'Hop' | |
# ] | |
# for oracle_name in oracle_list: | |
# oracle = Oracle(name=oracle_name) | |
# if oracle_name in ['Rediscovery', 'MPO', 'Similarity', 'Median', 'Isomers', 'Hop']: | |
# score = oracle(generated_smiles) | |
# if isinstance(score, dict): | |
# score = {key: sum(values)/len(values) for key, values in score.items()} | |
# else: | |
# score = oracle(generated_smiles) | |
# if isinstance(score, list): | |
# score = sum(score) / len(score) | |
# Results.update({f"{oracle_name}": score}) | |
# # keys_to_remove = ["FCD/TestSF", "SNN/TestSF", "Frag/TestSF", "Scaf/TestSF"] | |
# # for key in keys_to_remove: | |
# # Results.pop(key, None) | |
# return {"results": Results} | |