saicharan2804 commited on
Commit
b3f9149
·
1 Parent(s): ddc012e

SYBA added

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Molgenevalmetric
3
- emoji: 😻
4
  colorFrom: pink
5
  colorTo: indigo
6
  sdk: gradio
 
1
  ---
2
  title: Molgenevalmetric
3
+ emoji:
4
  colorFrom: pink
5
  colorTo: indigo
6
  sdk: gradio
__pycache__/molgenevalmetric.cpython-311.pyc ADDED
Binary file (23.5 kB). View file
 
__pycache__/molgenevalmetric.cpython-312.pyc ADDED
Binary file (24.3 kB). View file
 
app.py CHANGED
@@ -1,16 +1,22 @@
1
  import pandas as pd
2
  df = pd.read_csv('/Users/saicharan/chembl_10000.csv')
 
3
 
4
  import evaluate
5
- molgenevalmetric = evaluate.load("saicharan2804/molgenevalmetric")
6
 
7
  ls= df['SMILES'].tolist()
8
- ls_gen = ls[0:5000]
9
- ls_train = ls[5000:10000]
10
 
11
  print('computing')
 
 
 
 
 
12
 
13
- print(molgenevalmetric.compute(gensmi = ls_gen, trainsmi = ls_train))
14
 
15
  # import evaluate
16
  # from evaluate.utils import launch_gradio_widget
 
1
  import pandas as pd
2
  df = pd.read_csv('/Users/saicharan/chembl_10000.csv')
3
+ from molgenevalmetric import SYBAscore
4
 
5
  import evaluate
6
+ met = evaluate.load("saicharan2804/molgenevalmetric")
7
 
8
  ls= df['SMILES'].tolist()
9
+ ls_gen = ls[0:500]
10
+ ls_train = ls[500:1000]
11
 
12
  print('computing')
13
+ print(SYBAscore(gen=ls_gen))
14
+ # print(met.compute(gensmi = ls_gen, trainsmi = ls_train))
15
+ # print(qed_metric(gen=ls_gen))
16
+ # print(logP_metric(gen=ls_gen))
17
+ # print(average_sascore(gen=ls_gen))
18
 
19
+ # print(oracles(gen=ls_gen, train=ls_train))
20
 
21
  # import evaluate
22
  # from evaluate.utils import launch_gradio_widget
molgenevalmetric.py CHANGED
@@ -1,12 +1,9 @@
1
 
2
  import evaluate
3
  import datasets
4
- # import moses
5
- # from moses import metrics
6
  import pandas as pd
7
  from tdc import Evaluator
8
  from tdc import Oracle
9
- # from metrics import novelty, fraction_valid, fraction_unique, SAscore, internal_diversity,fcd_metric, SYBAscore, oracles
10
  from rdkit.Chem.QED import qed
11
  from rdkit.Chem.Crippen import MolLogP
12
  import os
@@ -30,16 +27,11 @@ import pandas as pd
30
  from rdkit import rdBase
31
  from rdkit.Contrib.SA_Score import sascorer
32
  import sys
33
-
34
  from rdkit.Chem import RDConfig
35
  import os
36
- # sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
37
- # import sascorer
38
  import pandas as pd
39
  from fcd_torch import FCD
40
- # from syba.syba import SybaClassifier
41
-
42
- # from SCScore import SCScorer
43
 
44
  from myscscore.SCScore import SCScorer
45
  import warnings
@@ -401,32 +393,32 @@ def fcd_metric(gen, train, n_jobs = 1, device = None):
401
  fcd = FCD(device=device, n_jobs= n_jobs)
402
  return fcd(gen, train)
403
 
404
- # def SYBAscore(gen):
405
- # """
406
- # Compute the average SYBA score for a list of SMILES strings.
407
-
408
- # Parameters:
409
- # - smiles_list (list of str): A list of SMILES strings representing molecules.
410
-
411
- # Returns:
412
- # - float: The average SYBA score for the list of molecules.
413
- # """
414
- # syba = SybaClassifier()
415
- # syba.fitDefaultScore()
416
- # scores = []
417
-
418
- # for smiles in gen:
419
- # try:
420
- # score = syba.predict(smi=smiles)
421
- # scores.append(score)
422
- # except Exception as e:
423
- # print(f"Error processing SMILES '{smiles}': {e}")
424
- # continue
425
-
426
- # if scores:
427
- # return sum(scores) / len(scores)
428
- # else:
429
- # return None # Or handle empty list or all failed predictions as needed
430
 
431
  def qed_metric(gen):
432
  """
@@ -604,11 +596,9 @@ class molgenevalmetric(evaluate.Metric):
604
  metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
605
  metrics['QED'] = qed_metric(gen=gensmi)
606
  metrics['LogP'] = logP_metric(gen=gensmi)
607
-
608
- # print('computing')
609
-
610
  metrics['SA'] = average_sascore(gen=gensmi)
611
  metrics['SCS'] = synthetic_complexity_score(gen=gensmi)
 
612
 
613
  return metrics
614
 
 
1
 
2
  import evaluate
3
  import datasets
 
 
4
  import pandas as pd
5
  from tdc import Evaluator
6
  from tdc import Oracle
 
7
  from rdkit.Chem.QED import qed
8
  from rdkit.Chem.Crippen import MolLogP
9
  import os
 
27
  from rdkit import rdBase
28
  from rdkit.Contrib.SA_Score import sascorer
29
  import sys
 
30
  from rdkit.Chem import RDConfig
31
  import os
 
 
32
  import pandas as pd
33
  from fcd_torch import FCD
34
+ from syba.syba import SybaClassifier
 
 
35
 
36
  from myscscore.SCScore import SCScorer
37
  import warnings
 
393
  fcd = FCD(device=device, n_jobs= n_jobs)
394
  return fcd(gen, train)
395
 
396
+ def SYBAscore(gen):
397
+ """
398
+ Compute the average SYBA score for a list of SMILES strings.
399
+
400
+ Parameters:
401
+ - smiles_list (list of str): A list of SMILES strings representing molecules.
402
+
403
+ Returns:
404
+ - float: The average SYBA score for the list of molecules.
405
+ """
406
+ syba = SybaClassifier()
407
+ syba.fitDefaultScore()
408
+ scores = []
409
+
410
+ for smiles in gen:
411
+ try:
412
+ score = syba.predict(smi=smiles)
413
+ scores.append(score)
414
+ except Exception as e:
415
+ print(f"Error processing SMILES '{smiles}': {e}")
416
+ continue
417
+
418
+ if scores:
419
+ return sum(scores) / len(scores)
420
+ else:
421
+ return None # Or handle empty list or all failed predictions as needed
422
 
423
  def qed_metric(gen):
424
  """
 
596
  metrics['Oracles'] = oracles(gen = gensmi, train = trainsmi)
597
  metrics['QED'] = qed_metric(gen=gensmi)
598
  metrics['LogP'] = logP_metric(gen=gensmi)
 
 
 
599
  metrics['SA'] = average_sascore(gen=gensmi)
600
  metrics['SCS'] = synthetic_complexity_score(gen=gensmi)
601
+ metrics['SYBA'] = SYBAscore(gen=gensmi)
602
 
603
  return metrics
604
 
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
  git+https://github.com/huggingface/evaluate@main
2
  git+https://github.com/saicharan2804/myscscore
 
3
  numpy
4
  pandas
5
  scipy
6
  torch
7
  rdkit
8
  pyarrow
9
- fcd-torch
 
 
1
  git+https://github.com/huggingface/evaluate@main
2
  git+https://github.com/saicharan2804/myscscore
3
+ git+https://github.com/saicharan2804/mysybascore
4
  numpy
5
  pandas
6
  scipy
7
  torch
8
  rdkit
9
  pyarrow
10
+ fcd-torch
11
+ PyTDC