ribesstefano commited on
Commit
b2864fc
·
1 Parent(s): 1b2eb07

Normalized some metrics in CV results

Browse files
notebooks/protac_degradation_predictor.ipynb CHANGED
@@ -8615,10 +8615,12 @@
8615
  " 'group_type': group_type,\n",
8616
  " 'train_len': len(train_df),\n",
8617
  " 'val_len': len(val_df),\n",
8618
- " 'train_active_perc': train_df['Active'].sum() / len(train_df) * 100,\n",
8619
- " 'train_inactive_perc': (len(train_df) - train_df['Active'].sum()) / len(train_df) * 100,\n",
8620
- " 'val_active_perc': val_df['Active'].sum() / len(val_df) * 100,\n",
8621
- " 'val_inactive_perc': (len(val_df) - val_df['Active'].sum()) / len(val_df) * 100,\n",
 
 
8622
  " 'num_leaking_uniprot': len(set(train_df['Uniprot']).intersection(set(val_df['Uniprot']))),\n",
8623
  " 'num_leaking_smiles': len(set(train_df['Smiles']).intersection(set(val_df['Smiles']))),\n",
8624
  " }\n",
@@ -8640,7 +8642,7 @@
8640
  },
8641
  {
8642
  "cell_type": "code",
8643
- "execution_count": 55,
8644
  "metadata": {},
8645
  "outputs": [
8646
  {
@@ -8664,8 +8666,10 @@
8664
  " <thead>\n",
8665
  " <tr style=\"text-align: right;\">\n",
8666
  " <th></th>\n",
 
8667
  " <th>val_acc</th>\n",
8668
  " <th>val_roc_auc</th>\n",
 
8669
  " <th>test_acc</th>\n",
8670
  " <th>test_roc_auc</th>\n",
8671
  " </tr>\n",
@@ -8675,27 +8679,35 @@
8675
  " <th></th>\n",
8676
  " <th></th>\n",
8677
  " <th></th>\n",
 
 
8678
  " </tr>\n",
8679
  " </thead>\n",
8680
  " <tbody>\n",
8681
  " <tr>\n",
8682
  " <th>random</th>\n",
 
8683
  " <td>0.834</td>\n",
8684
  " <td>0.903</td>\n",
 
8685
  " <td>0.846</td>\n",
8686
  " <td>0.050</td>\n",
8687
  " </tr>\n",
8688
  " <tr>\n",
8689
  " <th>tanimoto</th>\n",
 
8690
  " <td>0.774</td>\n",
8691
  " <td>0.784</td>\n",
 
8692
  " <td>0.892</td>\n",
8693
  " <td>0.117</td>\n",
8694
  " </tr>\n",
8695
  " <tr>\n",
8696
  " <th>uniprot</th>\n",
 
8697
  " <td>0.658</td>\n",
8698
  " <td>0.626</td>\n",
 
8699
  " <td>0.862</td>\n",
8700
  " <td>0.100</td>\n",
8701
  " </tr>\n",
@@ -8704,20 +8716,26 @@
8704
  "</div>"
8705
  ],
8706
  "text/plain": [
8707
- " val_acc val_roc_auc test_acc test_roc_auc\n",
8708
- "group_type \n",
8709
- "random 0.834 0.903 0.846 0.050\n",
8710
- "tanimoto 0.774 0.784 0.892 0.117\n",
8711
- "uniprot 0.658 0.626 0.862 0.100"
 
 
 
 
 
 
8712
  ]
8713
  },
8714
- "execution_count": 55,
8715
  "metadata": {},
8716
  "output_type": "execute_result"
8717
  }
8718
  ],
8719
  "source": [
8720
- "report.groupby('group_type').mean().round(3)[['val_acc', 'val_roc_auc', 'test_acc', 'test_roc_auc']]"
8721
  ]
8722
  },
8723
  {
 
8615
  " 'group_type': group_type,\n",
8616
  " 'train_len': len(train_df),\n",
8617
  " 'val_len': len(val_df),\n",
8618
+ " 'train_active_perc': train_df['Active'].sum() / len(train_df),\n",
8619
+ " 'train_inactive_perc': (len(train_df) - train_df['Active'].sum()) / len(train_df),\n",
8620
+ " 'val_active_perc': val_df['Active'].sum() / len(val_df),\n",
8621
+ " 'val_inactive_perc': (len(val_df) - val_df['Active'].sum()) / len(val_df),\n",
8622
+ " 'test_active_perc': test_df['Active'].sum() / len(test_df),\n",
8623
+ " 'test_inactive_perc': (len(test_df) - test_df['Active'].sum()) / len(test_df),\n",
8624
  " 'num_leaking_uniprot': len(set(train_df['Uniprot']).intersection(set(val_df['Uniprot']))),\n",
8625
  " 'num_leaking_smiles': len(set(train_df['Smiles']).intersection(set(val_df['Smiles']))),\n",
8626
  " }\n",
 
8642
  },
8643
  {
8644
  "cell_type": "code",
8645
+ "execution_count": 60,
8646
  "metadata": {},
8647
  "outputs": [
8648
  {
 
8666
  " <thead>\n",
8667
  " <tr style=\"text-align: right;\">\n",
8668
  " <th></th>\n",
8669
+ " <th>val_inactive_perc</th>\n",
8670
  " <th>val_acc</th>\n",
8671
  " <th>val_roc_auc</th>\n",
8672
+ " <th>test_inactive_perc</th>\n",
8673
  " <th>test_acc</th>\n",
8674
  " <th>test_roc_auc</th>\n",
8675
  " </tr>\n",
 
8679
  " <th></th>\n",
8680
  " <th></th>\n",
8681
  " <th></th>\n",
8682
+ " <th></th>\n",
8683
+ " <th></th>\n",
8684
  " </tr>\n",
8685
  " </thead>\n",
8686
  " <tbody>\n",
8687
  " <tr>\n",
8688
  " <th>random</th>\n",
8689
+ " <td>0.740</td>\n",
8690
  " <td>0.834</td>\n",
8691
  " <td>0.903</td>\n",
8692
+ " <td>0.923</td>\n",
8693
  " <td>0.846</td>\n",
8694
  " <td>0.050</td>\n",
8695
  " </tr>\n",
8696
  " <tr>\n",
8697
  " <th>tanimoto</th>\n",
8698
+ " <td>0.753</td>\n",
8699
  " <td>0.774</td>\n",
8700
  " <td>0.784</td>\n",
8701
+ " <td>0.923</td>\n",
8702
  " <td>0.892</td>\n",
8703
  " <td>0.117</td>\n",
8704
  " </tr>\n",
8705
  " <tr>\n",
8706
  " <th>uniprot</th>\n",
8707
+ " <td>0.705</td>\n",
8708
  " <td>0.658</td>\n",
8709
  " <td>0.626</td>\n",
8710
+ " <td>0.923</td>\n",
8711
  " <td>0.862</td>\n",
8712
  " <td>0.100</td>\n",
8713
  " </tr>\n",
 
8716
  "</div>"
8717
  ],
8718
  "text/plain": [
8719
+ " val_inactive_perc val_acc val_roc_auc test_inactive_perc \\\n",
8720
+ "group_type \n",
8721
+ "random 0.740 0.834 0.903 0.923 \n",
8722
+ "tanimoto 0.753 0.774 0.784 0.923 \n",
8723
+ "uniprot 0.705 0.658 0.626 0.923 \n",
8724
+ "\n",
8725
+ " test_acc test_roc_auc \n",
8726
+ "group_type \n",
8727
+ "random 0.846 0.050 \n",
8728
+ "tanimoto 0.892 0.117 \n",
8729
+ "uniprot 0.862 0.100 "
8730
  ]
8731
  },
8732
+ "execution_count": 60,
8733
  "metadata": {},
8734
  "output_type": "execute_result"
8735
  }
8736
  ],
8737
  "source": [
8738
+ "report.groupby('group_type').mean().round(3)[['val_inactive_perc', 'val_acc', 'val_roc_auc', 'test_inactive_perc', 'test_acc', 'test_roc_auc']]"
8739
  ]
8740
  },
8741
  {
reports/cv_report_5-splits.csv CHANGED
@@ -1,16 +1,16 @@
1
- fold,group_type,train_len,val_len,train_active_perc,train_inactive_perc,val_active_perc,val_inactive_perc,num_leaking_uniprot,num_leaking_smiles,val_loss,val_acc,val_f1_score,val_hp_metric,val_opt_score,val_precision,val_recall,val_roc_auc,test_loss,test_acc,test_f1_score,test_hp_metric,test_opt_score,test_precision,test_recall,test_roc_auc,train_unique_groups,val_unique_groups
2
- 0,random,859,215,25.960419091967402,74.0395809080326,26.046511627906977,73.95348837209302,47,119,0.32951581478118896,0.8604651093482971,0.7540983557701111,0.8604651093482971,1.6145634651184082,0.6969696879386902,0.8214285969734192,0.936657726764679,0.8791427612304688,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,,
3
- 1,random,859,215,25.960419091967402,74.0395809080326,26.046511627906977,73.95348837209302,47,102,0.5149048566818237,0.804651141166687,0.6315789222717285,0.804651141166687,1.4362300634384155,0.6206896305084229,0.6428571343421936,0.8723045587539673,0.6578966379165649,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,,
4
- 2,random,859,215,25.960419091967402,74.0395809080326,26.046511627906977,73.95348837209302,50,93,0.466389000415802,0.8186046481132507,0.6776859760284424,0.8186046481132507,1.496290683746338,0.6307692527770996,0.7321428656578064,0.8792678117752075,0.8081092834472656,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,,
5
- 3,random,859,215,25.960419091967402,74.0395809080326,26.046511627906977,73.95348837209302,48,102,0.5054850578308105,0.8186046481132507,0.7111111283302307,0.8186046481132507,1.5297157764434814,0.607594907283783,0.8571428656578064,0.9024034142494202,0.7945694327354431,0.7692307829856873,0.0,0.7692307829856873,0.7692307829856873,0.0,0.0,0.08333331346511841,,
6
- 4,random,860,214,26.046511627906977,73.95348837209302,25.70093457943925,74.29906542056075,43,108,0.4100916087627411,0.8691588640213013,0.7704917788505554,0.8691588640213013,1.639650583267212,0.7014925479888916,0.8545454740524292,0.9222984313964844,0.7853931188583374,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,,
7
- 0,uniprot,893,181,28.331466965285557,71.66853303471444,14.3646408839779,85.6353591160221,0,20,0.520714282989502,0.8176795840263367,0.2666666805744171,0.8176795840263367,1.0843462944030762,0.31578946113586426,0.23076923191547394,0.6401985287666321,0.7757952213287354,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,55.0,10.0
8
- 1,uniprot,876,198,25.228310502283108,74.77168949771689,29.292929292929294,70.70707070707071,0,14,1.2874281406402588,0.5757575631141663,0.2222222238779068,0.5757575631141663,0.7979797720909119,0.23999999463558197,0.2068965584039688,0.5036945939064026,0.5239871740341187,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,56.0,9.0
9
- 2,uniprot,1015,59,24.433497536945815,75.56650246305419,52.54237288135594,47.45762711864407,0,11,2.313382387161255,0.5254237055778503,0.3333333432674408,0.5254237055778503,0.8587570190429688,0.6363636255264282,0.22580644488334656,0.6849077939987183,0.8297387361526489,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,56.0,9.0
10
- 3,uniprot,733,341,24.829467939972716,75.17053206002728,28.445747800586513,71.5542521994135,0,50,1.2226158380508423,0.6891495585441589,0.22058823704719543,0.6891495585441589,0.9097378253936768,0.38461539149284363,0.15463916957378387,0.5923609733581543,0.6087363958358765,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,51.0,14.0
11
- 4,uniprot,779,295,27.214377406931966,72.78562259306803,22.71186440677966,77.28813559322035,0,62,0.7730569243431091,0.6813559532165527,0.4337349534034729,0.6813559532165527,1.1150908470153809,0.3636363744735718,0.5373134613037109,0.7085297107696533,0.7675966620445251,0.7692307829856873,0.0,0.7692307829856873,0.7692307829856873,0.0,0.0,0.25,42.0,23.0
12
- 0,tanimoto,1005,69,26.666666666666668,73.33333333333333,15.942028985507244,84.05797101449275,19,0,0.6027061939239502,0.782608687877655,0.3478260934352875,0.782608687877655,1.1304347515106201,0.3333333432674408,0.3636363744735718,0.8087774515151978,0.7111766338348389,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,25.0,4.0
13
- 1,tanimoto,907,167,25.468577728776186,74.53142227122382,28.74251497005988,71.25748502994011,36,0,0.9109691977500916,0.7604790329933167,0.5,0.7604790329933167,1.2604789733886719,0.625,0.4166666567325592,0.7228641510009766,0.7257124781608582,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,22.0,7.0
14
- 2,tanimoto,895,179,25.921787709497206,74.0782122905028,26.256983240223462,73.74301675977654,40,0,0.7800794243812561,0.7597765326499939,0.44155845046043396,0.7597765326499939,1.2013349533081055,0.5666666626930237,0.3617021143436432,0.7964216470718384,0.8443781733512878,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,22.0,7.0
15
- 3,tanimoto,746,328,24.664879356568363,75.33512064343164,28.96341463414634,71.03658536585365,43,0,0.7938919067382812,0.7865853905677795,0.5138888955116272,0.7865853905677795,1.3004742860794067,0.7551020383834839,0.38947367668151855,0.7958889007568359,0.5916397571563721,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.1666666865348816,23.0,6.0
16
- 4,tanimoto,743,331,27.052489905787347,72.94751009421265,23.564954682779458,76.43504531722054,45,0,0.662976861000061,0.7824773192405701,0.47058823704719543,0.7824773192405701,1.253065586090088,0.5517241358757019,0.41025641560554504,0.7972534894943237,0.612966001033783,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.25,24.0,5.0
 
1
+ fold,group_type,train_len,val_len,train_active_perc,train_inactive_perc,val_active_perc,val_inactive_perc,num_leaking_uniprot,num_leaking_smiles,val_loss,val_acc,val_f1_score,val_hp_metric,val_opt_score,val_precision,val_recall,val_roc_auc,test_loss,test_acc,test_f1_score,test_hp_metric,test_opt_score,test_precision,test_recall,test_roc_auc,train_unique_groups,val_unique_groups,test_active_perc,test_inactive_perc
2
+ 0,random,859,215,0.259604190919674,0.7403958090803261,0.26046511627906976,0.7395348837209302,47,119,0.32951581478118896,0.8604651093482971,0.7540983557701111,0.8604651093482971,1.6145634651184082,0.6969696879386902,0.8214285969734192,0.936657726764679,0.8791427612304688,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,,,0.07692307692307693,0.9230769230769231
3
+ 1,random,859,215,0.259604190919674,0.7403958090803261,0.26046511627906976,0.7395348837209302,47,102,0.5149048566818237,0.804651141166687,0.6315789222717285,0.804651141166687,1.4362300634384155,0.6206896305084229,0.6428571343421936,0.8723045587539673,0.6578966379165649,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,,,0.07692307692307693,0.9230769230769231
4
+ 2,random,859,215,0.259604190919674,0.7403958090803261,0.26046511627906976,0.7395348837209302,50,93,0.466389000415802,0.8186046481132507,0.6776859760284424,0.8186046481132507,1.496290683746338,0.6307692527770996,0.7321428656578064,0.8792678117752075,0.8081092834472656,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,,,0.07692307692307693,0.9230769230769231
5
+ 3,random,859,215,0.259604190919674,0.7403958090803261,0.26046511627906976,0.7395348837209302,48,102,0.5054850578308105,0.8186046481132507,0.7111111283302307,0.8186046481132507,1.5297157764434814,0.607594907283783,0.8571428656578064,0.9024034142494202,0.7945694327354431,0.7692307829856873,0.0,0.7692307829856873,0.7692307829856873,0.0,0.0,0.08333331346511841,,,0.07692307692307693,0.9230769230769231
6
+ 4,random,860,214,0.26046511627906976,0.7395348837209302,0.2570093457943925,0.7429906542056075,43,108,0.4100916087627411,0.8691588640213013,0.7704917788505554,0.8691588640213013,1.639650583267212,0.7014925479888916,0.8545454740524292,0.9222984313964844,0.7853931188583374,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,,,0.07692307692307693,0.9230769230769231
7
+ 0,uniprot,893,181,0.28331466965285557,0.7166853303471444,0.143646408839779,0.856353591160221,0,20,0.520714282989502,0.8176795840263367,0.2666666805744171,0.8176795840263367,1.0843462944030762,0.31578946113586426,0.23076923191547394,0.6401985287666321,0.7757952213287354,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,55.0,10.0,0.07692307692307693,0.9230769230769231
8
+ 1,uniprot,876,198,0.2522831050228311,0.7477168949771689,0.29292929292929293,0.7070707070707072,0,14,1.2874281406402588,0.5757575631141663,0.2222222238779068,0.5757575631141663,0.7979797720909119,0.23999999463558197,0.2068965584039688,0.5036945939064026,0.5239871740341187,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,56.0,9.0,0.07692307692307693,0.9230769230769231
9
+ 2,uniprot,1015,59,0.24433497536945814,0.755665024630542,0.5254237288135594,0.4745762711864407,0,11,2.313382387161255,0.5254237055778503,0.3333333432674408,0.5254237055778503,0.8587570190429688,0.6363636255264282,0.22580644488334656,0.6849077939987183,0.8297387361526489,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,56.0,9.0,0.07692307692307693,0.9230769230769231
10
+ 3,uniprot,733,341,0.24829467939972716,0.7517053206002728,0.2844574780058651,0.7155425219941349,0,50,1.2226158380508423,0.6891495585441589,0.22058823704719543,0.6891495585441589,0.9097378253936768,0.38461539149284363,0.15463916957378387,0.5923609733581543,0.6087363958358765,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,51.0,14.0,0.07692307692307693,0.9230769230769231
11
+ 4,uniprot,779,295,0.27214377406931967,0.7278562259306802,0.2271186440677966,0.7728813559322034,0,62,0.7730569243431091,0.6813559532165527,0.4337349534034729,0.6813559532165527,1.1150908470153809,0.3636363744735718,0.5373134613037109,0.7085297107696533,0.7675966620445251,0.7692307829856873,0.0,0.7692307829856873,0.7692307829856873,0.0,0.0,0.25,42.0,23.0,0.07692307692307693,0.9230769230769231
12
+ 0,tanimoto,1005,69,0.26666666666666666,0.7333333333333333,0.15942028985507245,0.8405797101449275,19,0,0.6027061939239502,0.782608687877655,0.3478260934352875,0.782608687877655,1.1304347515106201,0.3333333432674408,0.3636363744735718,0.8087774515151978,0.7111766338348389,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.08333331346511841,25.0,4.0,0.07692307692307693,0.9230769230769231
13
+ 1,tanimoto,907,167,0.25468577728776187,0.7453142227122381,0.2874251497005988,0.7125748502994012,36,0,0.9109691977500916,0.7604790329933167,0.5,0.7604790329933167,1.2604789733886719,0.625,0.4166666567325592,0.7228641510009766,0.7257124781608582,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.08333331346511841,22.0,7.0,0.07692307692307693,0.9230769230769231
14
+ 2,tanimoto,895,179,0.25921787709497207,0.7407821229050281,0.26256983240223464,0.7374301675977654,40,0,0.7800794243812561,0.7597765326499939,0.44155845046043396,0.7597765326499939,1.2013349533081055,0.5666666626930237,0.3617021143436432,0.7964216470718384,0.8443781733512878,0.8461538553237915,0.0,0.8461538553237915,0.8461538553237915,0.0,0.0,0.0,22.0,7.0,0.07692307692307693,0.9230769230769231
15
+ 3,tanimoto,746,328,0.24664879356568364,0.7533512064343164,0.2896341463414634,0.7103658536585366,43,0,0.7938919067382812,0.7865853905677795,0.5138888955116272,0.7865853905677795,1.3004742860794067,0.7551020383834839,0.38947367668151855,0.7958889007568359,0.5916397571563721,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.1666666865348816,23.0,6.0,0.07692307692307693,0.9230769230769231
16
+ 4,tanimoto,743,331,0.27052489905787347,0.7294751009421264,0.23564954682779457,0.7643504531722054,45,0,0.662976861000061,0.7824773192405701,0.47058823704719543,0.7824773192405701,1.253065586090088,0.5517241358757019,0.41025641560554504,0.7972534894943237,0.612966001033783,0.9230769276618958,0.0,0.9230769276618958,0.9230769276618958,0.0,0.0,0.25,24.0,5.0,0.07692307692307693,0.9230769230769231