Spaces:
Build error
Build error
File size: 7,359 Bytes
e487255 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
"""
===============================
Metrics calculation
===============================
Includes a few metric as well as functions composing metrics on results files.
"""
import numpy as np
import torch
from sklearn.metrics import roc_auc_score, accuracy_score, balanced_accuracy_score, average_precision_score
from scipy.stats import rankdata
import pandas as pd
"""
===============================
Metrics calculation
===============================
"""
def auc_metric(target, pred, multi_class='ovo', numpy=False):
lib = np if numpy else torch
try:
if not numpy:
target = torch.tensor(target) if not torch.is_tensor(target) else target
pred = torch.tensor(pred) if not torch.is_tensor(pred) else pred
if len(lib.unique(target)) > 2:
if not numpy:
return torch.tensor(roc_auc_score(target, pred, multi_class=multi_class))
return roc_auc_score(target, pred, multi_class=multi_class)
else:
if len(pred.shape) == 2:
pred = pred[:, 1]
if not numpy:
return torch.tensor(roc_auc_score(target, pred))
return roc_auc_score(target, pred)
except ValueError as e:
print(e)
return np.nan
def accuracy_metric(target, pred):
target = torch.tensor(target) if not torch.is_tensor(target) else target
pred = torch.tensor(pred) if not torch.is_tensor(pred) else pred
if len(torch.unique(target)) > 2:
return torch.tensor(accuracy_score(target, torch.argmax(pred, -1)))
else:
return torch.tensor(accuracy_score(target, pred[:, 1] > 0.5))
def average_precision_metric(target, pred):
target = torch.tensor(target) if not torch.is_tensor(target) else target
pred = torch.tensor(pred) if not torch.is_tensor(pred) else pred
if len(torch.unique(target)) > 2:
return torch.tensor(average_precision_score(target, torch.argmax(pred, -1)))
else:
return torch.tensor(average_precision_score(target, pred[:, 1] > 0.5))
def balanced_accuracy_metric(target, pred):
target = torch.tensor(target) if not torch.is_tensor(target) else target
pred = torch.tensor(pred) if not torch.is_tensor(pred) else pred
if len(torch.unique(target)) > 2:
return torch.tensor(balanced_accuracy_score(target, torch.argmax(pred, -1)))
else:
return torch.tensor(balanced_accuracy_score(target, pred[:, 1] > 0.5))
def cross_entropy(target, pred):
target = torch.tensor(target) if not torch.is_tensor(target) else target
pred = torch.tensor(pred) if not torch.is_tensor(pred) else pred
if len(torch.unique(target)) > 2:
ce = torch.nn.CrossEntropyLoss()
return ce(pred.float(), target.long())
else:
bce = torch.nn.BCELoss()
return bce(pred[:, 1].float(), target.float())
def time_metric():
"""
Dummy function, will just be used as a handler.
"""
pass
def count_metric(x, y):
"""
Dummy function, returns one count per dataset.
"""
return 1
"""
===============================
Metrics composition
===============================
"""
def calculate_score_per_method(metric, name:str, global_results:dict, ds:list, eval_positions:list, aggregator:str='mean'):
"""
Calculates the metric given by 'metric' and saves it under 'name' in the 'global_results'
:param metric: Metric function
:param name: Name of metric in 'global_results'
:param global_results: Dicrtonary containing the results for current method for a collection of datasets
:param ds: Dataset to calculate metrics on, a list of dataset properties
:param eval_positions: List of positions to calculate metrics on
:param aggregator: Specifies way to aggregate results across evaluation positions
:return:
"""
aggregator_f = np.nanmean if aggregator == 'mean' else np.nansum
for pos in eval_positions:
valid_positions = 0
for d in ds:
if f'{d[0]}_outputs_at_{pos}' in global_results:
preds = global_results[f'{d[0]}_outputs_at_{pos}']
y = global_results[f'{d[0]}_ys_at_{pos}']
preds, y = preds.detach().cpu().numpy() if torch.is_tensor(
preds) else preds, y.detach().cpu().numpy() if torch.is_tensor(y) else y
try:
if metric == time_metric:
global_results[f'{d[0]}_{name}_at_{pos}'] = global_results[f'{d[0]}_time_at_{pos}']
valid_positions = valid_positions + 1
else:
global_results[f'{d[0]}_{name}_at_{pos}'] = aggregator_f(
[metric(y[split], preds[split]) for split in range(y.shape[0])])
valid_positions = valid_positions + 1
except Exception as err:
print(f'Error calculating metric with {err}, {type(err)} at {d[0]} {pos} {name}')
global_results[f'{d[0]}_{name}_at_{pos}'] = np.nan
else:
global_results[f'{d[0]}_{name}_at_{pos}'] = np.nan
if valid_positions > 0:
global_results[f'{aggregator}_{name}_at_{pos}'] = aggregator_f([global_results[f'{d[0]}_{name}_at_{pos}'] for d in ds])
else:
global_results[f'{aggregator}_{name}_at_{pos}'] = np.nan
for d in ds:
metrics = [global_results[f'{d[0]}_{name}_at_{pos}'] for pos in eval_positions]
metrics = [m for m in metrics if not np.isnan(m)]
global_results[f'{d[0]}_{aggregator}_{name}'] = aggregator_f(metrics) if len(metrics) > 0 else np.nan
metrics = [global_results[f'{aggregator}_{name}_at_{pos}'] for pos in eval_positions]
metrics = [m for m in metrics if not np.isnan(m)]
global_results[f'{aggregator}_{name}'] = aggregator_f(metrics) if len(metrics) > 0 else np.nan
def calculate_score(metric, name, global_results, ds, eval_positions, aggregator='mean', limit_to=''):
"""
Calls calculate_metrics_by_method with a range of methods. See arguments of that method.
:param limit_to: This method will not get metric calculations.
"""
for m in global_results:
if limit_to not in m:
continue
calculate_score_per_method(metric, name, global_results[m], ds, eval_positions, aggregator=aggregator)
def make_metric_matrix(global_results, methods, pos, name, ds):
result = []
for m in global_results:
result += [[global_results[m][d[0] + '_' + name + '_at_' + str(pos)] for d in ds]]
result = np.array(result)
result = pd.DataFrame(result.T, index=[d[0] for d in ds], columns=[k[:-8] for k in list(global_results.keys())])
matrix_means, matrix_stds = [], []
for method in methods:
matrix_means += [result.iloc[:, [(method) in c for c in result.columns]].mean(axis=1)]
matrix_stds += [result.iloc[:, [(method) in c for c in result.columns]].std(axis=1)]
matrix_means = pd.DataFrame(matrix_means, index=methods).T
matrix_stds = pd.DataFrame(matrix_stds, index=methods).T
return matrix_means, matrix_stds
def make_ranks_and_wins_table(matrix):
for dss in matrix.T:
matrix.loc[dss] = rankdata(-matrix.round(3).loc[dss])
ranks_acc = matrix.mean()
wins_acc = (matrix == 1).sum()
return ranks_acc, wins_acc |