|
import json |
|
from copy import deepcopy |
|
from typing import Dict, List |
|
|
|
import numpy as np |
|
from sklearn.metrics import f1_score, precision_score, recall_score |
|
from sklearn.preprocessing import MultiLabelBinarizer |
|
|
|
|
|
def _transform( |
|
y_pred: List[List], |
|
y_true: List[List], |
|
): |
|
|
|
y_pred = [[y if type(y) == str else "" for y in y_ls] for y_ls in y_pred] |
|
|
|
mlb = MultiLabelBinarizer() |
|
tmp = deepcopy(y_true) |
|
tmp.extend(y_pred) |
|
mlb.fit(tmp) |
|
y_true_binary = mlb.transform(y_true) |
|
y_pred_binary = mlb.transform(y_pred) |
|
return y_pred_binary, y_true_binary |
|
|
|
|
|
class Metric: |
|
|
|
@classmethod |
|
def averaged( |
|
cls, |
|
y_pred: List[List], |
|
y_true: List[List], |
|
metric_types: List[str] = ["macro"], |
|
) -> Dict: |
|
y_pred_binary, y_true_binary = _transform(y_pred, y_true) |
|
resp = {} |
|
for metric_type in metric_types: |
|
assert metric_type in [ |
|
"micro", |
|
"macro", |
|
"samples", |
|
"weighted", |
|
], "metric type error." |
|
|
|
|
|
|
|
resp["{}-Averaged Recall".format(metric_type)] = round( |
|
recall_score(y_true_binary, y_pred_binary, average=metric_type), 3 |
|
) |
|
resp["{}-Averaged F1".format(metric_type)] = round( |
|
f1_score(y_true_binary, y_pred_binary, average=metric_type), 3 |
|
) |
|
return resp |
|
|
|
@classmethod |
|
def jaccard( |
|
cls, |
|
y_pred: List[List], |
|
y_true: List[List], |
|
) -> Dict: |
|
def jaccard_similarity(l_pred: List, l_true: List) -> float: |
|
intersection = len(set(l_pred) & set(l_true)) |
|
union = len(set(l_pred) | set(l_true)) |
|
if union == 0: |
|
return 0 |
|
else: |
|
return intersection / union |
|
|
|
similarities = [ |
|
jaccard_similarity(l_pred, l_true) for l_pred, l_true in zip(y_pred, y_true) |
|
] |
|
|
|
jaccard = sum(similarities) / len(similarities) |
|
return {"Jaccard Similarity": round(jaccard, 3)} |
|
|
|
@classmethod |
|
def hamming( |
|
cls, |
|
y_pred: List[List], |
|
y_true: List[List], |
|
) -> Dict: |
|
y_pred_binary, y_true_binary = _transform(y_pred, y_true) |
|
hamming_loss = np.sum(y_true_binary != y_pred_binary) / ( |
|
y_true_binary.shape[0] * y_true_binary.shape[1] |
|
) |
|
return {"Hamming Loss": round(hamming_loss, 3)} |
|
|