Spaces:
Runtime error
Runtime error
from unittest import TestCase | |
from multi_label_precision_recall_accuracy_fscore import MultiLabelPrecisionRecallAccuracyFscore | |
class MultiLabelPrecisionRecallAccuracyFscoreTest(TestCase): | |
""" | |
All of these tests are also used for multiset configuration. So please mind this and write the test in a way that | |
it is valid for both configurations (do not use same label multiple times). | |
""" | |
def setUp(self): | |
self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore() | |
def test_eok(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 1.0, | |
"accuracy": 1.0, | |
"fscore": 1.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[1, 2], | |
[0, 1, 2], | |
], | |
references=[ | |
[0, 1], | |
[1, 2], | |
[0, 1, 2], | |
] | |
) | |
) | |
def test_eok_string(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 1.0, | |
"accuracy": 1.0, | |
"fscore": 1.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
["0", "1"], | |
["1", "2"], | |
["0", "1", "2"], | |
], | |
references=[ | |
["0", "1"], | |
["1", "2"], | |
["0", "1", "2"], | |
] | |
) | |
) | |
def test_empty(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 1.0, | |
"accuracy": 1.0, | |
"fscore": 1.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[], | |
[], | |
[], | |
], | |
references=[ | |
[], | |
[], | |
[], | |
] | |
) | |
) | |
def test_empty_reference(self): | |
self.assertDictEqual( | |
{ | |
"precision": 0.0, | |
"recall": 0.0, | |
"accuracy": 0.0, | |
"fscore": 0.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[1, 2], | |
[0, 1, 2], | |
], | |
references=[ | |
[], | |
[], | |
[], | |
] | |
) | |
) | |
def test_empty_prediction(self): | |
self.assertDictEqual( | |
{ | |
"precision": 0.0, | |
"recall": 0.0, | |
"accuracy": 0.0, | |
"fscore": 0.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[], | |
[], | |
[], | |
], | |
references=[ | |
[0, 1], | |
[1, 2], | |
[0, 1, 2], | |
] | |
) | |
) | |
def test_completely_different(self): | |
self.assertDictEqual( | |
{ | |
"precision": 0.0, | |
"recall": 0.0, | |
"accuracy": 0.0, | |
"fscore": 0.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[1, 2], | |
[0, 1, 2], | |
], | |
references=[ | |
[3, 4], | |
[5, 6], | |
[7, 8, 9], | |
] | |
) | |
) | |
def test_max_precision(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 0.5, | |
"accuracy": 0.5, | |
"fscore": 2/3 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1] | |
], | |
references=[ | |
[0, 1, 2, 3] | |
] | |
) | |
) | |
def test_max_recall(self): | |
self.assertDictEqual( | |
{ | |
"precision": 0.5, | |
"recall": 1.0, | |
"accuracy": 0.5, | |
"fscore": 2/3 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1, 2, 3] | |
], | |
references=[ | |
[0, 1] | |
] | |
) | |
) | |
def test_partial_match(self): | |
self.assertDictEqual( | |
{ | |
"precision": 0.5, | |
"recall": 0.5, | |
"accuracy": 1/3, | |
"fscore": 0.5 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1] | |
], | |
references=[ | |
[0, 2] | |
] | |
) | |
) | |
def test_partial_match_multi_sample(self): | |
self.assertDictEqual( | |
{ | |
"precision": 2.5/3, | |
"recall": 2/3, | |
"accuracy": 0.5, | |
"fscore": 2*(2.5/3 * 2/3) / (2.5/3 + 2/3) | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[0, 1], | |
[2, 3] | |
], | |
references=[ | |
[0, 1, 2, 3], | |
[0, 1, 2, 3], | |
[2] | |
] | |
) | |
) | |
def test_beta(self): | |
self.multi_label_precision_recall_accuracy_fscore.beta = 2 | |
self.assertDictEqual( | |
{ | |
"precision": 2.5/3, | |
"recall": 2/3, | |
"accuracy": 0.5, | |
"fscore": 5*(2.5/3 * 2/3) / (4*2.5/3 + 2/3) | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[0, 1], | |
[2, 3] | |
], | |
references=[ | |
[0, 1, 2, 3], | |
[0, 1, 2, 3], | |
[2] | |
] | |
) | |
) | |
self.assertDictEqual( | |
{ | |
"precision": 2.5 / 3, | |
"recall": 2 / 3, | |
"accuracy": 0.5, | |
"fscore": 10 * (2.5 / 3 * 2 / 3) / (9 * 2.5 / 3 + 2 / 3) | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1], | |
[0, 1], | |
[2, 3] | |
], | |
references=[ | |
[0, 1, 2, 3], | |
[0, 1, 2, 3], | |
[2] | |
], | |
beta=3 | |
) | |
) | |
class MultiLabelPrecisionRecallAccuracyFscoreTestMultiset(MultiLabelPrecisionRecallAccuracyFscoreTest): | |
def setUp(self): | |
self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore(config_name="multiset") | |
def test_multiset_eok(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 1.0, | |
"accuracy": 1.0, | |
"fscore": 1.0 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1, 1], | |
[1, 2, 2], | |
[0, 1, 2, 1], | |
], | |
references=[ | |
[1, 0, 1], | |
[1, 2, 2], | |
[0, 1, 1, 2], | |
] | |
) | |
) | |
def test_multiset_partial_match(self): | |
self.assertDictEqual( | |
{ | |
"precision": 1.0, | |
"recall": 0.5, | |
"accuracy": 0.5, | |
"fscore": 2/3 | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1, 1] | |
], | |
references=[ | |
[1, 0, 1, 1, 0, 0], | |
] | |
) | |
) | |
def test_multiset_partial_match_multi_sample(self): | |
p = (1+2/3) / 2 | |
r = (3/4 + 1) / 2 | |
self.assertDictEqual( | |
{ | |
"precision": p, | |
"recall": r, | |
"accuracy": (3/4 + 2/3) / 2, | |
"fscore": 2*p*r / (p + r) | |
}, | |
self.multi_label_precision_recall_accuracy_fscore.compute( | |
predictions=[ | |
[0, 1, 1], | |
[1, 2, 2] | |
], | |
references=[ | |
[1, 0, 1, 1], | |
[1, 2], | |
] | |
) | |
) | |