Spaces:

evaluate-metric
/

matthews_correlation

Running

App Files Files Community

lvwerra HF staff commited on Nov 17, 2022

Commit

adb7eea

•

1 Parent(s): e7de28c

Update Space (evaluate main: 4ca8eed5)

Browse files

Files changed (3) hide show

README.md +2 -1
matthews_correlation.py +41 -4
requirements.txt +1 -1

README.md CHANGED Viewed

@@ -48,9 +48,10 @@ At minimum, this metric requires a list of predictions and a list of references:
 - **`predictions`** (`list` of `int`s): Predicted class labels.
 - **`references`** (`list` of `int`s): Ground truth labels.
 - **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
 ### Output Values
-- **`matthews_correlation`** (`float`): Matthews correlation coefficient.
 The metric output takes the following form:
 ```python

 - **`predictions`** (`list` of `int`s): Predicted class labels.
 - **`references`** (`list` of `int`s): Ground truth labels.
 - **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
+- **`average`**(`None` or `macro`): For the multilabel case, whether to return one correlation coefficient per feature (`average=None`), or the average of them (`average='macro'`). Defaults to `None`.
 ### Output Values
+- **`matthews_correlation`** (`float` or `list` of `float`s): Matthews correlation coefficient, or list of them in the multilabel case without averaging.
 The metric output takes the following form:
 ```python

matthews_correlation.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Matthews Correlation metric."""
 import datasets
 from sklearn.metrics import matthews_corrcoef
 import evaluate
@@ -36,6 +37,9 @@ _KWARGS_DESCRIPTION = """
 Args:
     predictions (list of int): Predicted labels, as returned by a model.
     references (list of int): Ground truth labels.
     sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
 Returns:
     matthews_correlation (dict containing float): Matthews correlation.
@@ -62,6 +66,21 @@ Examples:
         ...                                     sample_weight=[0.5, 1, 0, 0, 0, 1])
         >>> print(round(results['matthews_correlation'], 2))
         -0.25
 """
 _CITATION = """\
@@ -88,6 +107,11 @@ class MatthewsCorrelation(evaluate.Metric):
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
                     "predictions": datasets.Value("int32"),
                     "references": datasets.Value("int32"),
                 }
@@ -97,7 +121,20 @@ class MatthewsCorrelation(evaluate.Metric):
             ],
         )
-    def _compute(self, predictions, references, sample_weight=None):
-        return {
-            "matthews_correlation": float(matthews_corrcoef(references, predictions, sample_weight=sample_weight)),
-        }

 """Matthews Correlation metric."""
 import datasets
+import numpy as np
 from sklearn.metrics import matthews_corrcoef
 import evaluate
 Args:
     predictions (list of int): Predicted labels, as returned by a model.
     references (list of int): Ground truth labels.
+    average (`string`): This parameter is used for multilabel configs. Defaults to `None`.
+        - None (default): Returns an array of Matthews correlation coefficients, one for each feature
+        - 'macro': Calculate metrics for each feature, and find their unweighted mean.
     sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
 Returns:
     matthews_correlation (dict containing float): Matthews correlation.
         ...                                     sample_weight=[0.5, 1, 0, 0, 0, 1])
         >>> print(round(results['matthews_correlation'], 2))
         -0.25
+    Example 4, Multi-label without averaging:
+        >>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
+        >>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
+        ...                                     predictions=[[0,1], [1,1], [0,1]])
+        >>> print(results['matthews_correlation'])
+        [0.5, 0.0]
+    Example 5, Multi-label with averaging:
+        >>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
+        >>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
+        ...                                     predictions=[[0,1], [1,1], [0,1]],
+        ...                                     average='macro')
+        >>> print(round(results['matthews_correlation'], 2))
+        0.25
 """
 _CITATION = """\
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
+                    "predictions": datasets.Sequence(datasets.Value("int32")),
+                    "references": datasets.Sequence(datasets.Value("int32")),
+                }
+                if self.config_name == "multilabel"
+                else {
                     "predictions": datasets.Value("int32"),
                     "references": datasets.Value("int32"),
                 }
             ],
         )
+    def _compute(self, predictions, references, sample_weight=None, average=None):
+        if self.config_name == "multilabel":
+            references = np.array(references)
+            predictions = np.array(predictions)
+            if not (references.ndim == 2 and predictions.ndim == 2):
+                raise ValueError("For multi-label inputs, both references and predictions should be 2-dimensional")
+            matthews_corr = [
+                matthews_corrcoef(predictions[:, i], references[:, i], sample_weight=sample_weight)
+                for i in range(references.shape[1])
+            ]
+            if average == "macro":
+                matthews_corr = np.mean(matthews_corr)
+            elif average is not None:
+                raise ValueError("Invalid `average`: expected `macro`, or None ")
+        else:
+            matthews_corr = float(matthews_corrcoef(references, predictions, sample_weight=sample_weight))
+        return {"matthews_correlation": matthews_corr}

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@~~a12836bcf91d5e2fb5d1649df9b63dbee9b5e5de~~
2	scikit-learn


1	+ git+https://github.com/huggingface/evaluate@4ca8eed54000a52e542145f2d8d6201032423acb
2	scikit-learn