lvwerra HF staff commited on
Commit
adb7eea
1 Parent(s): e7de28c

Update Space (evaluate main: 4ca8eed5)

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. matthews_correlation.py +41 -4
  3. requirements.txt +1 -1
README.md CHANGED
@@ -48,9 +48,10 @@ At minimum, this metric requires a list of predictions and a list of references:
48
  - **`predictions`** (`list` of `int`s): Predicted class labels.
49
  - **`references`** (`list` of `int`s): Ground truth labels.
50
  - **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
 
51
 
52
  ### Output Values
53
- - **`matthews_correlation`** (`float`): Matthews correlation coefficient.
54
 
55
  The metric output takes the following form:
56
  ```python
 
48
  - **`predictions`** (`list` of `int`s): Predicted class labels.
49
  - **`references`** (`list` of `int`s): Ground truth labels.
50
  - **`sample_weight`** (`list` of `int`s, `float`s, or `bool`s): Sample weights. Defaults to `None`.
51
+ - **`average`**(`None` or `macro`): For the multilabel case, whether to return one correlation coefficient per feature (`average=None`), or the average of them (`average='macro'`). Defaults to `None`.
52
 
53
  ### Output Values
54
+ - **`matthews_correlation`** (`float` or `list` of `float`s): Matthews correlation coefficient, or list of them in the multilabel case without averaging.
55
 
56
  The metric output takes the following form:
57
  ```python
matthews_correlation.py CHANGED
@@ -14,6 +14,7 @@
14
  """Matthews Correlation metric."""
15
 
16
  import datasets
 
17
  from sklearn.metrics import matthews_corrcoef
18
 
19
  import evaluate
@@ -36,6 +37,9 @@ _KWARGS_DESCRIPTION = """
36
  Args:
37
  predictions (list of int): Predicted labels, as returned by a model.
38
  references (list of int): Ground truth labels.
 
 
 
39
  sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
40
  Returns:
41
  matthews_correlation (dict containing float): Matthews correlation.
@@ -62,6 +66,21 @@ Examples:
62
  ... sample_weight=[0.5, 1, 0, 0, 0, 1])
63
  >>> print(round(results['matthews_correlation'], 2))
64
  -0.25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  """
66
 
67
  _CITATION = """\
@@ -88,6 +107,11 @@ class MatthewsCorrelation(evaluate.Metric):
88
  inputs_description=_KWARGS_DESCRIPTION,
89
  features=datasets.Features(
90
  {
 
 
 
 
 
91
  "predictions": datasets.Value("int32"),
92
  "references": datasets.Value("int32"),
93
  }
@@ -97,7 +121,20 @@ class MatthewsCorrelation(evaluate.Metric):
97
  ],
98
  )
99
 
100
- def _compute(self, predictions, references, sample_weight=None):
101
- return {
102
- "matthews_correlation": float(matthews_corrcoef(references, predictions, sample_weight=sample_weight)),
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """Matthews Correlation metric."""
15
 
16
  import datasets
17
+ import numpy as np
18
  from sklearn.metrics import matthews_corrcoef
19
 
20
  import evaluate
 
37
  Args:
38
  predictions (list of int): Predicted labels, as returned by a model.
39
  references (list of int): Ground truth labels.
40
+ average (`string`): This parameter is used for multilabel configs. Defaults to `None`.
41
+ - None (default): Returns an array of Matthews correlation coefficients, one for each feature
42
+ - 'macro': Calculate metrics for each feature, and find their unweighted mean.
43
  sample_weight (list of int, float, or bool): Sample weights. Defaults to `None`.
44
  Returns:
45
  matthews_correlation (dict containing float): Matthews correlation.
 
66
  ... sample_weight=[0.5, 1, 0, 0, 0, 1])
67
  >>> print(round(results['matthews_correlation'], 2))
68
  -0.25
69
+
70
+ Example 4, Multi-label without averaging:
71
+ >>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
72
+ >>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
73
+ ... predictions=[[0,1], [1,1], [0,1]])
74
+ >>> print(results['matthews_correlation'])
75
+ [0.5, 0.0]
76
+
77
+ Example 5, Multi-label with averaging:
78
+ >>> matthews_metric = evaluate.load("matthews_correlation", config_name="multilabel")
79
+ >>> results = matthews_metric.compute(references=[[0,1], [1,0], [1,1]],
80
+ ... predictions=[[0,1], [1,1], [0,1]],
81
+ ... average='macro')
82
+ >>> print(round(results['matthews_correlation'], 2))
83
+ 0.25
84
  """
85
 
86
  _CITATION = """\
 
107
  inputs_description=_KWARGS_DESCRIPTION,
108
  features=datasets.Features(
109
  {
110
+ "predictions": datasets.Sequence(datasets.Value("int32")),
111
+ "references": datasets.Sequence(datasets.Value("int32")),
112
+ }
113
+ if self.config_name == "multilabel"
114
+ else {
115
  "predictions": datasets.Value("int32"),
116
  "references": datasets.Value("int32"),
117
  }
 
121
  ],
122
  )
123
 
124
+ def _compute(self, predictions, references, sample_weight=None, average=None):
125
+ if self.config_name == "multilabel":
126
+ references = np.array(references)
127
+ predictions = np.array(predictions)
128
+ if not (references.ndim == 2 and predictions.ndim == 2):
129
+ raise ValueError("For multi-label inputs, both references and predictions should be 2-dimensional")
130
+ matthews_corr = [
131
+ matthews_corrcoef(predictions[:, i], references[:, i], sample_weight=sample_weight)
132
+ for i in range(references.shape[1])
133
+ ]
134
+ if average == "macro":
135
+ matthews_corr = np.mean(matthews_corr)
136
+ elif average is not None:
137
+ raise ValueError("Invalid `average`: expected `macro`, or None ")
138
+ else:
139
+ matthews_corr = float(matthews_corrcoef(references, predictions, sample_weight=sample_weight))
140
+ return {"matthews_correlation": matthews_corr}
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@a12836bcf91d5e2fb5d1649df9b63dbee9b5e5de
2
  scikit-learn
 
1
+ git+https://github.com/huggingface/evaluate@4ca8eed54000a52e542145f2d8d6201032423acb
2
  scikit-learn