Spaces:
Sleeping
Sleeping
Update f_beta.py
Browse files
f_beta.py
CHANGED
@@ -11,58 +11,71 @@
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
-
|
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
|
|
18 |
|
19 |
|
20 |
-
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
22 |
-
@
|
23 |
-
title
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
}
|
27 |
"""
|
28 |
|
29 |
-
# TODO: Add description of the module here
|
30 |
_DESCRIPTION = """\
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
|
34 |
|
35 |
-
# TODO: Add description of the arguments of the module here
|
36 |
_KWARGS_DESCRIPTION = """
|
37 |
-
Calculates how good are predictions given some references, using certain scores
|
38 |
Args:
|
39 |
-
predictions
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
Returns:
|
44 |
-
|
45 |
-
|
|
|
46 |
Examples:
|
47 |
-
Examples should be written in doctest format, and should illustrate how
|
48 |
-
to use the function.
|
49 |
|
50 |
-
>>>
|
51 |
-
>>> results =
|
52 |
>>> print(results)
|
53 |
-
{'
|
|
|
|
|
|
|
54 |
"""
|
55 |
|
56 |
-
# TODO: Define external resources urls if needed
|
57 |
-
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
58 |
-
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
61 |
class F_Beta(evaluate.Metric):
|
62 |
-
"""TODO: Short description of my evaluation module."""
|
63 |
-
|
64 |
def _info(self):
|
65 |
-
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
66 |
return evaluate.MetricInfo(
|
67 |
# This is the description that will appear on the modules page.
|
68 |
module_type="metric",
|
@@ -71,25 +84,19 @@ class F_Beta(evaluate.Metric):
|
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
-
'predictions': datasets.Value('
|
75 |
-
'references': datasets.Value('
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
-
homepage="
|
79 |
# Additional links to the codebase or references
|
80 |
-
codebase_urls=["
|
81 |
-
reference_urls=["
|
82 |
)
|
83 |
|
84 |
-
def _download_and_prepare(self, dl_manager):
|
85 |
-
"""Optional: download external resources useful to compute the scores"""
|
86 |
-
# TODO: Download external resources if needed
|
87 |
-
pass
|
88 |
|
89 |
-
def _compute(self, predictions, references):
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
return {
|
94 |
-
"accuracy": accuracy,
|
95 |
-
}
|
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
+
|
15 |
+
"""F-Beta score"""
|
16 |
|
17 |
import evaluate
|
18 |
import datasets
|
19 |
+
from sklearn.metrics import fbeta_score
|
20 |
|
21 |
|
|
|
22 |
_CITATION = """\
|
23 |
+
@article{scikit-learn,
|
24 |
+
title={Scikit-learn: Machine Learning in {P}ython},
|
25 |
+
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
|
26 |
+
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
|
27 |
+
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
|
28 |
+
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
|
29 |
+
journal={Journal of Machine Learning Research},
|
30 |
+
volume={12},
|
31 |
+
pages={2825--2830},
|
32 |
+
year={2011}
|
33 |
}
|
34 |
"""
|
35 |
|
|
|
36 |
_DESCRIPTION = """\
|
37 |
+
Compute the F-beta score.
|
38 |
+
|
39 |
+
The F-beta score is the weighted harmonic mean of precision and recall, reaching its optimal value at 1 and its worst value at 0.
|
40 |
+
|
41 |
+
The beta parameter determines the weight of recall in the combined score. beta < 1 lends more weight to precision, while beta > 1 favors recall (beta -> 0 considers only precision, beta -> +inf only recall).
|
42 |
"""
|
43 |
|
44 |
|
|
|
45 |
_KWARGS_DESCRIPTION = """
|
|
|
46 |
Args:
|
47 |
+
predictions (`list` of `int`): Predicted labels.
|
48 |
+
references (`list` of `int`): Ground truth labels.
|
49 |
+
labels (`list` of `int`): The set of labels to include when `average` is not set to `'binary'`, and the order of the labels if `average` is `None`. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class. Labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in `predictions` and `references` are used in sorted order. Defaults to None.
|
50 |
+
pos_label (`int`): The class to be considered the positive class, in the case where `average` is set to `binary`. Defaults to 1.
|
51 |
+
average (`string`): This parameter is required for multiclass/multilabel targets. If set to `None`, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Defaults to `'binary'`.
|
52 |
+
- 'binary': Only report results for the class specified by `pos_label`. This is applicable only if the classes found in `predictions` and `references` are binary.
|
53 |
+
- 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
|
54 |
+
- 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
|
55 |
+
- 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters `'macro'` to account for label imbalance. This option can result in an F-score that is not between precision and recall.
|
56 |
+
- 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification).
|
57 |
+
sample_weight (`list` of `float`): Sample weights Defaults to None.
|
58 |
+
beta (`float`): Determines the weight of recall in the combined score.
|
59 |
+
|
60 |
Returns:
|
61 |
+
F1 (`float` (if average is not None) or `array` of `float`, shape =\ [n_unique_labels]): score of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task.
|
62 |
+
The F-beta score is the weighted harmonic mean of precision and recall, reaching its optimal value at 1 and its worst value at 0.
|
63 |
+
|
64 |
Examples:
|
|
|
|
|
65 |
|
66 |
+
>>> f_beta = evaluate.load("leslyarun/f_beta")
|
67 |
+
>>> results = f_beta.compute(references=[0, 1], predictions=[0, 1])
|
68 |
>>> print(results)
|
69 |
+
{'f_beta_score': 1.0}
|
70 |
+
|
71 |
+
For further examples, refer to https://scikit-learn.org/stable/modules/generated/sklearn.metrics.fbeta_score.html#sklearn.metrics.fbeta_score
|
72 |
+
|
73 |
"""
|
74 |
|
|
|
|
|
|
|
75 |
|
76 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
77 |
class F_Beta(evaluate.Metric):
|
|
|
|
|
78 |
def _info(self):
|
|
|
79 |
return evaluate.MetricInfo(
|
80 |
# This is the description that will appear on the modules page.
|
81 |
module_type="metric",
|
|
|
84 |
inputs_description=_KWARGS_DESCRIPTION,
|
85 |
# This defines the format of each prediction and reference
|
86 |
features=datasets.Features({
|
87 |
+
'predictions': datasets.Value('int32'),
|
88 |
+
'references': datasets.Value('int32'),
|
89 |
}),
|
90 |
# Homepage of the module for documentation
|
91 |
+
homepage="https://huggingface.co/spaces/leslyarun/f_beta",
|
92 |
# Additional links to the codebase or references
|
93 |
+
codebase_urls=["https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/metrics/_classification.py#L1148"],
|
94 |
+
reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.fbeta_score.html#sklearn.metrics.fbeta_score"]
|
95 |
)
|
96 |
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, beta):
|
99 |
+
score = fbeta_score(
|
100 |
+
references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight, beta
|
101 |
+
)
|
102 |
+
return {"f_beta_score": float(score) if score.size == 1 else score}
|
|
|
|