Spaces:
Sleeping
Sleeping
different size can be input and python version is retreated
Browse files- README.md +17 -8
- dicrect_compute_metric.py +49 -0
- matching_series.py +11 -3
README.md
CHANGED
@@ -12,19 +12,28 @@ pinned: false
|
|
12 |
|
13 |
# Metric Card for matching_series
|
14 |
|
15 |
-
***Module Card Instructions:*** *Fill out the following subsections. Feel free to take a look at existing metric cards if you'd like examples.*
|
16 |
-
|
17 |
## Metric Description
|
18 |
-
|
19 |
|
20 |
## How to Use
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
### Inputs
|
26 |
-
|
27 |
-
- **
|
28 |
|
29 |
### Output Values
|
30 |
|
|
|
12 |
|
13 |
# Metric Card for matching_series
|
14 |
|
|
|
|
|
15 |
## Metric Description
|
16 |
+
Matching Series is a metric for evaluating time-series generation models. It is based on the idea of matching the generated time-series with the original time-series. The metric calculates the Mean Squared Error (MSE) between the generated time-series and the original time-series between matched instances. The metric outputs a score greater or equal to 0, where 0 indicates a perfect generation.
|
17 |
|
18 |
## How to Use
|
19 |
+
At minium, the metric requires the original time-series and the generated time-series as input. The metric can be used to evaluate the performance of time-series generation models.
|
20 |
+
|
21 |
+
```python
|
22 |
+
>>> num_generation = 100
|
23 |
+
>>> num_reference = 10
|
24 |
+
>>> seq_len = 100
|
25 |
+
>>> num_features = 10
|
26 |
+
>>> references = np.random.rand(num_reference, seq_len, num_features)
|
27 |
+
>>> predictions = np.random.rand(num_generation, seq_len, num_features)
|
28 |
+
>>> metric = evaluate.load("bowdbeg/matching_series")
|
29 |
+
>>> results = metric.compute(references=references, predictions=predictions, batch_size=1000)
|
30 |
+
>>> print(results)
|
31 |
+
{'matching_mse': 0.15250070138019745, 'harmonic_mean': 0.15246672297315564, 'covered_mse': 0.15243275970407652, 'index_mse': 0.16772539808686357, 'matching_mse_features': [0.11976368411913872, 0.1238622735860897, 0.1235259257706047, 0.12385236248438022, 0.12241466736218365, 0.12328439290438079, 0.1232240061707885, 0.12342319803028035, 0.12235222572924524, 0.12437865819262514], 'harmonic_mean_features': [0.12010478503934609, 0.12379899085819131, 0.12321441761307182, 0.12273884163905005, 0.12256126537300535, 0.12323289686030311, 0.12323847434641247, 0.12333469339243568, 0.12273530480438972, 0.12390254295493403], 'covered_mse_features': [0.12044783449951382, 0.1237357727610885, 0.12290447662839017, 0.12164516506865233, 0.12270821492248948, 0.12318144381818667, 0.12325294591995689, 0.12324631559392285, 0.12312079021887229, 0.12343005890751833], 'index_mse_features': [0.16331894487549958, 0.1679797859239729, 0.16904075114728268, 0.16962427920551068, 0.16915910655024802, 0.16686197230602684, 0.17056311327206022, 0.1638796919248867, 0.16736730842643857, 0.16945902723670975], 'macro_matching_mse': 0.1230081394349717, 'macro_covered_mse': 0.12276730183385913, 'macro_harmonic_mean': 0.12288622128811397}
|
32 |
+
```
|
33 |
|
34 |
### Inputs
|
35 |
+
- **predictions**: (list of list of list of float or numpy.ndarray): The generated time-series. The shape of the array should be `(num_generation, seq_len, num_features)`.
|
36 |
+
- **references**: (list of list of list of float or numpy.ndarray): The original time-series. The shape of the array should be `(num_reference, seq_len, num_features)`.
|
37 |
|
38 |
### Output Values
|
39 |
|
dicrect_compute_metric.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
|
3 |
+
import evaluate
|
4 |
+
|
5 |
+
|
6 |
+
class DirectComputeMetric(evaluate.Metric):
|
7 |
+
"""
|
8 |
+
Base class for metrics that directly compute the score from the predictions and references without add_batch
|
9 |
+
"""
|
10 |
+
|
11 |
+
def compute(self, *, predictions=None, references=None, **kwargs) -> Optional[dict]:
|
12 |
+
"""Compute the evaluation module.
|
13 |
+
|
14 |
+
Usage of positional arguments is not allowed to prevent mistakes.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
predictions (`list/array/tensor`, *optional*):
|
18 |
+
Predictions.
|
19 |
+
references (`list/array/tensor`, *optional*):
|
20 |
+
References.
|
21 |
+
**kwargs (optional):
|
22 |
+
Keyword arguments that will be forwarded to the evaluation module [`~evaluate.EvaluationModule.compute`]
|
23 |
+
method (see details in the docstring).
|
24 |
+
|
25 |
+
Return:
|
26 |
+
`dict` or `None`
|
27 |
+
|
28 |
+
- Dictionary with the results if this evaluation module is run on the main process (`process_id == 0`).
|
29 |
+
- `None` if the evaluation module is not run on the main process (`process_id != 0`).
|
30 |
+
|
31 |
+
```py
|
32 |
+
>>> import evaluate
|
33 |
+
>>> accuracy = evaluate.load("accuracy")
|
34 |
+
>>> accuracy.compute(predictions=[0, 1, 1, 0], references=[0, 1, 0, 1])
|
35 |
+
```
|
36 |
+
"""
|
37 |
+
all_kwargs = {"predictions": predictions, "references": references, **kwargs}
|
38 |
+
if predictions is None and references is None:
|
39 |
+
missing_kwargs = {k: None for k in self._feature_names() if k not in all_kwargs}
|
40 |
+
all_kwargs.update(missing_kwargs)
|
41 |
+
else:
|
42 |
+
missing_inputs = [k for k in self._feature_names() if k not in all_kwargs]
|
43 |
+
if missing_inputs:
|
44 |
+
raise ValueError(
|
45 |
+
f"Evaluation module inputs are missing: {missing_inputs}. All required inputs are {list(self._feature_names())}"
|
46 |
+
)
|
47 |
+
inputs = {input_name: all_kwargs[input_name] for input_name in self._feature_names()}
|
48 |
+
compute_kwargs = {k: kwargs[k] for k in kwargs if k not in self._feature_names()}
|
49 |
+
return self._compute(**inputs, **compute_kwargs)
|
matching_series.py
CHANGED
@@ -14,11 +14,14 @@
|
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
import statistics
|
|
|
17 |
|
18 |
import datasets
|
19 |
import evaluate
|
20 |
import numpy as np
|
21 |
|
|
|
|
|
22 |
# TODO: Add BibTeX citation
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
@@ -55,7 +58,7 @@ Examples:
|
|
55 |
|
56 |
|
57 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
58 |
-
class matching_series(
|
59 |
"""TODO: Short description of my evaluation module."""
|
60 |
|
61 |
def _info(self):
|
@@ -74,7 +77,7 @@ class matching_series(evaluate.Metric):
|
|
74 |
}
|
75 |
),
|
76 |
# Homepage of the module for documentation
|
77 |
-
homepage="
|
78 |
# Additional links to the codebase or references
|
79 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
80 |
reference_urls=["http://path.to.reference.url/new_module"],
|
@@ -84,7 +87,12 @@ class matching_series(evaluate.Metric):
|
|
84 |
"""Optional: download external resources useful to compute the scores"""
|
85 |
pass
|
86 |
|
87 |
-
def _compute(
|
|
|
|
|
|
|
|
|
|
|
88 |
"""
|
89 |
Compute the scores of the module given the predictions and references
|
90 |
Args:
|
|
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
import statistics
|
17 |
+
from typing import Optional, Union
|
18 |
|
19 |
import datasets
|
20 |
import evaluate
|
21 |
import numpy as np
|
22 |
|
23 |
+
from dicrect_compute_metric import DirectComputeMetric
|
24 |
+
|
25 |
# TODO: Add BibTeX citation
|
26 |
_CITATION = """\
|
27 |
@InProceedings{huggingface:module,
|
|
|
58 |
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
61 |
+
class matching_series(DirectComputeMetric):
|
62 |
"""TODO: Short description of my evaluation module."""
|
63 |
|
64 |
def _info(self):
|
|
|
77 |
}
|
78 |
),
|
79 |
# Homepage of the module for documentation
|
80 |
+
homepage="https://huggingface.co/spaces/bowdbeg/matching_series",
|
81 |
# Additional links to the codebase or references
|
82 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
83 |
reference_urls=["http://path.to.reference.url/new_module"],
|
|
|
87 |
"""Optional: download external resources useful to compute the scores"""
|
88 |
pass
|
89 |
|
90 |
+
def _compute(
|
91 |
+
self,
|
92 |
+
predictions: Union[list, np.ndarray],
|
93 |
+
references: Union[list, np.ndarray],
|
94 |
+
batch_size: Optional[int] = None,
|
95 |
+
):
|
96 |
"""
|
97 |
Compute the scores of the module given the predictions and references
|
98 |
Args:
|