Initial version of M2 implemented
Browse files- m2.py +49 -7
- requirements.txt +2 -1
m2.py
CHANGED
@@ -14,8 +14,10 @@
|
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
import evaluate
|
17 |
-
import
|
|
|
18 |
|
|
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
@@ -70,9 +72,17 @@ class M2(evaluate.Metric):
|
|
70 |
citation=_CITATION,
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
-
features=
|
74 |
-
'predictions':
|
75 |
-
'references':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
homepage="http://module.homepage",
|
@@ -88,8 +98,40 @@ class M2(evaluate.Metric):
|
|
88 |
|
89 |
def _compute(self, predictions, references):
|
90 |
"""Returns the scores"""
|
|
|
91 |
# TODO: Compute the different scores of the module
|
92 |
-
|
93 |
return {
|
94 |
-
"
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
import evaluate
|
17 |
+
from datasets import Features, Sequence, Value
|
18 |
+
import pdb
|
19 |
|
20 |
+
from m2scorer import get_m2score, get_m2score_from_raw, load_m2
|
21 |
|
22 |
# TODO: Add BibTeX citation
|
23 |
_CITATION = """\
|
|
|
72 |
citation=_CITATION,
|
73 |
inputs_description=_KWARGS_DESCRIPTION,
|
74 |
# This defines the format of each prediction and reference
|
75 |
+
features=Features({
|
76 |
+
'predictions': Value(dtype='string'),
|
77 |
+
'references': {
|
78 |
+
'source_sentence': Value(dtype='string'),
|
79 |
+
'edits': Sequence({
|
80 |
+
'from': Value(dtype='int32'),
|
81 |
+
'to': Value(dtype='int32'),
|
82 |
+
'text': [Value(dtype='string')],
|
83 |
+
'aid': Value(dtype='int32'),
|
84 |
+
}),
|
85 |
+
},
|
86 |
}),
|
87 |
# Homepage of the module for documentation
|
88 |
homepage="http://module.homepage",
|
|
|
98 |
|
99 |
def _compute(self, predictions, references):
|
100 |
"""Returns the scores"""
|
101 |
+
gold_data = self._features_to_gold_data(references)
|
102 |
# TODO: Compute the different scores of the module
|
103 |
+
p, r, f = get_m2score(predictions, gold_data, tokenize=False, keep_gold=True)
|
104 |
return {
|
105 |
+
"f0.5": f,
|
106 |
+
"precision": p,
|
107 |
+
"recall": r,
|
108 |
+
}
|
109 |
+
|
110 |
+
def _features_to_gold_data(self, features):
|
111 |
+
gold_data = []
|
112 |
+
for entry in features:
|
113 |
+
annotators = {}
|
114 |
+
edits = entry['edits']
|
115 |
+
for i in range(len(edits['from'])):
|
116 |
+
edit = (edits['from'][i], edits['to'][i], edits['text'][i])
|
117 |
+
if edits['aid'][i] not in annotators:
|
118 |
+
annotators[edits['aid'][i]] = []
|
119 |
+
annotators[edits['aid'][i]].append(edit)
|
120 |
+
gold_data.append( (entry['source_sentence'], annotators) )
|
121 |
+
return gold_data
|
122 |
+
|
123 |
+
def load_m2_file(self, fpath):
|
124 |
+
data = load_m2(fpath)
|
125 |
+
result = []
|
126 |
+
for src_sent, edits_ in data:
|
127 |
+
edits = []
|
128 |
+
for aid, annotator_edits in edits_.items():
|
129 |
+
if len(annotator_edits) == 0:
|
130 |
+
edits.append({'from': -1, 'to': -1, 'text': [''], 'aid': aid})
|
131 |
+
for from_, to_, text_ in annotator_edits:
|
132 |
+
edits.append({'from': from_, 'to': to_, 'text': text_, 'aid': aid})
|
133 |
+
result.append({
|
134 |
+
'source_sentence': src_sent,
|
135 |
+
'edits': edits,
|
136 |
+
})
|
137 |
+
return result
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@main
|
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@main
|
2 |
+
git+https://github.com/zbeloki/m2scorer@master
|