Spaces:

DarrenChensformer
/

relation_extraction

Runtime error

App Files Files Community

DarrenChensformer commited on Sep 20, 2023

Commit

ba6a59b

1 Parent(s): 3360bee

Add main evaluation method

Browse files

Files changed (1) hide show

relation_extraction.py +77 -5

relation_extraction.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
@@ -86,10 +87,81 @@ class relation_extraction(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def _compute(self, predictions, references):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
-        return {
-            "accuracy": accuracy,
-        }

 import evaluate
 import datasets
+import numpy as np
 # TODO: Add BibTeX citation
         # TODO: Download external resources if needed
         pass
+    def _compute(self, pred_relations, gt_relations, mode="strict", relation_types=[]):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
+        assert mode in ["strict", "boundaries"]
+        # construct relation_types from ground truth if not given
+        if len(relation_types) == 0:
+            for triplets in gt_relations:
+                for triplet in triplets:
+                    relation = triplet["type"]
+                    if relation not in relation_types:
+                        relation_types.append(relation)
+        scores = {rel: {"tp": 0, "fp": 0, "fn": 0} for rel in relation_types + ["ALL"]}
+        # Count GT relations and Predicted relations
+        n_sents = len(gt_relations)
+        n_rels = sum([len([rel for rel in sent]) for sent in gt_relations])
+        n_found = sum([len([rel for rel in sent]) for sent in pred_relations])
+        # Count TP, FP and FN per type
+        for pred_sent, gt_sent in zip(pred_relations, gt_relations):
+            for rel_type in relation_types:
+                # strict mode takes argument types into account
+                if mode == "strict":
+                    pred_rels = {(rel["head"], rel["head_type"], rel["tail"], rel["tail_type"]) for rel in pred_sent if
+                                rel["type"] == rel_type}
+                    gt_rels = {(rel["head"], rel["head_type"], rel["tail"], rel["tail_type"]) for rel in gt_sent if
+                            rel["type"] == rel_type}
+                # boundaries mode only takes argument spans into account
+                elif mode == "boundaries":
+                    pred_rels = {(rel["head"], rel["tail"]) for rel in pred_sent if rel["type"] == rel_type}
+                    gt_rels = {(rel["head"], rel["tail"]) for rel in gt_sent if rel["type"] == rel_type}
+                scores[rel_type]["tp"] += len(pred_rels & gt_rels)
+                scores[rel_type]["fp"] += len(pred_rels - gt_rels)
+                scores[rel_type]["fn"] += len(gt_rels - pred_rels)
+        # Compute per entity Precision / Recall / F1
+        for rel_type in scores.keys():
+            if scores[rel_type]["tp"]:
+                scores[rel_type]["p"] = 100 * scores[rel_type]["tp"] / (scores[rel_type]["fp"] + scores[rel_type]["tp"])
+                scores[rel_type]["r"] = 100 * scores[rel_type]["tp"] / (scores[rel_type]["fn"] + scores[rel_type]["tp"])
+            else:
+                scores[rel_type]["p"], scores[rel_type]["r"] = 0, 0
+            if not scores[rel_type]["p"] + scores[rel_type]["r"] == 0:
+                scores[rel_type]["f1"] = 2 * scores[rel_type]["p"] * scores[rel_type]["r"] / (
+                        scores[rel_type]["p"] + scores[rel_type]["r"])
+            else:
+                scores[rel_type]["f1"] = 0
+        # Compute micro F1 Scores
+        tp = sum([scores[rel_type]["tp"] for rel_type in relation_types])
+        fp = sum([scores[rel_type]["fp"] for rel_type in relation_types])
+        fn = sum([scores[rel_type]["fn"] for rel_type in relation_types])
+        if tp:
+            precision = 100 * tp / (tp + fp)
+            recall = 100 * tp / (tp + fn)
+            f1 = 2 * precision * recall / (precision + recall)
+        else:
+            precision, recall, f1 = 0, 0, 0
+        scores["ALL"]["p"] = precision
+        scores["ALL"]["r"] = recall
+        scores["ALL"]["f1"] = f1
+        scores["ALL"]["tp"] = tp
+        scores["ALL"]["fp"] = fp
+        scores["ALL"]["fn"] = fn
+        # Compute Macro F1 Scores
+        scores["ALL"]["Macro_f1"] = np.mean([scores[ent_type]["f1"] for ent_type in relation_types])
+        scores["ALL"]["Macro_p"] = np.mean([scores[ent_type]["p"] for ent_type in relation_types])
+        scores["ALL"]["Macro_r"] = np.mean([scores[ent_type]["r"] for ent_type in relation_types])