Spaces:

danieldux
/

isco_hierarchical_accuracy

Running

App Files Files Community

danieldux commited on Mar 12

Commit

03c8589

•

1 Parent(s): ad04d80

Refactor calculate_hierarchical_precision_recall to use weighted sums

Browse files

Files changed (1) hide show

ham.py +33 -16

ham.py CHANGED Viewed

@@ -46,36 +46,53 @@ def extend_with_ancestors(classes: set, hierarchy: dict) -> set:
 def calculate_hierarchical_precision_recall(
     reference_codes: List[str],
     predicted_codes: List[str],
-    hierarchy: Dict[str, Set[str]],
 ) -> Tuple[float, float]:
     """
     Calculates the hierarchical precision and recall given the reference codes, predicted codes, and hierarchy definition.
     Args:
-        real_codes (List[str]): The list of reference codes.
         predicted_codes (List[str]): The list of predicted codes.
         hierarchy (Dict[str, Set[str]]): The hierarchy definition where keys are nodes and values are sets of parent nodes.
     Returns:
         Tuple[float, float]: A tuple containing the hierarchical precision and recall floating point values.
     """
-    # Extend the sets of real and predicted codes with their ancestors
-    extended_real = set()
-    for code in reference_codes:
-        extended_real.add(code)
-        extended_real.update(hierarchy.get(code, set()))
-    extended_predicted = set()
-    for code in predicted_codes:
-        extended_predicted.add(code)
-        extended_predicted.update(hierarchy.get(code, set()))
-    # Calculate the intersection
-    correct_predictions = extended_real.intersection(extended_predicted)
-    # Calculate hierarchical precision and recall
-    hP = len(correct_predictions) / len(extended_predicted) if extended_predicted else 0
-    hR = len(correct_predictions) / len(extended_real) if extended_real else 0
     return hP, hR

 def calculate_hierarchical_precision_recall(
     reference_codes: List[str],
     predicted_codes: List[str],
+    hierarchy: Dict[str, Dict[str, float]],
 ) -> Tuple[float, float]:
     """
     Calculates the hierarchical precision and recall given the reference codes, predicted codes, and hierarchy definition.
     Args:
+        reference_codes (List[str]): The list of reference codes.
         predicted_codes (List[str]): The list of predicted codes.
         hierarchy (Dict[str, Set[str]]): The hierarchy definition where keys are nodes and values are sets of parent nodes.
     Returns:
         Tuple[float, float]: A tuple containing the hierarchical precision and recall floating point values.
     """
+    extended_real = {}
+    # Extend the sets of reference codes with their ancestors
+    for code in reference_codes:
+        weight = 1.0  # Full weight for exact match
+        extended_real[code] = weight
+        for ancestor, ancestor_weight in hierarchy.get(code, {}).items():
+            extended_real[ancestor] = max(
+                extended_real.get(ancestor, 0), ancestor_weight
+            )
+    extended_predicted = {}
+    # Extend the sets of predicted codes with their ancestors
+    for code in predicted_codes:
+        weight = 1.0
+        extended_predicted[code] = weight
+        for ancestor, ancestor_weight in hierarchy.get(code, {}).items():
+            extended_predicted[ancestor] = max(
+                extended_predicted.get(ancestor, 0), ancestor_weight
+            )
+    # Calculate weighted correct predictions
+    correct_weights = 0
+    for code, weight in extended_predicted.items():
+        if code in extended_real:
+            correct_weights += min(weight, extended_real[code])
+    total_predicted_weights = sum(extended_predicted.values())
+    total_real_weights = sum(extended_real.values())
+    # Calculate hierarchical precision and recall using weighted sums
+    hP = correct_weights / total_predicted_weights if total_predicted_weights else 0
+    hR = correct_weights / total_real_weights if total_real_weights else 0
     return hP, hR