Spaces:

danieldux
/

isco_hierarchical_accuracy

Running

App Files Files Community

danieldux commited on Mar 4, 2024

Commit

8a4a728

1 Parent(s): 17ea6af

Refactor code to improve readability and add type annotations

Browse files

Files changed (1) hide show

ham.py +33 -50

ham.py CHANGED Viewed

@@ -1,28 +1,19 @@
 from typing import List, Set, Dict, Tuple
-def find_ancestors1(tree, code):
     """
-    Recursively finds ancestors of a given class (e.g., an ISCO-08 code) in a hierarchical JSON structure.
     Args:
-    - tree: A dictionary representing the hierarchical structure.
-    - code: A string representing the label of the class.
     Returns:
-    - A list of strings, each representing an ancestor of the input class.
     """
-    ancestors = []
-    current = code
-    while current:
-        parent = tree[current]["parent"]
-        if parent:
-            ancestors.append(parent)
-        current = parent
-    return ancestors
-def find_ancestors(node, hierarchy):
     ancestors = set()
     nodes_to_visit = [node]
     while nodes_to_visit:
@@ -34,7 +25,17 @@ def find_ancestors(node, hierarchy):
     return ancestors
-def extend_with_ancestors(classes, hierarchy):
     extended_classes = set(classes)
     for cls in classes:
         ancestors = find_ancestors(cls, hierarchy)
@@ -43,11 +44,24 @@ def extend_with_ancestors(classes, hierarchy):
 def calculate_hierarchical_precision_recall(
-    real_codes: List[str], predicted_codes: List[str], hierarchy: Dict[str, Set[str]]
 ) -> Tuple[float, float]:
     # Extend the sets of real and predicted codes with their ancestors
     extended_real = set()
-    for code in real_codes:
         extended_real.add(code)
         extended_real.update(hierarchy.get(code, set()))
@@ -66,37 +80,6 @@ def calculate_hierarchical_precision_recall(
     return hP, hR
-def calculate_hierarchical_measures(true_labels, predicted_labels, tree):
-    """
-    Calculates hierarchical precision, recall, and F-measure in a hierarchical structure.
-    Args:
-    - true_labels: A list of strings representing true class labels.
-    - predicted_labels: A list of strings representing predicted class labels.
-    - tree: A dictionary representing the hierarchical structure.
-    Returns:
-    - hP: A floating point number representing hierarchical precision.
-    - hR: A floating point number representing hierarchical recall.
-    - hF: A floating point number representing hierarchical F-measure.
-    """
-    extended_true = [set(find_ancestors(tree, code) | {code}) for code in true_labels]
-    extended_pred = [
-        set(find_ancestors(tree, code) | {code}) for code in predicted_labels
-    ]
-    true_positive = sum(len(t & p) for t, p in zip(extended_true, extended_pred))
-    predicted = sum(len(p) for p in extended_pred)
-    actual = sum(len(t) for t in extended_true)
-    hP = true_positive / predicted if predicted else 0
-    hR = true_positive / actual if actual else 0
-    hF = (2 * hP * hR) / (hP + hR) if (hP + hR) else 0
-    return hP, hR, hF
 def hierarchical_f_measure(hP, hR, beta=1.0):
     """Calculate the hierarchical F-measure."""
     if hP + hR == 0:

+"""This module provides functions for calculating hierarchical precicion, recall and f1."""
 from typing import List, Set, Dict, Tuple
+def find_ancestors(node: str, hierarchy: dict) -> set:
     """
+    Find the ancestors of a given node in a hierarchy.
     Args:
+        node (str): The node for which to find ancestors.
+        hierarchy (dict): A dictionary representing the hierarchy, where the keys are nodes and the values are their parents.
     Returns:
+        set: A set of ancestors of the given node.
     """
     ancestors = set()
     nodes_to_visit = [node]
     while nodes_to_visit:
     return ancestors
+def extend_with_ancestors(classes: set, hierarchy: dict) -> set:
+    """
+    Extend the given set of classes with their ancestors from the hierarchy.
+    Args:
+        classes (set): The set of classes to extend.
+        hierarchy (dict): The hierarchy of classes.
+    Returns:
+        set: The extended set of classes including their ancestors.
+    """
     extended_classes = set(classes)
     for cls in classes:
         ancestors = find_ancestors(cls, hierarchy)
 def calculate_hierarchical_precision_recall(
+    reference_codes: List[str],
+    predicted_codes: List[str],
+    hierarchy: Dict[str, Set[str]],
 ) -> Tuple[float, float]:
+    """
+    Calculates the hierarchical precision and recall given the reference codes, predicted codes, and hierarchy definition.
+    Args:
+        real_codes (List[str]): The list of reference codes.
+        predicted_codes (List[str]): The list of predicted codes.
+        hierarchy (Dict[str, Set[str]]): The hierarchy definition where keys are nodes and values are sets of parent nodes.
+    Returns:
+        Tuple[float, float]: A tuple containing the hierarchical precision and recall floating point values.
+    """
     # Extend the sets of real and predicted codes with their ancestors
     extended_real = set()
+    for code in reference_codes:
         extended_real.add(code)
         extended_real.update(hierarchy.get(code, set()))
     return hP, hR
 def hierarchical_f_measure(hP, hR, beta=1.0):
     """Calculate the hierarchical F-measure."""
     if hP + hR == 0: