Spaces:

danieldux
/

isco_hierarchical_accuracy

Running

App Files Files Community

isco_hierarchical_accuracy / ham.py

danieldux

Add functions for finding ancestors and calculating hierarchical precision and recall

fcd15ea 9 months ago

raw

history blame

3.46 kB

	from typing import List, Set, Dict, Tuple


	def find_ancestors1(tree, code):
	"""
	Recursively finds ancestors of a given class (e.g., an ISCO-08 code) in a hierarchical JSON structure.

	Args:
	- tree: A dictionary representing the hierarchical structure.
	- code: A string representing the label of the class.

	Returns:
	- A list of strings, each representing an ancestor of the input class.
	"""
	ancestors = []
	current = code
	while current:
	parent = tree[current]["parent"]
	if parent:
	ancestors.append(parent)
	current = parent
	return ancestors


	def find_ancestors(node, hierarchy):
	ancestors = set()
	nodes_to_visit = [node]
	while nodes_to_visit:
	current_node = nodes_to_visit.pop()
	if current_node in hierarchy:
	parents = hierarchy[current_node]
	ancestors.update(parents)
	nodes_to_visit.extend(parents)
	return ancestors


	def extend_with_ancestors(classes, hierarchy):
	extended_classes = set(classes)
	for cls in classes:
	ancestors = find_ancestors(cls, hierarchy)
	extended_classes.update(ancestors)
	return extended_classes


	def calculate_hierarchical_precision_recall(
	real_codes: List[str], predicted_codes: List[str], hierarchy: Dict[str, Set[str]]
	) -> Tuple[float, float]:
	# Extend the sets of real and predicted codes with their ancestors
	extended_real = set()
	for code in real_codes:
	extended_real.add(code)
	extended_real.update(hierarchy.get(code, set()))

	extended_predicted = set()
	for code in predicted_codes:
	extended_predicted.add(code)
	extended_predicted.update(hierarchy.get(code, set()))

	# Calculate the intersection
	correct_predictions = extended_real.intersection(extended_predicted)

	# Calculate hierarchical precision and recall
	hP = len(correct_predictions) / len(extended_predicted) if extended_predicted else 0
	hR = len(correct_predictions) / len(extended_real) if extended_real else 0

	return hP, hR


	def calculate_hierarchical_measures(true_labels, predicted_labels, tree):
	"""
	Calculates hierarchical precision, recall, and F-measure in a hierarchical structure.

	Args:
	- true_labels: A list of strings representing true class labels.
	- predicted_labels: A list of strings representing predicted class labels.
	- tree: A dictionary representing the hierarchical structure.

	Returns:
	- hP: A floating point number representing hierarchical precision.
	- hR: A floating point number representing hierarchical recall.
	- hF: A floating point number representing hierarchical F-measure.
	"""

	extended_true = [set(find_ancestors(tree, code) \| {code}) for code in true_labels]
	extended_pred = [
	set(find_ancestors(tree, code) \| {code}) for code in predicted_labels
	]

	true_positive = sum(len(t & p) for t, p in zip(extended_true, extended_pred))
	predicted = sum(len(p) for p in extended_pred)
	actual = sum(len(t) for t in extended_true)

	hP = true_positive / predicted if predicted else 0
	hR = true_positive / actual if actual else 0
	hF = (2 * hP * hR) / (hP + hR) if (hP + hR) else 0

	return hP, hR, hF


	def hierarchical_f_measure(hP, hR, beta=1.0):
	"""Calculate the hierarchical F-measure."""
	if hP + hR == 0:
	return 0
	return (beta*2 + 1) hP * hR / (beta*2 hP + hR)