Refactor create_hierarchy_dict function to improve readability and add documentation
Browse files
isco.py
CHANGED
@@ -1,71 +1,42 @@
|
|
1 |
-
|
2 |
-
|
3 |
import csv
|
4 |
|
5 |
|
6 |
-
def create_hierarchy_dict(
|
7 |
"""
|
8 |
-
Creates a dictionary where keys are nodes and values are sets of parent nodes representing the hierarchy of the ISCO-08
|
|
|
|
|
9 |
|
10 |
Args:
|
11 |
-
-
|
12 |
|
13 |
Returns:
|
14 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
15 |
"""
|
16 |
isco_hierarchy = {}
|
17 |
|
18 |
-
with open(
|
19 |
reader = csv.DictReader(csvfile)
|
20 |
for row in reader:
|
21 |
# Extract unit group level code (4 digits)
|
22 |
unit_code = row["unit"].zfill(4)
|
|
|
23 |
# Extract the parent code for the unit group level, which is the minor group level (3 digits)
|
24 |
-
|
25 |
|
26 |
# Add the unit code to the hierarchy with its parent code
|
27 |
-
isco_hierarchy[unit_code] = {
|
28 |
|
29 |
-
# Additionally, we can add the parent's parent codes
|
30 |
-
|
31 |
major_code = unit_code[0]
|
32 |
-
sub_major_code = unit_code[:2]
|
33 |
isco_hierarchy[unit_code].update({major_code, sub_major_code})
|
34 |
|
35 |
return isco_hierarchy
|
36 |
|
37 |
|
38 |
-
def create_hierarchy_tree(hierarchy_dict: dict) -> tuple:
|
39 |
-
"""
|
40 |
-
Builds the hierarchy tree and a mapping from name to ISCO code.
|
41 |
-
|
42 |
-
Args:
|
43 |
-
- hierarchy_dict: A dictionary representing the hierarchical structure.
|
44 |
-
|
45 |
-
Returns:
|
46 |
-
- tree: A dictionary representing the hierarchical structure.
|
47 |
-
- code_to_node: A dictionary mapping from ISCO code to node name.
|
48 |
-
"""
|
49 |
-
|
50 |
-
tree = defaultdict(lambda: {"children": [], "parent": None})
|
51 |
-
code_to_node = {}
|
52 |
-
|
53 |
-
def add_node(parent_code, node):
|
54 |
-
code = node["name"].split("=")[0].strip()
|
55 |
-
code_to_node[code] = node["name"]
|
56 |
-
tree[code]["parent"] = parent_code
|
57 |
-
if parent_code:
|
58 |
-
tree[parent_code]["children"].append(code)
|
59 |
-
for child in node.get("children", []):
|
60 |
-
add_node(code, child)
|
61 |
-
|
62 |
-
add_node(None, hierarchy_dict) # Root node has no parent
|
63 |
-
return tree, code_to_node
|
64 |
-
|
65 |
-
|
66 |
# Example usage:
|
67 |
-
# hierarchy_dict =
|
68 |
-
# tree, code_to_node = create_hierarchy_tree(hierarchy_dict)
|
69 |
# print(hierarchy)
|
70 |
-
# print(code_to_node)
|
71 |
-
# print(tree)
|
|
|
1 |
+
"""This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""
|
2 |
+
|
3 |
import csv
|
4 |
|
5 |
|
6 |
+
def create_hierarchy_dict(file: str) -> dict:
|
7 |
"""
|
8 |
+
Creates a dictionary where keys are nodes and values are sets of parent nodes representing the group level hierarchy of the ISCO-08 structure.
|
9 |
+
The function assumes that the input CSV file has a column named 'unit' with the 4-digit ISCO-08 codes.
|
10 |
+
A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
|
11 |
|
12 |
Args:
|
13 |
+
- file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes.
|
14 |
|
15 |
Returns:
|
16 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
17 |
"""
|
18 |
isco_hierarchy = {}
|
19 |
|
20 |
+
with open(file, newline="") as csvfile:
|
21 |
reader = csv.DictReader(csvfile)
|
22 |
for row in reader:
|
23 |
# Extract unit group level code (4 digits)
|
24 |
unit_code = row["unit"].zfill(4)
|
25 |
+
|
26 |
# Extract the parent code for the unit group level, which is the minor group level (3 digits)
|
27 |
+
minor_code = unit_code[0:3]
|
28 |
|
29 |
# Add the unit code to the hierarchy with its parent code
|
30 |
+
isco_hierarchy[unit_code] = {minor_code}
|
31 |
|
32 |
+
# Additionally, we can add the parent's parent codes at the sub-major group level (2 digits) and major group level (1 digit)
|
33 |
+
sub_major_code = unit_code[0:2]
|
34 |
major_code = unit_code[0]
|
|
|
35 |
isco_hierarchy[unit_code].update({major_code, sub_major_code})
|
36 |
|
37 |
return isco_hierarchy
|
38 |
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Example usage:
|
41 |
+
# hierarchy_dict = create_hierarchy_dict("ISCO_structure.csv")
|
|
|
42 |
# print(hierarchy)
|
|
|
|