danieldux commited on
Commit
10e83ef
1 Parent(s): d1fbaa3

Refactor create_hierarchy_dict function to improve readability and add documentation

Browse files
Files changed (1) hide show
  1. isco.py +14 -43
isco.py CHANGED
@@ -1,71 +1,42 @@
1
- # filename: isco.py
2
- from collections import defaultdict
3
  import csv
4
 
5
 
6
- def create_hierarchy_dict(filename):
7
  """
8
- Creates a dictionary where keys are nodes and values are sets of parent nodes representing the hierarchy of the ISCO-08 codes from the "unit" column of the isco_structure.csv file.
 
 
9
 
10
  Args:
11
- - filename: A string representing the path to the CSV file containing the ISCO-08 codes and their hierarchy.
12
 
13
  Returns:
14
  - A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
15
  """
16
  isco_hierarchy = {}
17
 
18
- with open(filename, newline="") as csvfile:
19
  reader = csv.DictReader(csvfile)
20
  for row in reader:
21
  # Extract unit group level code (4 digits)
22
  unit_code = row["unit"].zfill(4)
 
23
  # Extract the parent code for the unit group level, which is the minor group level (3 digits)
24
- parent_code = unit_code[:3]
25
 
26
  # Add the unit code to the hierarchy with its parent code
27
- isco_hierarchy[unit_code] = {parent_code}
28
 
29
- # Additionally, we can add the parent's parent codes if needed
30
- # For example, the major group level (1 digit) and sub-major group level (2 digits)
31
  major_code = unit_code[0]
32
- sub_major_code = unit_code[:2]
33
  isco_hierarchy[unit_code].update({major_code, sub_major_code})
34
 
35
  return isco_hierarchy
36
 
37
 
38
- def create_hierarchy_tree(hierarchy_dict: dict) -> tuple:
39
- """
40
- Builds the hierarchy tree and a mapping from name to ISCO code.
41
-
42
- Args:
43
- - hierarchy_dict: A dictionary representing the hierarchical structure.
44
-
45
- Returns:
46
- - tree: A dictionary representing the hierarchical structure.
47
- - code_to_node: A dictionary mapping from ISCO code to node name.
48
- """
49
-
50
- tree = defaultdict(lambda: {"children": [], "parent": None})
51
- code_to_node = {}
52
-
53
- def add_node(parent_code, node):
54
- code = node["name"].split("=")[0].strip()
55
- code_to_node[code] = node["name"]
56
- tree[code]["parent"] = parent_code
57
- if parent_code:
58
- tree[parent_code]["children"].append(code)
59
- for child in node.get("children", []):
60
- add_node(code, child)
61
-
62
- add_node(None, hierarchy_dict) # Root node has no parent
63
- return tree, code_to_node
64
-
65
-
66
  # Example usage:
67
- # hierarchy_dict = create_hierarchy("ISCO_structure.csv")
68
- # tree, code_to_node = create_hierarchy_tree(hierarchy_dict)
69
  # print(hierarchy)
70
- # print(code_to_node)
71
- # print(tree)
 
1
+ """This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""
2
+
3
  import csv
4
 
5
 
6
+ def create_hierarchy_dict(file: str) -> dict:
7
  """
8
+ Creates a dictionary where keys are nodes and values are sets of parent nodes representing the group level hierarchy of the ISCO-08 structure.
9
+ The function assumes that the input CSV file has a column named 'unit' with the 4-digit ISCO-08 codes.
10
+ A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
11
 
12
  Args:
13
+ - file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes.
14
 
15
  Returns:
16
  - A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
17
  """
18
  isco_hierarchy = {}
19
 
20
+ with open(file, newline="") as csvfile:
21
  reader = csv.DictReader(csvfile)
22
  for row in reader:
23
  # Extract unit group level code (4 digits)
24
  unit_code = row["unit"].zfill(4)
25
+
26
  # Extract the parent code for the unit group level, which is the minor group level (3 digits)
27
+ minor_code = unit_code[0:3]
28
 
29
  # Add the unit code to the hierarchy with its parent code
30
+ isco_hierarchy[unit_code] = {minor_code}
31
 
32
+ # Additionally, we can add the parent's parent codes at the sub-major group level (2 digits) and major group level (1 digit)
33
+ sub_major_code = unit_code[0:2]
34
  major_code = unit_code[0]
 
35
  isco_hierarchy[unit_code].update({major_code, sub_major_code})
36
 
37
  return isco_hierarchy
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Example usage:
41
+ # hierarchy_dict = create_hierarchy_dict("ISCO_structure.csv")
 
42
  # print(hierarchy)