Hannes Kuchelmeister commited on
Commit
2689208
·
1 Parent(s): 3397b2a

add python scripts

Browse files
annotation-preprocessing/0_fetch_from_database.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mysql.connector
2
+ import pandas as pd
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ def fetch_objects_from_datase(db):
7
+ cursor = db.cursor()
8
+
9
+ cursor.execute("""SELECT
10
+ UniqueGroundTruth.focus_stack_id,
11
+ UniqueGroundTruth.x_min,
12
+ UniqueGroundTruth.y_min,
13
+ UniqueGroundTruth.x_max,
14
+ UniqueGroundTruth.y_max,
15
+ UniqueGroundTruth.object_type_id,
16
+ ObjectType.name,
17
+ Image.add_date
18
+ FROM UniqueGroundTruth
19
+ JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
20
+ JOIN Image on Image.id = DetectedObject.image_id
21
+ JOIN FocusStack on FocusStack.id = Image.focus_stack_id
22
+ JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
23
+ WHERE metaclass_id = 1 -- only select eggs;
24
+ AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
25
+ ORDER BY UniqueGroundTruth.focus_stack_id;
26
+ """)
27
+
28
+ result = cursor.fetchall()
29
+ return result
30
+
31
+ def fetch_focus_stacks_from_database(db):
32
+ cursor = db.cursor()
33
+
34
+ cursor.execute("""SELECT
35
+ FocusStack.id as foucs_stack_id,
36
+ CONCAT (study_id, "/", uuid, "/", file_name) as file_path,
37
+ file_name,
38
+ uuid,
39
+ study_id,
40
+ Image.pos_z,
41
+ Image.focus_value,
42
+ Image.add_date
43
+ FROM FocusStack
44
+ JOIN Scan on Scan.id = FocusStack.scan_id
45
+ JOIN Slide on Slide.id = Scan.id
46
+ JOIN Study on Study .id = Slide.study_id
47
+ JOIN Image on Image.focus_stack_id = FocusStack.id
48
+ WHERE
49
+ FocusStack.id IN( -- get all focus stacks that have objects in them;
50
+ SELECT DISTINCT
51
+ UniqueGroundTruth.focus_stack_id
52
+ FROM UniqueGroundTruth
53
+ JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
54
+ JOIN Image on Image.id = DetectedObject.image_id
55
+ JOIN FocusStack on FocusStack.id = Image.focus_stack_id
56
+ JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
57
+ WHERE metaclass_id = 1 -- only select eggs;
58
+ AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
59
+ )
60
+ ORDER BY FocusStack.id DESC, focus_value, focus_level
61
+ """)
62
+ result = cursor.fetchall()
63
+ return result
64
+
65
+
66
+
67
+ if __name__ == "__main__":
68
+ load_dotenv()
69
+
70
+ db = mysql.connector.connect(
71
+ host=os.getenv('DB_HOST'),
72
+ user=os.getenv('DB_USER'),
73
+ password=os.getenv('DB_PASSWORD'),
74
+ database=os.getenv('DB_NAME')
75
+ )
76
+
77
+ print("Querring objects...")
78
+ df_objects = pd.DataFrame(fetch_objects_from_datase(db))
79
+ print("Querring stacks...")
80
+ df_stacks = pd.DataFrame(fetch_focus_stacks_from_database(db))
81
+
82
+ df_objects.columns = ['stack_id', 'x_min', 'y_min', 'x_max', 'y_max', 'object_type_id', 'name', 'add_date']
83
+ df_stacks.columns = ['stack_id', 'file_path', 'file_name',
84
+ 'uuid', 'study_id', 'pos_z', 'focus_value', 'add_date']
85
+
86
+ print("Writing objects to file...")
87
+ df_objects.to_csv("objects.csv")
88
+ print("Writing stacks to file...")
89
+ df_stacks.to_csv("stacks.csv")
annotation-preprocessing/1_splitting_into_patches.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from collections import defaultdict
3
+ from dotenv import load_dotenv
4
+ import os
5
+ from PIL import Image, ImageDraw
6
+ import math
7
+ import json
8
+ import random
9
+
10
+ class StackEntry:
11
+ def __init__(self):
12
+ self.images = []
13
+ self.objects = []
14
+ def add_image(self, image):
15
+ self.images.append(image)
16
+ def add_object(self, object):
17
+ self.objects.append(object)
18
+ def sort(self):
19
+ self.images.sort(key=lambda x: x.focus_value)
20
+
21
+ def get_neighbours(img, x, y, dimensions):
22
+ neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]
23
+
24
+ width, height = img.size
25
+
26
+ neighbours = []
27
+ for x_offset, y_offset in neighbour_candidates:
28
+ neighbour_x = x + x_offset * dimensions
29
+ neighbour_y = y + y_offset * dimensions
30
+
31
+ if neighbour_x >= 0 and neighbour_x + dimensions <= width and neighbour_y >= 0 and neighbour_y + dimensions <= height:
32
+ box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
33
+ neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
34
+ else:
35
+ neighbours.append(None)
36
+ return neighbours
37
+
38
+ def extract_object_tiles(obj, stack_images):
39
+ x_start = int(obj.x_min / size) * size
40
+ x_end = int(math.ceil(obj.x_max / size)) * size
41
+ y_start = int(obj.y_min / size) * size
42
+ y_end = int(math.ceil(obj.y_max / size)) * size
43
+
44
+ tiles = []
45
+
46
+ focus_stack_images = list(map(lambda x: (x, Image.open(x.file_path)), stack_images))
47
+
48
+ # Get tiles of the image that contain bounding box of object
49
+ for y in range(y_start, y_end, size):
50
+ for x in range(x_start, x_end, size):
51
+ stack = []
52
+ for row, img in focus_stack_images:
53
+ box = [x, y, x + size, y + size]
54
+ crop = img.crop(box)
55
+
56
+ neighbours = get_neighbours(img, x, y, size)
57
+ stack.append((row, box[:2], crop, neighbours))
58
+ tiles.append(stack)
59
+ return tiles
60
+
61
+
62
+ def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
63
+ path, file_name = os.path.split(original_file_path)
64
+ name, ext = os.path.splitext(file_name)
65
+
66
+ out_path = os.path.join(out_dir, path)
67
+ save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')
68
+
69
+ if not os.path.exists(out_path):
70
+ os.makedirs(out_path)
71
+ if overwrite or not os.path.exists(save_to):
72
+ img.save(save_to)
73
+ return save_to
74
+
75
+ def save_obj_tiles(obj, out_folder, stack_images):
76
+ extracted = extract_object_tiles(obj, stack_images)
77
+ z_stacks = []
78
+ for z_stack in extracted:
79
+ z_stack_images = []
80
+ for row, box, img, neigbours in z_stack:
81
+
82
+ neighbours = []
83
+
84
+ image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
85
+ for neighbour in neigbours:
86
+ n_path = None
87
+ if neighbour:
88
+ x, y, n_img = neighbour
89
+ n_path = save_tile(row.file_path, out_folder, x, y, n_img)
90
+ neighbours.append(n_path)
91
+
92
+ z_stack_images.append({
93
+ "focus_value": row.focus_value,
94
+ "image_path": image_path,
95
+ "neighbours": neighbours,
96
+ "original_filename": row.file_name,
97
+ "scan_uuid": row.uuid,
98
+ "study_id": row.study_id,
99
+ })
100
+ z_stacks.append({
101
+ "best_index": None,
102
+ "images" : z_stack_images,
103
+ "obj_name": obj.name,
104
+ "stack_id": obj.stack_id,
105
+ })
106
+
107
+ return z_stacks
108
+
109
+ def save_stack(stack, out_folder):
110
+ z_stacks = []
111
+ for obj in stack.objects:
112
+ z_stacks.extend(save_obj_tiles(obj, out_folder, stack.images))
113
+ return z_stacks
114
+
115
+
116
+ if __name__ == "__main__":
117
+ load_dotenv()
118
+ print("Geting environment variables...")
119
+ size = int(os.getenv('IMG_SIZE'))
120
+
121
+ print("Loading data from csv files...")
122
+ objects = pd.read_csv("test_objects.csv", index_col=0)
123
+ stacks = pd.read_csv("test_stacks.csv", index_col=0)
124
+
125
+
126
+ stacks_dict = defaultdict(lambda: StackEntry())
127
+
128
+ print("Building internal datastructure...")
129
+ # adding images to dict
130
+ for (index, row) in stacks.iterrows():
131
+ stacks_dict[row.stack_id].add_image(row)
132
+
133
+ for values in stacks_dict.values():
134
+ values.sort()
135
+
136
+ # adding objects
137
+ for (index, row) in objects.iterrows():
138
+ stacks_dict[row.stack_id].add_object(row)
139
+
140
+ out_folder = "out"
141
+ z_stacks = []
142
+
143
+ print("Generating image tiles and writing them to file...")
144
+ for stack in stacks_dict.values():
145
+ z_stacks.extend(save_stack(stack,""))
146
+
147
+ # randomize z_stacks
148
+ print("Shuffling data...")
149
+ random.shuffle(z_stacks)
150
+
151
+ print("Writing meta-data for annotation to file...")
152
+ with open(os.path.join(out_folder, "data.json"), 'w') as file:
153
+ file.write(json.dumps(z_stacks))
154
+