Hannes Kuchelmeister
commited on
Commit
·
2689208
1
Parent(s):
3397b2a
add python scripts
Browse files
annotation-preprocessing/0_fetch_from_database.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mysql.connector
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
def fetch_objects_from_datase(db):
|
7 |
+
cursor = db.cursor()
|
8 |
+
|
9 |
+
cursor.execute("""SELECT
|
10 |
+
UniqueGroundTruth.focus_stack_id,
|
11 |
+
UniqueGroundTruth.x_min,
|
12 |
+
UniqueGroundTruth.y_min,
|
13 |
+
UniqueGroundTruth.x_max,
|
14 |
+
UniqueGroundTruth.y_max,
|
15 |
+
UniqueGroundTruth.object_type_id,
|
16 |
+
ObjectType.name,
|
17 |
+
Image.add_date
|
18 |
+
FROM UniqueGroundTruth
|
19 |
+
JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
|
20 |
+
JOIN Image on Image.id = DetectedObject.image_id
|
21 |
+
JOIN FocusStack on FocusStack.id = Image.focus_stack_id
|
22 |
+
JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
|
23 |
+
WHERE metaclass_id = 1 -- only select eggs;
|
24 |
+
AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
|
25 |
+
ORDER BY UniqueGroundTruth.focus_stack_id;
|
26 |
+
""")
|
27 |
+
|
28 |
+
result = cursor.fetchall()
|
29 |
+
return result
|
30 |
+
|
31 |
+
def fetch_focus_stacks_from_database(db):
|
32 |
+
cursor = db.cursor()
|
33 |
+
|
34 |
+
cursor.execute("""SELECT
|
35 |
+
FocusStack.id as foucs_stack_id,
|
36 |
+
CONCAT (study_id, "/", uuid, "/", file_name) as file_path,
|
37 |
+
file_name,
|
38 |
+
uuid,
|
39 |
+
study_id,
|
40 |
+
Image.pos_z,
|
41 |
+
Image.focus_value,
|
42 |
+
Image.add_date
|
43 |
+
FROM FocusStack
|
44 |
+
JOIN Scan on Scan.id = FocusStack.scan_id
|
45 |
+
JOIN Slide on Slide.id = Scan.id
|
46 |
+
JOIN Study on Study .id = Slide.study_id
|
47 |
+
JOIN Image on Image.focus_stack_id = FocusStack.id
|
48 |
+
WHERE
|
49 |
+
FocusStack.id IN( -- get all focus stacks that have objects in them;
|
50 |
+
SELECT DISTINCT
|
51 |
+
UniqueGroundTruth.focus_stack_id
|
52 |
+
FROM UniqueGroundTruth
|
53 |
+
JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
|
54 |
+
JOIN Image on Image.id = DetectedObject.image_id
|
55 |
+
JOIN FocusStack on FocusStack.id = Image.focus_stack_id
|
56 |
+
JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
|
57 |
+
WHERE metaclass_id = 1 -- only select eggs;
|
58 |
+
AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
|
59 |
+
)
|
60 |
+
ORDER BY FocusStack.id DESC, focus_value, focus_level
|
61 |
+
""")
|
62 |
+
result = cursor.fetchall()
|
63 |
+
return result
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
if __name__ == "__main__":
|
68 |
+
load_dotenv()
|
69 |
+
|
70 |
+
db = mysql.connector.connect(
|
71 |
+
host=os.getenv('DB_HOST'),
|
72 |
+
user=os.getenv('DB_USER'),
|
73 |
+
password=os.getenv('DB_PASSWORD'),
|
74 |
+
database=os.getenv('DB_NAME')
|
75 |
+
)
|
76 |
+
|
77 |
+
print("Querring objects...")
|
78 |
+
df_objects = pd.DataFrame(fetch_objects_from_datase(db))
|
79 |
+
print("Querring stacks...")
|
80 |
+
df_stacks = pd.DataFrame(fetch_focus_stacks_from_database(db))
|
81 |
+
|
82 |
+
df_objects.columns = ['stack_id', 'x_min', 'y_min', 'x_max', 'y_max', 'object_type_id', 'name', 'add_date']
|
83 |
+
df_stacks.columns = ['stack_id', 'file_path', 'file_name',
|
84 |
+
'uuid', 'study_id', 'pos_z', 'focus_value', 'add_date']
|
85 |
+
|
86 |
+
print("Writing objects to file...")
|
87 |
+
df_objects.to_csv("objects.csv")
|
88 |
+
print("Writing stacks to file...")
|
89 |
+
df_stacks.to_csv("stacks.csv")
|
annotation-preprocessing/1_splitting_into_patches.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from collections import defaultdict
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
from PIL import Image, ImageDraw
|
6 |
+
import math
|
7 |
+
import json
|
8 |
+
import random
|
9 |
+
|
10 |
+
class StackEntry:
|
11 |
+
def __init__(self):
|
12 |
+
self.images = []
|
13 |
+
self.objects = []
|
14 |
+
def add_image(self, image):
|
15 |
+
self.images.append(image)
|
16 |
+
def add_object(self, object):
|
17 |
+
self.objects.append(object)
|
18 |
+
def sort(self):
|
19 |
+
self.images.sort(key=lambda x: x.focus_value)
|
20 |
+
|
21 |
+
def get_neighbours(img, x, y, dimensions):
|
22 |
+
neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]
|
23 |
+
|
24 |
+
width, height = img.size
|
25 |
+
|
26 |
+
neighbours = []
|
27 |
+
for x_offset, y_offset in neighbour_candidates:
|
28 |
+
neighbour_x = x + x_offset * dimensions
|
29 |
+
neighbour_y = y + y_offset * dimensions
|
30 |
+
|
31 |
+
if neighbour_x >= 0 and neighbour_x + dimensions <= width and neighbour_y >= 0 and neighbour_y + dimensions <= height:
|
32 |
+
box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
|
33 |
+
neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
|
34 |
+
else:
|
35 |
+
neighbours.append(None)
|
36 |
+
return neighbours
|
37 |
+
|
38 |
+
def extract_object_tiles(obj, stack_images):
|
39 |
+
x_start = int(obj.x_min / size) * size
|
40 |
+
x_end = int(math.ceil(obj.x_max / size)) * size
|
41 |
+
y_start = int(obj.y_min / size) * size
|
42 |
+
y_end = int(math.ceil(obj.y_max / size)) * size
|
43 |
+
|
44 |
+
tiles = []
|
45 |
+
|
46 |
+
focus_stack_images = list(map(lambda x: (x, Image.open(x.file_path)), stack_images))
|
47 |
+
|
48 |
+
# Get tiles of the image that contain bounding box of object
|
49 |
+
for y in range(y_start, y_end, size):
|
50 |
+
for x in range(x_start, x_end, size):
|
51 |
+
stack = []
|
52 |
+
for row, img in focus_stack_images:
|
53 |
+
box = [x, y, x + size, y + size]
|
54 |
+
crop = img.crop(box)
|
55 |
+
|
56 |
+
neighbours = get_neighbours(img, x, y, size)
|
57 |
+
stack.append((row, box[:2], crop, neighbours))
|
58 |
+
tiles.append(stack)
|
59 |
+
return tiles
|
60 |
+
|
61 |
+
|
62 |
+
def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
|
63 |
+
path, file_name = os.path.split(original_file_path)
|
64 |
+
name, ext = os.path.splitext(file_name)
|
65 |
+
|
66 |
+
out_path = os.path.join(out_dir, path)
|
67 |
+
save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')
|
68 |
+
|
69 |
+
if not os.path.exists(out_path):
|
70 |
+
os.makedirs(out_path)
|
71 |
+
if overwrite or not os.path.exists(save_to):
|
72 |
+
img.save(save_to)
|
73 |
+
return save_to
|
74 |
+
|
75 |
+
def save_obj_tiles(obj, out_folder, stack_images):
|
76 |
+
extracted = extract_object_tiles(obj, stack_images)
|
77 |
+
z_stacks = []
|
78 |
+
for z_stack in extracted:
|
79 |
+
z_stack_images = []
|
80 |
+
for row, box, img, neigbours in z_stack:
|
81 |
+
|
82 |
+
neighbours = []
|
83 |
+
|
84 |
+
image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
|
85 |
+
for neighbour in neigbours:
|
86 |
+
n_path = None
|
87 |
+
if neighbour:
|
88 |
+
x, y, n_img = neighbour
|
89 |
+
n_path = save_tile(row.file_path, out_folder, x, y, n_img)
|
90 |
+
neighbours.append(n_path)
|
91 |
+
|
92 |
+
z_stack_images.append({
|
93 |
+
"focus_value": row.focus_value,
|
94 |
+
"image_path": image_path,
|
95 |
+
"neighbours": neighbours,
|
96 |
+
"original_filename": row.file_name,
|
97 |
+
"scan_uuid": row.uuid,
|
98 |
+
"study_id": row.study_id,
|
99 |
+
})
|
100 |
+
z_stacks.append({
|
101 |
+
"best_index": None,
|
102 |
+
"images" : z_stack_images,
|
103 |
+
"obj_name": obj.name,
|
104 |
+
"stack_id": obj.stack_id,
|
105 |
+
})
|
106 |
+
|
107 |
+
return z_stacks
|
108 |
+
|
109 |
+
def save_stack(stack, out_folder):
|
110 |
+
z_stacks = []
|
111 |
+
for obj in stack.objects:
|
112 |
+
z_stacks.extend(save_obj_tiles(obj, out_folder, stack.images))
|
113 |
+
return z_stacks
|
114 |
+
|
115 |
+
|
116 |
+
if __name__ == "__main__":
|
117 |
+
load_dotenv()
|
118 |
+
print("Geting environment variables...")
|
119 |
+
size = int(os.getenv('IMG_SIZE'))
|
120 |
+
|
121 |
+
print("Loading data from csv files...")
|
122 |
+
objects = pd.read_csv("test_objects.csv", index_col=0)
|
123 |
+
stacks = pd.read_csv("test_stacks.csv", index_col=0)
|
124 |
+
|
125 |
+
|
126 |
+
stacks_dict = defaultdict(lambda: StackEntry())
|
127 |
+
|
128 |
+
print("Building internal datastructure...")
|
129 |
+
# adding images to dict
|
130 |
+
for (index, row) in stacks.iterrows():
|
131 |
+
stacks_dict[row.stack_id].add_image(row)
|
132 |
+
|
133 |
+
for values in stacks_dict.values():
|
134 |
+
values.sort()
|
135 |
+
|
136 |
+
# adding objects
|
137 |
+
for (index, row) in objects.iterrows():
|
138 |
+
stacks_dict[row.stack_id].add_object(row)
|
139 |
+
|
140 |
+
out_folder = "out"
|
141 |
+
z_stacks = []
|
142 |
+
|
143 |
+
print("Generating image tiles and writing them to file...")
|
144 |
+
for stack in stacks_dict.values():
|
145 |
+
z_stacks.extend(save_stack(stack,""))
|
146 |
+
|
147 |
+
# randomize z_stacks
|
148 |
+
print("Shuffling data...")
|
149 |
+
random.shuffle(z_stacks)
|
150 |
+
|
151 |
+
print("Writing meta-data for annotation to file...")
|
152 |
+
with open(os.path.join(out_folder, "data.json"), 'w') as file:
|
153 |
+
file.write(json.dumps(z_stacks))
|
154 |
+
|