master_thesis_models / annotation-preprocessing /1_splitting_into_patches.py
Hannes Kuchelmeister
change test_object to actual ones in file path
93d387e
raw
history blame
5.53 kB
import pandas as pd
from collections import defaultdict
from dotenv import load_dotenv
import os
from PIL import Image, ImageDraw
import math
import json
import random
class StackEntry:
def __init__(self):
self.images = []
self.objects = []
def add_image(self, image):
self.images.append(image)
def add_object(self, object):
self.objects.append(object)
def sort(self):
self.images.sort(key=lambda x: x.focus_value)
def get_neighbours(img, x, y, dimensions):
neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]
width, height = img.size
neighbours = []
for x_offset, y_offset in neighbour_candidates:
neighbour_x = x + x_offset * dimensions
neighbour_y = y + y_offset * dimensions
if neighbour_x >= 0 and neighbour_x + dimensions <= width and neighbour_y >= 0 and neighbour_y + dimensions <= height:
box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
else:
neighbours.append(None)
return neighbours
def extract_object_tiles(obj, stack_images, in_folder, threshold = 0.25):
x_start = int(obj.x_min / size) * size
x_end = int(math.ceil(obj.x_max / size)) * size
y_start = int(obj.y_min / size) * size
y_end = int(math.ceil(obj.y_max / size)) * size
tiles = []
focus_stack_images = list(map(lambda x: (x, Image.open(os.path.join(in_folder, x.file_path))), stack_images))
# Get tiles of the image that contain bounding box of object
for y in range(y_start, y_end, size):
for x in range(x_start, x_end, size):
if compute_overlap([x, y, x + size, y + size], [obj.x_min, obj.y_min, obj.x_max, obj.y_max]) > size * size * threshold:
stack = []
for row, img in focus_stack_images:
box = [x, y, x + size, y + size]
crop = img.crop(box)
neighbours = get_neighbours(img, x, y, size)
stack.append((row, box[:2], crop, neighbours))
tiles.append(stack)
return tiles
def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
path, file_name = os.path.split(original_file_path)
name, ext = os.path.splitext(file_name)
out_path = os.path.join(out_dir, path)
save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')
if not os.path.exists(out_path):
os.makedirs(out_path)
if overwrite or not os.path.exists(save_to):
img.save(save_to)
return os.path.join(path, f'{name}_{x}_{y}{ext}')
def compute_overlap(rect1, rect2):
dx = min(rect1[2], rect2[2]) - max(rect1[0], rect2[0])
dy = min(rect1[3], rect2[3]) - max(rect1[1], rect2[1])
return dx * dy
def save_obj_tiles(obj, out_folder, in_folder, stack_images):
extracted = extract_object_tiles(obj, stack_images, in_folder)
z_stacks = []
for z_stack in extracted:
z_stack_images = []
for row, box, img, neigbours in z_stack:
neighbours = []
image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
for neighbour in neigbours:
n_path = None
if neighbour:
x, y, n_img = neighbour
n_path = save_tile(row.file_path, out_folder, x, y, n_img)
neighbours.append(n_path)
z_stack_images.append({
"focus_value": row["focus_value"],
"image_path": image_path,
"neighbours": neighbours,
"original_filename": row["file_name"],
"scan_uuid": row["uuid"],
"study_id": row["study_id"],
})
z_stacks.append({
"best_index": None,
"images" : z_stack_images,
"obj_name": obj["name"],
"stack_id": obj["stack_id"],
})
return z_stacks
def save_stack(stack, out_folder, in_folder):
z_stacks = []
for obj in stack.objects:
z_stacks.extend(save_obj_tiles(obj, out_folder, in_folder, stack.images))
return z_stacks
if __name__ == "__main__":
load_dotenv()
print("Geting environment variables...")
size = int(os.getenv('IMG_SIZE'))
root_in = os.getenv('ROOT_IN')
print(f'img_size: ')
print(f'in_folder: {root_in}')
print("Loading data from csv files...")
objects = pd.read_csv("out/objects.csv", index_col=0)
stacks = pd.read_csv("out/stacks.csv", index_col=0)
stacks_dict = defaultdict(lambda: StackEntry())
print("Building internal datastructure...")
# adding images to dict
for (index, row) in stacks.iterrows():
stacks_dict[row.stack_id].add_image(row)
for values in stacks_dict.values():
values.sort()
# adding objects
for (index, row) in objects.iterrows():
stacks_dict[row.stack_id].add_object(row)
out_folder = "out"
z_stacks = []
print("Generating image tiles and writing them to file...")
for stack in stacks_dict.values():
z_stacks.extend(save_stack(stack,"out", root_in))
# randomize z_stacks
print("Shuffling data...")
random.shuffle(z_stacks)
print("Writing meta-data for annotation to file...")
with open(os.path.join(out_folder, "data.json"), 'w') as file:
file.write(json.dumps(z_stacks))