master_thesis_models / annotation-preprocessing /1_splitting_into_patches.py

Hannes Kuchelmeister

change test_object to actual ones in file path

93d387e almost 3 years ago

5.53 kB

	import pandas as pd
	from collections import defaultdict
	from dotenv import load_dotenv
	import os
	from PIL import Image, ImageDraw
	import math
	import json
	import random

	class StackEntry:
	def __init__(self):
	self.images = []
	self.objects = []
	def add_image(self, image):
	self.images.append(image)
	def add_object(self, object):
	self.objects.append(object)
	def sort(self):
	self.images.sort(key=lambda x: x.focus_value)

	def get_neighbours(img, x, y, dimensions):
	neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]

	width, height = img.size

	neighbours = []
	for x_offset, y_offset in neighbour_candidates:
	neighbour_x = x + x_offset * dimensions
	neighbour_y = y + y_offset * dimensions

	if neighbour_x >= 0 and neighbour_x + dimensions <= width and neighbour_y >= 0 and neighbour_y + dimensions <= height:
	box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
	neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
	else:
	neighbours.append(None)
	return neighbours

	def extract_object_tiles(obj, stack_images, in_folder, threshold = 0.25):
	x_start = int(obj.x_min / size) * size
	x_end = int(math.ceil(obj.x_max / size)) * size
	y_start = int(obj.y_min / size) * size
	y_end = int(math.ceil(obj.y_max / size)) * size

	tiles = []

	focus_stack_images = list(map(lambda x: (x, Image.open(os.path.join(in_folder, x.file_path))), stack_images))

	# Get tiles of the image that contain bounding box of object
	for y in range(y_start, y_end, size):
	for x in range(x_start, x_end, size):

	if compute_overlap([x, y, x + size, y + size], [obj.x_min, obj.y_min, obj.x_max, obj.y_max]) > size * size * threshold:
	stack = []
	for row, img in focus_stack_images:
	box = [x, y, x + size, y + size]
	crop = img.crop(box)

	neighbours = get_neighbours(img, x, y, size)
	stack.append((row, box[:2], crop, neighbours))
	tiles.append(stack)
	return tiles


	def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
	path, file_name = os.path.split(original_file_path)
	name, ext = os.path.splitext(file_name)

	out_path = os.path.join(out_dir, path)
	save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')

	if not os.path.exists(out_path):
	os.makedirs(out_path)
	if overwrite or not os.path.exists(save_to):
	img.save(save_to)
	return os.path.join(path, f'{name}_{x}_{y}{ext}')

	def compute_overlap(rect1, rect2):
	dx = min(rect1[2], rect2[2]) - max(rect1[0], rect2[0])
	dy = min(rect1[3], rect2[3]) - max(rect1[1], rect2[1])
	return dx * dy

	def save_obj_tiles(obj, out_folder, in_folder, stack_images):
	extracted = extract_object_tiles(obj, stack_images, in_folder)
	z_stacks = []
	for z_stack in extracted:
	z_stack_images = []
	for row, box, img, neigbours in z_stack:

	neighbours = []

	image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
	for neighbour in neigbours:
	n_path = None
	if neighbour:
	x, y, n_img = neighbour
	n_path = save_tile(row.file_path, out_folder, x, y, n_img)
	neighbours.append(n_path)

	z_stack_images.append({
	"focus_value": row["focus_value"],
	"image_path": image_path,
	"neighbours": neighbours,
	"original_filename": row["file_name"],
	"scan_uuid": row["uuid"],
	"study_id": row["study_id"],
	})
	z_stacks.append({
	"best_index": None,
	"images" : z_stack_images,
	"obj_name": obj["name"],
	"stack_id": obj["stack_id"],
	})

	return z_stacks

	def save_stack(stack, out_folder, in_folder):
	z_stacks = []
	for obj in stack.objects:
	z_stacks.extend(save_obj_tiles(obj, out_folder, in_folder, stack.images))
	return z_stacks


	if __name__ == "__main__":
	load_dotenv()
	print("Geting environment variables...")
	size = int(os.getenv('IMG_SIZE'))
	root_in = os.getenv('ROOT_IN')

	print(f'img_size: ')
	print(f'in_folder: {root_in}')

	print("Loading data from csv files...")
	objects = pd.read_csv("out/objects.csv", index_col=0)
	stacks = pd.read_csv("out/stacks.csv", index_col=0)


	stacks_dict = defaultdict(lambda: StackEntry())

	print("Building internal datastructure...")
	# adding images to dict
	for (index, row) in stacks.iterrows():
	stacks_dict[row.stack_id].add_image(row)

	for values in stacks_dict.values():
	values.sort()

	# adding objects
	for (index, row) in objects.iterrows():
	stacks_dict[row.stack_id].add_object(row)

	out_folder = "out"
	z_stacks = []

	print("Generating image tiles and writing them to file...")
	for stack in stacks_dict.values():
	z_stacks.extend(save_stack(stack,"out", root_in))

	# randomize z_stacks
	print("Shuffling data...")
	random.shuffle(z_stacks)

	print("Writing meta-data for annotation to file...")
	with open(os.path.join(out_folder, "data.json"), 'w') as file:
	file.write(json.dumps(z_stacks))