File size: 5,541 Bytes

import pandas as pd
from collections import defaultdict
from dotenv import load_dotenv
import os
from PIL import Image, ImageDraw
import math
import json
import random

class StackEntry:
    def __init__(self):
        self.images = []
        self.objects = []
    def add_image(self, image):
        self.images.append(image)
    def add_object(self, object):
        self.objects.append(object)
    def sort(self):
        self.images.sort(key=lambda x: x.focus_value)

def get_neighbours(img, x, y, dimensions):
    neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]

    width, height = img.size

    neighbours = []
    for x_offset, y_offset in neighbour_candidates:
        neighbour_x = x + x_offset * dimensions
        neighbour_y = y + y_offset * dimensions

        if neighbour_x >= 0 and neighbour_x + dimensions <= width and      neighbour_y >= 0 and neighbour_y + dimensions <= height:
            box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
            neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
        else:
            neighbours.append(None)
    return neighbours

def extract_object_tiles(obj, stack_images, in_folder, threshold = 0.25):
    x_start = int(obj.x_min / size) * size
    x_end = int(math.ceil(obj.x_max / size)) * size
    y_start = int(obj.y_min / size) * size
    y_end = int(math.ceil(obj.y_max / size)) * size

    tiles = []

    focus_stack_images = list(map(lambda x: (x, Image.open(os.path.join(in_folder, x.file_path))), stack_images))

    # Get tiles of the image that contain bounding box of object
    for y in range(y_start, y_end, size):
        for x in range(x_start, x_end, size):
            
            if compute_overlap([x, y, x + size, y + size], [obj.x_min, obj.y_min, obj.x_max, obj.y_max]) > size * size * threshold:
                stack = []
                for row, img in focus_stack_images:
                    box = [x, y, x + size, y + size]
                    crop = img.crop(box)
                
                    neighbours = get_neighbours(img, x, y, size)
                    stack.append((row, box[:2], crop, neighbours))
                tiles.append(stack)
    return tiles


def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
    path, file_name = os.path.split(original_file_path)
    name, ext = os.path.splitext(file_name)

    out_path = os.path.join(out_dir, path)
    save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')

    if not os.path.exists(out_path):
        os.makedirs(out_path)
    if overwrite or not os.path.exists(save_to):
        img.save(save_to)
    return os.path.join(path, f'{name}_{x}_{y}{ext}')

def compute_overlap(rect1, rect2):
    dx = min(rect1[2], rect2[2]) - max(rect1[0], rect2[0])
    dy = min(rect1[3], rect2[3]) - max(rect1[1], rect2[1])
    return dx * dy

def save_obj_tiles(obj, out_folder, in_folder, stack_images):
    extracted = extract_object_tiles(obj, stack_images, in_folder)
    z_stacks = []
    for z_stack in extracted:
        z_stack_images = []
        for row, box, img, neigbours in z_stack:

            neighbours = []

            image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
            for neighbour in neigbours:
                n_path = None
                if neighbour:
                    x, y, n_img = neighbour
                    n_path = save_tile(row.file_path, out_folder, x, y, n_img)
                neighbours.append(n_path)

            z_stack_images.append({
                "focus_value": row["focus_value"],
                "image_path": image_path,
                "neighbours": neighbours,
                "original_filename": row["file_name"],
                "scan_uuid": row["uuid"],
                "study_id": row["study_id"],
            })
        z_stacks.append({ 
            "best_index": None,
            "images" : z_stack_images,
            "obj_name": obj["name"],
            "stack_id": obj["stack_id"],
        })

    return z_stacks

def save_stack(stack, out_folder, in_folder):
    z_stacks = []
    for obj in stack.objects:
        z_stacks.extend(save_obj_tiles(obj, out_folder, in_folder, stack.images))
    return z_stacks


if __name__ == "__main__":
    load_dotenv()
    print("Geting environment variables...")
    size = int(os.getenv('IMG_SIZE'))
    root_in = os.getenv('ROOT_IN')

    print(f'img_size: ')
    print(f'in_folder: {root_in}')

    print("Loading data from csv files...")
    objects = pd.read_csv("out/test_objects.csv", index_col=0)
    stacks = pd.read_csv("out/test_stacks.csv", index_col=0)


    stacks_dict = defaultdict(lambda: StackEntry())
    
    print("Building internal datastructure...")
    # adding images to dict
    for (index, row) in stacks.iterrows():
        stacks_dict[row.stack_id].add_image(row)

    for values in stacks_dict.values():
        values.sort()

    # adding objects
    for (index, row) in objects.iterrows():
        stacks_dict[row.stack_id].add_object(row)

    out_folder = "out"
    z_stacks = []

    print("Generating image tiles and writing them to file...")
    for stack in stacks_dict.values():
        z_stacks.extend(save_stack(stack,"out", root_in))

    # randomize z_stacks
    print("Shuffling data...")
    random.shuffle(z_stacks)

    print("Writing meta-data for annotation to file...")
    with open(os.path.join(out_folder, "data.json"), 'w') as file:
        file.write(json.dumps(z_stacks))