add python scripts

Files changed (2) hide show

annotation-preprocessing/0_fetch_from_database.py +89 -0
annotation-preprocessing/1_splitting_into_patches.py +154 -0

annotation-preprocessing/0_fetch_from_database.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import mysql.connector
+import pandas as pd
+import os
+from dotenv import load_dotenv
+def fetch_objects_from_datase(db):
+    cursor = db.cursor()
+    cursor.execute("""SELECT
+            UniqueGroundTruth.focus_stack_id,
+            UniqueGroundTruth.x_min,
+            UniqueGroundTruth.y_min,
+            UniqueGroundTruth.x_max,
+            UniqueGroundTruth.y_max,
+            UniqueGroundTruth.object_type_id,
+            ObjectType.name,
+            Image.add_date
+        FROM UniqueGroundTruth
+        JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
+        JOIN Image on Image.id = DetectedObject.image_id
+        JOIN FocusStack on FocusStack.id = Image.focus_stack_id
+        JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
+        WHERE metaclass_id = 1 -- only select eggs;
+            AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
+        ORDER BY UniqueGroundTruth.focus_stack_id;
+        """)
+    result = cursor.fetchall()
+    return result
+def fetch_focus_stacks_from_database(db):
+    cursor = db.cursor()
+    cursor.execute("""SELECT
+            FocusStack.id as foucs_stack_id,
+            CONCAT (study_id, "/", uuid, "/", file_name) as file_path,
+            file_name,
+            uuid,
+            study_id,
+            Image.pos_z,
+            Image.focus_value,
+            Image.add_date
+        FROM FocusStack
+        JOIN Scan on Scan.id = FocusStack.scan_id
+        JOIN Slide on Slide.id = Scan.id
+        JOIN Study on Study .id = Slide.study_id
+        JOIN Image on Image.focus_stack_id  = FocusStack.id
+        WHERE
+            FocusStack.id IN( -- get all focus stacks that have objects in them;
+                SELECT DISTINCT
+                    UniqueGroundTruth.focus_stack_id
+                FROM UniqueGroundTruth
+                JOIN DetectedObject on DetectedObject.id = UniqueGroundTruth.object_id
+                JOIN Image on Image.id = DetectedObject.image_id
+                JOIN FocusStack on FocusStack.id = Image.focus_stack_id
+                JOIN ObjectType on ObjectType.id = UniqueGroundTruth.object_type_id
+                WHERE metaclass_id = 1 -- only select eggs;
+                    AND unix_timestamp(Image.add_date) > unix_timestamp('2021-03-07 00:00:00')
+            )
+        ORDER BY FocusStack.id DESC, focus_value, focus_level
+        """)
+    result = cursor.fetchall()
+    return result
+if __name__ == "__main__":
+    load_dotenv()
+    db = mysql.connector.connect(
+        host=os.getenv('DB_HOST'),
+        user=os.getenv('DB_USER'),
+        password=os.getenv('DB_PASSWORD'),
+        database=os.getenv('DB_NAME')
+    )
+    print("Querring objects...")
+    df_objects = pd.DataFrame(fetch_objects_from_datase(db))
+    print("Querring stacks...")
+    df_stacks = pd.DataFrame(fetch_focus_stacks_from_database(db))
+    df_objects.columns = ['stack_id', 'x_min', 'y_min', 'x_max', 'y_max', 'object_type_id', 'name', 'add_date']
+    df_stacks.columns = ['stack_id', 'file_path', 'file_name',
+            'uuid', 'study_id', 'pos_z', 'focus_value', 'add_date']
+    print("Writing objects to file...")
+    df_objects.to_csv("objects.csv")
+    print("Writing stacks to file...")
+    df_stacks.to_csv("stacks.csv")

annotation-preprocessing/1_splitting_into_patches.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import pandas as pd
+from collections import defaultdict
+from dotenv import load_dotenv
+import os
+from PIL import Image, ImageDraw
+import math
+import json
+import random
+class StackEntry:
+    def __init__(self):
+        self.images = []
+        self.objects = []
+    def add_image(self, image):
+        self.images.append(image)
+    def add_object(self, object):
+        self.objects.append(object)
+    def sort(self):
+        self.images.sort(key=lambda x: x.focus_value)
+def get_neighbours(img, x, y, dimensions):
+    neighbour_candidates = [(-1,-1), (0, -1), (1, -1), (-1, 0), (1,0), (-1,1), (0,1), (1,1)]
+    width, height = img.size
+    neighbours = []
+    for x_offset, y_offset in neighbour_candidates:
+        neighbour_x = x + x_offset * dimensions
+        neighbour_y = y + y_offset * dimensions
+        if neighbour_x >= 0 and neighbour_x + dimensions <= width and      neighbour_y >= 0 and neighbour_y + dimensions <= height:
+            box = [neighbour_x, neighbour_y, neighbour_x + dimensions, neighbour_y + dimensions]
+            neighbours.append((neighbour_x, neighbour_y, img.crop(box)))
+        else:
+            neighbours.append(None)
+    return neighbours
+def extract_object_tiles(obj, stack_images):
+    x_start = int(obj.x_min / size) * size
+    x_end = int(math.ceil(obj.x_max / size)) * size
+    y_start = int(obj.y_min / size) * size
+    y_end = int(math.ceil(obj.y_max / size)) * size
+    tiles = []
+    focus_stack_images = list(map(lambda x: (x, Image.open(x.file_path)), stack_images))
+    # Get tiles of the image that contain bounding box of object
+    for y in range(y_start, y_end, size):
+        for x in range(x_start, x_end, size):
+            stack = []
+            for row, img in focus_stack_images:
+                box = [x, y, x + size, y + size]
+                crop = img.crop(box)
+                neighbours = get_neighbours(img, x, y, size)
+                stack.append((row, box[:2], crop, neighbours))
+            tiles.append(stack)
+    return tiles
+def save_tile(original_file_path, out_dir, x : int, y : int, img, overwrite = False):
+    path, file_name = os.path.split(original_file_path)
+    name, ext = os.path.splitext(file_name)
+    out_path = os.path.join(out_dir, path)
+    save_to = os.path.join(out_path, f'{name}_{x}_{y}{ext}')
+    if not os.path.exists(out_path):
+        os.makedirs(out_path)
+    if overwrite or not os.path.exists(save_to):
+        img.save(save_to)
+    return save_to
+def save_obj_tiles(obj, out_folder, stack_images):
+    extracted = extract_object_tiles(obj, stack_images)
+    z_stacks = []
+    for z_stack in extracted:
+        z_stack_images = []
+        for row, box, img, neigbours in z_stack:
+            neighbours = []
+            image_path = save_tile(row.file_path, out_folder, box[0], box[1], img)
+            for neighbour in neigbours:
+                n_path = None
+                if neighbour:
+                    x, y, n_img = neighbour
+                    n_path = save_tile(row.file_path, out_folder, x, y, n_img)
+                neighbours.append(n_path)
+            z_stack_images.append({
+                "focus_value": row.focus_value,
+                "image_path": image_path,
+                "neighbours": neighbours,
+                "original_filename": row.file_name,
+                "scan_uuid": row.uuid,
+                "study_id": row.study_id,
+            })
+        z_stacks.append({
+            "best_index": None,
+            "images" : z_stack_images,
+            "obj_name": obj.name,
+            "stack_id": obj.stack_id,
+        })
+    return z_stacks
+def save_stack(stack, out_folder):
+    z_stacks = []
+    for obj in stack.objects:
+        z_stacks.extend(save_obj_tiles(obj, out_folder, stack.images))
+    return z_stacks
+if __name__ == "__main__":
+    load_dotenv()
+    print("Geting environment variables...")
+    size = int(os.getenv('IMG_SIZE'))
+    print("Loading data from csv files...")
+    objects = pd.read_csv("test_objects.csv", index_col=0)
+    stacks = pd.read_csv("test_stacks.csv", index_col=0)
+    stacks_dict = defaultdict(lambda: StackEntry())
+    print("Building internal datastructure...")
+    # adding images to dict
+    for (index, row) in stacks.iterrows():
+        stacks_dict[row.stack_id].add_image(row)
+    for values in stacks_dict.values():
+        values.sort()
+    # adding objects
+    for (index, row) in objects.iterrows():
+        stacks_dict[row.stack_id].add_object(row)
+    out_folder = "out"
+    z_stacks = []
+    print("Generating image tiles and writing them to file...")
+    for stack in stacks_dict.values():
+        z_stacks.extend(save_stack(stack,""))
+    # randomize z_stacks
+    print("Shuffling data...")
+    random.shuffle(z_stacks)
+    print("Writing meta-data for annotation to file...")
+    with open(os.path.join(out_folder, "data.json"), 'w') as file:
+        file.write(json.dumps(z_stacks))