Spaces:

earthtoolsmaker
/

trout-reID

Running

App Files Files Community

achouffe commited on Nov 27, 2024

Commit

1726cb2

verified ·

1 Parent(s): f3a57dd

feat: initial commit for porting the webapp

Browse files

Files changed (28) hide show

__init__.py +0 -0
app.py +222 -0
data/04_models/pipeline/webapp/installed/cropped_images/0495c348-a87c-4e70-8a1f-9e07e8510977.jpg +0 -0
data/04_models/pipeline/webapp/installed/cropped_images/4d7bd307-1224-41e6-ba89-82dd85e69ea9.jpg +0 -0
data/04_models/pipeline/webapp/installed/cropped_images/8042e02f-1d73-4251-8b76-3f634ffe0196.jpg +0 -0
data/04_models/pipeline/webapp/installed/cropped_images/8ed739d7-d24f-4919-bd1d-339e34536a9b.jpg +0 -0
data/04_models/pipeline/webapp/installed/cropped_images/ec1c6fe4-b2b5-491a-9d7f-542b85e2b078.jpg +0 -0
data/images/2023-08-03_1157_7_900088000908738_F.jpg +0 -0
data/images/2023-08-16_1547_7_989001006037192_F.jpg +0 -0
data/images/2023-10-20_1604_7_900088000909142_F.jpg +0 -0
data/images/2023-10-20_1625_7_900088000913636_F.jpg +0 -0
data/images/989,001,006,004,046_F.jpg +0 -0
data/pipeline/config.yaml +13 -0
data/pipeline/db/db.csv +6 -0
data/pipeline/models/identification/config.yaml +2 -0
data/pipeline/models/identification/features.pt +3 -0
data/pipeline/models/pose/weights.pt +3 -0
data/pipeline/models/segmentation/weights.pt +3 -0
data/summary.csv +6 -0
identification.py +241 -0
pipeline.py +348 -0
pose.py +166 -0
requirements.txt +8 -0
segmentation.py +103 -0
ui.py +0 -0
utils.py +659 -0
viz2d.py +326 -0
yolo.py +68 -0

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""
+Simple Gradio Interface to showcase the ML outputs in a UI and webapp.
+"""
+import math
+from pathlib import Path
+import gradio as gr
+from PIL import Image
+from ultralytics import YOLO
+import pipeline
+from identification import IdentificationModel, generate_visualization
+from utils import bgr_to_rgb, select_best_device
+DEFAULT_IMAGE_INDEX = 0
+DIR_INSTALLED_PIPELINE = Path("./data/pipeline/")
+DIR_EXAMPLES = Path("./data/images/")
+FILEPATH_IDENTIFICATION_LIGHTGLUE_CONFIG = (
+    DIR_INSTALLED_PIPELINE / "models/identification/config.yaml"
+)
+FILEPATH_IDENTIFICATION_DB = DIR_INSTALLED_PIPELINE / "db/db.csv"
+FILEPATH_IDENTIFICATION_LIGHTGLUE_FEATURES = (
+    DIR_INSTALLED_PIPELINE / "models/identification/features.pt"
+)
+FILEPATH_WEIGHTS_SEGMENTATION_MODEL = (
+    DIR_INSTALLED_PIPELINE / "models/segmentation/weights.pt"
+)
+FILEPATH_WEIGHTS_POSE_MODEL = DIR_INSTALLED_PIPELINE / "models/pose/weights.pt"
+def examples(dir_examples: Path) -> list[Path]:
+    """
+    Function to retrieve the default example images.
+    Returns:
+        examples (list[Path]): list of image filepaths.
+    """
+    return list(dir_examples.glob("*.jpg"))
+def make_ui(loaded_models: dict[str, YOLO | IdentificationModel]):
+    """
+    Main entrypoint to wire up the Gradio interface.
+    Args:
+        loaded_models (dict[str, YOLO | IdentificationModel]): loaded models ready to run inference with.
+    Returns:
+        gradio_ui
+    """
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(
+                    type="pil",
+                    value=default_value_input,
+                    label="input image",
+                    sources=["upload", "clipboard"],
+                )
+                gr.Examples(
+                    examples=example_filepaths,
+                    inputs=image_input,
+                )
+                submit_btn = gr.Button(value="Identify", variant="primary")
+            with gr.Column():
+                with gr.Tab("Prediction"):
+                    with gr.Row():
+                        pit_prediction = gr.Text(label="predicted individual")
+                        name_prediction = gr.Text(label="fish name", visible=False)
+                    image_feature_matching = gr.Image(
+                        label="pattern matching", visible=False
+                    )
+                    image_extracted_keypoints = gr.Image(
+                        label="extracted keypoints", visible=False
+                    )
+                with gr.Tab("Details", visible=False) as tab_details:
+                    with gr.Column():
+                        with gr.Row():
+                            text_rotation_angle = gr.Text(
+                                label="correction angle (degrees)"
+                            )
+                            text_side = gr.Text(label="predicted side")
+                        image_pose_keypoints = gr.Image(
+                            type="pil", label="pose keypoints"
+                        )
+                        image_rotated_keypoints = gr.Image(
+                            type="pil", label="rotated keypoints"
+                        )
+                        image_segmentation_mask = gr.Image(type="pil", label="mask")
+                        image_masked = gr.Image(type="pil", label="masked")
+        def submit_fn(
+            loaded_models: dict[str, YOLO | IdentificationModel],
+            orig_image: Image.Image,
+        ):
+            """
+            Main function used for the Gradio interface.
+            Args:
+                loaded_models (dict[str, YOLO]): loaded models.
+                orig_image (PIL): original image picked by the user
+            Returns:
+                fish side (str): predicted fish side
+                correction angle (str): rotation to do in degrees to re align the image.
+                keypoints image (PIL): image displaying the bbox and keypoints from the
+                pose estimation model.
+                rotated image (PIL): rotated image after applying the correction angle.
+                segmentation mask (PIL): segmentation mask predicted by the segmentation model.
+                segmented image (PIL): segmented orig_image using the segmentation mask
+                and the crop.
+                predicted_individual (str): The identified individual.
+                pil_image_extracted_keypoints (PIL): The extracted keypoints overlayed on the image.
+                feature_matching_image (PIL): The matching of the source with the identified individual.
+            """
+            # return {}
+            results = pipeline.run(loaded_models=loaded_models, pil_image=orig_image)
+            side = results["stages"]["pose"]["output"]["side"]
+            theta = results["stages"]["pose"]["output"]["theta"]
+            pil_image_keypoints = Image.fromarray(
+                bgr_to_rgb(results["stages"]["pose"]["output"]["prediction"].plot())
+            )
+            pil_image_rotated = Image.fromarray(
+                results["stages"]["rotation"]["output"]["array_image"]
+            )
+            pil_image_mask = results["stages"]["segmentation"]["output"]["mask"]
+            pil_image_masked_cropped = results["stages"]["crop"]["output"]["pil_image"]
+            viz_dict = generate_visualization(
+                pil_image=pil_image_masked_cropped,
+                prediction=results["stages"]["identification"]["output"],
+            )
+            is_new_individual = (
+                results["stages"]["identification"]["output"]["type"] == "new"
+            )
+            return {
+                text_rotation_angle: f"{math.degrees(theta):0.1f}",
+                text_side: side.value,
+                image_pose_keypoints: pil_image_keypoints,
+                image_rotated_keypoints: pil_image_rotated,
+                image_segmentation_mask: pil_image_mask,
+                image_masked: pil_image_masked_cropped,
+                pit_prediction: (
+                    "New Fish!"
+                    if is_new_individual
+                    else gr.Text(
+                        results["stages"]["identification"]["output"]["match"]["pit"],
+                        visible=True,
+                    )
+                ),
+                name_prediction: (
+                    gr.Text(visible=False)
+                    if is_new_individual
+                    else gr.Text(
+                        results["stages"]["identification"]["output"]["match"]["name"],
+                        visible=True,
+                    )
+                ),
+                tab_details: gr.Column(visible=True),
+                image_extracted_keypoints: gr.Image(
+                    viz_dict["keypoints_source"], visible=True
+                ),
+                image_feature_matching: (
+                    gr.Image(visible=False)
+                    if is_new_individual
+                    else gr.Image(viz_dict["matches"], visible=True)
+                ),
+            }
+        submit_btn.click(
+            fn=lambda pil_image: submit_fn(
+                loaded_models=loaded_models,
+                orig_image=pil_image,
+            ),
+            inputs=image_input,
+            outputs=[
+                text_rotation_angle,
+                text_side,
+                image_pose_keypoints,
+                image_rotated_keypoints,
+                image_feature_matching,
+                image_segmentation_mask,
+                image_masked,
+                pit_prediction,
+                name_prediction,
+                tab_details,
+                image_feature_matching,
+                image_extracted_keypoints,
+            ],
+        )
+    return demo
+if __name__ == "__main__":
+    device = select_best_device()
+    # FIXME: get this from the config instead
+    extractor_type = "aliked"
+    n_keypoints = 1024
+    threshold_wasserstein = 0.084
+    loaded_models = pipeline.load_models(
+        device=device,
+        filepath_weights_segmentation_model=FILEPATH_WEIGHTS_SEGMENTATION_MODEL,
+        filepath_weights_pose_model=FILEPATH_WEIGHTS_POSE_MODEL,
+        filepath_identification_lightglue_features=FILEPATH_IDENTIFICATION_LIGHTGLUE_FEATURES,
+        filepath_identification_db=FILEPATH_IDENTIFICATION_DB,
+        extractor_type=extractor_type,
+        n_keypoints=n_keypoints,
+        threshold_wasserstein=threshold_wasserstein,
+    )
+    model_segmentation = loaded_models["segmentation"]
+    example_filepaths = examples(dir_examples=DIR_EXAMPLES)
+    default_value_input = Image.open(example_filepaths[DEFAULT_IMAGE_INDEX])
+    demo = make_ui(loaded_models=loaded_models)
+    demo.launch()

data/04_models/pipeline/webapp/installed/cropped_images/0495c348-a87c-4e70-8a1f-9e07e8510977.jpg ADDED Viewed

data/04_models/pipeline/webapp/installed/cropped_images/4d7bd307-1224-41e6-ba89-82dd85e69ea9.jpg ADDED Viewed

data/04_models/pipeline/webapp/installed/cropped_images/8042e02f-1d73-4251-8b76-3f634ffe0196.jpg ADDED Viewed

data/04_models/pipeline/webapp/installed/cropped_images/8ed739d7-d24f-4919-bd1d-339e34536a9b.jpg ADDED Viewed

data/04_models/pipeline/webapp/installed/cropped_images/ec1c6fe4-b2b5-491a-9d7f-542b85e2b078.jpg ADDED Viewed

data/images/2023-08-03_1157_7_900088000908738_F.jpg ADDED Viewed

data/images/2023-08-16_1547_7_989001006037192_F.jpg ADDED Viewed

data/images/2023-10-20_1604_7_900088000909142_F.jpg ADDED Viewed

data/images/2023-10-20_1625_7_900088000913636_F.jpg ADDED Viewed

data/images/989,001,006,004,046_F.jpg ADDED Viewed

data/pipeline/config.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+filepath_db: db/db.csv
+filepath_model_segmentation_weights: models/segmentation/weights.pt
+filepath_model_pose_weights: models/pose/weights.pt
+filepath_model_identification_features: models/identification/features.pt
+filepath_model_identification_config: models/identification/config.yaml
+filepath_config: config.yaml
+root_dir: .
+dir_cropped_images: cropped_images
+dir_models: models
+dir_models_segmentation: models/segmentation
+dir_models_pose: models/pose
+dir_models_identification: models/identification
+dir_db: db

data/pipeline/db/db.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,filepath,year,pit,created_at_filepath,created_at_exif,is_electrofishing,is_guide_angling,exif_make,exif_model,exif_focal_length,exif_image_width,exif_image_height,exif_shutter_speed,exif_aperture,exif_brightness,coordinates_lat,coordinates_lon,uuid,success,filepath_crop,pose_theta,pose_fish_side,name
+0,0,0,1311,1312,data/01_raw/elk-river/2023/Guide/Fish/Nupqu 2/2023-09-15_1040_2_900088000908738_F.jpg,2023,900088000908738,2023-09-15 10:40:00,,False,True,,,,4032.0,3024.0,,,,,,8042e02f-1d73-4251-8b76-3f634ffe0196,True,data/04_models/pipeline/webapp/installed/cropped_images/8042e02f-1d73-4251-8b76-3f634ffe0196.jpg,2.9707219143490304,left,Norma Fisher
+1,1,1,2306,2308,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/August/989,001,006,037,192_F.jpg",2022,989001006037192,,2022-08-10 14:24:34,True,False,Apple,iPhone 12 Pro Max,5.1,4032.0,3024.0,10.17001733102253,1.3561438092556088,7.527949339379251,49.447309527777776,-115.05773913888888,0495c348-a87c-4e70-8a1f-9e07e8510977,True,data/04_models/pipeline/webapp/installed/cropped_images/0495c348-a87c-4e70-8a1f-9e07e8510977.jpg,3.0887346928329578,left,Jorge Sullivan
+2,2,2,142,142,data/01_raw/elk-river/2023/Boat Electrofishing/Fish/Aug 16 2023/2023-08-16_1146_7_900088000913914_F.jpg,2023,900088000913914,2023-08-16 11:46:00,2023-08-16 11:47:39,True,False,Apple,iPhone 13 Pro Max,5.7,4032.0,3024.0,9.47009,1.169925,7.41103,49.479367833333335,-115.06903196666666,4d7bd307-1224-41e6-ba89-82dd85e69ea9,True,data/04_models/pipeline/webapp/installed/cropped_images/4d7bd307-1224-41e6-ba89-82dd85e69ea9.jpg,-0.0302701344557054,left,Elizabeth Woods
+3,3,3,2264,2266,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/August/989,001,006,004,046_F_2.jpg",2022,989001006004046,,2022-08-05 14:00:47,True,False,Apple,iPhone 13 Pro Max,5.7,4032.0,3024.0,10.445739257101238,1.1699250021066825,8.260268601117538,49.374336702777775,-115.01017866666666,8ed739d7-d24f-4919-bd1d-339e34536a9b,True,data/04_models/pipeline/webapp/installed/cropped_images/8ed739d7-d24f-4919-bd1d-339e34536a9b.jpg,-0.1349107606696157,left,Susan Wagner
+4,4,4,1825,1826,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/October/900,088,000,909,142_F.jpg",2022,900088000909142,,2022-10-05 14:14:45,True,False,Apple,iPhone 12 Pro Max,5.1,4032.0,3024.0,6.922439780250739,1.3561438092556088,5.0997288781417165,49.60164128611111,-114.96475872222224,ec1c6fe4-b2b5-491a-9d7f-542b85e2b078,True,data/04_models/pipeline/webapp/installed/cropped_images/ec1c6fe4-b2b5-491a-9d7f-542b85e2b078.jpg,-3.138177921264415,left,Peter Montgomery

data/pipeline/models/identification/config.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ n_keypoints: 1024
2	+ extractor_type: aliked

data/pipeline/models/identification/features.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:053c31af351b7aa685c003a26435eadabfcece6d046f10060f8ad5f90ab9e30e
+size 2689138

data/pipeline/models/pose/weights.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4081cc996b6d436bc0131e5756b076cdd58c372a319df3aaa688abf559b286c8
+size 5701249

data/pipeline/models/segmentation/weights.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bc54dad5a4d62746ddf4e7edb4402642e905663048da1ff61af969a2d2db604
+size 6015837

data/summary.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+,Unnamed: 0.1,Unnamed: 0,filepath,year,pit,created_at_filepath,created_at_exif,is_electrofishing,is_guide_angling,exif_make,exif_model,exif_focal_length,exif_image_width,exif_image_height,exif_shutter_speed,exif_aperture,exif_brightness,coordinates_lat,coordinates_lon,uuid,success,filepath_crop,pose_theta,pose_fish_side
+0,1311,1312,data/01_raw/elk-river/2023/Guide/Fish/Nupqu 2/2023-09-15_1040_2_900088000908738_F.jpg,2023,900088000908738,2023-09-15 10:40:00,,False,True,,,,4032.0,3024.0,,,,,,8042e02f-1d73-4251-8b76-3f634ffe0196,True,data/03_processed/identification/input/images/8042e02f-1d73-4251-8b76-3f634ffe0196.jpg,2.9707219143490304,left
+1,2306,2308,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/August/989,001,006,037,192_F.jpg",2022,989001006037192,,2022-08-10 14:24:34,True,False,Apple,iPhone 12 Pro Max,5.1,4032.0,3024.0,10.17001733102253,1.3561438092556088,7.527949339379251,49.447309527777776,-115.05773913888888,0495c348-a87c-4e70-8a1f-9e07e8510977,True,data/03_processed/identification/input/images/0495c348-a87c-4e70-8a1f-9e07e8510977.jpg,3.0887346928329578,left
+2,142,142,data/01_raw/elk-river/2023/Boat Electrofishing/Fish/Aug 16 2023/2023-08-16_1146_7_900088000913914_F.jpg,2023,900088000913914,2023-08-16 11:46:00,2023-08-16 11:47:39,True,False,Apple,iPhone 13 Pro Max,5.7,4032.0,3024.0,9.47009,1.169925,7.41103,49.479367833333335,-115.06903196666666,4d7bd307-1224-41e6-ba89-82dd85e69ea9,True,data/03_processed/identification/input/images/4d7bd307-1224-41e6-ba89-82dd85e69ea9.jpg,-0.0302701344557054,left
+3,2264,2266,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/August/989,001,006,004,046_F_2.jpg",2022,989001006004046,,2022-08-05 14:00:47,True,False,Apple,iPhone 13 Pro Max,5.7,4032.0,3024.0,10.445739257101238,1.1699250021066825,8.260268601117538,49.374336702777775,-115.01017866666666,8ed739d7-d24f-4919-bd1d-339e34536a9b,True,data/03_processed/identification/input/images/8ed739d7-d24f-4919-bd1d-339e34536a9b.jpg,-0.1349107606696157,left
+4,1825,1826,"data/01_raw/elk-river/2022/Boat_Electrofishing/Fish/October/900,088,000,909,142_F.jpg",2022,900088000909142,,2022-10-05 14:14:45,True,False,Apple,iPhone 12 Pro Max,5.1,4032.0,3024.0,6.922439780250739,1.3561438092556088,5.0997288781417165,49.60164128611111,-114.96475872222224,ec1c6fe4-b2b5-491a-9d7f-542b85e2b078,True,data/03_processed/identification/input/images/ec1c6fe4-b2b5-491a-9d7f-542b85e2b078.jpg,-3.138177921264415,left

identification.py ADDED Viewed

	@@ -0,0 +1,241 @@

+"""
+Module to manage the identification model. One can load and run inference on a
+new image.
+"""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import numpy as np
+import pandas as pd
+import torch
+from lightglue import ALIKED, DISK, SIFT, LightGlue, SuperPoint
+from lightglue.utils import numpy_image_to_torch, rbd
+from PIL import Image
+from utils import (
+    extractor_type_to_extractor,
+    extractor_type_to_matcher,
+    get_scores,
+    wasserstein,
+)
+from viz2d import keypoints_as_pil_image, matches_as_pil_image
+@dataclass
+class IdentificationModel:
+    extractor: SIFT | ALIKED | DISK | SuperPoint
+    extractor_type: str
+    threshold_wasserstein: float
+    n_keypoints: int
+    matcher: LightGlue
+    features_dict: dict[str, torch.Tensor]
+    df_db: pd.DataFrame
+def load(
+    device: torch.device,
+    filepath_features: Path,
+    filepath_db: Path,
+    extractor_type: str,
+    n_keypoints: int,
+    threshold_wasserstein: float,
+) -> IdentificationModel:
+    """
+    Load the IdentificationModel provided the arguments.
+    Args:
+        device (torch.device): cpu|cuda
+        filepath_features (Path): filepath to the torch cached features on the
+        dataset one wants to predict on.
+        filepath_db (Path): filepath to the csv file containing the dataset to
+        compare with.
+        extractor_type (str): in {sift, disk, aliked, superpoint}.
+        n_keypoints (int): maximum number of keypoints to extract with the extractor.
+        threshold_wasserstein (float): threshold for the wasserstein distance to consider it a match.
+    Returns:
+        IdentificationModel: an IdentificationModel instance.
+    Raises:
+        AssertionError when the extractor_type or n_keypoints are not valid.
+    """
+    allowed_extractor_types = ["sift", "disk", "aliked", "superpoint"]
+    assert (
+        extractor_type in allowed_extractor_types
+    ), f"extractor_type should be in {allowed_extractor_types}"
+    assert 1 <= n_keypoints <= 5000, f"n_keypoints should be in range 1..5000"
+    assert (
+        0.0 <= threshold_wasserstein <= 1.0
+    ), f"threshold_wasserstein should be in 0..1"
+    extractor = extractor_type_to_extractor(
+        device=device,
+        extractor_type=extractor_type,
+        n_keypoints=n_keypoints,
+    )
+    matcher = extractor_type_to_matcher(
+        device=device,
+        extractor_type=extractor_type,
+    )
+    features_dict = torch.load(filepath_features)
+    df_db = pd.read_csv(filepath_db)
+    return IdentificationModel(
+        extractor_type=extractor_type,
+        n_keypoints=n_keypoints,
+        extractor=extractor,
+        matcher=matcher,
+        features_dict=features_dict,
+        df_db=df_db,
+        threshold_wasserstein=threshold_wasserstein,
+    )
+def _make_prediction_dict(
+    model: IdentificationModel,
+    indexed_matches: dict[str, dict[str, torch.Tensor]],
+) -> dict[str, Any]:
+    """
+    Return the prediction dict. Two types of predictions can be made:
+    1. A new individual
+    2. A match from the dataset
+    Returns:
+        type (str): new|match
+        match (dict): dict containing the following keys if type==match.
+            pit (str): the PIT of the matched individual.
+            name (str): the name of the matched individual.
+            filepath_crop_closest (Path): the filepath to the matched individual.
+            features (torch.Tensor): LightGlue Features of the matched individual.
+            matches (torch.Tensor): LightGlue Matches of the matched individual.
+    """
+    indexed_scores = {k: get_scores(v) for k, v in indexed_matches.items()}
+    indexed_wasserstein = {k: wasserstein(v) for k, v in indexed_scores.items()}
+    sorted_wasserstein = sorted(
+        indexed_wasserstein.items(), key=lambda item: item[1], reverse=True
+    )
+    shared_record = {
+        "indexed_matches": indexed_matches,
+        "indexed_scores": indexed_scores,
+        "indexed_wasserstein": indexed_wasserstein,
+        "sorted_wasserstein": sorted_wasserstein,
+    }
+    if not sorted_wasserstein:
+        return {"type": "new", **shared_record}
+    elif model.threshold_wasserstein > sorted_wasserstein[0][1]:
+        return {"type": "new", **shared_record}
+    else:
+        prediction_uuid = sorted_wasserstein[0][0]
+        db_row = model.df_db[model.df_db["uuid"] == prediction_uuid].iloc[0]
+        return {
+            "type": "match",
+            "match": {
+                "pit": db_row["pit"],
+                "name": db_row["name"],
+                "filepath_crop": db_row["filepath_crop"],
+                "features": model.features_dict[prediction_uuid],
+                "matches": indexed_matches[prediction_uuid],
+            },
+            **shared_record,
+        }
+# FIXME: Properly run a batch inference here to make it fast on GPU.
+def _batch_inference(
+    model: IdentificationModel,
+    feats0: dict,
+) -> dict[str, dict[str, torch.Tensor]]:
+    """
+    Run batch inference on feats0 with the IdentificationModel.
+    Returns an indexed_matches datastructure containing the results of each run
+    for the given uuid in the features_dict.
+    """
+    indexed_matches = {}
+    for uuid in model.features_dict.keys():
+        matches01 = model.matcher(
+            {"image0": feats0, "image1": model.features_dict[uuid]}
+        )
+        indexed_matches[uuid] = matches01
+    return indexed_matches
+def predict(model: IdentificationModel, pil_image: Image.Image) -> dict:
+    """
+    Run inference on the pil_image on all the features_dict entries from the
+    IdentificationModel.
+    Note: It will try to optimize inference depending on the available device
+    (cpu|gpu).
+    Args:
+        model (IdentificationModel): identification model to run inference with.
+        pil_image (PIL): input image to run the inference on.
+    Returns:
+        type (str): new|match.
+        source (dict): contains the `features` of the input image.
+        match (dict): dict containing the following keys if type==match.
+            pit (str): the PIT of the matched individual.
+            name (str): the name of the matched individual.
+            filepath_crop (Path): the filepath to the matched individual.
+            features (torch.Tensor): LightGlue Features of the matched individual.
+            matches (torch.Tensor): LightGlue Matches of the matched individual.
+    """
+    # Disable gradient accumulation to make inference faster
+    torch.set_grad_enabled(False)
+    torch_image = numpy_image_to_torch(np.array(pil_image))
+    feats0 = model.extractor.extract(torch_image)
+    indexed_matches = _batch_inference(model=model, feats0=feats0)
+    prediction_dict = _make_prediction_dict(
+        model=model,
+        indexed_matches=indexed_matches,
+    )
+    return {"source": {"features": feats0}, **prediction_dict}
+def generate_visualization(pil_image: Image.Image, prediction: dict) -> dict:
+    if "type" not in prediction:
+        return {}
+    elif prediction["type"] == "match":
+        pil_image_masked_closest = Image.open(prediction["match"]["filepath_crop"])
+        torch_image0 = np.array(pil_image)
+        torch_image1 = np.array(pil_image_masked_closest)
+        torch_images = [torch_image0, torch_image1]
+        feats0 = prediction["source"]["features"]
+        feats1 = prediction["match"]["features"]
+        matches01 = prediction["match"]["matches"]
+        feats0, feats1, matches01 = [
+            rbd(x) for x in [feats0, feats1, matches01]
+        ]  # remove batch dimension
+        pil_image_matches = matches_as_pil_image(
+            torch_images=torch_images,
+            feats0=feats0,
+            feats1=feats1,
+            matches01=matches01,
+            mode="column",
+        )
+        pil_image_keypoints_source = keypoints_as_pil_image(
+            torch_image=torch_image0,
+            feats=feats0,
+            ps=23,
+        )
+        return {
+            "matches": pil_image_matches,
+            "keypoints_source": pil_image_keypoints_source,
+        }
+    elif prediction["type"] == "new":
+        torch_image0 = np.array(pil_image)
+        feats0 = prediction["source"]["features"]
+        feats0 = rbd(feats0)  # remove the batch dimension
+        pil_image_keypoints_source = keypoints_as_pil_image(
+            torch_image=torch_image0,
+            feats=feats0,
+            ps=23,
+        )
+        return {"keypoints_source": pil_image_keypoints_source}
+    else:
+        return {}

pipeline.py ADDED Viewed

	@@ -0,0 +1,348 @@

+from pathlib import Path
+from typing import Any
+import numpy as np
+import torch
+from PIL import Image
+from ultralytics import YOLO
+import identification
+import pose
+import segmentation
+from identification import IdentificationModel
+from utils import (
+    PictureLayout,
+    crop,
+    get_picture_layout,
+    get_segmentation_mask_crop_box,
+)
+def load_pose_and_segmentation_models(
+    filepath_weights_segmentation_model: Path,
+    filepath_weights_pose_model: Path,
+) -> dict[str, YOLO]:
+    """
+    Load into memory the models used by the pipeline.
+    Returns:
+        segmentation (YOLO): segmentation model.
+        pose (YOLO): pose estimation model.
+    """
+    model_segmentation = segmentation.load_pretrained_model(
+        str(filepath_weights_segmentation_model)
+    )
+    model_pose = pose.load_pretrained_model(str(filepath_weights_pose_model))
+    return {
+        "segmentation": model_segmentation,
+        "pose": model_pose,
+    }
+def load_models(
+    filepath_weights_segmentation_model: Path,
+    filepath_weights_pose_model: Path,
+    device: torch.device,
+    filepath_identification_lightglue_features: Path,
+    filepath_identification_db: Path,
+    extractor_type: str,
+    n_keypoints: int,
+    threshold_wasserstein: float,
+) -> dict[str, YOLO | IdentificationModel]:
+    """
+    Load into memory the models used by the pipeline.
+    Returns:
+        segmentation (YOLO): segmentation model.
+        pose (YOLO): pose estimation model.
+        identification (IdentificationModel): identification model.
+    """
+    loaded_pose_seg_models = load_pose_and_segmentation_models(
+        filepath_weights_segmentation_model=filepath_weights_segmentation_model,
+        filepath_weights_pose_model=filepath_weights_pose_model,
+    )
+    model_identification = identification.load(
+        device=device,
+        filepath_features=filepath_identification_lightglue_features,
+        filepath_db=filepath_identification_db,
+        n_keypoints=n_keypoints,
+        extractor_type=extractor_type,
+        threshold_wasserstein=threshold_wasserstein,
+    )
+    return {**loaded_pose_seg_models, "identification": model_identification}
+def run_preprocess(pil_image: Image.Image) -> dict[str, Any]:
+    """
+    Run the preprocess stage of the pipeline.
+    Args:
+        pil_image (PIL): original image.
+    Returns:
+        pil_image (PIL): rotated image to make it a landscape.
+        layout (PictureLayout): layout type of the input image.
+    """
+    picture_layout = get_picture_layout(pil_image=pil_image)
+    # If the image is in Portrait Mode, we turn it into Landscape
+    pil_image_preprocessed = (
+        pil_image.rotate(angle=90, expand=True)
+        if picture_layout == PictureLayout.PORTRAIT
+        else pil_image
+    )
+    return {
+        "pil_image": pil_image_preprocessed,
+        "layout": picture_layout,
+    }
+def run_pose(model: YOLO, pil_image: Image.Image) -> dict[str, Any]:
+    """
+    Run the pose stage of the pipeline.
+    Args:
+        model (YOLO): loaded pose estimation model.
+        pil_image (PIL): Image to run the model on.
+    Returns:
+        prediction: Raw prediction from the model.
+        orig_image: original image used for inference after the preprocessing
+        stages applied by ultralytics.
+        keypoints_xy (np.ndarray): keypoints in xy format.
+        keypoints_xyn (np.ndarray): keyoints in xyn format.
+        theta (float): angle in radians to rotate the image to re-align it
+        horizontally.
+        side (FishSide): Predicted side of the fish.
+    """
+    return pose.predict(model=model, pil_image=pil_image)
+def run_crop(
+    pil_image_mask: Image.Image,
+    pil_image_masked: Image.Image,
+    padding: int,
+) -> dict[str, Any]:
+    """
+    Run the crop on the mask and masked images.
+    Args:
+        pil_image_mask (PIL): Image containing the segmentation mask.
+        pil_image_masked (PIL): Image containing the applied pil_image_mask on
+        the original image.
+        padding (int): by how much do we want to pad the result image?
+    Returns:
+        box (Tuple[int, int, int, int]): 4 tuple representing a rectangle (x1,
+        y1, x2, y2) with the upper left corner given first.
+        pil_image (PIL): cropped masked image.
+    """
+    box_crop = get_segmentation_mask_crop_box(
+        pil_image_mask=pil_image_mask,
+        padding=padding,
+    )
+    pil_image_masked_cropped = crop(
+        pil_image=pil_image_masked,
+        box=box_crop,
+    )
+    return {
+        "box": box_crop,
+        "pil_image": pil_image_masked_cropped,
+    }
+def run_rotation(
+    pil_image: Image.Image,
+    angle_rad: float,
+    keypoints_xy: np.ndarray,
+) -> dict[str, Any]:
+    """
+    Run the rotation stage of the pipeline.
+    Args:
+        pil_image (PIL): image to run the rotation on.
+        angle_rad (float): angle in radian to rotate the image.
+        keypoints_xy (np.ndarray): keypoints from the pose estimation
+        prediction in xy format.
+    Returns:
+        array_image (np.ndarray): rotated array_image as a 2D numpy array.
+        keypoints_xy (np.ndarray): rotated keypoints in xy format.
+        pil_image (PIL): rotated PIL image.
+    """
+    results_rotation = pose.rotate_image_and_keypoints_xy(
+        angle_rad=angle_rad,
+        array_image=np.array(pil_image),
+        keypoints_xy=keypoints_xy,
+    )
+    pil_image_rotated = Image.fromarray(results_rotation["array_image"])
+    return {
+        "pil_image": pil_image_rotated,
+        "array_image": results_rotation["array_image"],
+        "keypoints_xy": results_rotation["keypoints_xy"],
+    }
+def run_segmentation(model: YOLO, pil_image: Image.Image) -> dict[str, Any]:
+    """
+    Run the segmentation stage of the pipeline.
+    Args:
+        pil_image (PIL): image to run the rotation on.
+        model (YOLO): segmentation model.
+        prediction in xy format.
+    Returns:
+        prediction: Raw prediction from the model.
+        orig_image: original image used for inference
+        after preprocessing stages applied by
+        ultralytics.
+        mask (PIL): postprocessed mask in white and black format - used for visualization
+        mask_raw (np.ndarray): Raw mask not postprocessed
+        masked (PIL): mask applied to the pil_image.
+    """
+    results_segmentation = segmentation.predict(
+        model=model,
+        pil_image=pil_image,
+    )
+    return results_segmentation
+def run_pre_identification_stages(
+    loaded_models: dict[str, YOLO],
+    pil_image: Image.Image,
+    param_crop_padding: int = 0,
+) -> dict[str, Any]:
+    """
+    Run the partial ML pipeline on `pil_image` up to identifying the fish. It
+    prepares the input image `pil_image` to make it possible to identify it.
+    Args:
+        loaded_models (dict[str, YOLO]): resut of calling `load_models`.
+        pil_image (PIL): Image to run the pipeline on.
+        param_crop_padding (int): how much to pad the resulting segmentated
+        image when cropped.
+    Returns:
+        order (list[str]): the stages and their order.
+        stages (dict[str, Any]): the description of each stage, its
+        input and output.
+    """
+    # Unpacking the loaded models
+    model_pose = loaded_models["pose"]
+    model_segmentation = loaded_models["segmentation"]
+    # Stage: Preprocess
+    results_preprocess = run_preprocess(pil_image=pil_image)
+    # Stage: Pose estimation
+    pil_image_preprocessed = results_preprocess["pil_image"]
+    results_pose = run_pose(model=model_pose, pil_image=pil_image_preprocessed)
+    # Stage: Rotation
+    results_rotation = run_rotation(
+        pil_image=pil_image_preprocessed,
+        keypoints_xy=results_pose["keypoints_xy"],
+        angle_rad=results_pose["theta"],
+    )
+    # Stage: Segmentation
+    pil_image_rotated = Image.fromarray(results_rotation["array_image"])
+    results_segmentation = run_segmentation(
+        model=model_segmentation, pil_image=pil_image_rotated
+    )
+    # Stage: Crop
+    results_crop = run_crop(
+        pil_image_mask=results_segmentation["mask"],
+        pil_image_masked=results_segmentation["masked"],
+        padding=param_crop_padding,
+    )
+    return {
+        "order": [
+            "preprocess",
+            "pose",
+            "rotation",
+            "segmentation",
+            "crop",
+        ],
+        "stages": {
+            "preprocess": {
+                "input": {"pil_image": pil_image},
+                "output": results_preprocess,
+            },
+            "pose": {
+                "input": {"pil_image": pil_image_preprocessed},
+                "output": results_pose,
+            },
+            "rotation": {
+                "input": {
+                    "pil_image": pil_image_preprocessed,
+                    "angle_rad": results_pose["theta"],
+                    "keypoints_xy": results_pose["keypoints_xy"],
+                },
+                "output": results_rotation,
+            },
+            "segmentation": {
+                "input": {"pil_image": pil_image_rotated},
+                "output": results_segmentation,
+            },
+            "crop": {
+                "input": {
+                    "pil_image_mask": results_segmentation["mask"],
+                    "pil_image_masked": results_segmentation["masked"],
+                    "padding": param_crop_padding,
+                },
+                "output": results_crop,
+            },
+        },
+    }
+def run(
+    loaded_models: dict[str, YOLO | IdentificationModel],
+    pil_image: Image.Image,
+    param_crop_padding: int = 0,
+) -> dict[str, Any]:
+    """
+    Run the ML pipeline on `pil_image`.
+    Args:
+        loaded_models (dict[str, YOLO]): resut of calling `load_models`.
+        pil_image (PIL): Image to run the pipeline on.
+        param_crop_padding (int): how much to pad the resulting segmentated
+        image when cropped.
+    Returns:
+        order (list[str]): the stages and their order.
+        stages (dict[str, Any]): the description of each stage, its
+        input and output.
+    """
+    model_identification = loaded_models["identification"]
+    results = run_pre_identification_stages(
+        loaded_models=loaded_models,
+        pil_image=pil_image,
+        param_crop_padding=param_crop_padding,
+    )
+    results_crop = results["stages"]["crop"]["output"]
+    results_identification = identification.predict(
+        model=model_identification,
+        pil_image=results_crop["pil_image"],
+    )
+    results["order"].append("identification")
+    results["stages"]["identification"] = {
+        "input": {"pil_image": results_crop["pil_image"]},
+        "output": results_identification,
+    }
+    return results

pose.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""
+Module to manage the pose detection model.
+"""
+from enum import Enum
+from pathlib import Path
+from typing import Any
+import numpy as np
+from PIL import Image
+from ultralytics import YOLO
+import yolo
+from utils import get_angle_correction, get_keypoint, rotate_image_and_keypoints_xy
+class FishSide(Enum):
+    """
+    Represents the Side of the Fish.
+    """
+    RIGHT = "right"
+    LEFT = "left"
+def predict_fish_side(
+    array_image: np.ndarray,
+    keypoints_xy: np.ndarray,
+    classes_dictionnary: dict[int, str],
+) -> FishSide:
+    """
+    Predict which side of the fish is displayed on the image.
+    Args:
+        array_image (np.ndarray): numpy array representing the image.
+        keypoints_xy (np.ndarray): detected keypoints on array_image in
+        xy format.
+        classes_dictionnary (dict[int, str]): mapping of class instance
+        to.
+    Returns:
+        FishSide: Predicted side of the fish.
+    """
+    theta = get_angle_correction(
+        keypoints_xy=keypoints_xy,
+        array_image=array_image,
+        classes_dictionnary=classes_dictionnary,
+    )
+    rotation_results = rotate_image_and_keypoints_xy(
+        angle_rad=theta, array_image=array_image, keypoints_xy=keypoints_xy
+    )
+    # We check if the eyes is on the left/right of one of the fins.
+    k_eye = get_keypoint(
+        class_name="eye",
+        keypoints=rotation_results["keypoints_xy"],
+        classes_dictionnary=classes_dictionnary,
+    )
+    k_anal_fin_base = get_keypoint(
+        class_name="anal_fin_base",
+        keypoints=rotation_results["keypoints_xy"],
+        classes_dictionnary=classes_dictionnary,
+    )
+    if k_eye[0] <= k_anal_fin_base[0]:
+        return FishSide.LEFT
+    else:
+        return FishSide.RIGHT
+# Model prediction classes
+CLASSES_DICTIONNARY = {
+    0: "eye",
+    1: "front_fin_base",
+    2: "tail_bottom_tip",
+    3: "tail_top_tip",
+    4: "dorsal_fin_base",
+    5: "pelvic_fin_base",
+    6: "anal_fin_base",
+}
+def load_pretrained_model(model_str: str) -> YOLO:
+    """
+    Load the pretrained model.
+    """
+    return yolo.load_pretrained_model(model_str)
+def train(
+    model: YOLO,
+    data_yaml_path: Path,
+    params: dict,
+    project: Path = Path("data/04_models/yolo/"),
+    experiment_name: str = "train",
+):
+    """Main function for running a train run. It saves the results
+    under `project / experiment_name`.
+    Args:
+        model (YOLO): result of `load_pretrained_model`.
+        data_yaml_path (Path): filepath to the data.yaml file that specifies the split and classes to train on
+        params (dict): parameters to override when running the training. See https://docs.ultralytics.com/modes/train/#train-settings for a complete list of parameters.
+        project (Path): root path to store the run artifacts and results.
+        experiment_name (str): name of the experiment, that is added to the project root path to store the run.
+    """
+    return yolo.train(
+        model=model,
+        data_yaml_path=data_yaml_path,
+        params=params,
+        project=project,
+        experiment_name=experiment_name,
+    )
+def predict(
+    model: YOLO,
+    pil_image: Image.Image,
+    classes_dictionnary: dict[int, str] = CLASSES_DICTIONNARY,
+) -> dict[str, Any]:
+    """
+    Given a loaded model and a PIL image, it returns a map containing the
+    keypoints predictions.
+    Args:
+        model (YOLO): loaded YOLO model for pose estimation.
+        pil_image (PIL): image to run the model on.
+        classes_dictionnary (dict[int, str]): mapping of class instance to
+        class name.
+    Returns:
+        prediction: Raw prediction from the model.
+        orig_image: original image used for inference after the preprocessing
+        stages applied by ultralytics.
+        keypoints_xy (np.ndarray): keypoints in xy format.
+        keypoints_xyn (np.ndarray): keyoints in xyn format.
+        theta (float): angle in radians to rotate the image to re-align it
+        horizontally.
+        side (FishSide): Predicted side of the fish.
+    """
+    predictions = model(pil_image)
+    print(predictions)
+    orig_image = predictions[0].orig_img
+    keypoints_xy = predictions[0].keypoints.xy.cpu().numpy().squeeze()
+    theta = get_angle_correction(
+        keypoints_xy=keypoints_xy,
+        array_image=orig_image,
+        classes_dictionnary=classes_dictionnary,
+    )
+    side = predict_fish_side(
+        array_image=orig_image,
+        keypoints_xy=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    return {
+        "prediction": predictions[0],
+        "orig_image": orig_image,
+        "keypoints_xy": keypoints_xy,
+        "keypoints_xyn": predictions[0].keypoints.xyn.cpu().numpy().squeeze(),
+        "theta": theta,
+        "side": side,
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==5.4.*
+pandas==2.2.*
+torch==2.5.*
+numpy==2.1.*
+tqdm==4.66.*
+ultralytics==8.3.*
+matplotlib==3.9.*
+lightglue @ git+https://github.com/cvg/LightGlue.git@edb2b838efb2ecfe3f88097c5fad9887d95aedad

segmentation.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+Module to manage the segmentation YOLO model.
+"""
+from pathlib import Path
+from typing import Any
+import cv2
+import numpy as np
+from PIL import Image
+from ultralytics import YOLO
+import yolo
+def load_pretrained_model(model_str: str) -> YOLO:
+    """
+    Load the pretrained model.
+    """
+    return yolo.load_pretrained_model(model_str)
+def train(
+    model: YOLO,
+    data_yaml_path: Path,
+    params: dict,
+    project: Path = Path("data/04_models/yolo/"),
+    experiment_name: str = "train",
+):
+    """Main function for running a train run. It saves the results
+    under `project / experiment_name`.
+    Args:
+        model (YOLO): result of `load_pretrained_model`.
+        data_yaml_path (Path): filepath to the data.yaml file that specifies the split and classes to train on
+        params (dict): parameters to override when running the training. See https://docs.ultralytics.com/modes/train/#train-settings for a complete list of parameters.
+        project (Path): root path to store the run artifacts and results.
+        experiment_name (str): name of the experiment, that is added to the project root path to store the run.
+    """
+    return yolo.train(
+        model=model,
+        data_yaml_path=data_yaml_path,
+        params=params,
+        project=project,
+        experiment_name=experiment_name,
+    )
+def predict(model: YOLO, pil_image: Image.Image) -> dict[str, Any]:
+    """
+    Given a loaded model an a PIL image, it returns a map
+    containing the segmentation predictions.
+    Args:
+        model (YOLO): loaded YOLO model for segmentation.
+        pil_image (PIL): image to run the model on.
+    Returns:
+        prediction: Raw prediction from the model.
+        orig_image: original image used for inference
+        after preprocessing stages applied by
+        ultralytics.
+        mask (PIL): postprocessed mask in white and black format - used for visualization
+        mask_raw (np.ndarray): Raw mask not postprocessed
+        masked (PIL): mask applied to the pil_image.
+    """
+    predictions = model(pil_image)
+    mask_raw = predictions[0].masks[0].data.cpu().numpy().transpose(1, 2, 0).squeeze()
+    # Convert single channel grayscale to 3 channel image
+    mask_3channel = cv2.merge((mask_raw, mask_raw, mask_raw))
+    # Get the size of the original image (height, width, channels)
+    h2, w2, c2 = predictions[0].orig_img.shape
+    # Resize the mask to the same size as the image (can probably be removed if image is the same size as the model)
+    mask = cv2.resize(mask_3channel, (w2, h2))
+    # Convert BGR to HSV
+    hsv = cv2.cvtColor(mask, cv2.COLOR_BGR2HSV)
+    # Define range of brightness in HSV
+    lower_black = np.array([0, 0, 0])
+    upper_black = np.array([0, 0, 1])
+    # Create a mask. Threshold the HSV image to get everything black
+    mask = cv2.inRange(mask, lower_black, upper_black)
+    # Invert the mask to get everything but black
+    mask = cv2.bitwise_not(mask)
+    # Apply the mask to the original image
+    masked = cv2.bitwise_and(
+        predictions[0].orig_img,
+        predictions[0].orig_img,
+        mask=mask,
+    )
+    # bgr to rgb and PIL conversion
+    image_output2 = Image.fromarray(masked[:, :, ::-1])
+    # return Image.fromarray(mask), image_output2
+    return {
+        "prediction": predictions[0],
+        "mask": Image.fromarray(mask),
+        "mask_raw": mask_raw,
+        "masked": Image.fromarray(masked[:, :, ::-1]),
+    }

ui.py ADDED Viewed

File without changes

utils.py ADDED Viewed

	@@ -0,0 +1,659 @@

+import math
+from enum import Enum
+from pathlib import Path
+from typing import Tuple
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from lightglue import ALIKED, DISK, SIFT, LightGlue, SuperPoint
+from PIL import Image
+from scipy.stats import wasserstein_distance
+def select_best_device() -> torch.device:
+    """
+    Select best available device (cpu or cuda) based on availability.
+    """
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    else:
+        return torch.device("cpu")
+def bgr_to_rgb(a: np.ndarray) -> np.ndarray:
+    """
+    Turn a BGR numpy array into a RGB numpy array.
+    """
+    return a[:, :, ::-1]
+ALLOWED_EXTRACTOR_TYPES = ["sift", "disk", "superpoint", "aliked"]
+def extractor_type_to_extractor(
+    device: torch.device,
+    extractor_type: str,
+    n_keypoints: int = 1024,
+):
+    """
+    Given an extractor_type in {'sift', 'superpoint', 'aliked', 'disk'},
+    returns a LightGlue extractor.
+    Args:
+        device (torch.device): cpu or cuda
+        extractor_type (str): in {sift, superpoint, aliked, disk}
+        n_keypoints (int): number of max keypoints to generate with the
+        extractor. The higher the better accuracy but the longer.
+    Returns:
+        LigthGlueExtractor: ALIKED | DISK | SIFT | SuperPoint
+    Raises:
+        AssertionError: when the n_keypoints are outside the valid range
+        0..5000
+        AssertionError: when extractor_type is not valid
+    """
+    assert 0 <= n_keypoints <= 5000, "n_keypoints should be in range 0..5000"
+    assert (
+        extractor_type in ALLOWED_EXTRACTOR_TYPES
+    ), f"extractor type {extractor_type} should be in {ALLOWED_EXTRACTOR_TYPES}."
+    if extractor_type == "sift":
+        return SIFT(max_num_keypoints=n_keypoints).eval().to(device)
+    elif extractor_type == "superpoint":
+        return SuperPoint(max_num_keypoints=n_keypoints).eval().to(device)
+    elif extractor_type == "disk":
+        return DISK(max_num_keypoints=n_keypoints).eval().to(device)
+    elif extractor_type == "aliked":
+        return ALIKED(max_num_keypoints=n_keypoints).eval().to(device)
+    else:
+        raise Exception("extractor_type is not valid")
+def extractor_type_to_matcher(device: torch.device, extractor_type: str) -> LightGlue:
+    """
+    Return the LightGlue matcher given an `extractor_type`.
+    Args:
+        device (torch.device): cpu or cuda
+        extractor_type (str): in {sift, superpoint, aliked, disk}
+    Returns:
+        LightGlue Matcher
+    """
+    assert (
+        extractor_type in ALLOWED_EXTRACTOR_TYPES
+    ), f"extractor type {extractor_type} should be in {ALLOWED_EXTRACTOR_TYPES}."
+    return LightGlue(features=extractor_type).eval().to(device)
+def get_scores(matches: dict[str, torch.Tensor]) -> np.ndarray:
+    """
+    Given a `matches` dict from the LightGlue matcher output, it returns the
+    scores as a numpy array.
+    """
+    return matches["matching_scores0"][0].to("cpu").numpy()
+def wasserstein(scores: np.ndarray) -> float:
+    """
+    Return the Wasserstein distance of the scores against the null
+    distribution.
+    The greater the distance, the farther away it is from the null
+    distribution.
+    """
+    x_null_distribution = [0.0] * 1024
+    return wasserstein_distance(x_null_distribution, scores).item()
+class PictureLayout(Enum):
+    """
+    Layout of a picture.
+    """
+    PORTRAIT = "portrait"
+    LANDSCAPE = "landscape"
+    SQUARE = "square"
+def crop(
+    pil_image: Image.Image,
+    box: Tuple[int, int, int, int],
+) -> Image.Image:
+    """
+    Crop a pil_image based on the provided rectangle in (x1, y1,
+    x2, y2) format - with the upper left corner given first.
+    """
+    return pil_image.crop(box=box)
+def get_picture_layout(pil_image: Image.Image) -> PictureLayout:
+    """
+    Return the picture layout.
+    """
+    width, height = pil_image.size
+    if width > height:
+        return PictureLayout.LANDSCAPE
+    elif width == height:
+        return PictureLayout.SQUARE
+    else:
+        return PictureLayout.PORTRAIT
+def get_segmentation_mask_crop_box(
+    pil_image_mask: Image.Image,
+    padding: int = 0,
+) -> Tuple[int, int, int, int]:
+    """
+    Return a crop box for the given pil_image that contains the segmentation mask (black and white).
+    Args:
+        pil_image_mask (PIL): image containing the segmentation mask
+        padding (int): how much to pad around the segmentation mask.
+    Returns:
+        Rectangle (Tuple[int, int, int, int]): 4 tuple representing a rectangle (x1, y1, x2, y2) with the upper left corner given first.
+    """
+    array_image_mask = np.array(pil_image_mask)
+    a = np.where(array_image_mask != 0)
+    y_min = np.min(a[0]).item()
+    y_max = np.max(a[0]).item()
+    x_min = np.min(a[1]).item()
+    x_max = np.max(a[1]).item()
+    box = (x_min, y_min, x_max, y_max)
+    box_with_padding = (
+        box[0] - padding,
+        box[1] - padding,
+        box[2] + padding,
+        box[3] + padding,
+    )
+    return box_with_padding
+def scale_keypoints_to_image_size(
+    image_width: int,
+    image_height: int,
+    keypoints_xyn: np.ndarray,
+) -> np.ndarray:
+    """
+    Given keypoints in xyn format, it returns new keypoints in xy format.
+    Args:
+        image_width (int): width of the image
+        image_height (int): height of the image
+        keypoints_xyn (np.ndarray): 2D numpy array representing the keypoints
+        in xyn format.
+    Returns:
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+    """
+    keypoints_xy = keypoints_xyn.copy()
+    keypoints_xy[:, 0] = keypoints_xyn[:, 0] * image_width
+    keypoints_xy[:, 1] = keypoints_xyn[:, 1] * image_height
+    return keypoints_xy
+def normalize_keypoints_to_image_size(
+    image_width: int,
+    image_height: int,
+    keypoints_xy: np.ndarray,
+) -> np.ndarray:
+    """
+    Given keypoints in xy format, it returns new keypoints in xyn format.
+    Args:
+        image_width (int): width of the image
+        image_height (int): height of the image
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints
+        in xy format.
+    Returns:
+        keypoints_xyn (np.ndarray): 2D numpy array representing the keypoints in
+        xyn format.
+    """
+    keypoints_xyn = keypoints_xy.copy()
+    keypoints_xyn[:, 0] = keypoints_xy[:, 0] / image_width
+    keypoints_xyn[:, 1] = keypoints_xy[:, 1] / image_height
+    return keypoints_xyn
+def show_keypoints_xy(
+    array_image: np.ndarray,
+    keypoints_xy: np.ndarray,
+    classes_dictionnary: dict[int, str],
+    verbose: bool = True,
+) -> None:
+    """
+    Show keypoints on top of an `array_image`, useful in jupyter notebooks for
+    instance.
+    Args:
+        array_image (np.ndarray): numpy array representing an image.
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+        classes_dictionnary (dict[int, str]): Model prediction classes.
+        verbose (bool): should we make the image verbose by adding some label
+        for each keypoint?
+    """
+    colors = ["r", "g", "b", "c", "m", "y", "w"]
+    plt.imshow(array_image)
+    label_margin = 20
+    height, width, _ = array_image.shape
+    for class_inst, class_name in classes_dictionnary.items():
+        color = colors[class_inst]
+        x, y = keypoints_xy[class_inst]
+        plt.scatter(x=[x], y=[y], c=color)
+        if verbose:
+            plt.annotate(class_name, (x - label_margin, y - label_margin), c="w")
+def draw_keypoints_xy_on_ax(
+    ax,
+    array_image: np.ndarray,
+    keypoints_xy: np.ndarray,
+    classes_dictionnary: dict,
+    verbose: bool = True,
+) -> None:
+    """
+    Dray keypoints on top of an `array_image`, useful in jupyter notebooks for
+    instance.
+    Args:
+        array_image (np.ndarray): numpy array representing an image.
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+        classes_dictionnary (dict[int, str]): Model prediction classes.
+        verbose (bool): should we make the image verbose by adding some label
+        for each keypoint?
+    """
+    colors = ["r", "g", "b", "c", "m", "y", "w"]
+    ax.imshow(array_image)
+    label_margin = 20
+    height, width, _ = array_image.shape
+    for class_inst, class_name in classes_dictionnary.items():
+        color = colors[class_inst]
+        x, y = keypoints_xy[class_inst]
+        ax.scatter(x=[x], y=[y], c=color)
+        if verbose:
+            ax.annotate(class_name, (x - label_margin, y - label_margin), c="w")
+    k_pelvic_fin_base = get_keypoint(
+        class_name="pelvic_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    k_anal_fin_base = get_keypoint(
+        class_name="anal_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    ax.axline(k_pelvic_fin_base, k_anal_fin_base, c="lime")
+def show_keypoints_xyn(
+    array_image: np.ndarray,
+    keypoints_xyn: np.ndarray,
+    classes_dictionnary: dict,
+    verbose: bool = True,
+) -> None:
+    """
+    Dray keypoints on top of an `array_image`, useful in jupyter notebooks for
+    instance.
+    Args:
+        array_image (np.ndarray): numpy array representing an image.
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+        classes_dictionnary (dict[int, str]): Model prediction classes.
+        verbose (bool): should we make the image verbose by adding some label
+        for each keypoint?
+    """
+    height, width, _ = array_image.shape
+    keypoints_xy = scale_keypoints_to_image_size(
+        image_height=height,
+        image_width=width,
+        keypoints_xyn=keypoints_xyn,
+    )
+    show_keypoints_xy(
+        array_image=array_image,
+        keypoints_xy=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+        verbose=verbose,
+    )
+def rotate_point(
+    clockwise: bool,
+    origin: Tuple[float, float],
+    point: Tuple[float, float],
+    angle: float,
+) -> Tuple[float, float]:
+    """
+    Rotate a point clockwise or counterclockwise by a given angle around a
+    given origin.
+    Args:
+        clockwise (bool): should the rotation be clockwise?
+        origin (Tuple[float, float]): origin 2D point to perform the rotation.
+        point (Tuple[float, float]): 2D point to rotate.
+        angle (float): angle in radian.
+    Returns:
+        rotated_point (Tuple[float, float]): rotated point after applying the
+        2D transformation.
+    """
+    if clockwise:
+        angle = 0 - angle
+    ox, oy = origin
+    px, py = point
+    qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
+    qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
+    return qx, qy
+def rotate_image(angle_rad: float, array_image: np.ndarray, expand=False) -> np.ndarray:
+    """
+    Rotate an `array_image` by an angle defined in radians, clockwise using the
+    center as origin.
+    Args:
+        angle_rad (float): angle in radian.
+        array_image (np.ndarray): numpy array representing the image to rotate.
+        expand (bool): should we expand the image as we rotate it to not
+        truncate some parts of it if the image is not square?
+    """
+    angle_degrees = math.degrees(angle_rad)
+    return np.array(Image.fromarray(array_image).rotate(angle_degrees, expand=expand))
+def rotate_keypoints_xy(
+    angle_rad: float,
+    keypoints_xy: np.ndarray,
+    origin: Tuple[float, float],
+    clockwise: bool = True,
+) -> np.ndarray:
+    """
+    Rotate keypoints by an angle defined in radians, clockwise or
+    counterclockwise using the `origin_xyn` point.
+    Args:
+        angle_rad (float): angle in radian.
+        origin (Tuple[float, float]): origin 2D point to perform the rotation.
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+        clockwise (bool): should the rotation be clockwise?
+    Returns:
+        rotated_keypoints_xy (np.ndarray): rotated keypoints in xy format.
+    """
+    return np.array(
+        [
+            rotate_point(
+                clockwise=clockwise,
+                origin=origin,
+                point=(kp[0].item(), kp[1].item()),
+                angle=angle_rad,
+            )
+            for kp in keypoints_xy
+        ]
+    )
+def rotate_image_and_keypoints_xy(
+    angle_rad: float,
+    array_image: np.ndarray,
+    keypoints_xy: np.ndarray,
+) -> dict[str, np.ndarray]:
+    """
+    Rotate the image and its keypoints provided the parameters.
+    Args:
+        angle_rad (float): angle in radian.
+        array_image (np.ndarray): numpy array representing the image to rotate.
+        keypoints_xy (np.ndarray): 2D numpy array representing the keypoints in
+        xy format.
+    Returns:
+        array_image (np.ndarray): rotated array_image as a 2D numpy array.
+        keypoints_xy (np.ndarray): rotated keypoints in xy format.
+    """
+    height, width, _ = array_image.shape
+    center_x, center_y = int(width / 2), int(height / 2)
+    origin = (center_x, center_y)
+    image_rotated = rotate_image(angle_rad=angle_rad, array_image=array_image)
+    keypoints_xy_rotated = rotate_keypoints_xy(
+        angle_rad=angle_rad, keypoints_xy=keypoints_xy, origin=origin, clockwise=True
+    )
+    return {
+        "array_image": image_rotated,
+        "keypoints_xy": keypoints_xy_rotated,
+    }
+def get_keypoint(
+    class_name: str,
+    keypoints: np.ndarray,
+    classes_dictionnary: dict[int, str],
+) -> np.ndarray:
+    """
+    Return the keypoint for the provided `class_name` (eg. eye, front_fin_base, etc).
+    Raises:
+        AssertionError: when the provided class_name is not compatible or when the number of keypoints does not match.
+    """
+    assert (
+        class_name in classes_dictionnary.values()
+    ), f"class_name should be in {classes_dictionnary.values()}"
+    assert len(classes_dictionnary) == len(
+        keypoints
+    ), "Number of provided keypoints does not match the number of class names"
+    class_name_to_class_inst = {v: k for k, v in classes_dictionnary.items()}
+    return keypoints[class_name_to_class_inst[class_name]]
+def to_direction_vector(p1: np.ndarray, p2: np.ndarray) -> np.ndarray:
+    """
+    Return the direction vector between two points p1 and p2.
+    """
+    assert len(p1) == len(p2), "p1 and p2 should have the same length"
+    return p2 - p1
+def is_upside_down(
+    keypoints_xy: np.ndarray,
+    classes_dictionnary: dict[int, str],
+) -> bool:
+    """
+    Is the fish upside down?
+    """
+    k_pelvic_fin_base = get_keypoint(
+        class_name="pelvic_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    k_anal_fin_base = get_keypoint(
+        class_name="anal_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    k_dorsal_fin_base = get_keypoint(
+        class_name="dorsal_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    print(f"dorsal_fin_base: {k_dorsal_fin_base}")
+    print(f"pelvic_fin_base: {k_pelvic_fin_base}")
+    print(f"anal_fin_base: {k_anal_fin_base}")
+    return (k_dorsal_fin_base[1] > k_pelvic_fin_base[1]).item()
+def get_direction_vector(
+    keypoints_xy: np.ndarray, classes_dictionnary: dict[int, str]
+) -> np.ndarray:
+    """
+    Get the direction vector for the realignment.
+    """
+    # Align horizontally the fish based on its pelvic fin base and its anal fin base
+    k_pelvic_fin_base = get_keypoint(
+        class_name="pelvic_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    k_anal_fin_base = get_keypoint(
+        class_name="anal_fin_base",
+        keypoints=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    return to_direction_vector(
+        p1=k_pelvic_fin_base, p2=k_anal_fin_base
+    )  # line between the pelvic and anal fins
+def get_reference_vector() -> np.ndarray:
+    """
+    Get the reference vector to align the direction vector to.
+    """
+    return np.array([1, 0])  # horizontal axis
+def get_angle(v1: np.ndarray, v2: np.ndarray) -> float:
+    """
+    Return the angle (couterclockwise) in radians between vectors v1 and v2.
+    """
+    cos_theta = (
+        np.dot(v1, v2) / np.linalg.norm(v1, ord=2) * np.linalg.norm(v2, ord=2)
+    ).item()
+    return -math.acos(cos_theta)
+def is_aligned(keypoints_xy: np.ndarray, classes_dictionnary: dict[int, str]) -> bool:
+    """
+    Return wether the keypoints are now properly aligned with the direction
+    vector used to make the rotation.
+    """
+    v1 = get_direction_vector(
+        keypoints_xy=keypoints_xy, classes_dictionnary=classes_dictionnary
+    )
+    v_ref = get_reference_vector()
+    theta = get_angle(v1, v_ref)
+    return abs(theta) <= 0.001
+def get_angle_correction_sign(
+    angle_rad: float,
+    array_image: np.ndarray,
+    keypoints_xy: np.ndarray,
+    classes_dictionnary: dict[int, str],
+) -> int:
+    """
+    Returns 1 or -1 depending on the angle sign to set.
+    """
+    rotation_results = rotate_image_and_keypoints_xy(
+        angle_rad=angle_rad, array_image=array_image, keypoints_xy=keypoints_xy
+    )
+    if not is_aligned(
+        keypoints_xy=rotation_results["keypoints_xy"],
+        classes_dictionnary=classes_dictionnary,
+    ):
+        return -1
+    else:
+        return 1
+def get_angle_correction(
+    keypoints_xy: np.ndarray,
+    array_image: np.ndarray,
+    classes_dictionnary: dict[int, str],
+) -> float:
+    """
+    Get the angle correction in radians that aligns the fish (based on the
+    keypoints) horizontally.
+    """
+    v1 = get_direction_vector(
+        keypoints_xy=keypoints_xy, classes_dictionnary=classes_dictionnary
+    )
+    v_ref = get_reference_vector()
+    theta = get_angle(v1, v_ref)
+    angle_sign = get_angle_correction_sign(
+        angle_rad=theta,
+        array_image=array_image,
+        keypoints_xy=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    theta = angle_sign * theta
+    rotation_results = rotate_image_and_keypoints_xy(
+        angle_rad=theta, array_image=array_image, keypoints_xy=keypoints_xy
+    )
+    # Check whether the fish is upside down
+    if is_upside_down(
+        keypoints_xy=rotation_results["keypoints_xy"],
+        classes_dictionnary=classes_dictionnary,
+    ):
+        print("the fish is upside down...")
+        return theta + math.pi
+    else:
+        print("The fish is not upside down")
+        return theta  # No need to rotate the fish more
+def show_algorithm_steps(
+    image_filepath: Path,
+    keypoints_xy: np.ndarray,
+    rotation_results: dict,
+    theta: float,
+    classes_dictionnary: dict,
+) -> None:
+    """
+    Display a matplotlib figure that details step by step the result of the rotation.
+    Keypoints can be overlayed with the images.
+    """
+    array_image = np.array(Image.open(image_filepath))
+    array_image_final = np.array(
+        Image.open(image_filepath).rotate(math.degrees(theta), expand=True)
+    )
+    fig, axs = plt.subplots(1, 4, figsize=(20, 4))
+    fig.suptitle(f"{image_filepath.name}")
+    print(f"image_filepath: {image_filepath}")
+    # Hiding the x and y axis ticks
+    for ax in axs:
+        ax.xaxis.set_visible(False)
+        ax.yaxis.set_visible(False)
+    axs[0].set_title("original")
+    axs[0].imshow(array_image)
+    axs[1].set_title("predicted keypoints")
+    draw_keypoints_xy_on_ax(
+        ax=axs[1],
+        array_image=array_image,
+        keypoints_xy=keypoints_xy,
+        classes_dictionnary=classes_dictionnary,
+    )
+    axs[2].set_title(f"rotation of {math.degrees(theta):.1f} degrees")
+    draw_keypoints_xy_on_ax(
+        ax=axs[2],
+        array_image=rotation_results["array_image"],
+        keypoints_xy=rotation_results["keypoints_xy"],
+        classes_dictionnary=classes_dictionnary,
+    )
+    axs[3].set_title("final")
+    axs[3].imshow(array_image_final)

viz2d.py ADDED Viewed

	@@ -0,0 +1,326 @@

+"""
+2D visualization primitives based on Matplotlib.
+1) Plot images with `plot_images`.
+2) Call `plot_keypoints` or `plot_matches` any number of times.
+3) Optionally: save a .png or .pdf plot (nice in papers!) with `save_plot`.
+"""
+import io
+from typing import Callable
+import matplotlib
+import matplotlib.patheffects as path_effects
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from PIL import Image
+def pyplot_to_pil_image(plot_fn: Callable[..., None]) -> Image.Image:
+    """
+    Turn a plot_fn side effectful function that uses pyplot into a pil_image by
+    writing to an IO buffer.
+    """
+    plot_fn()
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png")
+    buf.seek(0)  # Move to the beginning of the BytesIO buffer
+    return Image.open(buf)
+def cm_RdGn(x):
+    """Custom colormap: red (0) -> yellow (0.5) -> green (1)."""
+    x = np.clip(x, 0, 1)[..., None] * 2
+    c = x * np.array([[0, 1.0, 0]]) + (2 - x) * np.array([[1.0, 0, 0]])
+    return np.clip(c, 0, 1)
+def cm_BlRdGn(x_):
+    """Custom colormap: blue (-1) -> red (0.0) -> green (1)."""
+    x = np.clip(x_, 0, 1)[..., None] * 2
+    c = x * np.array([[0, 1.0, 0, 1.0]]) + (2 - x) * np.array([[1.0, 0, 0, 1.0]])
+    xn = -np.clip(x_, -1, 0)[..., None] * 2
+    cn = xn * np.array([[0, 0.1, 1, 1.0]]) + (2 - xn) * np.array([[1.0, 0, 0, 1.0]])
+    out = np.clip(np.where(x_[..., None] < 0, cn, c), 0, 1)
+    return out
+def cm_prune(x_):
+    """Custom colormap to visualize pruning"""
+    if isinstance(x_, torch.Tensor):
+        x_ = x_.cpu().numpy()
+    max_i = max(x_)
+    norm_x = np.where(x_ == max_i, -1, (x_ - 1) / 9)
+    return cm_BlRdGn(norm_x)
+def matches_as_pil_image(
+    torch_images,
+    feats0,
+    feats1,
+    matches01,
+    mode: str = "column",
+) -> Image.Image:
+    """
+    Generate a PIL image outlining the keypoints from `feats0` and `feats1` and
+    how they match.
+    Overlay it on the torch_images.
+    """
+    def plot_fn():
+        kpts0, kpts1, matches = (
+            feats0["keypoints"],
+            feats1["keypoints"],
+            matches01["matches"],
+        )
+        m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]
+        axes = plot_images(imgs=torch_images, mode=mode)
+        plot_matches(m_kpts0, m_kpts1, color="lime", lw=0.2)
+    return pyplot_to_pil_image(plot_fn=plot_fn)
+def keypoints_as_pil_image(
+    torch_image: torch.Tensor,
+    feats: dict[str, torch.Tensor],
+    color: str = "blue",
+    ps: int = 10,
+) -> Image.Image:
+    """
+    Generate a PIL image outlining the keypoints from `feats` and overlay it on
+    the torch_image.
+    """
+    def plot_fn():
+        kpts = feats["keypoints"]
+        plot_images([torch_image])
+        plot_keypoints(kpts=[kpts], colors=color, ps=ps)
+    return pyplot_to_pil_image(plot_fn=plot_fn)
+def matching_keypoints_as_pil_image(
+    torch_images,
+    feats0,
+    feats1,
+    matches01,
+    mode: str = "column",
+) -> Image.Image:
+    """
+    Generate a PIL image outlining the keypoints from `feats0` and `feats1`.
+    Overlay it on the torch_images.
+    """
+    def plot_fn():
+        kpts0, kpts1 = (
+            feats0["keypoints"],
+            feats1["keypoints"],
+        )
+        kpc0, kpc1 = cm_prune(matches01["prune0"]), cm_prune(matches01["prune1"])
+        plot_images(torch_images, mode=mode)
+        plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=10)
+    return pyplot_to_pil_image(plot_fn=plot_fn)
+def as_pil_image(
+    torch_images,
+    feats0,
+    feats1,
+    matches01,
+    mode: str = "column",
+) -> Image.Image:
+    kpts0, kpts1, matches = (
+        feats0["keypoints"],
+        feats1["keypoints"],
+        matches01["matches"],
+    )
+    m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]
+    axes = plot_images(imgs=torch_images, mode=mode)
+    plot_matches(m_kpts0, m_kpts1, color="lime", lw=0.2)
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png")
+    buf.seek(0)  # Move to the beginning of the BytesIO buffer
+    # Step 3: Open the image with PIL
+    image = Image.open(buf)
+    return image
+def plot_images(
+    imgs,
+    titles=None,
+    cmaps="gray",
+    dpi=100,
+    pad=0.5,
+    adaptive=True,
+    mode: str = "column",
+):
+    """Plot a set of images horizontally.
+    Args:
+        imgs: list of NumPy RGB (H, W, 3) or PyTorch RGB (3, H, W) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images.
+        adaptive: whether the figure size should fit the image aspect ratios.
+        mode (str): value in {column, row}
+    """
+    assert mode in [
+        "column",
+        "row",
+    ], f"mode is not valid, should be in ['column', 'row']."
+    # conversion to (H, W, 3) for torch.Tensor
+    imgs = [
+        (
+            img.permute(1, 2, 0).cpu().numpy()
+            if (isinstance(img, torch.Tensor) and img.dim() == 3)
+            else img
+        )
+        for img in imgs
+    ]
+    n = len(imgs)
+    if not isinstance(cmaps, (list, tuple)):
+        cmaps = [cmaps] * n
+    if adaptive:
+        ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
+    elif mode == "row":
+        ratios = [4 / 3] * n
+    elif mode == "column":
+        ratios = [1 / 3] * n
+    else:
+        ratios = [4 / 3] * n
+    if mode == "column":
+        figsize = [10, 5]
+        fig, ax = plt.subplots(
+            n, 1, figsize=figsize, dpi=dpi, gridspec_kw={"height_ratios": ratios}
+        )
+    elif mode == "row":
+        figsize = [sum(ratios) * 4.5, 4.5]
+        fig, ax = plt.subplots(
+            1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
+        )
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)
+def plot_keypoints(kpts, colors="lime", ps=4, axes=None, a=1.0):
+    """Plot keypoints for existing images.
+    Args:
+        kpts: list of ndarrays of size (N, 2).
+        colors: string, or list of list of tuples (one for each keypoints).
+        ps: size of the keypoints as float.
+    """
+    if not isinstance(colors, list):
+        colors = [colors] * len(kpts)
+    if not isinstance(a, list):
+        a = [a] * len(kpts)
+    if axes is None:
+        axes = plt.gcf().axes
+    for ax, k, c, alpha in zip(axes, kpts, colors, a):
+        if isinstance(k, torch.Tensor):
+            k = k.cpu().numpy()
+        ax.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0, alpha=alpha)
+def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1.0, labels=None, axes=None):
+    """Plot matches for a pair of existing images.
+    Args:
+        kpts0, kpts1: corresponding keypoints of size (N, 2).
+        color: color of each match, string or RGB tuple. Random if not given.
+        lw: width of the lines.
+        ps: size of the end points (no endpoint if ps=0)
+        indices: indices of the images to draw the matches on.
+        a: alpha opacity of the match lines.
+    """
+    fig = plt.gcf()
+    if axes is None:
+        ax = fig.axes
+        ax0, ax1 = ax[0], ax[1]
+    else:
+        ax0, ax1 = axes
+    if isinstance(kpts0, torch.Tensor):
+        kpts0 = kpts0.cpu().numpy()
+    if isinstance(kpts1, torch.Tensor):
+        kpts1 = kpts1.cpu().numpy()
+    assert len(kpts0) == len(kpts1)
+    if color is None:
+        color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
+    elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
+        color = [color] * len(kpts0)
+    if lw > 0:
+        for i in range(len(kpts0)):
+            line = matplotlib.patches.ConnectionPatch(
+                xyA=(kpts0[i, 0], kpts0[i, 1]),
+                xyB=(kpts1[i, 0], kpts1[i, 1]),
+                coordsA=ax0.transData,
+                coordsB=ax1.transData,
+                axesA=ax0,
+                axesB=ax1,
+                zorder=1,
+                color=color[i],
+                linewidth=lw,
+                clip_on=True,
+                alpha=a,
+                label=None if labels is None else labels[i],
+                picker=5.0,
+            )
+            line.set_annotation_clip(True)
+            fig.add_artist(line)
+    # freeze the axes to prevent the transform to change
+    ax0.autoscale(enable=False)
+    ax1.autoscale(enable=False)
+    if ps > 0:
+        ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
+        ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
+def add_text(
+    idx,
+    text,
+    pos=(0.01, 0.99),
+    fs=15,
+    color="w",
+    lcolor="k",
+    lwidth=2,
+    ha="left",
+    va="top",
+):
+    ax = plt.gcf().axes[idx]
+    t = ax.text(
+        *pos, text, fontsize=fs, ha=ha, va=va, color=color, transform=ax.transAxes
+    )
+    if lcolor is not None:
+        t.set_path_effects(
+            [
+                path_effects.Stroke(linewidth=lwidth, foreground=lcolor),
+                path_effects.Normal(),
+            ]
+        )
+def save_plot(path, **kw):
+    """Save the current figure without any white margin."""
+    plt.savefig(path, bbox_inches="tight", pad_inches=0, **kw)

yolo.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""
+Generic helper functions to interact with the ultralytics yolo
+models.
+"""
+from pathlib import Path
+from ultralytics import YOLO
+def load_pretrained_model(model_str: str) -> YOLO:
+    """Loads the pretrained `model`"""
+    return YOLO(model_str)
+DEFAULT_TRAIN_PARAMS = {
+    "batch": 16,
+    "epochs": 100,
+    "patience": 100,
+    "imgsz": 640,
+    "lr0": 0.01,
+    "lrf": 0.01,
+    "optimizer": "auto",
+    # data augmentation
+    "mixup": 0.0,
+    "close_mosaic": 10,
+    "degrees": 0.0,
+    "translate": 0.1,
+    "flipud": 0.0,
+    "fliplr": 0.5,
+}
+def train(
+    model: YOLO,
+    data_yaml_path: Path,
+    params: dict,
+    project: Path = Path("data/04_models/yolo/"),
+    experiment_name: str = "train",
+):
+    """Main function for running a train run. It saves the results
+    under `project / experiment_name`.
+    Args:
+        model (YOLO): result of `load_pretrained_model`.
+        data_yaml_path (Path): filepath to the data.yaml file that specifies the split and classes to train on
+        params (dict): parameters to override when running the training. See https://docs.ultralytics.com/modes/train/#train-settings for a complete list of parameters.
+        project (Path): root path to store the run artifacts and results.
+        experiment_name (str): name of the experiment, that is added to the project root path to store the run.
+    """
+    assert data_yaml_path.exists(), f"data_yaml_path does not exist, {data_yaml_path}"
+    params = {**DEFAULT_TRAIN_PARAMS, **params}
+    model.train(
+        project=str(project),
+        name=experiment_name,
+        data=data_yaml_path.absolute(),
+        epochs=params["epochs"],
+        lr0=params["lr0"],
+        lrf=params["lrf"],
+        optimizer=params["optimizer"],
+        imgsz=params["imgsz"],
+        close_mosaic=params["close_mosaic"],
+        # Data Augmentation parameters
+        mixup=params["mixup"],
+        degrees=params["degrees"],
+        flipud=params["flipud"],
+        translate=params["translate"],
+    )