import os from typing import Optional import matplotlib.pyplot as plt import numpy as np import torch from lpips import LPIPS from PIL import Image from torchvision.transforms import Normalize def show_images_horizontally( list_of_files: np.array, output_file: Optional[str] = None, interact: bool = False ) -> None: """ Visualize the list of images horizontally and save the figure as PNG. Args: list_of_files: The list of images as numpy array with shape (N, H, W, C). output_file: The output file path to save the figure as PNG. interact: Whether to show the figure interactively in Jupyter Notebook or not in Python. """ number_of_files = len(list_of_files) heights = [a[0].shape[0] for a in list_of_files] widths = [a.shape[1] for a in list_of_files[0]] fig_width = 8.0 # inches fig_height = fig_width * sum(heights) / sum(widths) # Create a figure with subplots _, axs = plt.subplots( 1, number_of_files, figsize=(fig_width * number_of_files, fig_height) ) plt.tight_layout() for i in range(number_of_files): _image = list_of_files[i] axs[i].imshow(_image) axs[i].axis("off") # Save the figure as PNG if interact: plt.show() else: plt.savefig(output_file, bbox_inches="tight", pad_inches=0.25) def image_grids(images, rows=None, cols=None): if not images: raise ValueError("The image list is empty.") n_images = len(images) if cols is None: cols = int(n_images**0.5) if rows is None: rows = (n_images + cols - 1) // cols width, height = images[0].size grid_width = cols * width grid_height = rows * height grid_image = Image.new("RGB", (grid_width, grid_height)) for i, image in enumerate(images): row, col = divmod(i, cols) grid_image.paste(image, (col * width, row * height)) return grid_image def save_image(image: np.array, file_name: str) -> None: """ Save the image as JPG. Args: image: The input image as numpy array with shape (H, W, C). file_name: The file name to save the image. """ image = Image.fromarray(image) image.save(file_name) def load_and_process_images(load_dir: str) -> np.array: """ Load and process the images into numpy array from the directory. Args: load_dir: The directory to load the images. Returns: images: The images as numpy array with shape (N, H, W, C). """ images = [] print(load_dir) filenames = sorted( os.listdir(load_dir), key=lambda x: int(x.split(".")[0]) ) # Ensure the files are sorted numerically for filename in filenames: if filename.endswith(".jpg"): img = Image.open(os.path.join(load_dir, filename)) img_array = ( np.asarray(img) / 255.0 ) # Convert to numpy array and scale pixel values to [0, 1] images.append(img_array) return images def compute_lpips(images: np.array, lpips_model: LPIPS) -> np.array: """ Compute the LPIPS of the input images. Args: images: The input images as numpy array with shape (N, H, W, C). lpips_model: The LPIPS model used to compute perceptual distances. Returns: distances: The LPIPS of the input images. """ # Get device of lpips_model device = next(lpips_model.parameters()).device device = str(device) # Change the input images into tensor images = torch.tensor(images).to(device).float() images = torch.permute(images, (0, 3, 1, 2)) normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) images = normalize(images) # Compute the LPIPS between each adjacent input images distances = [] for i in range(images.shape[0]): if i == images.shape[0] - 1: break img1 = images[i].unsqueeze(0) img2 = images[i + 1].unsqueeze(0) loss = lpips_model(img1, img2) distances.append(loss.item()) distances = np.array(distances) return distances def compute_gini(distances: np.array) -> float: """ Compute the Gini index of the input distances. Args: distances: The input distances as numpy array. Returns: gini: The Gini index of the input distances. """ if len(distances) < 2: return 0.0 # Gini index is 0 for less than two elements # Sort the list of distances sorted_distances = sorted(distances) n = len(sorted_distances) mean_distance = sum(sorted_distances) / n # Compute the sum of absolute differences sum_of_differences = 0 for di in sorted_distances: for dj in sorted_distances: sum_of_differences += abs(di - dj) # Normalize the sum of differences by the mean and the number of elements gini = sum_of_differences / (2 * n * n * mean_distance) return gini def compute_smoothness_and_consistency(images: np.array, lpips_model: LPIPS) -> tuple: """ Compute the smoothness and efficiency of the input images. Args: images: The input images as numpy array with shape (N, H, W, C). lpips_model: The LPIPS model used to compute perceptual distances. Returns: smoothness: One minus gini index of LPIPS of consecutive images. consistency: The mean LPIPS of consecutive images. max_inception_distance: The maximum LPIPS of consecutive images. """ distances = compute_lpips(images, lpips_model) smoothness = 1 - compute_gini(distances) consistency = np.mean(distances) max_inception_distance = np.max(distances) return smoothness, consistency, max_inception_distance def separate_source_and_interpolated_images(images: np.array) -> tuple: """ Separate the input images into source and interpolated images. The input source is the start and end of the images, while the interpolated images are the rest. Args: images: The input images as numpy array with shape (N, H, W, C). Returns: source: The source images as numpy array with shape (2, H, W, C). interpolation: The interpolated images as numpy array with shape (N-2, H, W, C). """ # Check if the array has at least two elements if len(images) < 2: raise ValueError("The input array should have at least two elements.") # Separate the array into two parts # First part takes the first and last element source = np.array([images[0], images[-1]]) # Second part takes the rest of the elements interpolation = images[1:-1] return source, interpolation