Spaces:

kornia
/

image-registration-with-kornia

Running on CPU Upgrade

File size: 4,035 Bytes

import numpy as np
import gradio as gr
import imageio
import cv2
import kornia as K
import kornia.geometry as KG
from copy import deepcopy
from tqdm import tqdm
from base64 import b64encode
import torch
import torch.nn.functional as F

use_cuda: bool = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
registrator = KG.ImageRegistrator('similarity', 
                                  loss_fn = F.mse_loss, 
                                  lr=8e-4, pyramid_levels=3, num_iterations=500).to(device)

models = []

def resize_images(f_names):
    for i, f_name in enumerate(f_names):
      img = cv2.imread(f_name, cv2.IMREAD_COLOR)
      if i==0:
        height, width, _ = img.shape
      else:
        resized_image = cv2.resize(img,(width, height))
        cv2.imwrite(f_name,resized_image)


def convert_img(f_name):
    img = cv2.imread(f_name, cv2.IMREAD_COLOR)
    # convert image to torch tensor                                                                  
    tensor = K.image_to_tensor(img, None).float() / 255.
    return K.color.bgr_to_rgb(tensor)

def merge_sharp1_into2(timg1, timg2, trans1to2, verbose=False):
    curr_img = timg2.clone()
    warped = KG.homography_warp(timg1, torch.inverse(trans1to2), timg1.shape[-2:])
    mask1 = K.filters.laplacian(K.color.rgb_to_grayscale(timg1), 7).abs()
    mask1_norm = (mask1-mask1.min()) / (mask1.max() - mask1.min())
    mask1_blur = K.filters.gaussian_blur2d(mask1_norm, (9,9), (1.6, 1.6))
    mask1_blur = mask1_blur / mask1_blur.max()
    warped_mask = KG.homography_warp(mask1_blur.float(), torch.inverse(trans1to2), timg1.shape[-2:])
    curr_img = warped_mask * warped + (1-warped_mask) * curr_img
    return curr_img

def img_registration(images):
  f_names = [f.name for f in images]
  resize_images(f_names)

  for i, f_name in tqdm(enumerate(f_names)):
      if i == 0:
          continue
      prev_img = convert_img(f_names[i-1]).to(device)
      curr_img = convert_img(f_name).to(device)
      model = registrator.register(prev_img, curr_img)
      models.append(deepcopy(model.detach()))

  models_to_final = [torch.eye(3, device=device)[None]]
  for m in models[::-1]:
      models_to_final.append(m @ models_to_final[-1])
  models_to_final = models_to_final[::-1]

  base_img = convert_img(f_names[-1])
  curr_img = deepcopy(base_img)
  _, layers, height, width  = curr_img.shape
  video_file = 'video.avi'
  video = cv2.VideoWriter(video_file, 0, 1, (width,height))

  with torch.no_grad():
      for i, image in tqdm(enumerate(f_names)):
          timg = convert_img(image)
          curr_img = merge_sharp1_into2(timg.to(device), curr_img.to(device), models_to_final[i].to(device))
          video.write(cv2.cvtColor(K.tensor_to_image(curr_img.float()*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
  video.release()

  return K.tensor_to_image(curr_img.float()), video_file

title = 'Image Registration with Kornia!'
description = '''Image registration is the process of transforming different sets of data into one coordinate system. Data may be multiple photographs, data from different sensors, times, depths, or viewpoints. It is used in computer vision, medical imaging, and compiling and analyzing images and data from satellites. Registration is necessary in order to be able to compare or integrate the data obtained from these different measurements.

*Note that you can upload only image files, e.g. jpg, png etc and all images should have same width and height!* 

Learn more about [image registration and Kornia](https://kornia.readthedocs.io/en/latest/applications/image_registration.html)'''

examples = [["IMG_3020.JPG", "IMG_3027.JPG", "IMG_3034.JPG", "IMG_3040.JPG", "IMG_3058.JPG", "IMG_3070.JPG", "IMG_3083.JPG", "IMG_3100.JPG", "IMG_3106.JPG", "IMG_3112.JPG"]]

iface = gr.Interface(
    img_registration, 
    inputs='files', 
    outputs=["image", gr.Video()], 
    allow_flagging="never",
    title=title,
    description=description
    )

if __name__ == "__main__":
  iface.launch(show_error=True)