Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
import torch | |
def ssim(img1, img2): | |
C1 = 0.01**2 | |
C2 = 0.03**2 | |
img1 = img1.astype(np.float64) | |
img2 = img2.astype(np.float64) | |
kernel = cv2.getGaussianKernel(11, 1.5) | |
window = np.outer(kernel, kernel.transpose()) | |
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid | |
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] | |
mu1_sq = mu1**2 | |
mu2_sq = mu2**2 | |
mu1_mu2 = mu1 * mu2 | |
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq | |
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq | |
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 | |
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) | |
return ssim_map.mean() | |
def calculate_ssim_function(img1, img2): | |
# [0,1] | |
# ssim is the only metric extremely sensitive to gray being compared to b/w | |
if not img1.shape == img2.shape: | |
raise ValueError("Input images must have the same dimensions.") | |
if img1.ndim == 2: | |
return ssim(img1, img2) | |
elif img1.ndim == 3: | |
if img1.shape[0] == 3: | |
ssims = [] | |
for i in range(3): | |
ssims.append(ssim(img1[i], img2[i])) | |
return np.array(ssims).mean() | |
elif img1.shape[0] == 1: | |
return ssim(np.squeeze(img1), np.squeeze(img2)) | |
else: | |
raise ValueError("Wrong input image dimensions.") | |
def trans(x): | |
return x | |
def calculate_ssim(videos1, videos2): | |
# videos [batch_size, timestamps, channel, h, w] | |
assert videos1.shape == videos2.shape | |
videos1 = trans(videos1) | |
videos2 = trans(videos2) | |
ssim_results = [] | |
for video_num in range(videos1.shape[0]): | |
# get a video | |
# video [timestamps, channel, h, w] | |
video1 = videos1[video_num] | |
video2 = videos2[video_num] | |
ssim_results_of_a_video = [] | |
for clip_timestamp in range(len(video1)): | |
# get a img | |
# img [timestamps[x], channel, h, w] | |
# img [channel, h, w] numpy | |
img1 = video1[clip_timestamp].numpy() | |
img2 = video2[clip_timestamp].numpy() | |
# calculate ssim of a video | |
ssim_results_of_a_video.append(calculate_ssim_function(img1, img2)) | |
ssim_results.append(ssim_results_of_a_video) | |
ssim_results = np.array(ssim_results) | |
ssim = {} | |
ssim_std = {} | |
for clip_timestamp in range(len(video1)): | |
ssim[clip_timestamp] = np.mean(ssim_results[:, clip_timestamp]) | |
ssim_std[clip_timestamp] = np.std(ssim_results[:, clip_timestamp]) | |
result = { | |
"value": ssim, | |
"value_std": ssim_std, | |
"video_setting": video1.shape, | |
"video_setting_name": "time, channel, heigth, width", | |
} | |
return result | |
# test code / using example | |
def main(): | |
NUMBER_OF_VIDEOS = 8 | |
VIDEO_LENGTH = 50 | |
CHANNEL = 3 | |
SIZE = 64 | |
videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) | |
videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) | |
torch.device("cuda") | |
import json | |
result = calculate_ssim(videos1, videos2) | |
print(json.dumps(result, indent=4)) | |
if __name__ == "__main__": | |
main() | |