Spaces:
Running
Running
import tensorflow as tf | |
import numpy as np | |
from einops import rearrange | |
from decord import VideoReader | |
num_frames = 32 | |
input_size = 224 | |
patch_size = (16, 16) | |
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406]) | |
IMAGENET_STD = np.array([0.229, 0.224, 0.225]) | |
def format_frames(frame, output_size): | |
frame = tf.image.convert_image_dtype(frame, tf.uint8) | |
frame = tf.image.resize(frame, size=output_size) | |
frame = frame / 255. | |
frame = frame - IMAGENET_MEAN | |
frame = frame / IMAGENET_STD | |
return frame | |
def read_video(file_path): | |
container = VideoReader(file_path) | |
return container | |
def frame_sampling(container, num_frames): | |
interval = len(container) // num_frames | |
bids = np.arange(num_frames) * interval | |
offset = np.random.randint(interval, size=bids.shape) | |
frame_index = bids + offset | |
frames = container.get_batch(frame_index).asnumpy() | |
frames = np.stack(frames) | |
frames = format_frames(frames, [input_size] * 2) | |
return frames | |
def denormalize(z): | |
mean = np.array([0.485, 0.456, 0.406]) | |
std = np.array([0.225, 0.225, 0.225]) | |
x = (z * std) + mean | |
x = x * 255 | |
return x.clip(0, 255) |