|
from transformers import TimesformerModel, VideoMAEImageProcessor |
|
import torch |
|
import cv2 |
|
import numpy as np |
|
from torchvision.transforms import Lambda |
|
from pytorchvideo.transforms import ( |
|
Normalize, |
|
) |
|
from torchvision.transforms import ( |
|
Lambda, |
|
) |
|
import os |
|
from os.path import isfile, join, basename |
|
|
|
def extract_features(frames, device, model, image_processor): |
|
|
|
frames_tensor = torch.stack([torch.from_numpy(frame) for frame in frames]) |
|
|
|
frames_tensor = frames_tensor.permute(3, 0, 1, 2).to(device) |
|
|
|
|
|
mean = image_processor.image_mean |
|
std = image_processor.image_std |
|
|
|
|
|
frames_tensor = Lambda(lambda x: x / 255.0)(frames_tensor) |
|
frames_tensor = Normalize(mean, std)(frames_tensor) |
|
|
|
|
|
frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0) |
|
|
|
|
|
model.to(device) |
|
model.eval() |
|
outputs = model(frames_tensor) |
|
|
|
|
|
final_output = outputs[0][:, 0] |
|
|
|
return final_output |
|
|
|
def to_video(selected_frames, frames, output_path, video_fps): |
|
|
|
print("MP4 Format.") |
|
|
|
video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (frames[0].shape[1], frames[0].shape[0])) |
|
|
|
|
|
for idx in selected_frames: |
|
video_writer.write(frames[idx]) |
|
|
|
video_writer.release() |
|
print("Completed summarizing the video (wait for a moment to load).") |
|
|
|
def load_model(): |
|
try: |
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
model = TimesformerModel.from_pretrained("facebook/timesformer-base-finetuned-k600").to(DEVICE).eval() |
|
processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base") |
|
return model, processor, DEVICE |
|
|
|
except Exception as e: |
|
print(e) |
|
|
|
def sum_of_squared_difference(vector1, vector2): |
|
squared_diff = np.square(vector1 - vector2) |
|
sum_squared_diff = np.sum(squared_diff) |
|
return sum_squared_diff |
|
|