File size: 2,652 Bytes
b0a48de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from transformers import TimesformerModel, VideoMAEImageProcessor
import torch
import cv2
import numpy as np
from torchvision.transforms import Lambda
from pytorchvideo.transforms import (
    Normalize,
)
from torchvision.transforms import (
    Lambda,
)
import os
from os.path import isfile, join, basename

def extract_features(frames, device, model, image_processor):
    # Convert frames to tensor
    frames_tensor = torch.stack([torch.from_numpy(frame) for frame in frames])
    # Change the order of the tensor to (num_frames, channel, height, width)
    frames_tensor = frames_tensor.permute(3, 0, 1, 2).to(device)

    # Get the mean and std of the image processor
    mean = image_processor.image_mean
    std = image_processor.image_std

    # Normalize frames
    frames_tensor = Lambda(lambda x: x / 255.0)(frames_tensor)
    frames_tensor = Normalize(mean, std)(frames_tensor)

    # Change the order of the tensor to (num_frames, channel, height, width) and add a batch dimension
    frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0)

    # Load the model to the device
    model.to(device)
    model.eval()
    outputs = model(frames_tensor)

    # Get the output after the Transformer Encoder (MLP head)
    final_output = outputs[0][:, 0]

    return final_output
    
def to_video(selected_frames, frames, output_path, video_fps):
    
    print("MP4 Format.")
    # Write the selected frames to a video
    video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (frames[0].shape[1], frames[0].shape[0]))

    # selected_frames is a list of indices of frames
    for idx in selected_frames:
        video_writer.write(frames[idx])
    
    video_writer.release()
    print("Completed summarizing the video (wait for a moment to load).")

def to_txt(selected_frames, output_path, clip_sample_rate):
    # Write the selected frames to a txt file

    with open(output_path, "w") as file:
        for item in selected_frames:
            file.write(str(item) + "\n")
    
    print("Completed summarizing the txt (wait for a moment to load).")

def load_model():
    try:
        DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = TimesformerModel.from_pretrained("facebook/timesformer-base-finetuned-k600").to(DEVICE).eval()
        processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
        return model, processor, DEVICE
    
    except Exception as e:
        print(e)

def sum_of_squared_difference(vector1, vector2):
    squared_diff = np.square(vector1 - vector2)
    sum_squared_diff = np.sum(squared_diff)
    return sum_squared_diff