|
|
|
|
|
import unittest |
|
import unittest.mock |
|
|
|
import torch |
|
from pytorchvideo.data import EpicKitchenRecognition |
|
from pytorchvideo.data.epic_kitchen import ActionData |
|
from pytorchvideo.data.epic_kitchen_recognition import ClipSampling |
|
from pytorchvideo.data.frame_video import FrameVideo |
|
|
|
|
|
class TestEpicKitchenRecognition(unittest.TestCase): |
|
def test_transform_generator(self): |
|
clip = { |
|
"start_time": 2.5, |
|
"stop_time": 6.5, |
|
"video": torch.rand(3, 4, 10, 20), |
|
"actions": [ |
|
ActionData( |
|
"P01", |
|
"P01_01", |
|
"turn off light", |
|
"00:00:01.00", |
|
"00:00:02.00", |
|
262, |
|
370, |
|
"turn-off", |
|
12, |
|
"light", |
|
113, |
|
"['light']", |
|
"[113]", |
|
), |
|
ActionData( |
|
"P01", |
|
"P01_01", |
|
"turn on light", |
|
"00:00:04.00", |
|
"00:00:06.00", |
|
262, |
|
370, |
|
"turn-on", |
|
12, |
|
"light", |
|
113, |
|
"['light']", |
|
"[113]", |
|
), |
|
ActionData( |
|
"P01", |
|
"P01_01", |
|
"close door", |
|
"00:00:06.00", |
|
"00:00:07.00", |
|
418, |
|
569, |
|
"close", |
|
3, |
|
"door", |
|
8, |
|
"['door']", |
|
"[8]", |
|
), |
|
ActionData( |
|
"P01", |
|
"P01_01", |
|
"slam door", |
|
"00:00:10.00", |
|
"00:00:11.00", |
|
408, |
|
509, |
|
"slam", |
|
3, |
|
"door", |
|
8, |
|
"['door']", |
|
"[8]", |
|
), |
|
], |
|
} |
|
|
|
def additional_transform(clip): |
|
clip["video"] = clip["video"].permute(1, 2, 3, 0) |
|
return clip |
|
|
|
transform_fn = EpicKitchenRecognition._transform_generator(additional_transform) |
|
|
|
transformed_clip = transform_fn(clip) |
|
|
|
self.assertEqual(len(transformed_clip["actions"]), 2) |
|
|
|
sorted_actions = sorted(transformed_clip["actions"], key=lambda a: a.start_time) |
|
|
|
self.assertEqual(sorted_actions[0].narration, "turn on light") |
|
self.assertEqual(sorted_actions[1].narration, "close door") |
|
|
|
self.assertEqual(transformed_clip["start_time"], 2.5) |
|
self.assertEqual(transformed_clip["stop_time"], 6.5) |
|
|
|
self.assertEqual(transformed_clip["video"].size(), torch.Size([4, 10, 20, 3])) |
|
|
|
def test_frame_filter_generator(self): |
|
input_list = list(range(10)) |
|
|
|
frame_filter_fn = EpicKitchenRecognition._frame_filter_generator(10) |
|
all_elements = frame_filter_fn(input_list) |
|
self.assertEqual(all_elements, input_list) |
|
|
|
frame_filter_fn = EpicKitchenRecognition._frame_filter_generator(5) |
|
half_elements = frame_filter_fn(input_list) |
|
self.assertEqual(len(half_elements), 5) |
|
self.assertEqual(half_elements, [i for i in input_list if not i % 2]) |
|
|
|
frame_filter_fn = EpicKitchenRecognition._frame_filter_generator(1) |
|
half_elements = frame_filter_fn(input_list) |
|
self.assertEqual(len(half_elements), 1) |
|
self.assertEqual(half_elements[0], 0) |
|
|
|
def test_define_clip_structure_generator(self): |
|
seconds_per_clip = 5 |
|
define_clip_structure_fn = ( |
|
EpicKitchenRecognition._define_clip_structure_generator( |
|
seconds_per_clip=5, clip_sampling=ClipSampling.RandomOffsetUniform |
|
) |
|
) |
|
frame_videos = { |
|
"P01_003": FrameVideo.from_frame_paths( |
|
[f"root/P01_003/frame_{i}" for i in range(100)], 10 |
|
), |
|
"P02_004": FrameVideo.from_frame_paths( |
|
[f"root/P02_004/frame_{i}" for i in range(300)], 10 |
|
), |
|
"P11_010": FrameVideo.from_frame_paths( |
|
[f"root/P11_010/frame_{i}" for i in range(600)], 30 |
|
), |
|
} |
|
actions = {video_id: [] for video_id in frame_videos} |
|
random_value = 0.5 |
|
with unittest.mock.patch("random.random", return_value=random_value) as _: |
|
clips = define_clip_structure_fn(frame_videos, actions) |
|
sorted_clips = sorted(clips, key=lambda c: c.start_time) |
|
|
|
for clip in sorted_clips: |
|
self.assertEqual(clip.stop_time - clip.start_time, seconds_per_clip) |
|
|
|
clips_P01_003 = [c for c in sorted_clips if c.video_id == "P01_003"] |
|
self.assertEqual(len(clips_P01_003), 1) |
|
for i in range(len(clips_P01_003)): |
|
self.assertEqual( |
|
clips_P01_003[i].start_time, seconds_per_clip * (i + random_value) |
|
) |
|
|
|
clips_P02_004 = [c for c in sorted_clips if c.video_id == "P02_004"] |
|
self.assertEqual(len(clips_P02_004), 5) |
|
for i in range(len(clips_P02_004)): |
|
self.assertEqual( |
|
clips_P02_004[i].start_time, seconds_per_clip * (i + random_value) |
|
) |
|
|
|
clips_P11_010 = [c for c in sorted_clips if c.video_id == "P11_010"] |
|
self.assertEqual(len(clips_P11_010), 3) |
|
for i in range(len(clips_P11_010)): |
|
self.assertEqual( |
|
clips_P11_010[i].start_time, seconds_per_clip * (i + random_value) |
|
) |
|
|