File size: 3,777 Bytes
3133fdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

import contextlib
import json
import tempfile
import unittest

from pytorchvideo.data import json_dataset
from pytorchvideo.data.clip_sampling import make_clip_sampler
from pytorchvideo.data.labeled_video_dataset import LabeledVideoDataset
from utils import temp_frame_video_dataset


class TestJsonDatasets(unittest.TestCase):
    def setUp(self):
        LabeledVideoDataset._MAX_CONSECUTIVE_FAILURES = 1

    def test_recognition_random_clip_sampler(self):
        total_duration = 0.05
        with mock_json_annotations() as (annotation_json, labels, duration):
            clip_sampler = make_clip_sampler("random", total_duration)
            dataset = json_dataset.clip_recognition_dataset(
                data_path=annotation_json,
                clip_sampler=clip_sampler,
                decode_audio=False,
            )

            self.assertEqual(dataset.num_videos, 4)
            self.assertEqual(len(list(iter(dataset))), 4)

    def test_recognition_uniform_clip_sampler(self):
        total_duration = 0.05
        with mock_json_annotations() as (annotation_json, labels, duration):
            clip_sampler = make_clip_sampler("uniform", total_duration)
            dataset = json_dataset.clip_recognition_dataset(
                data_path=annotation_json,
                clip_sampler=clip_sampler,
                decode_audio=False,
            )

            self.assertEqual(dataset.num_videos, 4)
            self.assertEqual(len(list(iter(dataset))), 4)

    def test_video_only_frame_video_dataset(self):
        total_duration = 2.0
        with mock_json_annotations() as (annotation_json, labels, duration):
            clip_sampler = make_clip_sampler("random", total_duration)
            dataset = json_dataset.video_only_dataset(
                data_path=annotation_json,
                clip_sampler=clip_sampler,
                decode_audio=False,
            )

            self.assertEqual(dataset.num_videos, 2)
            self.assertEqual(len(list(iter(dataset))), 2)


@contextlib.contextmanager
def mock_json_annotations():
    with temp_frame_video_dataset() as (_, videos):
        label_videos = []
        json_dict = {}
        for video in videos:
            label_videos.append((video[-3], video[-2]))
            name = str(video[0])
            json_dict[name] = {
                "benchmarks": {
                    "forecasting_hands_objects": [
                        {
                            "critical_frame_selection_parent_start_sec": 0.001,
                            "critical_frame_selection_parent_end_sec": 0.012,
                            "taxonomy": {
                                "noun": video[-3],
                                "verb": video[-3],
                                "noun_unsure": False,
                                "verb_unsure": False,
                            },
                        },
                        {
                            "critical_frame_selection_parent_start_sec": 0.01,
                            "critical_frame_selection_parent_end_sec": 0.05,
                            "taxonomy": {
                                "noun": video[-3],
                                "verb": video[-3],
                                "noun_unsure": False,
                                "verb_unsure": False,
                            },
                        },
                    ]
                }
            }

        with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="wt") as f:
            json.dump(json_dict, f)
            f.close()

        min_duration = min(videos[0][-1], videos[1][-1])
        yield f.name, label_videos, min_duration