|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Import TallyQA into TFDS format. Uses Visual Genome and COCO images. |
|
|
|
It's small data, so simple to run locally. First, download all the data: |
|
|
|
mkdir /tmp/data/ ; cd /tmp/data |
|
wget http://images.cocodataset.org/zips/{train2014,val2014}.zip |
|
wget https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip |
|
wget https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip |
|
wget https://github.com/manoja328/tallyqa/blob/master/tallyqa.zip?raw=true |
|
unzip *.zip |
|
|
|
Then, update the PATHs below and run conversion locally like so (make sure to |
|
install tensorflow-datasets for the `tfds` util): |
|
|
|
cd big_vision/datasets |
|
env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=tallyqa |
|
|
|
Example to load: |
|
import tensorflow_datasets as tfds |
|
dataset = tfds.load('tallyqa', split='train', data_dir='/tmp/tfds') |
|
|
|
The test split distinguishes between simple and complex questions. The train |
|
split does not contain this information. We therefore set issimple to `-1` in |
|
the train split to indicate it is not known. |
|
""" |
|
|
|
import json |
|
|
|
import numpy as np |
|
import tensorflow_datasets as tfds |
|
|
|
|
|
_TALLYQA_PATH = '/tmp/data/tallyQA/' |
|
_VISUAL_GENOME_PATH = '/tmp/data/visual_genome/' |
|
|
|
_COCO_PATH = '/tmp/data/coco/' |
|
|
|
|
|
_DESCRIPTION = """ |
|
TallyQA: Answering Complex Counting Questions |
|
Most counting questions in visual question answering (VQA) datasets are simple |
|
and require no more than object detection. Here, we study algorithms for complex |
|
counting questions that involve relationships between objects, attribute |
|
identification, reasoning, and more. To do this, we created TallyQA, the world's |
|
largest dataset for open-ended counting. |
|
""" |
|
|
|
_CITATION = """ |
|
@inproceedings{acharya2019tallyqa, |
|
title={TallyQA: Answering Complex Counting Questions}, |
|
author={Acharya, Manoj and Kafle, Kushal and Kanan, Christopher}, |
|
booktitle={AAAI}, |
|
year={2019} |
|
} |
|
""" |
|
|
|
_HOMEPAGE = 'https://github.com/manoja328/TallyQA_dataset' |
|
|
|
|
|
class TallyQA(tfds.core.GeneratorBasedBuilder): |
|
"""Import TallyQA dataset.""" |
|
|
|
VERSION = tfds.core.Version('1.0.0') |
|
RELEASE_NOTES = {'1.0.0': 'Initial release.'} |
|
MANUAL_DOWNLOAD_INSTRUCTIONS = """ |
|
There are three parts which should be downloaded: |
|
* TallyQA (train / test json files) |
|
* Visual Genome images (needed for train and test split) |
|
* COCO (2014) train / val images (only needed for train split) |
|
""" |
|
|
|
def _info(self) -> tfds.core.DatasetInfo: |
|
"""Returns the dataset metadata.""" |
|
features = tfds.features.FeaturesDict({ |
|
'image': tfds.features.Image(shape=(None, None, 3)), |
|
'image_id': tfds.features.Scalar(dtype=np.int32), |
|
'image_source': tfds.features.Text(), |
|
'question': tfds.features.Text(), |
|
'question_id': tfds.features.Scalar(dtype=np.int32), |
|
'answer': tfds.features.Scalar(dtype=np.int32), |
|
'issimple': tfds.features.Scalar(dtype=np.int32), |
|
}) |
|
|
|
return tfds.core.DatasetInfo( |
|
builder=self, |
|
features=features, |
|
description=_DESCRIPTION, |
|
supervised_keys=None, |
|
homepage=_HOMEPAGE, |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager: tfds.download.DownloadManager) -> ...: |
|
"""Call the function which defines the splits.""" |
|
del dl_manager |
|
return { |
|
'train': self._generate_examples(split='train'), |
|
'test': self._generate_examples(split='test'), |
|
} |
|
|
|
def _generate_examples(self, split: str) -> ...: |
|
tally_json_file = f'{_TALLYQA_PATH}/{split}.json' |
|
with open(tally_json_file, 'r') as f: |
|
tally_json = json.load(f) |
|
|
|
for tally_qa in tally_json: |
|
|
|
|
|
filepath = tally_qa['image'] |
|
if filepath.startswith('VG_100K'): |
|
filepath = _VISUAL_GENOME_PATH + filepath |
|
elif filepath.startswith('train2014') or filepath.startswith('val2014'): |
|
filepath = _COCO_PATH + filepath |
|
else: |
|
raise ValueError(f'Unknown image path: {filepath}') |
|
|
|
tally_qa_dict = { |
|
'image': filepath, |
|
'image_id': tally_qa['image_id'], |
|
'image_source': tally_qa['data_source'], |
|
'question': tally_qa['question'], |
|
'question_id': tally_qa['question_id'], |
|
'answer': int(tally_qa['answer']), |
|
} |
|
if split == 'test': |
|
|
|
tally_qa_dict.update({'issimple': tally_qa['issimple']}) |
|
else: |
|
|
|
tally_qa_dict.update({'issimple': -1}) |
|
tally_qa_id = f'{tally_qa_dict["image_id"]} / {tally_qa_dict["question_id"]}' |
|
yield tally_qa_id, tally_qa_dict |
|
|