|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Implements textvqa in TFDS structure. |
|
|
|
It's small data, so simple to run locally. First, copy the data to local disk: |
|
|
|
mkdir -p /tmp/data/textvqa |
|
cd /tmp/data/textvqa |
|
curl -O https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip |
|
curl -O https://dl.fbaipublicfiles.com/textvqa/images/test_images.zip |
|
curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_0.5.1_train.json |
|
curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_0.5.1_val.json |
|
curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_0.5.1_test.json |
|
# The Rosetta_OCR files are probably not needed. |
|
# curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_Rosetta_OCR_v0.2_train.json |
|
# curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_Rosetta_OCR_v0.2_val.json |
|
# curl -O https://dl.fbaipublicfiles.com/textvqa/data/TextVQA_Rosetta_OCR_v0.2_test.json |
|
unzip train_val_images.zip |
|
rm train_val_images.zip |
|
unzip test_images.zip |
|
rm test_images.zip |
|
# Background: at https://textvqa.org/dataset/ it says: |
|
# "Note: Some of the images in OpenImages are rotated, |
|
# please make sure to check the Rotation field in the Image IDs files |
|
# for train and test." |
|
curl -O https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv |
|
curl -O https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv |
|
mv train-images-boxable-with-rotation.csv train_images/rotation.csv |
|
mv test-images-with-rotation.csv test_images/rotation.csv |
|
|
|
Then, run conversion locally (make sure to install tensorflow-datasets for the `tfds` util): |
|
|
|
cd big_vision/datasets |
|
env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=textvqa |
|
|
|
Example to load: |
|
|
|
import tensorflow_datasets as tfds |
|
dataset = tfds.load('textvqa', split='train', data_dir='/tmp/tfds') |
|
""" |
|
import json |
|
import os |
|
|
|
from absl import logging |
|
import numpy as np |
|
import pandas as pd |
|
import tensorflow as tf |
|
import tensorflow_datasets as tfds |
|
|
|
|
|
_DESCRIPTION = """TextVqa dataset.""" |
|
|
|
|
|
_CITATION = ( |
|
'@inproceedings{singh2019towards,' |
|
'title={Towards VQA Models That Can Read},' |
|
'author={Singh, Amanpreet and Natarjan, Vivek and Shah, Meet and Jiang, Yu and Chen, Xinlei and Parikh, Devi and Rohrbach, Marcus},' |
|
'booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},' |
|
'pages={8317-8326},' |
|
'year={2019}}' |
|
) |
|
|
|
|
|
|
|
_FILEPATH = '/tmp/data/textvqa/' |
|
_TRAIN_FILES = '/tmp/data/textvqa/TextVQA_0.5.1_train.json' |
|
_VAL_FILES = '/tmp/data/textvqa/TextVQA_0.5.1_val.json' |
|
_TEST_FILES = '/tmp/data/textvqa/TextVQA_0.5.1_test.json' |
|
_ROTATION_CSV = 'rotation.csv' |
|
|
|
|
|
class TextVqa(tfds.core.GeneratorBasedBuilder): |
|
"""DatasetBuilder for textvqa dataset.""" |
|
|
|
VERSION = tfds.core.Version('1.0.1') |
|
RELEASE_NOTES = { |
|
'1.0.0': 'Initial release.', |
|
'1.0.1': 'Undo rotation for known rotated images.', |
|
} |
|
|
|
def _info(self) -> tfds.core.DatasetInfo: |
|
"""Returns the dataset metadata. |
|
|
|
(tfds.core.DatasetInfo object) |
|
These are the features of your dataset like images, labels, etc. |
|
""" |
|
|
|
return tfds.core.DatasetInfo( |
|
builder=self, |
|
description=_DESCRIPTION, |
|
features=tfds.features.FeaturesDict({ |
|
'image/id': tfds.features.Scalar(np.int32), |
|
'image_filepath': tfds.features.Text(), |
|
'image': tfds.features.Image(encoding_format='jpeg'), |
|
'question_id': tfds.features.Scalar(np.int32), |
|
'question': tfds.features.Text(), |
|
'answers': tfds.features.Sequence(tfds.features.Text()), |
|
}), |
|
supervised_keys=None, |
|
homepage='https://textvqa.org/', |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager: tfds.download.DownloadManager): |
|
"""Returns SplitGenerators.""" |
|
def json_to_examples(data, image_dir): |
|
|
|
logging.info('Processing %d items in %s', len(data), image_dir) |
|
rot = pd.read_csv(os.path.join(_FILEPATH, image_dir, _ROTATION_CSV)) |
|
rotation_by_id = {} |
|
for row in rot.itertuples(): |
|
rotation = int(row.Rotation) if not np.isnan(row.Rotation) else 0 |
|
rotation_by_id[row.ImageID] = rotation |
|
|
|
examples = {} |
|
for v in data: |
|
image_id = str(v['image_id']) |
|
image_filepath = os.path.join(_FILEPATH, image_dir, image_id + '.jpg') |
|
question_id = v['question_id'] |
|
examples[question_id] = { |
|
'image/id': question_id, |
|
'image_filepath': image_filepath, |
|
'image': image_filepath, |
|
'rotation': rotation_by_id[image_id], |
|
'question_id': question_id, |
|
'question': v['question'], |
|
'answers': v.get('answers', []), |
|
} |
|
return examples |
|
|
|
|
|
with open(_TRAIN_FILES) as f: |
|
train_data = json_to_examples(json.load(f)['data'], 'train_images') |
|
with open(_VAL_FILES) as f: |
|
|
|
val_data = json_to_examples(json.load(f)['data'], 'train_images') |
|
with open(_TEST_FILES) as f: |
|
test_data = json_to_examples(json.load(f)['data'], 'test_images') |
|
return { |
|
'train': self._generate_examples(train_data), |
|
'val': self._generate_examples(val_data), |
|
'test': self._generate_examples(test_data), |
|
} |
|
|
|
def _generate_examples(self, data): |
|
"""Generate a tf.Example object. |
|
|
|
Args: |
|
data: a dictionary with the image/id. |
|
|
|
Yields: |
|
(key, example) tuples from dataset. The example has format specified in |
|
the above DatasetInfo. |
|
""" |
|
for k, v in data.items(): |
|
|
|
image_bytes = open(v['image_filepath'], 'rb').read() |
|
if v['rotation'] != 0: |
|
rotation = v['rotation'] |
|
assert rotation % 90 == 0 |
|
turns = int(rotation / 90) |
|
image = tf.image.decode_jpeg(image_bytes) |
|
image_bytes = tf.io.encode_jpeg( |
|
tf.image.rot90(image, turns), quality=100 |
|
).numpy() |
|
|
|
v['image'] = image_bytes |
|
|
|
|
|
|
|
del v['rotation'] |
|
|
|
yield k, v |
|
|