|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Creates TFDS dataset for Screen2words. |
|
|
|
|
|
Preparing the data: |
|
1) mkdir /tmp/data/rico && cd /tmp/data/rico |
|
2) wget https://storage.googleapis.com/crowdstf-rico-uiuc-4540/rico_dataset_v0.1/unique_uis.tar.gz |
|
3) tar xvfz unique_uis.tar.gz && rm unique_uis.tar.gz |
|
4) git clone https://github.com/google-research-datasets/screen2words.git |
|
|
|
Then, run conversion locally (make sure to install tensorflow-datasets for the `tfds` util): |
|
|
|
cd big_vision/datasets |
|
env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=screen2words |
|
|
|
Example to load: |
|
|
|
import tensorflow_datasets as tfds |
|
dataset = tfds.load('screen2_words', split='train', data_dir='/tmp/tfds') |
|
""" |
|
|
|
import collections |
|
import csv |
|
import os |
|
|
|
import numpy as np |
|
import tensorflow_datasets as tfds |
|
|
|
|
|
_DESCRIPTION = """Screen2words dataset.""" |
|
_CITATION = """ |
|
@inproceedings{wang2021screen2words, |
|
title={Screen2words: Automatic mobile UI summarization with multimodal |
|
learning}, |
|
author={Wang, Bryan and |
|
Li, Gang and |
|
Zhou, Xin and |
|
Chen, Zhourong and |
|
Grossman, Tovi and |
|
Li, Yang}, |
|
booktitle={The 34th Annual ACM Symposium on User Interface Software |
|
and Technology}, |
|
pages={498--510}, |
|
year={2021} |
|
} |
|
""" |
|
|
|
|
|
_SCREEN2WORDS_DIR = "/tmp/data/rico/screen2words" |
|
_RICO_DIR = "/tmp/data/rico/combined" |
|
|
|
|
|
|
|
_SPLITS_TO_GENERATE = ["train", "dev", "test"] |
|
|
|
|
|
class Screen2Words(tfds.core.GeneratorBasedBuilder): |
|
"""DatasetBuilder for the Screen2words dataset.""" |
|
|
|
VERSION = tfds.core.Version("1.0.0") |
|
RELEASE_NOTES = {"1.0.0": "First release."} |
|
|
|
def _info(self): |
|
"""Returns the metadata.""" |
|
|
|
return tfds.core.DatasetInfo( |
|
builder=self, |
|
description=_DESCRIPTION, |
|
features=tfds.features.FeaturesDict({ |
|
"image/id": tfds.features.Scalar(np.int32), |
|
"image/filename": tfds.features.Text(), |
|
"image": tfds.features.Image(encoding_format="jpeg"), |
|
"summary": tfds.features.Sequence(tfds.features.Text()), |
|
}), |
|
supervised_keys=None, |
|
homepage="https://github.com/google-research-datasets/screen2words", |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager: tfds.download.DownloadManager): |
|
"""Returns SplitGenerators.""" |
|
return {split: self._generate_examples(split) |
|
for split in _SPLITS_TO_GENERATE} |
|
|
|
def _generate_examples(self, split: str): |
|
"""Yields (key, example) tuples from test set.""" |
|
id_list_fname = os.path.join( |
|
_SCREEN2WORDS_DIR, "split", f"{split}_screens.txt") |
|
with open(id_list_fname, "r") as fin: |
|
split_ids = fin.readlines() |
|
|
|
summaries_fname = os.path.join(_SCREEN2WORDS_DIR, "screen_summaries.csv") |
|
summaries = collections.defaultdict(list) |
|
with open(summaries_fname, "r") as fin: |
|
for entry in csv.DictReader(fin): |
|
summaries[int(entry["screenId"])].append(entry["summary"]) |
|
|
|
for line in split_ids: |
|
line = line.strip() |
|
image_id = int(line) |
|
yield image_id, { |
|
"image/id": image_id, |
|
"image/filename": f"{image_id}.jpg", |
|
"image": os.path.join(_RICO_DIR, f"{image_id}.jpg"), |
|
"summary": summaries[image_id], |
|
} |
|
|