Skip to content
This repository has been archived by the owner on Jul 2, 2021. It is now read-only.

Add ADE20K dataset #429

Merged
merged 17 commits into from
Oct 6, 2017
4 changes: 4 additions & 0 deletions chainercv/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from chainercv.datasets.ade20k.ade20k_semantic_segmentation_dataset import ADE20KSemanticSegmentationDataset # NOQA
from chainercv.datasets.ade20k.ade20k_test_image_dataset import ADE20KTestImageDataset # NOQA
from chainercv.datasets.ade20k.ade20k_utils import ade20k_label_colors # NOQA
from chainercv.datasets.ade20k.ade20k_utils import ade20k_label_names # NOQA
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to our naming convention, ade20k_label_colors should be ade20k_semantic_segmentation_label_colors.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/chainer/chainercv/blob/master/chainercv/datasets/cityscapes/cityscapes_utils.py#L53

You think this is a problem also?
label_names should be changed as well because it can be different from the one used by Instance Segmentation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is also a problem. Thank you for pointing out.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To summarize:

  • ade_label_colors --> ade_semantic_segmentation_label_colors
  • ade_label_names --> ade_semantic_segmentation_label_names
  • cityscapes_label_colors --> cityscapes_semantic_segmentation_label_colors
  • cityscapes_label_names --> cityscapes_semantic_segmentation_label_names

Objects for CamVid need not be changed because this dataset only contains semantic segmentation data.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for summarizing.

Objects for CamVid need not be changed because this dataset only contains semantic segmentation data.

I agree with you.

from chainercv.datasets.camvid.camvid_dataset import camvid_ignore_label_color # NOQA
from chainercv.datasets.camvid.camvid_dataset import camvid_label_colors # NOQA
from chainercv.datasets.camvid.camvid_dataset import camvid_label_names # NOQA
Expand Down
Empty file.
81 changes: 81 additions & 0 deletions chainercv/datasets/ade20k/ade20k_semantic_segmentation_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import glob
import os

import numpy as np

from chainer import dataset
from chainer.dataset import download
from chainercv import utils
from chainercv.utils import read_image

root = 'pfnet/chainercv/ade20k'
url = 'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip'


def get_ade20k():
data_root = download.get_dataset_directory(root)
cache_path = utils.cached_download(url)
utils.extractall(cache_path, data_root, os.path.splitext(url)[1])
return data_root


class ADE20KSemanticSegmentationDataset(dataset.DatasetMixin):

"""Semantic segmentation dataset for `ADE20K`_.

This is ADE20K dataset distributed in MIT Scene Parsing Benchmark website.
It has 20,210 training images and 2,000 validation images.

.. _`MIT Scene Parsing Benchmark`: http://sceneparsing.csail.mit.edu/

Args:
data_dir (string): Path to the dataset directory. The directory should
contain the :obj:`ADEChallengeData2016` directory. And that
directory should contain at least :obj:`images` and
:obj:`annotations` directries. If :obj:`auto` is given, the dataset
is automatically downloaded into
:obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/ade20k`.
split ({'train', 'val'}): Select from dataset splits used in
MIT Scene Parsing Benchmark dataset (ADE20K).

"""

def __init__(self, data_dir='auto', split='train'):
if data_dir is 'auto':
data_dir = get_ade20k()

if split == 'train' or split == 'val':
img_dir = os.path.join(
data_dir, 'ADEChallengeData2016', 'images',
'training' if split == 'train' else 'validation')
label_dir = os.path.join(
data_dir, 'ADEChallengeData2016', 'annotations',
'training' if split == 'train' else 'validation')
else:
raise ValueError(
'Please give \'split\' argument with either \'train\' or '
'\'val\'.')

self.img_paths = sorted(glob.glob(os.path.join(img_dir, '*.jpg')))
self.label_paths = sorted(glob.glob(os.path.join(label_dir, '*.png')))

def __len__(self):
return len(self.img_paths)

def get_example(self, i):
"""Returns the i-th example.

Args:
i (int): The index of the example.

Returns:
Returns a tuple consited of a color image and a label whose shapes
are (3, H, W) and (H, W), respectively. H and W are height and
width of the image. The dtype of the color image is
:obj:`numpy.float32` and the dtype of the label image is
:obj:`numpy.int32`.

"""
img = read_image(self.img_paths[i])
label = read_image(self.label_paths[i], dtype=np.int32, color=False)[0]
return img, label
58 changes: 58 additions & 0 deletions chainercv/datasets/ade20k/ade20k_test_image_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import glob
import os

from chainer import dataset
from chainer.dataset import download
from chainercv import utils
from chainercv.utils import read_image

root = 'pfnet/chainercv/ade20k'
url = 'http://data.csail.mit.edu/places/ADEchallenge/release_test.zip'


def get_ade20k():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about moving this function under ade20k_utils?

data_root = download.get_dataset_directory(root)
cache_path = utils.cached_download(url)
utils.extractall(cache_path, data_root, os.path.splitext(url)[1])
return data_root


class ADE20KTestImageDataset(dataset.DatasetMixin):

"""Image dataset for test split of `ADE20K`_.

This is an image dataset of test split in ADE20K dataset distributed at
MIT Scene Parsing Benchmark website. It has 3,352 test images.

.. _`MIT Scene Parsing Benchmark`: http://sceneparsing.csail.mit.edu/

Args:
data_dir (string): Path to the dataset directory. The directory should
contain the :obj:`release_test` dir. If :obj:`auto` is given, the
dataset is automatically downloaded into
:obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/ade20k`.

"""

def __init__(self, data_dir='auto'):
if data_dir is 'auto':
data_dir = get_ade20k()
img_dir = os.path.join(data_dir, 'release_test', 'testing')
self.img_paths = sorted(glob.glob(os.path.join(img_dir, '*.jpg')))

def __len__(self):
return len(self.img_paths)

def get_example(self, i):
"""Returns the i-th example.

Args:
i (int): The index of the example.

Returns:
Returns a color image whose shape is (3, H, W). H and W are height
and width of the image. The dtype of the image is
:obj:`numpy.float32`.

"""
return read_image(self.img_paths[i])
Loading