Skip to content

Commit

Permalink
[fbsync] Download and Kinetics 400/600/700 Datasets (#3680)
Browse files Browse the repository at this point in the history
Summary:
* Initial commit

* pmeiers comments

* pmeiers changes

* pmeiers comments

* replace pandas with system library to avoid crashes

* Lint

* Lint

* fixing unittest

* Minor comments removal

* pmeier comments

* remove asserts

* address pmeier formatting changes

* address pmeier changes

* pmeier changes

* rename n_classes to num_classes

* formatting changes

* doc change to add ".mp4" to backported class

* formatting to correct line length

* adding **kwargs to Kinetics400 class

* remove urlib request and download the file directly

* annotations and files can be already downloaded

* test fix

* add download tests for Kinetics

* users now dont need to provide full path within the root for new Kinetics dataset

* linter

* Update test/test_datasets_download.py

* Update torchvision/datasets/kinetics.py

* revert whitespace (3680#discussion_r626382842)

* addressing annotation_path parameter which is unnecessary

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* kwargs update

* expose num_download_workers as public

* swap os.isfile with check_integrity

* nit on private things

* special case if there are no default arguments

* revert changes to kinetics400 test case for BC

* add split_folder changes and support for legacy format

* pmeiers suggestions

* pmeiers suggestions - root comment

* pmeiers comments - annotation attribute remmoved

* pmeiers suggestion

* pmeiers suggestion

* pmeiers suggestion

* pmeiers suggestion

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* Update torchvision/datasets/kinetics.py

* minor debugging

* nit picks

* only include public kwargs into defaults

* add _use_legacy_structure in favour of **kwargs

* add type hints for Kinetics400

* flake8

* flake8

* flake8

* rename to make thigs clearer

* permuting the output

Reviewed By: fmassa

Differential Revision: D29097736

fbshipit-source-id: 1de2119e82eadbbba682f0897f03aba3929e3604

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
  • Loading branch information
3 people authored and facebook-github-bot committed Jun 14, 2021
1 parent 3482225 commit e76e52f
Show file tree
Hide file tree
Showing 5 changed files with 305 additions and 30 deletions.
8 changes: 6 additions & 2 deletions test/datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,11 @@ def _populate_private_class_attributes(cls):
continue

defaults.append(
{kwarg: default for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)}
{
kwarg: default
for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)
if not kwarg.startswith("_")
}
)

if not argspec.varkw:
Expand Down Expand Up @@ -637,7 +641,7 @@ def __init__(self, *args, **kwargs):

def _set_default_frames_per_clip(self, inject_fake_data):
argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
args_without_default = argspec.args[1:-len(argspec.defaults)]
args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)]
frames_per_clip_last = args_without_default[-1] == "frames_per_clip"

@functools.wraps(inject_fake_data)
Expand Down
21 changes: 21 additions & 0 deletions test/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,27 @@ def test_not_found_or_corrupted(self):
super().test_not_found_or_corrupted()


class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.Kinetics
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
split=("train", "val"), num_classes=("400", "600", "700")
)

def inject_fake_data(self, tmpdir, config):
classes = ("Abseiling", "Zumba")
num_videos_per_class = 2
tmpdir = pathlib.Path(tmpdir) / config['split']
digits = string.ascii_letters + string.digits + "-_"
for cls in classes:
datasets_utils.create_video_folder(
tmpdir,
cls,
lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4",
num_videos_per_class,
)
return num_videos_per_class * len(classes)


class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.Kinetics400

Expand Down
20 changes: 20 additions & 0 deletions test/test_datasets_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,25 @@ def widerface():
)


def kinetics():
return itertools.chain(
*[
collect_download_configs(
lambda: datasets.Kinetics(
path.join(ROOT, f"Kinetics{num_classes}"),
frames_per_clip=1,
num_classes=num_classes,
split=split,
download=True,
),
name=f"Kinetics, {num_classes}, {split}",
file="kinetics",
)
for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val"))
]
)


def kitti():
return itertools.chain(
*[
Expand Down Expand Up @@ -440,6 +459,7 @@ def make_parametrize_kwargs(download_configs):
usps(),
celeba(),
widerface(),
kinetics(),
kitti(),
)
)
Expand Down
4 changes: 2 additions & 2 deletions torchvision/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .sbd import SBDataset
from .vision import VisionDataset
from .usps import USPS
from .kinetics import Kinetics400
from .kinetics import Kinetics400, Kinetics
from .hmdb51 import HMDB51
from .ucf101 import UCF101
from .places365 import Places365
Expand All @@ -34,6 +34,6 @@
'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset',
'VisionDataset', 'USPS', 'Kinetics400', 'HMDB51', 'UCF101',
'VisionDataset', 'USPS', 'Kinetics400', "Kinetics", 'HMDB51', 'UCF101',
'Places365', 'Kitti',
)
Loading

0 comments on commit e76e52f

Please sign in to comment.