Skip to content

Commit

Permalink
Download and Kinetics 400/600/700 Datasets (#3680)
Browse files Browse the repository at this point in the history
* Initial commit

* pmeiers comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* replace pandas with system library to avoid crashes

* Lint

* Lint

* fixing unittest

* Minor comments removal

* pmeier comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* remove asserts

* address pmeier formatting changes

* address pmeier changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeier changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* rename n_classes to num_classes

* formatting changes

* doc change to add ".mp4" to backported class

* formatting to correct line length

* adding **kwargs to Kinetics400 class

* remove urlib request and download the file directly

* annotations and files can be already downloaded

* test fix

* add download tests for Kinetics

* users now dont need to provide full path within the root for new Kinetics dataset

* linter

* Update test/test_datasets_download.py

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* revert whitespace (3680#discussion_r626382842)

* addressing annotation_path parameter which is unnecessary

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* kwargs update

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* expose num_download_workers as public

* swap os.isfile with check_integrity

* nit on private things

* special case if there are no default arguments

* revert changes to kinetics400 test case for BC

* add split_folder changes and support for legacy format

* pmeiers suggestions

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers suggestions - root comment

* pmeiers comments - annotation attribute remmoved

* pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

* minor debugging

* nit picks

* only include public kwargs into defaults

* add _use_legacy_structure in favour of **kwargs

* add type hints for Kinetics400

* flake8

* flake8

* flake8

* rename to make thigs clearer

* permuting the output

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
  • Loading branch information
3 people authored Jun 10, 2021
1 parent 9a6c8bb commit 8ea04d1
Show file tree
Hide file tree
Showing 5 changed files with 305 additions and 30 deletions.
8 changes: 6 additions & 2 deletions test/datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,11 @@ def _populate_private_class_attributes(cls):
continue

defaults.append(
{kwarg: default for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)}
{
kwarg: default
for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)
if not kwarg.startswith("_")
}
)

if not argspec.varkw:
Expand Down Expand Up @@ -637,7 +641,7 @@ def __init__(self, *args, **kwargs):

def _set_default_frames_per_clip(self, inject_fake_data):
argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
args_without_default = argspec.args[1:-len(argspec.defaults)]
args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)]
frames_per_clip_last = args_without_default[-1] == "frames_per_clip"

@functools.wraps(inject_fake_data)
Expand Down
21 changes: 21 additions & 0 deletions test/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,27 @@ def test_not_found_or_corrupted(self):
super().test_not_found_or_corrupted()


class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.Kinetics
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
split=("train", "val"), num_classes=("400", "600", "700")
)

def inject_fake_data(self, tmpdir, config):
classes = ("Abseiling", "Zumba")
num_videos_per_class = 2
tmpdir = pathlib.Path(tmpdir) / config['split']
digits = string.ascii_letters + string.digits + "-_"
for cls in classes:
datasets_utils.create_video_folder(
tmpdir,
cls,
lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4",
num_videos_per_class,
)
return num_videos_per_class * len(classes)


class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.Kinetics400

Expand Down
20 changes: 20 additions & 0 deletions test/test_datasets_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,25 @@ def widerface():
)


def kinetics():
return itertools.chain(
*[
collect_download_configs(
lambda: datasets.Kinetics(
path.join(ROOT, f"Kinetics{num_classes}"),
frames_per_clip=1,
num_classes=num_classes,
split=split,
download=True,
),
name=f"Kinetics, {num_classes}, {split}",
file="kinetics",
)
for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val"))
]
)


def kitti():
return itertools.chain(
*[
Expand Down Expand Up @@ -440,6 +459,7 @@ def make_parametrize_kwargs(download_configs):
usps(),
celeba(),
widerface(),
kinetics(),
kitti(),
)
)
Expand Down
4 changes: 2 additions & 2 deletions torchvision/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .sbd import SBDataset
from .vision import VisionDataset
from .usps import USPS
from .kinetics import Kinetics400
from .kinetics import Kinetics400, Kinetics
from .hmdb51 import HMDB51
from .ucf101 import UCF101
from .places365 import Places365
Expand All @@ -34,6 +34,6 @@
'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset',
'VisionDataset', 'USPS', 'Kinetics400', 'HMDB51', 'UCF101',
'VisionDataset', 'USPS', 'Kinetics400', "Kinetics", 'HMDB51', 'UCF101',
'Places365', 'Kitti',
)
Loading

0 comments on commit 8ea04d1

Please sign in to comment.