Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable download for StanfordCars dataset #8309

Merged
merged 5 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions test/test_datasets_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,6 @@ def kitti():
)


def stanford_cars():
return itertools.chain.from_iterable(
[collect_urls(datasets.StanfordCars, ROOT, split=split, download=True) for split in ["train", "test"]]
)


def url_parametrization(*dataset_urls_and_ids_fns):
return pytest.mark.parametrize(
"url",
Expand Down Expand Up @@ -378,9 +372,9 @@ def test_url_is_accessible(url):
retry(lambda: assert_url_is_accessible(url))


@url_parametrization(
stanford_cars, # https://github.com/pytorch/vision/issues/7545
)
# TODO: if e.g. caltech101 starts failing, remove the pytest.mark.parametrize below and use
# @url_parametrization(caltech101)
@pytest.mark.parametrize("url", ("http://url_that_doesnt_exist.com",)) # here until we actually have a failing dataset
@pytest.mark.xfail
def test_url_is_not_accessible(url):
"""
Expand Down
53 changes: 19 additions & 34 deletions torchvision/datasets/stanford_cars.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import pathlib

Check warning on line 1 in torchvision/datasets/stanford_cars.py

View workflow job for this annotation

GitHub Actions / bc

Function StanfordCars.download: function deleted
from typing import Any, Callable, Optional, Tuple

from PIL import Image

from .utils import download_and_extract_archive, download_url, verify_str_arg
from .utils import verify_str_arg
from .vision import VisionDataset


class StanfordCars(VisionDataset):
"""`Stanford Cars <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset
"""Stanford Cars Dataset

The Cars dataset contains 16,185 images of 196 classes of cars. The data is
split into 8,144 training images and 8,041 testing images, where each class
has been split roughly in a 50-50 split

The original URL is https://ai.stanford.edu/~jkrause/cars/car_dataset.html, but it is broken.

.. note::

This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.
Expand All @@ -25,9 +27,11 @@
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again."""
download (bool, optional): This parameter exists for backward compatibility but it does not
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could deprecate and remove but it would be disruptive for those who explicitly set download=False already. So I'm tempted to keep it around forever.

download the dataset, since the original URL is not available anymore. The dataset
seems to be available on Kaggle so you can try to manually download it using
`these instructions <https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616>`_.
"""

def __init__(
self,
Expand Down Expand Up @@ -57,10 +61,18 @@
self._images_base_path = self._base_folder / "cars_test"

if download:
self.download()
raise ValueError(
NicolasHug marked this conversation as resolved.
Show resolved Hide resolved
"The original URL is broken so the StanfordCars dataset is not available for automatic "
"download anymore. You can try to download it manually following "
"https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616, "
"and set download=False to avoid this error."
)

if not self._check_exists():
raise RuntimeError("Dataset not found. You can use download=True to download it")
raise RuntimeError(
"Dataset not found. Try to manually download following the instructions in "
"https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616."
)

self._samples = [
(
Expand All @@ -87,33 +99,6 @@
target = self.target_transform(target)
return pil_image, target

def download(self) -> None:
if self._check_exists():
return

download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz",
download_root=str(self._base_folder),
md5="c3b158d763b6e2245038c8ad08e45376",
)
if self._split == "train":
download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/car196/cars_train.tgz",
download_root=str(self._base_folder),
md5="065e5b463ae28d29e77c1b4b166cfe61",
)
else:
download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/car196/cars_test.tgz",
download_root=str(self._base_folder),
md5="4ce7ebf6a94d07f1952d94dd34c4d501",
)
download_url(
url="https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat",
root=str(self._base_folder),
md5="b0a2b23655a3edd16d84508592a98d10",
)

def _check_exists(self) -> bool:
if not (self._base_folder / "devkit").is_dir():
return False
Expand Down
Loading