Skip to content

Commit

Permalink
Added dataset download support in fbcode (#3823) (#3826)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3823

Uploaded FashionMNIST dataset to [manifold](https://www.internalfb.com/intern/network/manifold/?bucket=torchvision&path=tree%2Fdatasets) bucket `torchvision`. Any new dataset that needs to be added could be uploaded under `tree/datasets/<dataset_name>`.

Reviewed By: datumbox

Differential Revision: D28358470

fbshipit-source-id: 6f2282d3f1ce4b1416e962de8fb132896d4b2d76
  • Loading branch information
prabhat00155 authored May 13, 2021
1 parent 0fd0f50 commit f5aa5f5
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
6 changes: 6 additions & 0 deletions torchvision/datasets/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def _download_file_from_remote_location(fpath: str) -> None:
pass


def _is_remote_location_available() -> bool:
return False
43 changes: 26 additions & 17 deletions torchvision/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
import torch
from torch.utils.model_zoo import tqdm

from ._utils import (
_download_file_from_remote_location,
_is_remote_location_available,
)


USER_AGENT = "pytorch/vision"

Expand Down Expand Up @@ -117,26 +122,30 @@ def download_url(
print('Using downloaded and verified file: ' + fpath)
return

# expand redirect chain if needed
url = _get_redirect_url(url, max_hops=max_redirect_hops)
if _is_remote_location_available():
_download_file_from_remote_location(fpath)
else:
# expand redirect chain if needed
url = _get_redirect_url(url, max_hops=max_redirect_hops)

# check if file is located on Google Drive
file_id = _get_google_drive_file_id(url)
if file_id is not None:
return download_file_from_google_drive(file_id, root, filename, md5)
# check if file is located on Google Drive
file_id = _get_google_drive_file_id(url)
if file_id is not None:
return download_file_from_google_drive(file_id, root, filename, md5)

# download the file
try:
print('Downloading ' + url + ' to ' + fpath)
_urlretrieve(url, fpath)
except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined]
if url[:5] == 'https':
url = url.replace('https:', 'http:')
print('Failed download. Trying https -> http instead.'
' Downloading ' + url + ' to ' + fpath)
# download the file
try:
print('Downloading ' + url + ' to ' + fpath)
_urlretrieve(url, fpath)
else:
raise e
except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined]
if url[:5] == 'https':
url = url.replace('https:', 'http:')
print('Failed download. Trying https -> http instead.'
' Downloading ' + url + ' to ' + fpath)
_urlretrieve(url, fpath)
else:
raise e

# check integrity of downloaded file
if not check_integrity(fpath, md5):
raise RuntimeError("File not found or corrupted.")
Expand Down

0 comments on commit f5aa5f5

Please sign in to comment.