Skip to content

Commit

Permalink
[Fix] Add load_url to handle incompatibility of PyTorch versions (#1377)
Browse files Browse the repository at this point in the history
* [Fix] Fix torch.load error

* [Fix] Fix torch.load error

* rename _save to _save_ckpt

* add load_url to handle imcompatibility of PyTorch versions

* add unittest for load_url

* fix typo

* print a friendly information when error occurred
  • Loading branch information
zhouzaida authored Nov 19, 2021
1 parent add157c commit 990d8b6
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Run unittests and generate coverage report
run: |
pip install -r requirements/test.txt
pytest tests/ --ignore=tests/test_runner --ignore=tests/test_optimizer.py --ignore=tests/test_cnn --ignore=tests/test_parallel.py --ignore=tests/test_ops --ignore=tests/test_load_model_zoo.py --ignore=tests/test_utils/test_logging.py --ignore=tests/test_image/test_io.py --ignore=tests/test_utils/test_registry.py --ignore=tests/test_utils/test_parrots_jit.py --ignore=tests/test_utils/test_trace.py
pytest tests/ --ignore=tests/test_runner --ignore=tests/test_optimizer.py --ignore=tests/test_cnn --ignore=tests/test_parallel.py --ignore=tests/test_ops --ignore=tests/test_load_model_zoo.py --ignore=tests/test_utils/test_logging.py --ignore=tests/test_image/test_io.py --ignore=tests/test_utils/test_registry.py --ignore=tests/test_utils/test_parrots_jit.py --ignore=tests/test_utils/test_trace.py --ignore=tests/test_utils/test_hub.py
build_without_ops:
runs-on: ubuntu-18.04
Expand Down
7 changes: 3 additions & 4 deletions mmcv/runner/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
import torch
import torchvision
from torch.optim import Optimizer
from torch.utils import model_zoo

import mmcv
from ..fileio import FileClient
from ..fileio import load as load_file
from ..parallel import is_module_wrapper
from ..utils import mkdir_or_exist
from ..utils import load_url, mkdir_or_exist
from .dist_utils import get_dist_info

ENV_MMCV_HOME = 'MMCV_HOME'
Expand Down Expand Up @@ -281,12 +280,12 @@ def load_from_http(filename, map_location=None, model_dir=None):
rank, world_size = get_dist_info()
rank = int(os.environ.get('LOCAL_RANK', rank))
if rank == 0:
checkpoint = model_zoo.load_url(
checkpoint = load_url(
filename, model_dir=model_dir, map_location=map_location)
if world_size > 1:
torch.distributed.barrier()
if rank > 0:
checkpoint = model_zoo.load_url(
checkpoint = load_url(
filename, model_dir=model_dir, map_location=map_location)
return checkpoint

Expand Down
3 changes: 2 additions & 1 deletion mmcv/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
_MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home)
from .registry import Registry, build_from_cfg
from .trace import is_jit_tracing
from .hub import load_url
__all__ = [
'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger',
'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast',
Expand All @@ -65,5 +66,5 @@
'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer',
'assert_params_all_zeros', 'check_python_script',
'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch',
'_get_cuda_home', 'has_method'
'_get_cuda_home', 'load_url', 'has_method'
]
127 changes: 127 additions & 0 deletions mmcv/utils/hub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
from .parrots_wrapper import TORCH_VERSION
from .path import mkdir_or_exist
from .version_utils import digit_version

if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version(
'1.7.0'):
# Modified from https://github.com/pytorch/pytorch/blob/master/torch/hub.py
import os
import torch
import warnings
from urllib.parse import urlparse
import sys
import zipfile
from torch.hub import download_url_to_file, _get_torch_home, HASH_REGEX

# Hub used to support automatically extracts from zipfile manually
# compressed by users. The legacy zip format expects only one file from
# torch.save() < 1.6 in the zip. We should remove this support since
# zipfile is now default zipfile format for torch.save().
def _is_legacy_zip_format(filename):
if zipfile.is_zipfile(filename):
infolist = zipfile.ZipFile(filename).infolist()
return len(infolist) == 1 and not infolist[0].is_dir()
return False

def _legacy_zip_load(filename, model_dir, map_location):
warnings.warn('Falling back to the old format < 1.6. This support will'
' be deprecated in favor of default zipfile format '
'introduced in 1.6. Please redo torch.save() to save it '
'in the new zipfile format.')
# Note: extractall() defaults to overwrite file if exists. No need to
# clean up beforehand. We deliberately don't handle tarfile here
# since our legacy serialization format was in tar.
# E.g. resnet18-5c106cde.pth which is widely used.
with zipfile.ZipFile(filename) as f:
members = f.infolist()
if len(members) != 1:
raise RuntimeError(
'Only one file(not dir) is allowed in the zipfile')
f.extractall(model_dir)
extraced_name = members[0].filename
extracted_file = os.path.join(model_dir, extraced_name)
return torch.load(extracted_file, map_location=map_location)

def load_url(url,
model_dir=None,
map_location=None,
progress=True,
check_hash=False,
file_name=None):
r"""Loads the Torch serialized object at the given URL.
If downloaded file is a zip file, it will be automatically decompressed
If the object is already present in `model_dir`, it's deserialized and
returned.
The default value of ``model_dir`` is ``<hub_dir>/checkpoints`` where
``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`.
Args:
url (str): URL of the object to download
model_dir (str, optional): directory in which to save the object
map_location (optional): a function or a dict specifying how to
remap storage locations (see torch.load)
progress (bool, optional): whether or not to display a progress bar
to stderr. Default: True
check_hash(bool, optional): If True, the filename part of the URL
should follow the naming convention ``filename-<sha256>.ext``
where ``<sha256>`` is the first eight or more digits of the
SHA256 hash of the contents of the file. The hash is used to
ensure unique names and to verify the contents of the file.
Default: False
file_name (str, optional): name for the downloaded file. Filename
from ``url`` will be used if not set. Default: None.
Example:
>>> url = ('https://s3.amazonaws.com/pytorch/models/resnet18-5c106'
... 'cde.pth')
>>> state_dict = torch.hub.load_state_dict_from_url(url)
"""
# Issue warning to move data if old env is set
if os.getenv('TORCH_MODEL_ZOO'):
warnings.warn('TORCH_MODEL_ZOO is deprecated, please use env '
'TORCH_HOME instead')

if model_dir is None:
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')

mkdir_or_exist(model_dir)

parts = urlparse(url)
filename = os.path.basename(parts.path)
if file_name is not None:
filename = file_name
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
sys.stderr.write('Downloading: "{}" to {}\n'.format(
url, cached_file))
hash_prefix = None
if check_hash:
r = HASH_REGEX.search(filename) # r is Optional[Match[str]]
hash_prefix = r.group(1) if r else None
download_url_to_file(
url, cached_file, hash_prefix, progress=progress)

if _is_legacy_zip_format(cached_file):
return _legacy_zip_load(cached_file, model_dir, map_location)

try:
return torch.load(cached_file, map_location=map_location)
except RuntimeError as error:
if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
warnings.warn(
f'If the error is the same as "{cached_file} is a zip '
'archive (did you mean to use torch.jit.load()?)", you can'
' upgrade your torch to 1.5.0 or higher (current torch '
f'version is {TORCH_VERSION}). The error was raised '
' because the checkpoint was saved in torch>=1.6.0 but '
'loaded in torch<1.5.')
raise error
else:
from torch.utils.model_zoo import load_url # noqa: F401
6 changes: 3 additions & 3 deletions tests/test_load_model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def load(filepath, map_location=None):

@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
@patch('mmcv.runner.checkpoint.load_from_http', load_from_http)
@patch('mmcv.runner.checkpoint.load_url', load_url)
@patch('torch.load', load)
@patch('torch.utils.model_zoo.load_url', load_url)
def test_load_external_url():
# test modelzoo://
url = _load_checkpoint('modelzoo://resnet50')
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_load_external_url():
os.environ[ENV_MMCV_HOME] = mmcv_home
url = _load_checkpoint('open-mmlab://train')
assert url == 'url:https://localhost/train.pth'
with pytest.raises(IOError, match='train.pth is not a checkpoint ' 'file'):
with pytest.raises(IOError, match='train.pth is not a checkpoint file'):
_load_checkpoint('open-mmlab://train_empty')
url = _load_checkpoint('open-mmlab://test')
assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'
Expand All @@ -140,7 +140,7 @@ def test_load_external_url():
assert url == 'url:http://localhost/train.pth'

# test local file
with pytest.raises(IOError, match='train.pth is not a checkpoint ' 'file'):
with pytest.raises(IOError, match='train.pth is not a checkpoint file'):
_load_checkpoint('train.pth')
url = _load_checkpoint(osp.join(_get_mmcv_home(), 'test.pth'))
assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'
32 changes: 32 additions & 0 deletions tests/test_utils/test_hub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest
from torch.utils import model_zoo

from mmcv.utils import TORCH_VERSION, digit_version, load_url


def test_load_url():
url1 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.5.pth'
url2 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.6.pth'

# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
model_zoo.load_url(url1)
with pytest.raises(RuntimeError):
model_zoo.load_url(url2)
else:
# high version of PyTorch can load checkpoints from url, regardless
# of which version they were saved in
model_zoo.load_url(url1)
model_zoo.load_url(url2)

load_url(url1)
# if a checkpoint was saved in torch >= 1.6.0 but loaded in torch < 1.5.0,
# it will raise a RuntimeError
if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
with pytest.raises(RuntimeError):
load_url(url2)
else:
load_url(url2)

0 comments on commit 990d8b6

Please sign in to comment.