Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chunk Control Tests #5583

Merged
merged 24 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
78a62d8
converted tests to pytest, added neg_one, and incomplete from_file an…
ESadek-MO Nov 15, 2023
a54f424
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 15, 2023
c6f115c
added from_file test
ESadek-MO Nov 15, 2023
1bb45a1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 15, 2023
82951d3
added mocking tests
ESadek-MO Nov 16, 2023
32ebf09
Merge branch 'cc_tests' of github.com:ESadek-MO/iris into cc_tests
ESadek-MO Nov 16, 2023
de72114
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
6d939a6
trial and error with mocks and patches, may or may not work
ESadek-MO Nov 16, 2023
eed93d4
Merge branch 'cc_tests' of github.com:ESadek-MO/iris into cc_tests
ESadek-MO Nov 16, 2023
8d728ae
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
36ba71e
converted Mock to patch in as_dask test
ESadek-MO Nov 16, 2023
9aa899e
Merge branch 'cc_tests' of github.com:ESadek-MO/iris into cc_tests
ESadek-MO Nov 16, 2023
0b63581
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
82a10b6
review comment changes
ESadek-MO Nov 16, 2023
9137527
merge conflicts
ESadek-MO Nov 16, 2023
4f80847
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
fee7ed2
pre commit fixes
ESadek-MO Nov 16, 2023
c386032
merge conflicts
ESadek-MO Nov 16, 2023
6727f7b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 16, 2023
139fd41
review comments, and added test in test__get_cf_var_data()
ESadek-MO Nov 17, 2023
bfe23b6
added in another test
ESadek-MO Nov 17, 2023
5140274
Merge branch 'cc_tests' of github.com:ESadek-MO/iris into cc_tests
ESadek-MO Nov 17, 2023
fc8c78b
added tests and fixed review comments
ESadek-MO Nov 20, 2023
929b03b
added AuxCoord test
ESadek-MO Nov 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions lib/iris/fileformats/netcdf/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,17 +241,19 @@ def _get_cf_var_data(cf_var, filename):
result = as_lazy_data(proxy, chunks=None, dask_chunking=True)
else:
chunks = cf_var.cf_data.chunking()
if (
chunks is None
and CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE
):
raise KeyError(
f"{cf_var.cf_name} does not contain pre-existing chunk specifications."
f"Instead, you might wish to use CHUNK_CONTROL.set(), or just use default"
f" behaviour outside of a context manager. "
)
# In the "contiguous" case, pass chunks=None to 'as_lazy_data'.
if chunks == "contiguous":
if (
CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE
and isinstance(
cf_var, iris.fileformats.cf.CFDataVariable
)
):
raise KeyError(
f"{cf_var.cf_name} does not contain pre-existing chunk specifications."
f" Instead, you might wish to use CHUNK_CONTROL.set(), or just use default"
f" behaviour outside of a context manager. "
)
# Equivalent to chunks=None, but value required by chunking control
chunks = list(cf_var.shape)

Expand Down
97 changes: 0 additions & 97 deletions lib/iris/tests/integration/test_netcdf__chunk_control.py

This file was deleted.

216 changes: 216 additions & 0 deletions lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for :class:`iris.fileformats.netcdf.loader.ChunkControl`."""

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests # isort:skip
from unittest.mock import ANY, patch

import dask
import numpy as np
import pytest

import iris
from iris.cube import CubeList
from iris.fileformats.netcdf import loader
from iris.fileformats.netcdf.loader import CHUNK_CONTROL
import iris.tests.stock as istk


@pytest.fixture()
def save_cubelist_with_sigma(tmp_filepath):
cube = istk.simple_4d_with_hybrid_height()
cube_varname = "my_var"
sigma_varname = "my_sigma"
cube.var_name = cube_varname
cube.coord("sigma").var_name = sigma_varname
cube.coord("sigma").guess_bounds()
iris.save(cube, tmp_filepath)
return cube_varname, sigma_varname


@pytest.fixture
def save_cube_with_chunksize(tmp_filepath):
cube = istk.simple_3d()
# adding an aux coord allows us to test that
# iris.fileformats.netcdf.loader._get_cf_var_data()
# will only throw an error if from_file mode is
# True when the entire cube has no specified chunking
aux = iris.coords.AuxCoord(
points=np.zeros((3, 4)),
long_name="random",
units="1",
)
cube.add_aux_coord(aux, [1, 2])
iris.save(cube, tmp_filepath, chunksizes=(1, 3, 4))


@pytest.fixture(scope="session")
def tmp_filepath(tmp_path_factory):
tmp_dir = tmp_path_factory.mktemp("data")
tmp_path = tmp_dir / "tmp.nc"
return str(tmp_path)


@pytest.fixture(autouse=True)
def remove_min_bytes():
old_min_bytes = loader._LAZYVAR_MIN_BYTES
loader._LAZYVAR_MIN_BYTES = 0
yield
loader._LAZYVAR_MIN_BYTES = old_min_bytes


def test_default(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
assert cube.lazy_data().chunksize == (3, 4, 5, 6)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (4,)
assert sigma.lazy_bounds().chunksize == (4, 2)


def test_control_global(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
with CHUNK_CONTROL.set(model_level_number=2):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
assert cube.lazy_data().chunksize == (3, 2, 5, 6)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (2,)
assert sigma.lazy_bounds().chunksize == (2, 2)


def test_control_sigma_only(tmp_filepath, save_cubelist_with_sigma):
cube_varname, sigma_varname = save_cubelist_with_sigma
with CHUNK_CONTROL.set(sigma_varname, model_level_number=2):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
assert cube.lazy_data().chunksize == (3, 4, 5, 6)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (2,)
# N.B. this does not apply to bounds array
assert sigma.lazy_bounds().chunksize == (4, 2)


def test_control_cube_var(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
with CHUNK_CONTROL.set(cube_varname, model_level_number=2):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
assert cube.lazy_data().chunksize == (3, 2, 5, 6)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (2,)
assert sigma.lazy_bounds().chunksize == (2, 2)


def test_invalid_chunksize(tmp_filepath, save_cubelist_with_sigma):
with pytest.raises(ValueError):
with CHUNK_CONTROL.set(model_level_numer="2"):
CubeList(loader.load_cubes(tmp_filepath))


def test_invalid_var_name(tmp_filepath, save_cubelist_with_sigma):
with pytest.raises(ValueError):
with CHUNK_CONTROL.set([1, 2], model_level_numer="2"):
CubeList(loader.load_cubes(tmp_filepath))


def test_control_multiple(tmp_filepath, save_cubelist_with_sigma):
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
cube_varname, sigma_varname = save_cubelist_with_sigma
with CHUNK_CONTROL.set(
cube_varname, model_level_number=2
), CHUNK_CONTROL.set(sigma_varname, model_level_number=3):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
assert cube.lazy_data().chunksize == (3, 2, 5, 6)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (3,)
assert sigma.lazy_bounds().chunksize == (2, 2)


def test_neg_one(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
with dask.config.set({"array.chunk-size": "50B"}):
with CHUNK_CONTROL.set(model_level_number=-1):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
# uses known good output
assert cube.lazy_data().chunksize == (1, 4, 1, 1)

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (4,)
assert sigma.lazy_bounds().chunksize == (4, 1)


def test_from_file(tmp_filepath, save_cube_with_chunksize):
with CHUNK_CONTROL.from_file():
cube = next(loader.load_cubes(tmp_filepath))
assert cube.shape == (2, 3, 4)
assert cube.lazy_data().chunksize == (1, 3, 4)


def test_no_chunks_from_file(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
with pytest.raises(KeyError):
with CHUNK_CONTROL.from_file():
CubeList(loader.load_cubes(tmp_filepath))


def test_as_dask(tmp_filepath, save_cubelist_with_sigma):
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
"""
This does not test return values, as we can't be sure
dask chunking behaviour won't change, or that it will differ
from our own chunking behaviour.
"""
message = "Mock called, rest of test unneeded"
with patch("iris.fileformats.netcdf.loader.as_lazy_data") as as_lazy_data:
as_lazy_data.side_effect = RuntimeError(message)
with CHUNK_CONTROL.as_dask():
try:
CubeList(loader.load_cubes(tmp_filepath))
except RuntimeError as e:
if str(e) != message:
raise e
as_lazy_data.assert_called_with(ANY, chunks=None, dask_chunking=True)


def test_pinned_optimisation(tmp_filepath, save_cubelist_with_sigma):
cube_varname, _ = save_cubelist_with_sigma
with dask.config.set({"array.chunk-size": "250B"}):
with CHUNK_CONTROL.set(model_level_number=2):
cubes = CubeList(loader.load_cubes(tmp_filepath))
cube = cubes.extract_cube(cube_varname)
assert cube.shape == (3, 4, 5, 6)
# uses known good output
# known good output WITHOUT pinning: (1, 1, 5, 6)
assert cube.lazy_data().chunksize == (1, 2, 2, 6)
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved

sigma = cube.coord("sigma")
assert sigma.shape == (4,)
assert sigma.lazy_points().chunksize == (2,)
assert sigma.lazy_bounds().chunksize == (2, 2)


if __name__ == "__main__":
tests.main()
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from iris._lazy_data import _optimum_chunksize
import iris.fileformats.cf
from iris.fileformats.netcdf.loader import _get_cf_var_data
from iris.fileformats.netcdf.loader import CHUNK_CONTROL, _get_cf_var_data


class Test__get_cf_var_data(tests.IrisTest):
Expand All @@ -30,6 +30,7 @@ def _make(
cf_data = mock.MagicMock(
_FillValue=None,
__getitem__="<real-data>",
dimensions=["dim_" + str(x) for x in range(len(shape or "1"))],
)
cf_data.chunking = mock.MagicMock(return_value=chunksizes)
if shape is None:
Expand Down Expand Up @@ -61,6 +62,16 @@ def test_cf_data_chunks(self):
expected_chunks = _optimum_chunksize(chunks, self.shape)
self.assertArrayEqual(lazy_data_chunks, expected_chunks)

def test_cf_data_chunk_control(self):
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
# more thorough testing can be found at `test__chunk_control`
chunks = [2500, 240, 200]
cf_var = self._make(shape=(2500, 240, 200), chunksizes=chunks)
with CHUNK_CONTROL.set(dim_0=25, dim_1=24, dim_2=20):
lazy_data = _get_cf_var_data(cf_var, self.filename)
lazy_data_chunks = [c[0] for c in lazy_data.chunks]
expected_chunks = (25, 24, 20)
self.assertArrayEqual(lazy_data_chunks, expected_chunks)

def test_cf_data_no_chunks(self):
# No chunks means chunks are calculated from the array's shape by
# `iris._lazy_data._optimum_chunksize()`.
Expand Down
Loading
Loading