Skip to content

Commit

Permalink
ENH: Remove deepcopies when slicing cubes and copying coords
Browse files Browse the repository at this point in the history
  • Loading branch information
Carwyn Pelley committed Apr 24, 2017
1 parent 88db1d2 commit 75e3aec
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 12 deletions.
13 changes: 10 additions & 3 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,13 @@ def copy(self, points=None, bounds=None):
raise ValueError('If bounds are specified, points must also be '
'specified')

new_coord = copy.deepcopy(self)
if points is not None:
# We do not perform a deepcopy when we supply new points so as to
# not unnecessarily copy the old points and bounds.
new_coord = copy.copy(self)
new_coord.attributes = copy.deepcopy(self.attributes)
new_coord.coord_system = copy.deepcopy(self.coord_system)

# Explicitly not using the points property as we don't want the
# shape the new points to be constrained by the shape of
# self.points
Expand All @@ -534,6 +539,8 @@ def copy(self, points=None, bounds=None):
# points will result in new bounds, discarding those copied from
# self.
new_coord.bounds = bounds
else:
new_coord = copy.deepcopy(self)

return new_coord

Expand Down Expand Up @@ -1503,7 +1510,7 @@ def points(self):

@points.setter
def points(self, points):
points = np.array(points, ndmin=1)
points = np.array(points, ndmin=1, copy=False)
# If points are already defined for this coordinate,
if hasattr(self, '_points') and self._points is not None:
# Check that setting these points wouldn't change self.shape
Expand Down Expand Up @@ -1539,7 +1546,7 @@ def bounds(self):
def bounds(self, bounds):
if bounds is not None:
# Ensure the bounds are a compatible shape.
bounds = np.array(bounds, ndmin=2)
bounds = np.array(bounds, ndmin=2, copy=False)
if self.shape != bounds.shape[:-1]:
raise ValueError(
"The shape of the bounds array should be "
Expand Down
11 changes: 2 additions & 9 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,25 +2239,18 @@ def new_cell_measure_dims(cm_):
try:
first_slice = next(slice_gen)
except StopIteration:
first_slice = None
first_slice = Ellipsis

if self.has_lazy_data():
cube_data = self._dask_array
else:
cube_data = self._numpy_array

if first_slice is not None:
data = cube_data[first_slice]
else:
data = copy.deepcopy(cube_data)
data = cube_data[first_slice]

for other_slice in slice_gen:
data = data[other_slice]

# We don't want a view of the data, so take a copy of it if it's
# not already our own.
data = copy.deepcopy(data)

# We can turn a masked array into a normal array if it's full.
if ma.isMaskedArray(data):
if ma.count_masked(data) == 0:
Expand Down
121 changes: 121 additions & 0 deletions lib/iris/tests/unit/coords/test_AuxCoord.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# (C) British Crown Copyright 2017, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Unit tests for :class:`iris.coords.AuxCoord`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import dask
import numpy as np

from iris._lazy_data import as_lazy_data
from iris.coords import AuxCoord


class Test___init__(tests.IrisTest):
def test_writeable(self):
coord = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]])
self.assertTrue(coord.points.flags.writeable)
self.assertTrue(coord.bounds.flags.writeable)


def fetch_base(ndarray):
if ndarray.base is not None:
return fetch_base(ndarray.base)
return ndarray


class Test___getitem__(tests.IrisTest):
def test_share_data(self):
# Ensure that slicing a coordinate behaves like slicing a numpy array
# i.e. that the points and bounds are views of the original.
original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
self.assertIs(fetch_base(sliced_coord._points),
fetch_base(original._points))
self.assertIs(fetch_base(sliced_coord._bounds),
fetch_base(original._bounds))
self.assertIsNot(sliced_coord.coord_system, original.coord_system)
self.assertIsNot(sliced_coord.attributes, original.attributes)

def test_lazy_data_realisation(self):
# Capture the fact that we realise the data when slicing.
points = np.array([1, 2])
points = as_lazy_data(points)

bounds = np.array([[1, 2], [2, 3]])
bounds = as_lazy_data(bounds)

original = AuxCoord(points, bounds=bounds,
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
# Returned coord is realised.
self.assertIsInstance(sliced_coord._points, dask.array.core.Array)
self.assertIsInstance(sliced_coord._bounds, dask.array.core.Array)

# Original coord remains unrealised.
self.assertIsInstance(points, dask.array.core.Array)
self.assertIsInstance(bounds, dask.array.core.Array)


class Test_copy(tests.IrisTest):
def setUp(self):
self.original = AuxCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)

def assert_data_no_share(self, coord_copy):
self.assertIsNot(fetch_base(coord_copy._points),
fetch_base(self.original._points))
self.assertIsNot(fetch_base(coord_copy._bounds),
fetch_base(self.original._bounds))
self.assertIsNot(coord_copy.coord_system, self.original.coord_system)
self.assertIsNot(coord_copy.attributes, self.original.attributes)

def test_existing_points(self):
# Ensure that copying a coordinate does not return a view of its
# points or bounds.
coord_copy = self.original.copy()
self.assert_data_no_share(coord_copy)

def test_existing_points_deepcopy_call(self):
# Ensure that the coordinate object itself is deepcopied called.
with tests.mock.patch('copy.deepcopy') as mock_copy:
self.original.copy()
mock_copy.assert_called_once_with(self.original)

def test_new_points(self):
coord_copy = self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
self.assert_data_no_share(coord_copy)

def test_new_points_shallowcopy_call(self):
# Ensure that the coordinate object itself is shallow copied so that
# the points and bounds are not unnecessarily copied.
with tests.mock.patch('copy.copy') as mock_copy:
self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
mock_copy.assert_called_once_with(self.original)


if __name__ == '__main__':
tests.main()
126 changes: 126 additions & 0 deletions lib/iris/tests/unit/coords/test_DimCoord.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# (C) British Crown Copyright 2017, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Unit tests for :class:`iris.coords.DimCoord`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import copy

import dask
import numpy as np

from iris._lazy_data import as_lazy_data
from iris.coords import DimCoord


class Test___init__(tests.IrisTest):
def test_writeable(self):
coord = DimCoord([1, 2], bounds=[[1, 2], [2, 3]])
self.assertFalse(coord.points.flags.writeable)
self.assertFalse(coord.bounds.flags.writeable)


def fetch_base(ndarray):
if ndarray.base is not None:
return fetch_base(ndarray.base)
return ndarray


class Test___getitem__(tests.IrisTest):
def test_share_data(self):
# Ensure that slicing a coordinate behaves like slicing a numpy array
# i.e. that the points and bounds are views of the original.
original = DimCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
self.assertIs(fetch_base(sliced_coord._points),
fetch_base(original._points))
self.assertIs(fetch_base(sliced_coord._bounds),
fetch_base(original._bounds))
self.assertIsNot(sliced_coord.coord_system, original.coord_system)
self.assertIsNot(sliced_coord.attributes, original.attributes)

def test_lazy_data_realisation(self):
# Capture the fact that we realise the data when slicing.
points = np.array([1, 2])
points = as_lazy_data(points)

bounds = np.array([[1, 2], [2, 3]])
bounds = as_lazy_data(bounds)

original = DimCoord(points, bounds=bounds,
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)
sliced_coord = original[:]
# Returned coord is realised.
self.assertIsInstance(sliced_coord._points, np.ndarray)
self.assertIsInstance(sliced_coord._bounds, np.ndarray)

# Original coord remains unrealised.
self.assertIsInstance(points, dask.array.core.Array)
self.assertIsInstance(bounds, dask.array.core.Array)


class Test_copy(tests.IrisTest):
def setUp(self):
self.original = DimCoord([1, 2], bounds=[[1, 2], [2, 3]],
attributes={'dummy1': None},
coord_system=tests.mock.sentinel.coord_system)

def assert_data_no_share(self, coord_copy):
self.assertIsNot(fetch_base(coord_copy._points),
fetch_base(self.original._points))
self.assertIsNot(fetch_base(coord_copy._bounds),
fetch_base(self.original._bounds))
self.assertIsNot(coord_copy.coord_system, self.original.coord_system)
self.assertIsNot(coord_copy.attributes, self.original.attributes)

def test_existing_points(self):
# Ensure that copying a coordinate does not return a view of its
# points or bounds.
coord_copy = self.original.copy()
self.assert_data_no_share(coord_copy)

def test_existing_points_deepcopy_call(self):
# Ensure that the coordinate object itself is deepcopied called.
cp_orig = copy.deepcopy(self.original)
with tests.mock.patch('copy.deepcopy', return_value=cp_orig) as \
mock_copy:
self.original.copy()
mock_copy.assert_called_once_with(self.original)

def test_new_points(self):
coord_copy = self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
self.assert_data_no_share(coord_copy)

def test_new_points_shallowcopy_call(self):
# Ensure that the coordinate object itself is shallow copied so that
# the points and bounds are not unnecessarily copied.
cp_orig = copy.copy(self.original)
with tests.mock.patch('copy.copy', return_value=cp_orig) as mock_copy:
self.original.copy([1, 2], bounds=[[1, 2], [2, 3]])
mock_copy.assert_called_once_with(self.original)


if __name__ == '__main__':
tests.main()
16 changes: 16 additions & 0 deletions lib/iris/tests/unit/cube/test_Cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,6 +1670,22 @@ def test_remove_cell_measure(self):
[[self.b_cell_measure, (0, 1)]])


class Test___getitem__lazy(tests.IrisTest):
def test_lazy_array(self):
data = np.arange(6).reshape(2, 3)
data = as_lazy_data(data)
cube = Cube(data)
cube2 = cube[1:]
self.assertTrue(cube2.has_lazy_data())
cube.data
self.assertTrue(cube2.has_lazy_data())

def test_ndarray(self):
cube = Cube(np.arange(6).reshape(2, 3))
cube2 = cube[1:]
self.assertIs(cube.data.base, cube2.data.base)


class Test__getitem_CellMeasure(tests.IrisTest):
def setUp(self):
cube = Cube(np.arange(6).reshape(2, 3))
Expand Down

0 comments on commit 75e3aec

Please sign in to comment.