From 03145178da6fbcbd7a364fba391763d4affe47f8 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 21 Jan 2014 09:24:26 -0800 Subject: [PATCH 1/6] Remove deepcopies when slicing or manipulating cubes This is a first attempt at removing unnecessary (and very slow) deepcopy operations with slicing or otherwise manipulating cubes and coordinates. See #914. Note: A few of the unit tests are failing, because they insist on checking the order (Fortran or C) of numpy arrays. I think these checks should be removed, because it is a waste of computational effort to always ensure arrays are contiguous. If some code needs to interface with external modules code that require continguous arrays, it should use np.ascontiguousarray or np.asfortranarray at the immediate level of the wrapper. --- lib/iris/coords.py | 4 +++- lib/iris/cube.py | 13 ++----------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 01017bb2a2..c8d2ad206e 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -522,7 +522,9 @@ def copy(self, points=None, bounds=None): raise ValueError('If bounds are specified, points must also be ' 'specified') - new_coord = copy.deepcopy(self) + new_coord = copy.copy(self) + new_coord.attributes = copy.deepcopy(self.attributes) + new_coord.coord_system = copy.deepcopy(self.coord_system) if points is not None: # Explicitly not using the points property as we don't want the # shape the new points to be constrained by the shape of diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 9fb94f8101..91c53f07e4 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -2167,21 +2167,12 @@ def __getitem__(self, keys): try: first_slice = next(slice_gen) except StopIteration: - first_slice = None - - if first_slice is not None: - data = self._my_data[first_slice] - else: - data = copy.deepcopy(self._my_data) + first_slice = Ellipsis + data = self._my_data[first_slice] for other_slice in slice_gen: data = data[other_slice] - # We don't want a view of the data, so take a copy of it if it's - # not already our own. - if isinstance(data, biggus.Array) or not data.flags['OWNDATA']: - data = copy.deepcopy(data) - # We can turn a masked array into a normal array if it's full. if isinstance(data, ma.core.MaskedArray): if ma.count_masked(data) == 0: From 944f1b124e616705c411d3e1b430c5a82294e1bb Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Wed, 4 May 2016 11:21:53 +0100 Subject: [PATCH 2/6] Control copying with iris.FUTURE.share_data --- lib/iris/__init__.py | 14 +++++++++++--- lib/iris/coords.py | 9 ++++++--- lib/iris/cube.py | 27 ++++++++++++++++++++++----- lib/iris/tests/unit/test_Future.py | 6 ++++++ 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index 9e1ee56ee2..4f164eb39d 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -136,7 +136,7 @@ class Future(threading.local): def __init__(self, cell_datetime_objects=False, netcdf_promote=False, strict_grib_load=False, netcdf_no_unlimited=False, - clip_latitudes=False): + clip_latitudes=False, share_data=False): """ A container for run-time options controls. @@ -183,20 +183,28 @@ def __init__(self, cell_datetime_objects=False, netcdf_promote=False, :meth:`iris.coords.Coord.guess_bounds()` method limits the guessed bounds to [-90, 90] for latitudes. + The option `share_data` controls whether indexing a Cube returns + a Cube whose data is a view onto the original Cube's data, as + opposed to a independent copy of the relevant data. It also + controls whether `Coord.copy()` defaults to creating coordinates + whose `points` and `bounds` attributes are views onto the + original coordinate's attributes. + """ self.__dict__['cell_datetime_objects'] = cell_datetime_objects self.__dict__['netcdf_promote'] = netcdf_promote self.__dict__['strict_grib_load'] = strict_grib_load self.__dict__['netcdf_no_unlimited'] = netcdf_no_unlimited self.__dict__['clip_latitudes'] = clip_latitudes + self.__dict__['share_data'] = share_data def __repr__(self): msg = ('Future(cell_datetime_objects={}, netcdf_promote={}, ' 'strict_grib_load={}, netcdf_no_unlimited={}, ' - 'clip_latitudes={})') + 'clip_latitudes={}, share_data={})') return msg.format(self.cell_datetime_objects, self.netcdf_promote, self.strict_grib_load, self.netcdf_no_unlimited, - self.clip_latitudes) + self.clip_latitudes, self.share_data) def __setattr__(self, name, value): if name not in self.__dict__: diff --git a/lib/iris/coords.py b/lib/iris/coords.py index c8d2ad206e..7c03e8275f 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -522,9 +522,12 @@ def copy(self, points=None, bounds=None): raise ValueError('If bounds are specified, points must also be ' 'specified') - new_coord = copy.copy(self) - new_coord.attributes = copy.deepcopy(self.attributes) - new_coord.coord_system = copy.deepcopy(self.coord_system) + if iris.FUTURE.share_data: + new_coord = copy.copy(self) + new_coord.attributes = copy.deepcopy(self.attributes) + new_coord.coord_system = copy.deepcopy(self.coord_system) + else: + new_coord = copy.deepcopy(self) if points is not None: # Explicitly not using the points property as we don't want the # shape the new points to be constrained by the shape of diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 91c53f07e4..2e5ea061b9 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -2164,15 +2164,32 @@ def __getitem__(self, keys): self.cell_measure_dims(cm_) if dimension_mapping[d] is not None] - try: - first_slice = next(slice_gen) - except StopIteration: - first_slice = Ellipsis - data = self._my_data[first_slice] + if iris.FUTURE.share_data: + try: + first_slice = next(slice_gen) + except StopIteration: + first_slice = Ellipsis + data = self._my_data[first_slice] + else: + try: + first_slice = next(slice_gen) + except StopIteration: + first_slice = None + + if first_slice is not None: + data = self._my_data[first_slice] + else: + data = copy.deepcopy(self._my_data) for other_slice in slice_gen: data = data[other_slice] + if not iris.FUTURE.share_data: + # We don't want a view of the data, so take a copy of it if it's + # not already our own. + if isinstance(data, biggus.Array) or not data.flags['OWNDATA']: + data = copy.deepcopy(data) + # We can turn a masked array into a normal array if it's full. if isinstance(data, ma.core.MaskedArray): if ma.count_masked(data) == 0: diff --git a/lib/iris/tests/unit/test_Future.py b/lib/iris/tests/unit/test_Future.py index d55a9ae58f..68b0a8d26c 100644 --- a/lib/iris/tests/unit/test_Future.py +++ b/lib/iris/tests/unit/test_Future.py @@ -51,6 +51,12 @@ def test_valid_clip_latitudes(self): future.clip_latitudes = new_value self.assertEqual(future.clip_latitudes, new_value) + def test_valid_share_data(self): + future = Future() + new_value = not future.share_data + future.share_data = new_value + self.assertEqual(future.share_data, new_value) + def test_invalid_attribute(self): future = Future() with self.assertRaises(AttributeError): From e438f5892e8a03556f1e4c2ff3d637c105e74f64 Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Wed, 4 May 2016 12:03:39 +0100 Subject: [PATCH 3/6] Add deprecation messages and "What's new" entry. --- .../deprecate_2016-May-04_share_data.txt | 3 +++ lib/iris/coords.py | 8 ++++++++ lib/iris/cube.py | 10 +++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 docs/iris/src/whatsnew/contributions_1.10/deprecate_2016-May-04_share_data.txt diff --git a/docs/iris/src/whatsnew/contributions_1.10/deprecate_2016-May-04_share_data.txt b/docs/iris/src/whatsnew/contributions_1.10/deprecate_2016-May-04_share_data.txt new file mode 100644 index 0000000000..bd31a9682f --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.10/deprecate_2016-May-04_share_data.txt @@ -0,0 +1,3 @@ +* Deprecated the data-copying behaviour of Cube indexing and `Coord.copy()`. + The `share_data` attribute of `iris.FUTURE` can be used to switch to + the new data-sharing behaviour. diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 7c03e8275f..b2af5fe1c2 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -516,6 +516,14 @@ def copy(self, points=None, bounds=None): .. note:: If the points argument is specified and bounds are not, the resulting coordinate will have no bounds. + .. deprecated:: 1.10 + + By default the new coordinate's `points` and `bounds` will + be independent copies of the corresponding attributes of the + source coordinate. + The `share_data` attribute of `iris.FUTURE` can be used to + switch to the new data-sharing behaviour. + """ if points is None and bounds is not None: diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 2e5ea061b9..cfe55f94e4 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -2146,6 +2146,14 @@ def __getitem__(self, keys): requested must be applicable directly to the cube.data attribute. All metadata will be subsequently indexed appropriately. + .. deprecated:: 1.10 + The value of the `data` attribute of the result will always + be independent of the source Cube's data. As a result, + modifying data values of the result Cube will have no effect + on the source Cube, and vice versa. + The `share_data` attribute of `iris.FUTURE` can be used to + switch to the new data-sharing behaviour. + """ # turn the keys into a full slice spec (all dims) full_slice = iris.util._build_full_slice_given_keys(keys, @@ -3106,7 +3114,7 @@ def add_history(self, string): .. deprecated:: 1.6 Add/modify history metadata within - attr:`~iris.cube.Cube.attributes` as needed. + :attr:`~iris.cube.Cube.attributes` as needed. """ warnings.warn("Cube.add_history() has been deprecated - " From 425e74675d3027d9d8ad37a3fa1d99c0488b1b23 Mon Sep 17 00:00:00 2001 From: cpelley Date: Thu, 5 May 2016 13:54:13 +0100 Subject: [PATCH 4/6] TEST: Added test for cube.__getitem__ --- lib/iris/tests/unit/cube/test_Cube.py | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index f68214e4f2..d998588742 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -1330,6 +1330,35 @@ def test_remove_cell_measure(self): [[self.b_cell_measure, (0, 1)]]) +class Test___getitem__nofuture(tests.IrisTest): + def test_lazy_array(self): + cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) + cube2 = cube[1:] + self.assertTrue(cube2.has_lazy_data()) + cube.data + self.assertTrue(cube2.has_lazy_data()) + + def test_ndarray(self): + cube = Cube(np.arange(6).reshape(2, 3)) + cube2 = cube[1:] + self.assertIsNot(cube.data.base, cube2.data.base) + + +class Test___getitem__future(tests.IrisTest): + def test_lazy_array(self): + cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) + cube2 = cube[1:] + self.assertTrue(cube2.has_lazy_data()) + cube.data + self.assertTrue(cube2.has_lazy_data()) + + def test_ndarray(self): + with mock.patch('iris.FUTURE.share_data', new=True): + cube = Cube(np.arange(6).reshape(2, 3)) + cube2 = cube[1:] + self.assertIs(cube.data.base, cube2.data.base) + + class Test__getitem_CellMeasure(tests.IrisTest): def setUp(self): cube = Cube(np.arange(6).reshape(2, 3)) From 72934ef8c5f19e3c0f9300e809a7b90828d38988 Mon Sep 17 00:00:00 2001 From: cpelley Date: Thu, 5 May 2016 13:59:50 +0100 Subject: [PATCH 5/6] TEST: Refactor of tests --- lib/iris/tests/unit/cube/test_Cube.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index d998588742..34ce056fcb 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -1331,6 +1331,11 @@ def test_remove_cell_measure(self): class Test___getitem__nofuture(tests.IrisTest): + def setUp(self): + patch = mock.patch('iris.FUTURE.share_data', new=False) + self.mock_fshare = patch.start() + self.addCleanup(patch.stop) + def test_lazy_array(self): cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) cube2 = cube[1:] @@ -1345,6 +1350,11 @@ def test_ndarray(self): class Test___getitem__future(tests.IrisTest): + def setUp(self): + patch = mock.patch('iris.FUTURE.share_data', new=True) + self.mock_fshare = patch.start() + self.addCleanup(patch.stop) + def test_lazy_array(self): cube = Cube(biggus.NumpyArrayAdapter(np.arange(6).reshape(2, 3))) cube2 = cube[1:] @@ -1353,9 +1363,8 @@ def test_lazy_array(self): self.assertTrue(cube2.has_lazy_data()) def test_ndarray(self): - with mock.patch('iris.FUTURE.share_data', new=True): - cube = Cube(np.arange(6).reshape(2, 3)) - cube2 = cube[1:] + cube = Cube(np.arange(6).reshape(2, 3)) + cube2 = cube[1:] self.assertIs(cube.data.base, cube2.data.base) From 4dcf4daf4e27d54c8b161d9fe7f4fd652e1c0a80 Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Thu, 5 May 2016 14:38:10 +0100 Subject: [PATCH 6/6] Test AuxCoord.copy() with FUTURE.share_data --- lib/iris/tests/unit/coords/test_AuxCoord.py | 55 +++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 lib/iris/tests/unit/coords/test_AuxCoord.py diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py new file mode 100644 index 0000000000..20724d9789 --- /dev/null +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -0,0 +1,55 @@ +# (C) British Crown Copyright 2016, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for :class:`iris.coords.AuxCoord`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np + +from iris.coords import AuxCoord +import iris + + +class Test_copy(tests.IrisTest): + def test_share_data_default(self): + original = AuxCoord(np.arange(4)) + copy = original.copy() + original.points[1] = 999 + self.assertArrayEqual(copy.points, [0, 1, 2, 3]) + + def test_share_data_false(self): + original = AuxCoord(np.arange(4)) + with iris.FUTURE.context(share_data=False): + copy = original.copy() + original.points[1] = 999 + self.assertArrayEqual(copy.points, [0, 1, 2, 3]) + + def test_share_data_true(self): + original = AuxCoord(np.arange(4)) + with iris.FUTURE.context(share_data=True): + copy = original.copy() + original.points[1] = 999 + self.assertArrayEqual(copy.points, [0, 999, 2, 3]) + + +if __name__ == '__main__': + tests.main()