From 8d2cd1d98616af921dd7846c3ef134d815c42821 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 12 May 2017 15:16:00 +0100 Subject: [PATCH 1/4] Refactor Coord inheritance. --- lib/iris/coords.py | 261 +++++++++----------- lib/iris/tests/test_coord_api.py | 5 +- lib/iris/tests/unit/coords/test_DimCoord.py | 6 +- 3 files changed, 123 insertions(+), 149 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index a05e147ce4..1d9ed27609 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -520,13 +520,95 @@ def copy(self, points=None, bounds=None): return new_coord - @abstractproperty - def points(self): - """Property containing the points values as a numpy array""" + @classmethod + def from_coord(cls, coord): + """Create a new Coord of this type, from the given coordinate.""" + kwargs = {'points': coord.core_points(), + 'bounds': coord.core_bounds(), + 'standard_name': coord.standard_name, + 'long_name': coord.long_name, + 'var_name': coord.var_name, + 'units': coord.units, + 'attributes': coord.attributes, + 'coord_system': copy.deepcopy(coord.coord_system)} + if issubclass(cls, DimCoord): + # DimCoord introduces an extra constructor keyword. + kwargs['circular'] = getattr(coord, 'circular', False) + return cls(**kwargs) + + def _sanitise_array(self, src, ndmin): + if is_lazy_data(src): + # Lazy data : just ensure ndmin requirement. + ndims_missing = ndmin - src.ndim + if ndims_missing <= 0: + result = src + else: + extended_shape = tuple([1] * ndims_missing + list(src.shape)) + result = src.reshape(extended_shape) + else: + # Real data : a few more things to do in this case. + # Ensure the array is writeable. + # NB. Returns the *same object* if src is already writeable. + result = np.require(src, requirements='W') + # Ensure the array has enough dimensions. + # NB. Returns the *same object* if result.ndim >= ndmin + result = np.array(result, ndmin=ndmin, copy=False) + # We don't need to copy the data, but we do need to have our + # own view so we can control the shape, etc. + result = result.view() + return result + + def _points_getter(self): + """The coordinate points values as a NumPy array.""" + return self._points_dm.data + + def _points_setter(self, points): + # Set the points to a new array - as long as it's the same shape. + + # Ensure points has an ndmin of 1 and is either a numpy or lazy array. + # This will avoid Scalar coords with points of shape () rather + # than the desired (1,). + points = self._sanitise_array(points, 1) + + # Set or update DataManager. + if self._points_dm is None: + self._points_dm = DataManager(points) + else: + self._points_dm.data = points + + points = property(_points_getter, _points_setter) + + def _bounds_getter(self): + """ + The coordinate bounds values, as a NumPy array, + or None if no bound values are defined. - @abstractproperty - def bounds(self): - """Property containing the bound values as a numpy array""" + .. note:: The shape of the bound array should be: ``points.shape + + (n_bounds, )``. + + """ + bounds = None + if self.has_bounds(): + bounds = self._bounds_dm.data + return bounds + + def _bounds_setter(self, bounds): + # Ensure the bounds are a compatible shape. + if bounds is None: + self._bounds_dm = None + else: + bounds = self._sanitise_array(bounds, 2) + if self.shape != bounds.shape[:-1]: + raise ValueError("Bounds shape must be compatible with points " + "shape.") + if not self.has_bounds() \ + or self.core_bounds().shape != bounds.shape: + # Construct a new bounds DataManager. + self._bounds_dm = DataManager(bounds) + else: + self._bounds_dm.data = bounds + + bounds = property(_bounds_getter, _bounds_setter) def lazy_points(self): """ @@ -1429,16 +1511,6 @@ class DimCoord(Coord): A coordinate that is 1D, numeric, and strictly monotonic. """ - @staticmethod - def from_coord(coord): - """Create a new DimCoord from the given coordinate.""" - return DimCoord(coord.core_points(), standard_name=coord.standard_name, - long_name=coord.long_name, var_name=coord.var_name, - units=coord.units, bounds=coord.core_bounds(), - attributes=coord.attributes, - coord_system=copy.deepcopy(coord.coord_system), - circular=getattr(coord, 'circular', False)) - @classmethod def from_regular(cls, zeroth, step, count, standard_name=None, long_name=None, var_name=None, units='1', attributes=None, @@ -1580,29 +1652,28 @@ def _new_points_requirements(self, points): if len(points) > 1 and not iris.util.monotonic(points, strict=True): raise ValueError('The points array must be strictly monotonic.') - @property - def points(self): - """The local points values as a read-only NumPy array.""" - return self._points_dm.data - - @points.setter - def points(self, points): - # We must realise points to check monotonicity. + def _points_setter(self, points): + # DimCoord always realises the points, to allow monotonicity checks. copy = is_lazy_data(points) points = as_concrete_data(points) - points = np.array(points, copy=copy, ndmin=1) + # Ensure it is an array.. + # .. and always a distinct view, so that we can make it read-only. + points = np.array(points, copy=copy) - # Set or update DataManager. - if self._points_dm is None: - self._points_dm = DataManager(points) - else: - self._points_dm.data = points + # Invoke the generic points setter. + super(DimCoord, self)._points_setter(points) + + if self._points_dm is not None: + # Fetch the new core array, as the parent can change it. + points = self._points_dm._real_array + + # Check validity requirements for dimension-coordinate points. + self._new_points_requirements(points) - # Checks for 1d, numeric, monotonic - self._new_points_requirements(self._points_dm.data) + # Make the array read-only. + self._points_dm.data.flags.writeable = False - # Make the array read-only. - self._points_dm.data.flags.writeable = False + points = property(Coord._points_getter, _points_setter) def _new_bounds_requirements(self, bounds): """ @@ -1639,37 +1710,27 @@ def _new_bounds_requirements(self, bounds): raise ValueError('The direction of monotonicity must be ' 'consistent across all bounds') - @property - def bounds(self): - """ - The bounds values as a read-only NumPy array, or None if no - bounds have been set. - - """ - bounds = None - if self.has_bounds(): - bounds = self._bounds_dm.data - return bounds - - @bounds.setter - def bounds(self, bounds): - if bounds is None: - self._bounds_dm = None - else: + def _bounds_setter(self, bounds): + if bounds is not None: # Ensure we have a realised array of new bounds values. copy = is_lazy_data(bounds) bounds = as_concrete_data(bounds) bounds = np.array(bounds, copy=copy, ndmin=2) - # Checks for appropriate values for the new bounds. + + # Invoke the generic bounds setter. + super(DimCoord, self)._bounds_setter(bounds) + + if self._bounds_dm is not None: + # Fetch the new core array, as the parent can change it. + bounds = self._bounds_dm._real_array + + # Check validity requirements for dimension-coordinate bounds. self._new_bounds_requirements(bounds) # Ensure the array is read-only. bounds.flags.writeable = False - if not self.has_bounds() or self.bounds.shape != bounds.shape: - # Construct a new bounds DataManager. - self._bounds_dm = DataManager(bounds) - else: - self._bounds_dm.data = bounds + + bounds = property(Coord._bounds_getter, _bounds_setter) def is_monotonic(self): return True @@ -1684,92 +1745,6 @@ def xml_element(self, doc): class AuxCoord(Coord): """A CF auxiliary coordinate.""" - @staticmethod - def from_coord(coord): - """Create a new AuxCoord from the given coordinate.""" - new_coord = AuxCoord(coord.points, standard_name=coord.standard_name, - long_name=coord.long_name, - var_name=coord.var_name, - units=coord.units, bounds=coord.bounds, - attributes=coord.attributes, - coord_system=copy.deepcopy(coord.coord_system)) - - return new_coord - - def _sanitise_array(self, src, ndmin): - if is_lazy_data(src): - # Lazy data : just ensure ndmin requirement. - ndims_missing = ndmin - src.ndim - if ndims_missing <= 0: - result = src - else: - extended_shape = tuple([1] * ndims_missing + list(src.shape)) - result = src.reshape(extended_shape) - else: - # Real data : a few more things to do in this case. - # Ensure the array is writeable. - # NB. Returns the *same object* if src is already writeable. - result = np.require(src, requirements='W') - # Ensure the array has enough dimensions. - # NB. Returns the *same object* if result.ndim >= ndmin - result = np.array(result, ndmin=ndmin, copy=False) - # We don't need to copy the data, but we do need to have our - # own view so we can control the shape, etc. - result = result.view() - return result - - @property - def points(self): - """The local points values as a read-only NumPy array.""" - return self._points_dm.data - - @points.setter - def points(self, points): - # Set the points to a new array - as long as it's the same shape. - - # Ensure points has an ndmin of 1 and is either a numpy or lazy array. - # This will avoid Scalar coords with points of shape () rather - # than the desired (1,). - points = self._sanitise_array(points, 1) - - # Set or update DataManager. - if self._points_dm is None: - self._points_dm = DataManager(points) - else: - self._points_dm.data = points - - @property - def bounds(self): - """ - Property containing the bound values, as a NumPy array, - or None if no bound values are defined. - - .. note:: The shape of the bound array should be: ``points.shape + - (n_bounds, )``. - - """ - bounds = None - if self.has_bounds(): - bounds = self._bounds_dm.data - return bounds - - @bounds.setter - def bounds(self, bounds): - # Ensure the bounds are a compatible shape. - if bounds is None: - self._bounds_dm = None - else: - bounds = self._sanitise_array(bounds, 2) - if self.shape != bounds.shape[:-1]: - raise ValueError("Bounds shape must be compatible with points " - "shape.") - if not self.has_bounds() \ - or self.core_bounds().shape != bounds.shape: - # Construct a new bounds DataManager. - self._bounds_dm = DataManager(bounds) - else: - self._bounds_dm.data = bounds - # This is necessary for merging, but probably shouldn't be used otherwise. # See #962 and #1772. def __hash__(self): diff --git a/lib/iris/tests/test_coord_api.py b/lib/iris/tests/test_coord_api.py index a797a0d31f..d6dbe6de13 100644 --- a/lib/iris/tests/test_coord_api.py +++ b/lib/iris/tests/test_coord_api.py @@ -326,10 +326,11 @@ def test_dim_coord_restrictions(self): 'monotonicity.*consistent.*all bounds'): iris.coords.DimCoord([1, 2, 3], bounds=[[1, 12], [2, 9], [3, 6]]) # shapes of points and bounds - with self.assertRaisesRegexp(ValueError, 'shape of the bounds array'): + msg = 'Bounds shape must be compatible with points shape' + with self.assertRaisesRegexp(ValueError, msg): iris.coords.DimCoord([1, 2, 3], bounds=[0.5, 1.5, 2.5, 3.5]) # another example of shapes of points and bounds - with self.assertRaisesRegexp(ValueError, 'shape of the bounds array'): + with self.assertRaisesRegexp(ValueError, msg): iris.coords.DimCoord([1, 2, 3], bounds=[[0.5, 1.5], [1.5, 2.5]]) # numeric diff --git a/lib/iris/tests/unit/coords/test_DimCoord.py b/lib/iris/tests/unit/coords/test_DimCoord.py index 426f7364dc..a201f83110 100644 --- a/lib/iris/tests/unit/coords/test_DimCoord.py +++ b/lib/iris/tests/unit/coords/test_DimCoord.py @@ -84,8 +84,7 @@ def test_fail_bounds_shape_mismatch(self): bds_shape = list(self.bds_real.shape) bds_shape[0] += 1 bds_wrong = np.zeros(bds_shape) - msg = ('The shape of the bounds array should be ' - 'points.shape \+ \(n_bounds,\)') + msg = 'Bounds shape must be compatible with points shape' with self.assertRaisesRegexp(ValueError, msg): DimCoord(self.pts_real, bounds=bds_wrong) @@ -442,8 +441,7 @@ def test_set_real(self): def test_fail_bad_shape(self): # Setting real points requires matching shape. coord = DimCoord(self.pts_real, bounds=self.bds_real) - msg = ('The shape of the bounds array should be ' - 'points.shape \+ \(n_bounds,\)') + msg = 'Bounds shape must be compatible with points shape' with self.assertRaisesRegexp(ValueError, msg): coord.bounds = np.array([1.0, 2.0, 3.0]) From a9f7b69695c6785601cb757f85ab4b93180cc47b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 17 May 2017 13:39:00 +0100 Subject: [PATCH 2/4] Review changes. --- lib/iris/coords.py | 50 +++++++++++++++++++++++++++++----------------- lib/iris/cube.py | 5 +++-- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 1d9ed27609..9e2eed36b3 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -766,6 +766,13 @@ def _as_defn(self): self.units, self.attributes, self.coord_system) return defn + # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. + # NOTE: Violates "objects which compare equal must have the same hash". + # Currently needed, but not really correct and should be fixed. + # See #962 and #1772. + def __hash__(self): + return hash(id(self)) + def __binary_operator__(self, other, mode_constant): """ Common code which is called by add, sub, mul and div @@ -1608,11 +1615,6 @@ def __eq__(self, other): # The __ne__ operator from Coord implements the not __eq__ method. - # This is necessary for merging, but probably shouldn't be used otherwise. - # See #962 and #1772. - def __hash__(self): - return hash(id(self)) - def __getitem__(self, key): coord = super(DimCoord, self).__getitem__(key) coord.circular = self.circular and coord.shape == self.shape @@ -1656,22 +1658,23 @@ def _points_setter(self, points): # DimCoord always realises the points, to allow monotonicity checks. copy = is_lazy_data(points) points = as_concrete_data(points) - # Ensure it is an array.. - # .. and always a distinct view, so that we can make it read-only. + # Ensure it is an actual array, and also make our own distinct view + # so that we can make it read-only. points = np.array(points, copy=copy) # Invoke the generic points setter. super(DimCoord, self)._points_setter(points) if self._points_dm is not None: - # Fetch the new core array, as the parent can change it. - points = self._points_dm._real_array + # Re-fetch the core array, as the super call may replace it. + points = self._points_dm.core_data() + # N.B. always a *real* array, as we realised 'points' at the start. # Check validity requirements for dimension-coordinate points. self._new_points_requirements(points) # Make the array read-only. - self._points_dm.data.flags.writeable = False + points.flags.writeable = False points = property(Coord._points_getter, _points_setter) @@ -1715,14 +1718,15 @@ def _bounds_setter(self, bounds): # Ensure we have a realised array of new bounds values. copy = is_lazy_data(bounds) bounds = as_concrete_data(bounds) - bounds = np.array(bounds, copy=copy, ndmin=2) + bounds = np.array(bounds, copy=copy) # Invoke the generic bounds setter. super(DimCoord, self)._bounds_setter(bounds) if self._bounds_dm is not None: - # Fetch the new core array, as the parent can change it. - bounds = self._bounds_dm._real_array + # Re-fetch the core array, as the super call may replace it. + bounds = self._bounds_dm.core_data() + # N.B. always a *real* array, as we realised 'bounds' at the start. # Check validity requirements for dimension-coordinate bounds. self._new_bounds_requirements(bounds) @@ -1744,11 +1748,21 @@ def xml_element(self, doc): class AuxCoord(Coord): - """A CF auxiliary coordinate.""" - # This is necessary for merging, but probably shouldn't be used otherwise. - # See #962 and #1772. - def __hash__(self): - return hash(id(self)) + """ + A CF auxiliary coordinate. + + .. note:: + + There are currently no specific properties of :class:`AuxCoord`, + everything is inherited from :class:`Coord`. + + """ + # Logically, :class:`Coord` is an abstract class and all actual coords must + # be members of some concrete subclass, i.e. an :class:`AuxCoord` or + # a :class:`DimCoord`. + # So we retain :class:`AuxCoord` as a distinct concrete subclass. + # This provides clarity, backwards compatibility, and so we can add + # AuxCoord-specific code if needed in future. class CellMeasure(six.with_metaclass(ABCMeta, CFVariableMixin)): diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 808cc71d5b..7e396e631a 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -3081,8 +3081,9 @@ def __ne__(self, other): result = not result return result - # Must supply __hash__, Python 3 does not enable it if __eq__ is defined - # This is necessary for merging, but probably shouldn't be used otherwise. + # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. + # NOTE: Violates "objects which compare equal must have the same hash". + # Currently needed, but not really correct and should be fixed. # See #962 and #1772. def __hash__(self): return hash(id(self)) From 6944d8f2bbfc14799b73fa8fc4e92148ea7dcc3a Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 17 May 2017 14:52:30 +0100 Subject: [PATCH 3/4] Review change -- reword comments for clarity. --- lib/iris/coords.py | 6 ++++-- lib/iris/cube.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 9e2eed36b3..9fda31e790 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -768,8 +768,10 @@ def _as_defn(self): # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. # NOTE: Violates "objects which compare equal must have the same hash". - # Currently needed, but not really correct and should be fixed. - # See #962 and #1772. + # We ought to remove this, as equality of two coords can *change*, so they + # really should not be hashable. + # However, current code needs it, e.g. so we can put them in sets. + # Fixing it will require changing those uses. See #962 and #1772. def __hash__(self): return hash(id(self)) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 7e396e631a..97288a08b5 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -3083,8 +3083,10 @@ def __ne__(self, other): # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. # NOTE: Violates "objects which compare equal must have the same hash". - # Currently needed, but not really correct and should be fixed. - # See #962 and #1772. + # We ought to remove this, as equality of two cube can *change*, so they + # really should not be hashable. + # However, current code needs it, e.g. so we can put them in sets. + # Fixing it will require changing those uses. See #962 and #1772. def __hash__(self): return hash(id(self)) From cf43c3eeaaf9f75a28ab25613d002ec41809148e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 17 May 2017 15:54:51 +0100 Subject: [PATCH 4/4] Fix Python3-specific problem. --- lib/iris/coords.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 9fda31e790..7ab07045a8 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -1617,6 +1617,14 @@ def __eq__(self, other): # The __ne__ operator from Coord implements the not __eq__ method. + # For Python 3, we must explicitly re-implement the '__hash__' method, as + # defining an '__eq__' has blocked its inheritance. See ... + # https://docs.python.org/3.1/reference/datamodel.html#object.__hash__ + # "If a class that overrides __eq__() needs to retain the + # implementation of __hash__() from a parent class, the interpreter + # must be told this explicitly". + __hash__ = Coord.__hash__ + def __getitem__(self, key): coord = super(DimCoord, self).__getitem__(key) coord.circular = self.circular and coord.shape == self.shape