SciTools · trexfeathers · Jul 2, 2024 · Jul 2, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -24,6 +24,10 @@ This document explains the changes made to Iris for this release
 📢 Announcements
 ================
 
+.. todo::
+    warn people about changes in NumPy 2. Better data type handling means
+    some of their Cubes will have different types to before.
+
 #. N/A
 
 

diff --git a/lib/iris/_representation/cube_summary.py b/lib/iris/_representation/cube_summary.py
@@ -47,6 +47,8 @@ def __init__(self, cube, name_padding=35):
 
 def string_repr(text, quote_strings=False, clip_strings=False):
     """Produce a one-line printable form of a text string."""
+    # Convert any np.str_ instances to plain strings.
+    text = str(text)
     force_quoted = re.findall("[\n\t]", text) or quote_strings
     if force_quoted:
         # Replace the string with its repr (including quotes).

diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py
@@ -156,22 +156,30 @@ def _src_align_and_flatten(coord):
     #
 
     # Wrap modular values (e.g. longitudes) if required.
-    modulus = sx.units.modulus
+    _modulus = sx.units.modulus
+    # Convert to NumPy scalar to enable cast checking.
+    modulus = np.min_scalar_type(_modulus).type(_modulus)
+
+    def _cast_sx_points(sx_points_: np.ndarray):
+        """Ensure modulus arithmetic will not raise a TypeError."""
+        if not np.can_cast(modulus, sx_points_.dtype):
+            new_type = np.promote_types(sx_points_.dtype, modulus.dtype)
+            result = sx_points_.astype(new_type, casting="safe")
+        else:
+            result = sx_points_
+        return result
+
     if modulus is not None:
         # Match the source cube x coordinate range to the target grid
         # cube x coordinate range.
         min_sx, min_tx = np.min(sx.points), np.min(tx.points)
         if min_sx < 0 and min_tx >= 0:
             indices = np.where(sx_points < 0)
-            # Ensure += doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] += modulus
         elif min_sx >= 0 and min_tx < 0:
             indices = np.where(sx_points > (modulus / 2))
-            # Ensure -= doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] -= modulus
 
     # Create target grid cube x and y cell boundaries.

diff --git a/lib/iris/analysis/maths.py b/lib/iris/analysis/maths.py
@@ -867,7 +867,7 @@ def _binary_op_common(
         if iris._lazy_data.is_lazy_data(other):
             rhs = other
         else:
-            rhs = np.asanyarray(other)
+            rhs = np.asanyarray(other, dtype=new_dtype)
 
     def unary_func(lhs):
         data = operation_function(lhs, rhs)

diff --git a/lib/iris/fileformats/_ff.py b/lib/iris/fileformats/_ff.py
@@ -5,6 +5,7 @@
 """Provides UK Met Office Fields File (FF) format specific capabilities."""
 
 import os
+from typing import Any
 import warnings
 
 import numpy as np
@@ -370,9 +371,22 @@ def __init__(self, filename, word_depth=DEFAULT_FF_WORD_DEPTH):
                     setattr(self, elem, res)
 
     def __str__(self):
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lookup_table`` is a tuple of NumPy scalars.
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         attributes = []
         for name, _ in FF_HEADER:
-            attributes.append("    {}: {}".format(name, getattr(self, name)))
+            attributes.append(f"    {name}: {_str_tuple(getattr(self, name))}")
         return "FF Header:\n" + "\n".join(attributes)
 
     def __repr__(self):

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -1387,9 +1387,13 @@ def _ensure_valid_dtype(self, values, src_name, src_object):
             val_min, val_max = (values.min(), values.max())
             if is_lazy_data(values):
                 val_min, val_max = _co_realise_lazy_arrays([val_min, val_max])
+            # NumPy will inherit values.dtype even if the scalar numbers work
+            #  with a smaller type.
+            min_dtype = np.promote_types(
+                *[np.min_scalar_type(m) for m in (val_min, val_max)]
+            )
             # Cast to an integer type supported by netCDF3.
-            can_cast = all([np.can_cast(m, np.int32) for m in (val_min, val_max)])
-            if not can_cast:
+            if not np.can_cast(min_dtype, np.int32):
                 msg = (
                     "The data type of {} {!r} is not supported by {} and"
                     " its values cannot be safely cast to a supported"

diff --git a/lib/iris/fileformats/nimrod_load_rules.py b/lib/iris/fileformats/nimrod_load_rules.py
@@ -126,42 +126,41 @@ def units(cube, field):
         "n/a": "1",
     }
 
+    dtype_original = cube.dtype
     field_units = remove_unprintable_chars(field.units)
     if field_units == "m/2-25k":
         # Handle strange visibility units
-        cube.data = (cube.data.astype(np.float32) + 25000.0) * 2
+        cube.data = (cube.data + 25000.0) * 2
         field_units = "m"
     if "*" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("*")
         if "^" in unit_list[1]:
             # Split out magnitude
             unit_sublist = unit_list[1].split("^")
-            cube.data = cube.data.astype(np.float32) / float(unit_sublist[0]) ** float(
-                unit_sublist[1]
-            )
+            cube.data = cube.data / float(unit_sublist[0]) ** float(unit_sublist[1])
         else:
-            cube.data = cube.data.astype(np.float32) / float(unit_list[1])
+            cube.data = cube.data / float(unit_list[1])
         field_units = unit_list[0]
     if "ug/m3E1" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("E")
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
         field_units = unit_list[0]
     if field_units == "%":
         # Convert any percentages into fraction
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 100.0
+        cube.data = cube.data / 100.0
     if field_units == "oktas":
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 8.0
+        cube.data = cube.data / 8.0
     if field_units == "dBZ":
         # cf_units doesn't recognise decibels (dBZ), but does know BZ
         field_units = "BZ"
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
     if field_units == "g/Kg":
         field_units = "kg/kg"
-        cube.data = cube.data.astype(np.float32) / 1000.0
+        cube.data = cube.data / 1000.0
     if not field_units:
         if field.field_code == 8:
             # Relative Humidity data are unitless, but not "unknown"
@@ -175,6 +174,14 @@ def units(cube, field):
         # Deal with the case where the units are of the form '/unit' eg
         # '/second' in the Nimrod file. This converts to the form unit^-1
         field_units = field_units[1:] + "^-1"
+
+    if cube.dtype != dtype_original:
+        # Original development logic: if any arithmetic takes place, ensure
+        #  the data type is float32 (starts as an int). Unknown why.
+        #  Automatic casting is returning inconsistent types when masks are
+        #  involved, so the new logic is to do the casting as the final step.
+        cube.data = cube.data.astype(np.float32)
+
     try:
         cube.units = field_units
     except ValueError:

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -11,6 +11,7 @@
 import os
 import re
 import struct
+from typing import Any
 import warnings
 
 import cf_units
@@ -950,6 +951,21 @@ def t2(self):
 
     def __repr__(self):
         """Return a string representation of the PP field."""
+
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lbuser`` is a tuple of NumPy scalars.
+
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         # Define an ordering on the basic header names
         attribute_priority_lookup = {name: loc[0] for name, loc in self.HEADER_DEFN}
 
@@ -975,9 +991,8 @@ def __repr__(self):
             ),
         )
 
-        return (
-            "PP Field" + "".join(["\n   %s: %s" % (k, v) for k, v in attributes]) + "\n"
-        )
+        contents = "".join([f"\n   {k}: {_str_tuple(v)}" for k, v in attributes])
+        return f"PP Field{contents}\n"
 
     @property
     def stash(self):
@@ -1178,7 +1193,7 @@ def save(self, file_handle):
             data.dtype = data.dtype.newbyteorder(">")
 
         # Create the arrays which will hold the header information
-        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">u%d" % PP_WORD_DEPTH))
+        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">i%d" % PP_WORD_DEPTH))
         b = np.empty(shape=NUM_FLOAT_HEADERS, dtype=np.dtype(">f%d" % PP_WORD_DEPTH))
 
         # Fill in the header elements from the PPField

diff --git a/lib/iris/pandas.py b/lib/iris/pandas.py
@@ -173,7 +173,7 @@ def as_cube(
     # 1.6 doesn't. Since we don't care about preserving the order we can
     # just force it back to C-order.)
     order = "C" if copy else "A"
-    data = np.array(pandas_array, copy=copy, order=order)
+    data = np.array(pandas_array.values, copy=copy, order=order)
     cube = Cube(np.ma.masked_invalid(data, copy=False))
     _add_iris_coord(cube, "index", pandas_array.index, 0, calendars.get(0, None))
     if pandas_array.ndim == 2:

diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py
@@ -319,7 +319,7 @@ def test_regrid_reorder_axis(self):
         dest = _resampled_grid(self.realistic_cube[0, 0, :3, :2], 3, 3)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (4, 3, 2), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (4, 9, 6), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (4, 9, 6), 288.0886, 0.008271061)
         # Reshape src so that the coords are ordered [x, z, y],
         # the mean and std statistics should be the same
         data = np.moveaxis(src.data.copy(), 2, 0)
@@ -329,7 +329,7 @@ def test_regrid_reorder_axis(self):
         src.add_dim_coord(lon, 0)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (2, 4, 3), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (6, 4, 9), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (6, 4, 9), 288.0886, 0.008271061)
         # Reshape src so that the coords are ordered [y, x, z],
         # the mean and std statistics should be the same
         data = np.moveaxis(src.data.copy(), 2, 0)
@@ -340,7 +340,7 @@ def test_regrid_reorder_axis(self):
         dest = _resampled_grid(self.realistic_cube[0, 0, :3, :2], 3, 3)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (3, 2, 4), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (9, 6, 4), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (9, 6, 4), 288.0886, 0.008271061)
 
     def test_regrid_lon_to_half_res(self):
         src = self.simple_cube

diff --git a/lib/iris/tests/integration/fast_load/test_fast_load.py b/lib/iris/tests/integration/fast_load/test_fast_load.py
@@ -375,7 +375,7 @@ def callback(cube, collation, filename):
             # This is actually a NumPy int32, so honour that here.
             expected[0].attributes["LBVC"] = np.int32(8)
         else:
-            expected[0].attributes["A_LBVC"] = [8, 8]
+            expected[0].attributes["A_LBVC"] = [np.int32(8)] * 2
 
         self.assertEqual(results, expected)
 

diff --git a/lib/iris/tests/results/FF/air_temperature_1.cml b/lib/iris/tests/results/FF/air_temperature_1.cml
@@ -31,13 +31,13 @@
   32.5,  35. ,  37.5,  40. ,  42.5,  45. ,  47.5,
   50. ,  52.5,  55. ,  57.5,  60. ,  62.5,  65. ,
   67.5,  70. ,  72.5,  75. ,  77.5,  80. ,  82.5,
-  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float32">
+  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>
       <coord datadims="[1]">
         <dimCoord circular="True" id="f913a8b3" points="[  0.  ,   3.75,   7.5 , ..., 348.75, 352.5 ,
- 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float32">
+ 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>

diff --git a/lib/iris/tests/results/FF/air_temperature_2.cml b/lib/iris/tests/results/FF/air_temperature_2.cml
@@ -31,13 +31,13 @@
   32.5,  35. ,  37.5,  40. ,  42.5,  45. ,  47.5,
   50. ,  52.5,  55. ,  57.5,  60. ,  62.5,  65. ,
   67.5,  70. ,  72.5,  75. ,  77.5,  80. ,  82.5,
-  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float32">
+  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>
       <coord datadims="[1]">
         <dimCoord circular="True" id="f913a8b3" points="[  0.  ,   3.75,   7.5 , ..., 348.75, 352.5 ,
- 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float32">
+ 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>

diff --git a/lib/iris/tests/results/FF/soil_temperature_1.cml b/lib/iris/tests/results/FF/soil_temperature_1.cml
@@ -24,13 +24,13 @@
   32.5,  35. ,  37.5,  40. ,  42.5,  45. ,  47.5,
   50. ,  52.5,  55. ,  57.5,  60. ,  62.5,  65. ,
   67.5,  70. ,  72.5,  75. ,  77.5,  80. ,  82.5,
-  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float32">
+  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>
       <coord datadims="[1]">
         <dimCoord circular="True" id="f913a8b3" points="[  0.  ,   3.75,   7.5 , ..., 348.75, 352.5 ,
- 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float32">
+ 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>

diff --git a/lib/iris/tests/results/FF/surface_altitude_1.cml b/lib/iris/tests/results/FF/surface_altitude_1.cml
@@ -24,13 +24,13 @@
   32.5,  35. ,  37.5,  40. ,  42.5,  45. ,  47.5,
   50. ,  52.5,  55. ,  57.5,  60. ,  62.5,  65. ,
   67.5,  70. ,  72.5,  75. ,  77.5,  80. ,  82.5,
-  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float32">
+  85. ,  87.5,  90. ]" shape="(73,)" standard_name="latitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>
       <coord datadims="[1]">
         <dimCoord circular="True" id="f913a8b3" points="[  0.  ,   3.75,   7.5 , ..., 348.75, 352.5 ,
- 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float32">
+ 356.25]" shape="(96,)" standard_name="longitude" units="Unit('degrees')" value_type="float64">
           <geogCS earth_radius="6371229.0"/>
         </dimCoord>
       </coord>