SciTools · QuLogic · Jun 22, 2015 · Jun 27, 2015 · Jun 27, 2015 · Jun 27, 2015
diff --git a/lib/iris/_cube_coord_common.py b/lib/iris/_cube_coord_common.py
@@ -108,7 +108,7 @@ def rename(self, name):
             self.long_name = None
         except ValueError:
             self.standard_name = None
-            self.long_name = unicode(name)
+            self.long_name = six.text_type(name)
 
         # Always clear var_name when renaming.
         self.var_name = None

diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py
@@ -1361,7 +1361,7 @@ def name_in_independents():
                         # string like).
                         dim_by_name[name] = dim = len(self._shape)
                         self._nd_names.append(name)
-                        if metadata[name].points_dtype.kind == 'S':
+                        if metadata[name].points_dtype.kind in 'SU':
                             self._aux_templates.append(
                                 _Template(dim, points, bounds, kwargs))
                         else:

diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py
@@ -1930,7 +1930,7 @@ def _compute_shared_coords(self):
 
         # Create new shared bounded coordinates.
         for coord in self._shared_coords:
-            if coord.points.dtype.kind == 'S':
+            if coord.points.dtype.kind in 'SU':
                 if coord.bounds is None:
                     new_points = []
                     new_bounds = None

diff --git a/lib/iris/coord_categorisation.py b/lib/iris/coord_categorisation.py
@@ -79,7 +79,7 @@ def add_categorised_coord(cube, name, from_coord, category_function,
     result = category_function(from_coord, from_coord.points.ravel()[0])
     if isinstance(result, six.string_types):
         str_vectorised_fn = np.vectorize(category_function, otypes=[object])
-        vectorised_fn = lambda *args: str_vectorised_fn(*args).astype('|S64')
+        vectorised_fn = lambda *args: str_vectorised_fn(*args).astype('|U64')
     else:
         vectorised_fn = np.vectorize(category_function)
     new_coord = iris.coords.AuxCoord(vectorised_fn(from_coord,

diff --git a/lib/iris/coords.py b/lib/iris/coords.py
@@ -963,10 +963,10 @@ def collapsed(self, dims_to_collapse=None):
                 for index in np.ndindex(shape):
                     index_slice = (slice(None),) + tuple(index)
                     bounds.append(serialize(self.bounds[index_slice]))
-                dtype = np.dtype('S{}'.format(max(map(len, bounds))))
+                dtype = np.dtype('U{}'.format(max(map(len, bounds))))
                 bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape)
             points = serialize(self.points)
-            dtype = np.dtype('S{}'.format(len(points)))
+            dtype = np.dtype('U{}'.format(len(points)))
             # Create the new collapsed coordinate.
             coord = self.copy(points=np.array(points, dtype=dtype),
                               bounds=bounds)

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -1863,7 +1863,7 @@ def vector_summary(vector_coords, cube_header, max_line_offset):
             if self.attributes:
                 attribute_lines = []
                 for name, value in sorted(six.iteritems(self.attributes)):
-                    value = iris.util.clip_string(unicode(value))
+                    value = iris.util.clip_string(six.text_type(value))
                     line = u'{pad:{width}}{name}: {value}'.format(pad=' ',
                                                                   width=indent,
                                                                   name=name,
@@ -1893,7 +1893,11 @@ def assert_valid(self):
         warnings.warn('Cube.assert_valid() has been deprecated.')
 
     def __str__(self):
-        return self.summary().encode(errors='replace')
+        # six has a decorator for this bit, but it doesn't do errors='replace'.
+        if six.PY3:
+            return self.summary()
+        else:
+            return self.summary().encode(errors='replace')
 
     def __unicode__(self):
         return self.summary()
@@ -2302,7 +2306,8 @@ def _as_list_of_coords(self, names_or_coords):
         Convert a name, coord, or list of names/coords to a list of coords.
         """
         # If not iterable, convert to list of a single item
-        if not hasattr(names_or_coords, '__iter__'):
+        if (not hasattr(names_or_coords, '__iter__') or
+                isinstance(names_or_coords, str)):
             names_or_coords = [names_or_coords]
 
         coords = []
@@ -2348,7 +2353,8 @@ def slices_over(self, ref_to_slice):
 
         """
         # Required to handle a mix between types.
-        if not hasattr(ref_to_slice, '__iter__'):
+        if (not hasattr(ref_to_slice, '__iter__') or
+                isinstance(ref_to_slice, str)):
             ref_to_slice = [ref_to_slice]
 
         slice_dims = set()
@@ -2408,7 +2414,8 @@ def slices(self, ref_to_slice, ordered=True):
             raise TypeError("'ordered' argument to slices must be boolean.")
 
         # Required to handle a mix between types
-        if not hasattr(ref_to_slice, '__iter__'):
+        if (not hasattr(ref_to_slice, '__iter__') or
+                isinstance(ref_to_slice, str)):
             ref_to_slice = [ref_to_slice]
 
         dim_to_slice = []

diff --git a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb
@@ -913,7 +913,7 @@ fc_extras
         # Set the cube global attributes. 
         for attr_name, attr_value in six.iteritems(cf_var.cf_group.global_attributes):
             try:
-                if isinstance(attr_value, unicode):
+                if six.PY2 and isinstance(attr_value, six.text_type):
                     try:
                         cube.attributes[str(attr_name)] = str(attr_value)
                     except UnicodeEncodeError:

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -70,6 +70,12 @@
                        ocean_s_coordinate_g2=['eta', 'depth'])
 
 
+# NetCDF returns a different type for strings depending on Python version.
+def _is_str_dtype(var):
+    return ((six.PY2 and np.issubdtype(var.dtype, np.str)) or
+            (six.PY3 and np.issubdtype(var.dtype, np.bytes_)))
+
+
 ################################################################################
 class CFVariable(six.with_metaclass(ABCMeta, object)):
     """Abstract base class wrapper for a CF-netCDF variable."""
@@ -313,7 +319,7 @@ def identify(cls, variables, ignore=None, target=None, warn=True):
                                 warnings.warn(message % (name, nc_var_name))
                         else:
                             # Restrict to non-string type i.e. not a CFLabelVariable.
-                            if not np.issubdtype(variables[name].dtype, np.str):
+                            if not _is_str_dtype(variables[name]):
                                 result[name] = CFAuxiliaryCoordinateVariable(name, variables[name])
 
         return result
@@ -478,7 +484,7 @@ def identify(cls, variables, ignore=None, target=None, warn=True, monotonic=Fals
             if nc_var_name in ignore:
                 continue
             # String variables can't be coordinates
-            if np.issubdtype(nc_var.dtype, np.str):
+            if _is_str_dtype(nc_var):
                 continue
             # Restrict to one-dimensional with name as dimension OR zero-dimensional scalar
             if not ((nc_var.ndim == 1 and nc_var_name in nc_var.dimensions) or (nc_var.ndim == 0)):
@@ -638,8 +644,9 @@ def identify(cls, variables, ignore=None, target=None, warn=True):
                                 warnings.warn(message % (name, nc_var_name))
                         else:
                             # Restrict to only string type.
-                            if np.issubdtype(variables[name].dtype, np.str):
-                                result[name] = CFLabelVariable(name, variables[name])
+                            if _is_str_dtype(variables[name]):
+                                var = variables[name]
+                                result[name] = CFLabelVariable(name, var)
 
         return result
 
@@ -683,7 +690,7 @@ def cf_label_data(self, cf_data_var):
 
             # Calculate new label data shape (without string dimension) and create payload array.
             new_shape = tuple(dim_len for i, dim_len in enumerate(self.shape) if i != str_dim)
-            data = np.empty(new_shape, dtype='|S%d' % self.shape[str_dim])
+            data = np.empty(new_shape, dtype='|U%d' % self.shape[str_dim])
 
             for index in np.ndindex(new_shape):
                 # Create the slice for the label data.
@@ -692,7 +699,8 @@ def cf_label_data(self, cf_data_var):
                 else:
                     label_index = index + (slice(None, None),)
 
-                data[index] = ''.join(label_data[label_index]).strip()
+                data[index] = b''.join(label_data[label_index]).strip().decode(
+                    'utf8')
 
         return data
 

diff --git a/lib/iris/fileformats/grib/__init__.py b/lib/iris/fileformats/grib/__init__.py
@@ -64,11 +64,13 @@
 _load_rules = None
 
 
-CENTRE_TITLES = {'egrr': 'U.K. Met Office - Exeter',
-                 'ecmf': 'European Centre for Medium Range Weather Forecasts',
-                 'rjtd': 'Tokyo, Japan Meteorological Agency',
-                 '55'  : 'San Francisco',
-                 'kwbc': 'US National Weather Service, National Centres for Environmental Prediction'}
+CENTRE_TITLES = {
+    'egrr': u'U.K. Met Office - Exeter',
+    'ecmf': u'European Centre for Medium Range Weather Forecasts',
+    'rjtd': u'Tokyo, Japan Meteorological Agency',
+    '55': u'San Francisco',
+    'kwbc': u'US National Weather Service, National Centres for Environmental '
+            u'Prediction'}
 
 TIME_RANGE_INDICATORS = {0:'none', 1:'none', 3:'time mean', 4:'time sum',
                          5:'time _difference', 10:'none',
@@ -449,7 +451,7 @@ def _compute_extra_keys(self):
         #originating centre
         #TODO #574 Expand to include sub-centre
         self.extra_keys['_originatingCentre'] = CENTRE_TITLES.get(
-                                        centre, "unknown centre %s" % centre)
+            centre, u'unknown centre %s' % centre)
 
         #forecast time unit as a cm string
         #TODO #575 Do we want PP or GRIB style forecast delta?

diff --git a/lib/iris/fileformats/name.py b/lib/iris/fileformats/name.py
@@ -34,7 +34,7 @@ def _get_NAME_loader(filename):
     import iris.fileformats.name_loaders as name_loaders
 
     load = None
-    with open(filename, 'r') as file_handle:
+    with open(filename, 'rb') as file_handle:
         header = name_loaders.read_header(file_handle)
 
     # Infer file type based on contents of header.

diff --git a/lib/iris/fileformats/name_loaders.py b/lib/iris/fileformats/name_loaders.py
@@ -78,11 +78,11 @@ def read_header(file_handle):
     header = {}
     header['NAME Version'] = next(file_handle).strip()
     for line in file_handle:
-        words = line.split(':', 1)
+        words = line.split(b':', 1)
         if len(words) != 2:
             break
         key, value = [word.strip() for word in words]
-        header[key] = value
+        header[key.decode()] = value
 
     # Cast some values into floats or integers if they match a
     # given name. Set any empty string values to None.
@@ -97,6 +97,8 @@ def read_header(file_handle):
                          'Number of fields',
                          'Number of series']:
                 header[key] = int(value)
+            else:
+                header[key] = value.decode()
         else:
             header[key] = None
 
@@ -118,7 +120,7 @@ def _read_data_arrays(file_handle, n_arrays, shape):
     for line in file_handle:
         # Split the line by comma, removing the last empty column
         # caused by the trailing comma
-        vals = line.split(',')[:-1]
+        vals = line.split(b',')[:-1]
 
         # Cast the x and y grid positions to integers and convert
         # them to zero based indices
@@ -518,7 +520,7 @@ def load_NAMEIII_field(filename):
     # Loading a file gives a generator of lines which can be progressed using
     # the next() function. This will come in handy as we wish to progress
     # through the file line by line.
-    with open(filename, 'r') as file_handle:
+    with open(filename, 'rb') as file_handle:
         # Create a dictionary which can hold the header metadata about this
         # file.
         header = read_header(file_handle)
@@ -536,7 +538,8 @@ def load_NAMEIII_field(filename):
                                    'Vertical Av or Int', 'Prob Perc',
                                    'Prob Perc Ens', 'Prob Perc Time',
                                    'Time', 'Z', 'D']:
-            cols = [col.strip() for col in next(file_handle).split(',')]
+            cols = [col.strip()
+                    for col in next(file_handle).decode().split(',')]
             column_headings[column_header_name] = cols[4:-1]
 
         # Convert the time to python datetimes.
@@ -588,7 +591,7 @@ def load_NAMEII_field(filename):
         A generator :class:`iris.cube.Cube` instances.
 
     """
-    with open(filename, 'r') as file_handle:
+    with open(filename, 'rb') as file_handle:
         # Create a dictionary which can hold the header metadata about this
         # file.
         header = read_header(file_handle)
@@ -607,7 +610,8 @@ def load_NAMEII_field(filename):
         for column_header_name in ['Species Category', 'Species',
                                    'Time Av or Int', 'Quantity',
                                    'Unit', 'Z', 'Time']:
-            cols = [col.strip() for col in next(file_handle).split(',')]
+            cols = [col.strip()
+                    for col in next(file_handle).decode().split(',')]
             column_headings[column_header_name] = cols[4:-1]
 
         # Convert the time to python datetimes
@@ -667,7 +671,7 @@ def load_NAMEIII_timeseries(filename):
         A generator :class:`iris.cube.Cube` instances.
 
     """
-    with open(filename, 'r') as file_handle:
+    with open(filename, 'rb') as file_handle:
         # Create a dictionary which can hold the header metadata about this
         # file.
         header = read_header(file_handle)
@@ -683,7 +687,8 @@ def load_NAMEIII_timeseries(filename):
                                    'Vertical Av or Int', 'Prob Perc',
                                    'Prob Perc Ens', 'Prob Perc Time',
                                    'Location', 'X', 'Y', 'Z', 'D']:
-            cols = [col.strip() for col in next(file_handle).split(',')]
+            cols = [col.strip()
+                    for col in next(file_handle).decode().split(',')]
             column_headings[column_header_name] = cols[1:-1]
 
         # Determine the coordinates of the data and store in namedtuples.
@@ -707,10 +712,10 @@ def load_NAMEIII_timeseries(filename):
         for line in file_handle:
             # Split the line by comma, removing the last empty column caused
             # by the trailing comma.
-            vals = line.split(',')[:-1]
+            vals = line.split(b',')[:-1]
 
             # Time is stored in the first column.
-            t = vals[0].strip()
+            t = vals[0].decode().strip()
             dt = datetime.datetime.strptime(t, NAMEIII_DATETIME_FORMAT)
             time_list.append(dt)
 
@@ -741,7 +746,7 @@ def load_NAMEII_timeseries(filename):
         A generator :class:`iris.cube.Cube` instances.
 
     """
-    with open(filename, 'r') as file_handle:
+    with open(filename, 'rb') as file_handle:
         # Create a dictionary which can hold the header metadata about this
         # file.
         header = read_header(file_handle)
@@ -751,7 +756,8 @@ def load_NAMEII_timeseries(filename):
         for column_header_name in ['Y', 'X', 'Location',
                                    'Species Category', 'Species',
                                    'Quantity', 'Z', 'Unit']:
-            cols = [col.strip() for col in next(file_handle).split(',')]
+            cols = [col.strip()
+                    for col in next(file_handle).decode().split(',')]
             column_headings[column_header_name] = cols[1:-1]
 
         # Determine the coordinates of the data and store in namedtuples.
@@ -771,10 +777,10 @@ def load_NAMEII_timeseries(filename):
         for line in file_handle:
             # Split the line by comma, removing the last empty column caused
             # by the trailing comma.
-            vals = line.split(',')[:-1]
+            vals = line.split(b',')[:-1]
 
             # Time is stored in the first two columns.
-            t = (vals[0].strip() + ' ' + vals[1].strip())
+            t = (vals[0].strip() + b' ' + vals[1].strip()).decode()
             dt = datetime.datetime.strptime(
                 t, NAMEII_TIMESERIES_DATETIME_FORMAT)
             time_list.append(dt)
@@ -809,21 +815,22 @@ def load_NAMEIII_trajectory(filename):
     time_unit = iris.unit.Unit('hours since epoch',
                                calendar=iris.unit.CALENDAR_GREGORIAN)
 
-    with open(filename, 'r') as infile:
+    with open(filename, 'rb') as infile:
         header = read_header(infile)
 
         # read the column headings
         for line in infile:
-            if line.startswith("    "):
+            if line.startswith(b'    '):
                 break
-        headings = [heading.strip() for heading in line.split(",")]
+        headings = [heading.strip() for heading in line.decode().split(',')]
 
         # read the columns
         columns = [[] for i in range(len(headings))]
         for line in infile:
-            values = [v.strip() for v in line.split(",")]
+            values = [v.strip() for v in line.split(b',')]
             for c, v in enumerate(values):
-                if "UTC" in v:
+                if b'UTC' in v:
+                    v = v.decode()
                     v = v.replace(":00 ", " ")  # Strip out milliseconds.
                     v = datetime.datetime.strptime(v, NAMEIII_DATETIME_FORMAT)
                 else:
@@ -872,6 +879,8 @@ def load_NAMEIII_trajectory(filename):
         elif name == "Z (FL)":
             name = "flight_level"
             long_name = name
+        elif values[0].dtype.kind == 'S':
+            values = [v.decode() for v in values]
 
         try:
             coord = DimCoord(values, units=units)

diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
@@ -355,7 +355,7 @@ def _pyke_stats(engine, cf_name):
 def _set_attributes(attributes, key, value):
     """Set attributes dictionary, converting unicode strings appropriately."""
 
-    if isinstance(value, unicode):
+    if isinstance(value, six.text_type):
         try:
             attributes[str(key)] = str(value)
         except UnicodeEncodeError:
@@ -1236,6 +1236,8 @@ def _create_cf_variable(self, cube, dimension_names, coord):
 
         if np.issubdtype(coord.points.dtype, np.str):
             string_dimension_depth = coord.points.dtype.itemsize
+            if coord.points.dtype.kind == 'U':
+                string_dimension_depth //= 4
             string_dimension_name = 'string%d' % string_dimension_depth
 
             # Determine whether to create the string length dimension.