From 56a2f12290083b676ae92234375c60e326fd832a Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Mon, 6 Jul 2015 18:37:51 -0400 Subject: [PATCH 1/3] py3k: Handle strings from netCDF consistently. --- lib/iris/fileformats/cf.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 4fceb8b666..f840f43a5b 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -70,6 +70,12 @@ ocean_s_coordinate_g2=['eta', 'depth']) +# NetCDF returns a different type for strings depending on Python version. +def _is_str_dtype(var): + return ((six.PY2 and np.issubdtype(var.dtype, np.str)) or + (six.PY3 and np.issubdtype(var.dtype, np.bytes_))) + + ################################################################################ class CFVariable(six.with_metaclass(ABCMeta, object)): """Abstract base class wrapper for a CF-netCDF variable.""" @@ -313,7 +319,7 @@ def identify(cls, variables, ignore=None, target=None, warn=True): warnings.warn(message % (name, nc_var_name)) else: # Restrict to non-string type i.e. not a CFLabelVariable. - if not np.issubdtype(variables[name].dtype, np.str): + if not _is_str_dtype(variables[name]): result[name] = CFAuxiliaryCoordinateVariable(name, variables[name]) return result @@ -478,7 +484,7 @@ def identify(cls, variables, ignore=None, target=None, warn=True, monotonic=Fals if nc_var_name in ignore: continue # String variables can't be coordinates - if np.issubdtype(nc_var.dtype, np.str): + if _is_str_dtype(nc_var): continue # Restrict to one-dimensional with name as dimension OR zero-dimensional scalar if not ((nc_var.ndim == 1 and nc_var_name in nc_var.dimensions) or (nc_var.ndim == 0)): @@ -638,8 +644,9 @@ def identify(cls, variables, ignore=None, target=None, warn=True): warnings.warn(message % (name, nc_var_name)) else: # Restrict to only string type. - if np.issubdtype(variables[name].dtype, np.str): - result[name] = CFLabelVariable(name, variables[name]) + if _is_str_dtype(variables[name]): + var = variables[name] + result[name] = CFLabelVariable(name, var) return result @@ -683,7 +690,7 @@ def cf_label_data(self, cf_data_var): # Calculate new label data shape (without string dimension) and create payload array. new_shape = tuple(dim_len for i, dim_len in enumerate(self.shape) if i != str_dim) - data = np.empty(new_shape, dtype='|S%d' % self.shape[str_dim]) + data = np.empty(new_shape, dtype='|U%d' % self.shape[str_dim]) for index in np.ndindex(new_shape): # Create the slice for the label data. @@ -692,7 +699,8 @@ def cf_label_data(self, cf_data_var): else: label_index = index + (slice(None, None),) - data[index] = ''.join(label_data[label_index]).strip() + data[index] = b''.join(label_data[label_index]).strip().decode( + 'utf8') return data From 529709cbab3c133b394b245568c121b916dbe095 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 5 Nov 2015 16:55:40 +0000 Subject: [PATCH 2/3] py3k: Handle strings from netCDF consistently : Modified = preserve auxcoord string datatype in Python 2. --- lib/iris/fileformats/cf.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index f840f43a5b..7c3c848f6c 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -690,7 +690,9 @@ def cf_label_data(self, cf_data_var): # Calculate new label data shape (without string dimension) and create payload array. new_shape = tuple(dim_len for i, dim_len in enumerate(self.shape) if i != str_dim) - data = np.empty(new_shape, dtype='|U%d' % self.shape[str_dim]) + string_basetype = '|S%d' if six.PY2 else '|U%d' + string_dtype = string_basetype % self.shape[str_dim] + data = np.empty(new_shape, dtype=string_dtype) for index in np.ndindex(new_shape): # Create the slice for the label data. @@ -699,8 +701,10 @@ def cf_label_data(self, cf_data_var): else: label_index = index + (slice(None, None),) - data[index] = b''.join(label_data[label_index]).strip().decode( - 'utf8') + label_string = b''.join(label_data[label_index]).strip() + if six.PY3: + label_string = label_string.decode('utf8') + data[index] = label_string return data From 747fddba2750d5ba70cef90783bf52596e7f459a Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 12 Nov 2015 12:37:23 +0000 Subject: [PATCH 3/3] Review change - code improvement. --- lib/iris/fileformats/cf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 7c3c848f6c..92faa7fd9a 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -643,9 +643,9 @@ def identify(cls, variables, ignore=None, target=None, warn=True): message = 'Missing CF-netCDF label variable %r, referenced by netCDF variable %r' warnings.warn(message % (name, nc_var_name)) else: - # Restrict to only string type. - if _is_str_dtype(variables[name]): - var = variables[name] + # Register variable, but only allow string type. + var = variables[name] + if _is_str_dtype(var): result[name] = CFLabelVariable(name, var) return result