From 98be00c4604536d5138055dccb741752bf7397b7 Mon Sep 17 00:00:00 2001 From: Jeff Whitaker Date: Mon, 12 Mar 2018 13:08:26 -0600 Subject: [PATCH] return views with numpy strings in compound types (issue #773) --- Changelog | 2 ++ netCDF4/_netCDF4.pyx | 47 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index d20500990..464c8bacf 100644 --- a/Changelog +++ b/Changelog @@ -17,6 +17,8 @@ into a single string ('foobar'). In NETCDF3/NETCDF4_CLASSIC, an IOError is now raised, instead of writing 'foobar'. Issue #770. * fix loading of enum type names (issue #775). + * automatically create views of compound types with character arrays as + numpy strings (issue #773). version 1.3.1 (tag v1.3.1rel) diff --git a/netCDF4/_netCDF4.pyx b/netCDF4/_netCDF4.pyx index 2701d67df..d84aec738 100644 --- a/netCDF4/_netCDF4.pyx +++ b/netCDF4/_netCDF4.pyx @@ -4052,6 +4052,13 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" if matchdim: data = chartostring(data, encoding=encoding) + # if structure array contains char arrays, return view as strings + # if _Encoding att set (issue #773) + if self._iscompound and \ + self._cmptype.dtype != self._cmptype.dtype_view: + encoding = getattr(self,'_Encoding',None) + if encoding is not None: + data = data.view(self._cmptype.dtype_view) return data def _toma(self,data): @@ -4306,6 +4313,15 @@ cannot be safely cast to variable data type""" % attname # of characters with one more dimension. data = stringtochar(data, encoding=encoding) + # if structured data has strings (and _Encoding att set), create view as char arrays + # (issue #773) + if self._iscompound and \ + self._cmptype.dtype != self._cmptype.dtype_view and \ + data.dtype == self._cmptype.dtype_view: + encoding = getattr(self,'_Encoding',None) + if encoding is not None: + data = data.view(self._cmptype.dtype) + if self._isvlen: # if vlen, should be object array (don't try casting) if self.dtype == str: # for string vars, if data is not an array @@ -4947,7 +4963,7 @@ The instance variables `dtype` and `name` should not be modified by the user. """ cdef public nc_type _nc_type - cdef public dtype, name + cdef public dtype, dtype_view, name __pdoc__['CompoundType.name'] = \ """String name.""" __pdoc__['CompoundType.dtype'] = \ @@ -4988,12 +5004,15 @@ the user. # evidence suggests that segfaults occur if this # alignment step is skipped - see issue #705). dt = _set_alignment(numpy.dtype(dt)) + # create a view datatype for converting char arrays to strings + dtview = _set_viewdtype(numpy.dtype(dt)) if 'typeid' in kwargs: xtype = kwargs['typeid'] else: xtype = _def_compound(grp, dt, dtype_name) self._nc_type = xtype self.dtype = dt + self.dtype_view = dtview self.name = dtype_name def __repr__(self): @@ -5031,6 +5050,32 @@ def _set_alignment(dt): dtype_dict = {'names':names,'formats':formats} return numpy.dtype(dtype_dict, align=True) +def _set_viewdtype(dt): + # recursively change character array dtypes to string dtypes + names = dt.names; formats = [] + for name in names: + fmt = dt.fields[name][0] + if fmt.kind == 'V': + if fmt.shape == (): + dtx = _set_alignment(dt.fields[name][0]) + else: + if fmt.subdtype[0].kind == 'V': # structured dtype + raise TypeError('nested structured dtype arrays not supported') + elif fmt.subdtype[0].kind == 'S': + if len(dt.fields[name][0].shape) == 1: + lenchar = dt.fields[name][0].shape[0] + dtx = numpy.dtype('S%s' % lenchar) + else: + dtx = dt.fields[name][0] + else: + dtx = dt.fields[name][0] + else: + # primitive data type + dtx = dt.fields[name][0] + formats.append(dtx) + dtype_dict = {'names':names,'formats':formats} + return numpy.dtype(dtype_dict, align=True) + cdef _def_compound(grp, object dt, object dtype_name): # private function used to construct a netcdf compound data type # from a numpy dtype object by CompoundType.__init__.