From 6e568b190972cf3bd7fefb8f2ab017985289ee24 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 27 Oct 2023 14:52:27 +0100 Subject: [PATCH 01/11] Add nc_complex submodule --- .gitmodules | 3 +++ external/nc_complex | 1 + setup.py | 12 ++++++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 .gitmodules create mode 160000 external/nc_complex diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..7b0ff7c97 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/nc_complex"] + path = external/nc_complex + url = git@github.com:PlasmaFAIR/nc-complex.git diff --git a/external/nc_complex b/external/nc_complex new file mode 160000 index 000000000..dbeeb080f --- /dev/null +++ b/external/nc_complex @@ -0,0 +1 @@ +Subproject commit dbeeb080ff869ec3c4288d9ab840f0be201e30de diff --git a/setup.py b/setup.py index 31d32ac30..aa8b6eb20 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os, sys, subprocess, glob import os.path as osp +import pathlib import shutil import configparser from setuptools import setup, Extension @@ -411,12 +412,19 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): osp.join("include", "parallel_support_imports.pxi") ) + nc_complex_dir = pathlib.Path("./external/nc_complex") + source_files = [ + netcdf4_src_pyx, + str(nc_complex_dir / "src/nc_complex.c"), + ] + include_dirs = inc_dirs + ["include", str(nc_complex_dir / "include")] + ext_modules = [Extension("netCDF4._netCDF4", - [netcdf4_src_pyx], + source_files, define_macros=DEFINE_MACROS, libraries=libs, library_dirs=lib_dirs, - include_dirs=inc_dirs + ['include'], + include_dirs=include_dirs, runtime_library_dirs=runtime_lib_dirs)] # set language_level directive to 3 for e in ext_modules: From 36c871fe6317ab349d959dace05bc74dfc21bf70 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Mon, 6 Nov 2023 17:49:35 +0000 Subject: [PATCH 02/11] Add support for complex numbers --- external/nc_complex | 2 +- include/netCDF4.pxi | 18 +++++ setup.py | 7 +- src/netCDF4/_netCDF4.pyx | 171 +++++++++++++++++++++++++++------------ test/test_complex.py | 65 +++++++++++++++ 5 files changed, 211 insertions(+), 52 deletions(-) create mode 100644 test/test_complex.py diff --git a/external/nc_complex b/external/nc_complex index dbeeb080f..9c2a87703 160000 --- a/external/nc_complex +++ b/external/nc_complex @@ -1 +1 @@ -Subproject commit dbeeb080ff869ec3c4288d9ab840f0be201e30de +Subproject commit 9c2a87703c7612f04097418bbed4978d3be6ef92 diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi index 7bd220274..8d232df8f 100644 --- a/include/netCDF4.pxi +++ b/include/netCDF4.pxi @@ -465,3 +465,21 @@ cdef extern from "netcdf-compat.h": NC_MPIIO NC_MPIPOSIX NC_PNETCDF + + +# Declarations for handling complex numbers +cdef extern from "nc_complex/nc_complex.h": + int pfnc_var_is_complex(int ncid, int varid) nogil + int pfnc_var_is_complex_type(int ncid, int varid) nogil + int pfnc_has_complex_dimension(int ncid, int varid) nogil + int pfnc_get_double_complex_typeid(int ncid, nc_type *nc_typeid) nogil + int pfnc_get_float_complex_typeid(int ncid, nc_type *nc_typeid) nogil + + int pfnc_complex_base_type(int ncid, int nc_typeid, int* base_type_id) nogil + int pfnc_inq_var_complex_base_type(int ncid, int varid, int* nc_typeid) nogil + + int pfnc_inq_varndims (int ncid, int varid, int *ndimsp) nogil + int pfnc_inq_vardimid (int ncid, int varid, int *dimidsp) nogil + + int pfnc_get_vara (int ncid, int varid, size_t *startp, + size_t *countp, void *ip) nogil diff --git a/setup.py b/setup.py index aa8b6eb20..232eedafd 100644 --- a/setup.py +++ b/setup.py @@ -417,7 +417,12 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): netcdf4_src_pyx, str(nc_complex_dir / "src/nc_complex.c"), ] - include_dirs = inc_dirs + ["include", str(nc_complex_dir / "include")] + include_dirs = inc_dirs + [ + "include", + str(nc_complex_dir / "include"), + str(nc_complex_dir / "include/generated_fallbacks"), + ] + DEFINE_MACROS += [("NC_COMPLEX_NO_EXPORT", "1")] ext_modules = [Extension("netCDF4._netCDF4", source_files, diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 5310ff672..c3bf7518c 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1226,6 +1226,7 @@ from .utils import (_StartCountStride, _quantize, _find_dim, _walk_grps, _out_array_shape, _sortbylist, _tostr, _safecast, _is_int) import sys import functools +from typing import Union __version__ = "1.7.0" @@ -1905,15 +1906,17 @@ cdef _get_grps(group): free(grpids) return groups -cdef _get_vars(group): +cdef _get_vars(group, bint auto_complex=False): # Private function to create `Variable` instances for all the # variables in a `Group` or Dataset cdef int ierr, numvars, n, nn, numdims, varid, classp, iendian, _grpid cdef int *varids - cdef int *dimids cdef nc_type xtype cdef char namstring[NC_MAX_NAME+1] cdef char namstring_cmp[NC_MAX_NAME+1] + cdef bint is_complex + cdef nc_type complex_nc_type + # get number of variables in this Group. _grpid = group._grpid with nogil: @@ -1992,43 +1995,47 @@ cdef _get_vars(group): msg="WARNING: variable '%s' has unsupported datatype, skipping .." % name warnings.warn(msg) continue + # get number of dimensions. - with nogil: - ierr = nc_inq_varndims(_grpid, varid, &numdims) - _ensure_nc_success(ierr) - dimids = malloc(sizeof(int) * numdims) - # get dimension ids. - with nogil: - ierr = nc_inq_vardimid(_grpid, varid, dimids) - _ensure_nc_success(ierr) + dimids = _inq_vardimid(_grpid, varid, auto_complex) + # loop over dimensions, retrieve names. # if not found in current group, look in parents. # QUESTION: what if grp1 has a dimension named 'foo' # and so does it's parent - can a variable in grp1 # use the 'foo' dimension from the parent? dimensions = [] - for nn from 0 <= nn < numdims: + for dimid in dimids: grp = group found = False while not found: for key, value in grp.dimensions.items(): - if value._dimid == dimids[nn]: + if value._dimid == dimid: dimensions.append(key) found = True break grp = grp.parent - free(dimids) + # create new variable instance. - dimensions = tuple(_find_dim(group,d) for d in dimensions) + dimensions = tuple(_find_dim(group, d) for d in dimensions) + + if auto_complex and pfnc_var_is_complex(_grpid, varid): + with nogil: + ierr = pfnc_inq_var_complex_base_type(_grpid, varid, &complex_nc_type) + _ensure_nc_success(ierr) + # TODO: proper lookup + datatype = "c16" if complex_nc_type == NC_DOUBLE else "c8" + if endianness == '>': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='big') + variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='big') elif endianness == '<': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='little') + variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='little') else: - variables[name] = Variable(group, name, datatype, dimensions, id=varid) + variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex) free(varids) # free pointer holding variable ids. return variables + def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None): # print netcdf error message, raise error. if ierr == NC_NOERR: @@ -2046,6 +2053,50 @@ def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None err_str = f"{err_str}: {extra_msg}" raise err_cls(err_str) + +def dtype_is_complex(dtype: Union[str, CompoundType, numpy.dtype]) -> bool: + # TODO: check numpy.complex128, matching CompoundTypes + return dtype in ("c8", "c16") + + +cdef int _inq_varndims(int ncid, int varid, bint auto_complex): + """Wrapper around `nc_inq_varndims`/`pfnc_inq_varndims` for complex numbers""" + + cdef int ierr = NC_NOERR + cdef int ndims + + if auto_complex: + with nogil: + ierr = pfnc_inq_varndims(ncid, varid, &ndims) + else: + with nogil: + ierr = nc_inq_varndims(ncid, varid, &ndims) + + _ensure_nc_success(ierr) + return ndims + + +cdef _inq_vardimid(int ncid, int varid, bint auto_complex): + """Wrapper around `nc_inq_vardimid`/`pfnc_inq_vardimid` for complex numbers""" + + cdef int ierr = NC_NOERR + cdef int ndims = _inq_varndims(ncid, varid, auto_complex) + cdef int* dimids = malloc(sizeof(int) * ndims) + + if auto_complex: + with nogil: + ierr = pfnc_inq_vardimid(ncid, varid, dimids) + else: + with nogil: + ierr = nc_inq_vardimid(ncid, varid, dimids) + + _ensure_nc_success(ierr) + + result = [dimids[n] for n in range(ndims)] + free(dimids) + return result + + # these are class attributes that # only exist at the python level (not in the netCDF file). @@ -2054,7 +2105,9 @@ _private_atts = \ '_nunlimdim','path','parent','ndim','mask','scale','cmptypes','vltypes','enumtypes','_isprimitive', 'file_format','_isvlen','_isenum','_iscompound','_cmptype','_vltype','_enumtype','name', '__orthogoral_indexing__','keepweakref','_has_lsd', - '_buffer','chartostring','_use_get_vars','_ncstring_attrs__'] + '_buffer','chartostring','_use_get_vars','_ncstring_attrs__', + 'auto_complex' +] cdef class Dataset: """ @@ -2132,12 +2185,12 @@ strings. cdef Py_buffer _buffer cdef public groups, dimensions, variables, disk_format, path, parent,\ file_format, data_model, cmptypes, vltypes, enumtypes, __orthogonal_indexing__, \ - keepweakref, _ncstring_attrs__ + keepweakref, _ncstring_attrs__, auto_complex def __init__(self, filename, mode='r', clobber=True, format='NETCDF4', diskless=False, persist=False, keepweakref=False, memory=None, encoding=None, parallel=False, - comm=None, info=None, **kwargs): + comm=None, info=None, auto_complex=False, **kwargs): """ **`__init__(self, filename, mode="r", clobber=True, diskless=False, persist=False, keepweakref=False, memory=None, encoding=None, @@ -2232,6 +2285,8 @@ strings. **`info`**: MPI_Info object for parallel access. Default `None`, which means MPI_INFO_NULL will be used. Ignored if `parallel=False`. + + **`auto_complex`**: if `True`, then automatically convert complex number types """ cdef int grpid, ierr, numgrps, numdims, numvars, cdef size_t initialsize @@ -2272,6 +2327,7 @@ strings. parmode = NC_MPIIO | _cmode_dict[format] self._inmemory = False + self.auto_complex = auto_complex # mode='x' is the same as mode='w' with clobber=False if mode == "x": @@ -2371,7 +2427,7 @@ strings. # get dimensions in the root group. self.dimensions = _get_dims(self) # get variables in the root Group. - self.variables = _get_vars(self) + self.variables = _get_vars(self, self.auto_complex) # get groups in the root Group. if self.data_model == 'NETCDF4': self.groups = _get_grps(self) @@ -3473,6 +3529,8 @@ Additional read-only class variables: self.keepweakref = parent.keepweakref # propagate _ncstring_attrs__ setting from parent. self._ncstring_attrs__ = parent._ncstring_attrs__ + self.auto_complex = parent.auto_complex + if 'id' in kwargs: self._grpid = kwargs['id'] # get compound, vlen and enum types in this Group. @@ -3480,7 +3538,7 @@ Additional read-only class variables: # get dimensions in this Group. self.dimensions = _get_dims(self) # get variables in this Group. - self.variables = _get_vars(self) + self.variables = _get_vars(self, self.auto_complex) # get groups in this Group. self.groups = _get_grps(self) else: @@ -3739,7 +3797,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. cdef public int _varid, _grpid, _nunlimdim cdef public _name, ndim, dtype, mask, scale, always_mask, chartostring, _isprimitive, \ _iscompound, _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\ - __orthogonal_indexing__, _has_lsd, _use_get_vars, _ncstring_attrs__ + __orthogonal_indexing__, _has_lsd, _use_get_vars, _ncstring_attrs__, auto_complex def __init__(self, grp, name, datatype, dimensions=(), compression=None, zlib=False, @@ -3747,7 +3805,8 @@ behavior is similar to Fortran or Matlab, but different than numpy. blosc_shuffle=1, fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, - significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None, **kwargs): + significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None, + auto_complex=False, **kwargs): """ **`__init__(self, group, name, datatype, dimensions=(), compression=None, zlib=False, complevel=4, shuffle=True, szip_coding='nn', szip_pixels_per_block=8, @@ -3889,6 +3948,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. cdef size_t sizep, nelemsp cdef size_t *chunksizesp cdef float preemptionp + cdef int nc_complex_typeid # Extra information for more helpful error messages error_info = f"(variable '{name}', group '{grp.name}')" @@ -3944,10 +4004,24 @@ behavior is similar to Fortran or Matlab, but different than numpy. self._grp = weakref.proxy(grp) else: self._grp = grp - user_type = isinstance(datatype, CompoundType) or \ - isinstance(datatype, VLType) or \ - isinstance(datatype, EnumType) or \ - datatype == str + + # If datatype is complex, convert to compoundtype + self.auto_complex = auto_complex + is_complex = dtype_is_complex(datatype) + if is_complex: + with nogil: + ierr = pfnc_get_double_complex_typeid(self._grpid, &nc_complex_typeid) + _ensure_nc_success(ierr, extra_msg=error_info) + + original_datatype = numpy.dtype(datatype) + datatype = CompoundType(self._grp, "f8, f8", "double_complex", typeid=nc_complex_typeid) + + user_type = ( + is_complex + or isinstance(datatype, (CompoundType, VLType, EnumType)) + or datatype == str + ) + # convert to a real numpy datatype object if necessary. if not user_type and type(datatype) != numpy.dtype: datatype = numpy.dtype(datatype) @@ -4004,7 +4078,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. ierr = nc_inq_type(self._grpid, xtype, namstring, NULL) _ensure_nc_success(ierr, extra_msg=error_info) # dtype variable attribute is a numpy datatype object. - self.dtype = datatype.dtype + self.dtype = original_datatype if is_complex else datatype.dtype elif datatype.str[1:] in _supportedtypes: self._isprimitive = True # find netCDF primitive data type corresponding to @@ -4255,11 +4329,9 @@ behavior is similar to Fortran or Matlab, but different than numpy. self._nunlimdim = 0 for dim in dimensions: if dim.isunlimited(): self._nunlimdim = self._nunlimdim + 1 + # set ndim attribute (number of dimensions). - with nogil: - ierr = nc_inq_varndims(self._grpid, self._varid, &numdims) - _ensure_nc_success(ierr, extra_msg=error_info) - self.ndim = numdims + self.ndim = _inq_varndims(self._grpid, self._varid, auto_complex) self._name = name # default for automatically applying scale_factor and # add_offset, and converting to/from masked arrays is True. @@ -4354,27 +4426,20 @@ behavior is similar to Fortran or Matlab, but different than numpy. def _getdims(self): # Private method to get variables's dimension names - cdef int ierr, numdims, n, nn + cdef int ierr, dimid cdef char namstring[NC_MAX_NAME+1] - cdef int *dimids - # get number of dimensions for this variable. - with nogil: - ierr = nc_inq_varndims(self._grpid, self._varid, &numdims) - _ensure_nc_success(ierr) - dimids = malloc(sizeof(int) * numdims) - # get dimension ids. - with nogil: - ierr = nc_inq_vardimid(self._grpid, self._varid, dimids) - _ensure_nc_success(ierr) + + dimids = _inq_vardimid(self._grpid, self._varid, self.auto_complex) + # loop over dimensions, retrieve names. dimensions = () - for nn from 0 <= nn < numdims: + for dimid in dimids: with nogil: - ierr = nc_inq_dimname(self._grpid, dimids[nn], namstring) + ierr = nc_inq_dimname(self._grpid, dimid, namstring) _ensure_nc_success(ierr) name = namstring.decode('utf-8') dimensions = dimensions + (name,) - free(dimids) + return dimensions def _getname(self): @@ -5778,9 +5843,15 @@ NC_CHAR). # if count contains a zero element, no data is being read if 0 not in count: if sum(stride) == ndims or ndims == 0: - with nogil: - ierr = nc_get_vara(self._grpid, self._varid, - startp, countp, PyArray_DATA(data)) + if self.auto_complex: + with nogil: + ierr = pfnc_get_vara(self._grpid, self._varid, + startp, countp, PyArray_DATA(data)) + else: + with nogil: + ierr = nc_get_vara(self._grpid, self._varid, + startp, countp, PyArray_DATA(data)) + else: with nogil: ierr = nc_get_vars(self._grpid, self._varid, diff --git a/test/test_complex.py b/test/test_complex.py new file mode 100644 index 000000000..8c03953f8 --- /dev/null +++ b/test/test_complex.py @@ -0,0 +1,65 @@ +import netCDF4 +import numpy as np +import pathlib +import tempfile +import unittest + +complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j], dtype="c16") +np_dt = np.dtype([("r", np.float64), ("i", np.float64)]) +complex_struct_array = np.array( + [(r, i) for r, i in zip(complex_array.real, complex_array.imag)], + dtype=np_dt, +) + + +class ComplexNumbersTestCase(unittest.TestCase): + def setUp(self): + self.tmp_path = pathlib.Path(tempfile.mkdtemp()) + + def test_read_dim(self): + filename = self.tmp_path / "test_read_dim.nc" + + with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + f.createDimension("ri", size=2) + c_ri = f.createVariable("data_dim", np.float64, ("x", "ri")) + as_dim_array = np.vstack((complex_array.real, complex_array.imag)).T + c_ri[:] = as_dim_array + + with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + assert "data_dim" in f.variables + data_dim = f["data_dim"] + assert data_dim.shape == complex_array.shape + data = data_dim[:] + + assert np.array_equal(data, complex_array) + + def test_read_struct(self): + filename = self.tmp_path / "test_read_struct.nc" + + with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + nc_dt = f.createCompoundType(np_dt, "nc_complex") + c_struct = f.createVariable("data_struct", nc_dt, ("x",)) + c_struct[:] = complex_struct_array + + with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + assert "data_struct" in f.variables + data = f["data_struct"][:] + + assert np.array_equal(data, complex_array) + + def test_write(self): + filename = self.tmp_path / "test_write.nc" + with netCDF4.Dataset(filename, "w", auto_complex=True) as f: + f.createDimension("x", size=len(complex_array)) + complex_var = f.createVariable("complex_data", "c16", ("x",)) + complex_var[:] = complex_array + + with netCDF4.Dataset(filename, "r") as f: + assert "complex_data" in f.variables + assert np.array_equal(f["complex_data"], complex_struct_array) + + +if __name__ == "__main__": + unittest.main() From 5ac2c6aed0ac24c6b5327a0202d63fc358c8863f Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Tue, 7 Nov 2023 17:10:49 +0000 Subject: [PATCH 03/11] Tidy some checks around endianness --- src/netCDF4/_netCDF4.pyx | 69 +++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index c3bf7518c..e9ebf0586 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1517,6 +1517,15 @@ _supportedtypes = _nptonctype.keys() # make sure NC_CHAR points to S1 _nctonptype[NC_CHAR]='S1' +# Mapping from numpy dtype endian format to what we expect +_dtype_endian_lookup = { + "=": "native", + ">": "big", + "<": "little", + "|": None, + None: None, +} + # internal C functions. cdef _get_att_names(int grpid, int varid): @@ -2026,12 +2035,11 @@ cdef _get_vars(group, bint auto_complex=False): # TODO: proper lookup datatype = "c16" if complex_nc_type == NC_DOUBLE else "c8" - if endianness == '>': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='big') - elif endianness == '<': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='little') - else: - variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex) + endian = _dtype_endian_lookup[endianness] or "native" + variables[name] = Variable( + group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian=endian + ) + free(varids) # free pointer holding variable ids. return variables @@ -3949,6 +3957,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. cdef size_t *chunksizesp cdef float preemptionp cdef int nc_complex_typeid + cdef int _nc_endian # Extra information for more helpful error messages error_info = f"(variable '{name}', group '{grp.name}')" @@ -3997,6 +4006,18 @@ behavior is similar to Fortran or Matlab, but different than numpy. pass else: raise ValueError(f"Unsupported value for compression kwarg {error_info}") + + if grp.data_model.startswith("NETCDF3") and endian != 'native': + raise RuntimeError( + f"only endian='native' allowed for NETCDF3 files, got '{endian}' {error_info}" + ) + + if endian not in ("little", "big", "native"): + raise ValueError( + f"'endian' keyword argument must be 'little','big' or 'native', got '{endian}' " + f"{error_info}" + ) + self._grpid = grp._grpid # make a weakref to group to avoid circular ref (issue 218) # keep strong reference the default behaviour (issue 251) @@ -4033,20 +4054,11 @@ behavior is similar to Fortran or Matlab, but different than numpy. datatype = str user_type = True # check if endian keyword consistent with datatype specification. - dtype_endian = getattr(datatype,'byteorder',None) - if dtype_endian == '=': dtype_endian='native' - if dtype_endian == '>': dtype_endian='big' - if dtype_endian == '<': dtype_endian='little' - if dtype_endian == '|': dtype_endian=None + dtype_endian = _dtype_endian_lookup[getattr(datatype, "byteorder", None)] if dtype_endian is not None and dtype_endian != endian: - if dtype_endian == 'native' and endian == sys.byteorder: - pass - else: - # endian keyword prevails, issue warning - msg = 'endian-ness of dtype and endian kwarg do not match, using endian kwarg' - #msg = 'endian-ness of dtype and endian kwarg do not match, dtype over-riding endian kwarg' - warnings.warn(msg) - #endian = dtype_endian # dtype prevails + if not (dtype_endian == 'native' and endian == sys.byteorder): + warnings.warn('endian-ness of dtype and endian kwarg do not match, using endian kwarg') + # check validity of datatype. self._isprimitive = False self._iscompound = False @@ -4248,17 +4260,13 @@ behavior is similar to Fortran or Matlab, but different than numpy. if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() _ensure_nc_success(ierr, extra_msg=error_info) + # set endian-ness of variable - if endian == 'little': - with nogil: - ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_LITTLE) - elif endian == 'big': + if endian != 'native': + _nc_endian = NC_ENDIAN_LITTLE if endian == "little" else NC_ENDIAN_BIG with nogil: - ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_BIG) - elif endian == 'native': - pass # this is the default format. - else: - raise ValueError("'endian' keyword argument must be 'little','big' or 'native', got '%s'" % endian) + ierr = nc_def_var_endian(self._grpid, self._varid, _nc_endian) + _ensure_nc_success(ierr, extra_msg=error_info) # set quantization if significant_digits is not None: @@ -4288,10 +4296,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() _ensure_nc_success(ierr, extra_msg=error_info) - else: - if endian != 'native': - msg=f"only endian='native' allowed for NETCDF3 files {error_info}" - raise RuntimeError(msg) + # set a fill value for this variable if fill_value keyword # given. This avoids the HDF5 overhead of deleting and # recreating the dataset if it is set later (after the enddef). From d364a6f5f2785c2e842e4bc1a84800518c1f9f16 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 16 Nov 2023 13:57:20 +0000 Subject: [PATCH 04/11] Simplify duplicated call to `nc_def_var` --- src/netCDF4/_netCDF4.pyx | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index e9ebf0586..ba12966b5 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -3952,7 +3952,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. cdef char namstring[NC_MAX_NAME+1] cdef char *varname cdef nc_type xtype - cdef int *dimids + cdef int *dimids = NULL cdef size_t sizep, nelemsp cdef size_t *chunksizesp cdef float preemptionp @@ -4116,15 +4116,11 @@ behavior is similar to Fortran or Matlab, but different than numpy. # any exceptions are raised. if grp.data_model != 'NETCDF4': grp._redef() # define variable. + with nogil: + ierr = nc_def_var(self._grpid, varname, xtype, ndims, + dimids, &self._varid) if ndims: - with nogil: - ierr = nc_def_var(self._grpid, varname, xtype, ndims, - dimids, &self._varid) free(dimids) - else: # a scalar variable. - with nogil: - ierr = nc_def_var(self._grpid, varname, xtype, ndims, - NULL, &self._varid) if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': From d60252b19695931fd68a1d94f4e95175b06ded3a Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 16 Nov 2023 17:01:02 +0000 Subject: [PATCH 05/11] Add support for complex numbers in netcdf3/classic model --- include/netCDF4.pxi | 27 +++++--- src/netCDF4/_netCDF4.pyx | 140 +++++++++++++++++++++++++-------------- test/test_complex.py | 13 ++++ 3 files changed, 121 insertions(+), 59 deletions(-) diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi index 8d232df8f..00d883662 100644 --- a/include/netCDF4.pxi +++ b/include/netCDF4.pxi @@ -469,17 +469,28 @@ cdef extern from "netcdf-compat.h": # Declarations for handling complex numbers cdef extern from "nc_complex/nc_complex.h": - int pfnc_var_is_complex(int ncid, int varid) nogil - int pfnc_var_is_complex_type(int ncid, int varid) nogil - int pfnc_has_complex_dimension(int ncid, int varid) nogil - int pfnc_get_double_complex_typeid(int ncid, nc_type *nc_typeid) nogil - int pfnc_get_float_complex_typeid(int ncid, nc_type *nc_typeid) nogil + bint pfnc_var_is_complex(int ncid, int varid) nogil + bint pfnc_var_is_complex_type(int ncid, int varid) nogil - int pfnc_complex_base_type(int ncid, int nc_typeid, int* base_type_id) nogil + int pfnc_get_complex_dim(int ncid, int* nc_dim) nogil int pfnc_inq_var_complex_base_type(int ncid, int varid, int* nc_typeid) nogil int pfnc_inq_varndims (int ncid, int varid, int *ndimsp) nogil int pfnc_inq_vardimid (int ncid, int varid, int *dimidsp) nogil - int pfnc_get_vara (int ncid, int varid, size_t *startp, - size_t *countp, void *ip) nogil + int pfnc_def_var(int ncid, char *name, nc_type xtype, int ndims, + int *dimidsp, int *varidp) nogil + + int pfnc_get_vars(int ncid, int varid, size_t *startp, + size_t *countp, ptrdiff_t *stridep, + void *ip) nogil + + int pfnc_put_vars(int ncid, int varid, size_t *startp, + size_t *countp, ptrdiff_t *stridep, + void *op) nogil + + cdef enum: + PFNC_DOUBLE_COMPLEX + PFNC_DOUBLE_COMPLEX_DIM + PFNC_FLOAT_COMPLEX + PFNC_FLOAT_COMPLEX_DIM diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index ba12966b5..1c11aa972 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1435,6 +1435,11 @@ _intnptonctype = {'i1' : NC_BYTE, 'i8' : NC_INT64, 'u8' : NC_UINT64} +_complex_types = { + "c16": PFNC_DOUBLE_COMPLEX, + "c8": PFNC_FLOAT_COMPLEX, +} + # create dictionary mapping string identifiers to netcdf format codes _format_dict = {'NETCDF3_CLASSIC' : NC_FORMAT_CLASSIC, 'NETCDF4_CLASSIC' : NC_FORMAT_NETCDF4_CLASSIC, @@ -1479,7 +1484,8 @@ if __has_pnetcdf_support__: 'NETCDF3_64BIT' ] -# default fill_value to numpy datatype mapping. +# Default fill_value to numpy datatype mapping. Last two for complex +# numbers only applies to complex dimensions default_fillvals = {#'S1':NC_FILL_CHAR, 'S1':'\0', 'i1':NC_FILL_BYTE, @@ -1491,7 +1497,10 @@ default_fillvals = {#'S1':NC_FILL_CHAR, 'i8':NC_FILL_INT64, 'u8':NC_FILL_UINT64, 'f4':NC_FILL_FLOAT, - 'f8':NC_FILL_DOUBLE} + 'f8':NC_FILL_DOUBLE, + 'c8':NC_FILL_FLOAT, + 'c16':NC_FILL_DOUBLE, +} # logical for native endian type. is_native_little = numpy.dtype('pfnc_var_is_complex_type(self._grpid, self._varid): + self._isprimitive = False + self._iscompound = True + with nogil: + ierr = pfnc_inq_var_complex_base_type(self._grpid, self._varid, &complex_typeid) + _ensure_nc_success(ierr, extra_msg=error_info) + + np_complex_type = _nctonptype[complex_typeid] + compound_complex_type = f"{np_complex_type}, {np_complex_type}" + + self._cmptype = CompoundType( + self._grp, compound_complex_type, "complex", typeid=complex_typeid + ) + else: + with nogil: + ierr = pfnc_get_complex_dim(self._grpid, &complex_dim_id) + _ensure_nc_success(ierr, extra_msg=error_info) + with nogil: + ierr = nc_inq_dimname(self._grpid, complex_dim_id, name) + _ensure_nc_success(ierr, extra_msg=error_info) + self._grp.dimensions[name.decode("utf-8")] = Dimension( + self._grp, name, size=2, id=complex_dim_id + ) + def __array__(self): # numpy special method that returns a numpy array. # allows numpy ufuncs to work faster on Variable objects @@ -4430,7 +4465,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. cdef int ierr, dimid cdef char namstring[NC_MAX_NAME+1] - dimids = _inq_vardimid(self._grpid, self._varid, self.auto_complex) + dimids = _inq_vardimid(self._grpid, self._varid, self._grp.auto_complex) # loop over dimensions, retrieve names. dimensions = () @@ -5721,7 +5756,11 @@ NC_CHAR). if not data.dtype.isnative: data = data.byteswap() # strides all 1 or scalar variable, use put_vara (faster) - if sum(stride) == ndims or ndims == 0: + if self._grp.auto_complex: + with nogil: + ierr = pfnc_put_vars(self._grpid, self._varid, + startp, countp, stridep, PyArray_DATA(data)) + elif sum(stride) == ndims or ndims == 0: with nogil: ierr = nc_put_vara(self._grpid, self._varid, startp, countp, PyArray_DATA(data)) @@ -5843,16 +5882,15 @@ NC_CHAR). # strides all 1 or scalar variable, use get_vara (faster) # if count contains a zero element, no data is being read if 0 not in count: - if sum(stride) == ndims or ndims == 0: - if self.auto_complex: - with nogil: - ierr = pfnc_get_vara(self._grpid, self._varid, - startp, countp, PyArray_DATA(data)) - else: - with nogil: - ierr = nc_get_vara(self._grpid, self._varid, - startp, countp, PyArray_DATA(data)) - + if self._grp.auto_complex: + with nogil: + ierr = pfnc_get_vars(self._grpid, self._varid, + startp, countp, stridep, + PyArray_DATA(data)) + elif sum(stride) == ndims or ndims == 0: + with nogil: + ierr = nc_get_vara(self._grpid, self._varid, + startp, countp, PyArray_DATA(data)) else: with nogil: ierr = nc_get_vars(self._grpid, self._varid, diff --git a/test/test_complex.py b/test/test_complex.py index 8c03953f8..20856ce1f 100644 --- a/test/test_complex.py +++ b/test/test_complex.py @@ -60,6 +60,19 @@ def test_write(self): assert "complex_data" in f.variables assert np.array_equal(f["complex_data"], complex_struct_array) + def test_write_netcdf3(self): + filename = self.tmp_path / "test_write_netcdf3.nc" + with netCDF4.Dataset( + filename, "w", format="NETCDF3_CLASSIC", auto_complex=True + ) as f: + f.createDimension("x", size=len(complex_array)) + complex_var = f.createVariable("complex_data", "c16", ("x",)) + complex_var[:] = complex_array + + with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + assert "complex_data" in f.variables + assert np.array_equal(f["complex_data"][:], complex_array) + if __name__ == "__main__": unittest.main() From df42ae11887a13326d06d87d77d06d564d845e4e Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 16 Nov 2023 17:30:31 +0000 Subject: [PATCH 06/11] CI: Checkout submodules --- .github/workflows/build_latest.yml | 2 ++ .github/workflows/build_master.yml | 2 ++ .github/workflows/build_old.yml | 2 ++ .github/workflows/miniconda.yml | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/.github/workflows/build_latest.yml b/.github/workflows/build_latest.yml index 3d252e1cc..4ec6ff7fd 100644 --- a/.github/workflows/build_latest.yml +++ b/.github/workflows/build_latest.yml @@ -17,6 +17,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/build_master.yml b/.github/workflows/build_master.yml index c2a00e76f..a072bf451 100644 --- a/.github/workflows/build_master.yml +++ b/.github/workflows/build_master.yml @@ -14,6 +14,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/build_old.yml b/.github/workflows/build_old.yml index e00d8be17..c2d4c16e3 100644 --- a/.github/workflows/build_old.yml +++ b/.github/workflows/build_old.yml @@ -17,6 +17,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/miniconda.yml b/.github/workflows/miniconda.yml index ca4309c33..33f079898 100644 --- a/.github/workflows/miniconda.yml +++ b/.github/workflows/miniconda.yml @@ -22,6 +22,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Setup Micromamba uses: mamba-org/setup-micromamba@v1 @@ -53,6 +55,8 @@ jobs: platform: [x64] steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Setup Micromamba uses: mamba-org/setup-micromamba@v1 From 4be9751dc5aca948b6e4e126e35ea0c77828651d Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Tue, 21 Nov 2023 17:01:35 +0000 Subject: [PATCH 07/11] Add some docs for complex number support --- docs/index.html | 72 ++++++++++++++++++++++++++++++++++------ examples/tutorial.py | 8 +++++ src/netCDF4/_netCDF4.pyx | 62 +++++++++++++++++++++++++++++----- 3 files changed, 122 insertions(+), 20 deletions(-) diff --git a/docs/index.html b/docs/index.html index 7ed8055d2..6971ceb5b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -3,7 +3,7 @@ - + netCDF4 API documentation @@ -49,8 +49,9 @@

Quick Install

Developer Install

+

All of the code in this tutorial is available in examples/tutorial.py, except +the parallel IO example, which is in examples/mpi_example.py. +Unit tests are in the test directory.

Creating/Opening/Closing a netCDF file

To create a netCDF file from python, you simply call the Dataset constructor. This is also the method used to open an existing netCDF @@ -671,9 +675,9 @@

Beyond ho Compound data types are created from the corresponding numpy data type using the Dataset.createCompoundType() method of a Dataset or Group instance. -Since there is no native complex data type in netcdf, compound types are handy -for storing numpy complex arrays. -Here's an example:

+Since there is no native complex data type in netcdf (but see +Support for complex numbers), compound +types are handy for storing numpy complex arrays. Here's an example:

>>> f = Dataset("complex.nc","w")
 >>> size = 3 # length of 1-d complex array
 >>> # create sample complex data.
@@ -1096,9 +1100,43 @@ 

In-memory (diskless) Datasets

[0 1 2 3 4] >>> nc.close()
-

All of the code in this tutorial is available in examples/tutorial.py, except -the parallel IO example, which is in examples/mpi_example.py. -Unit tests are in the test directory.

+

Support for complex numbers

+

Although there is no native support for complex numbers in netCDF, there are +some common conventions for storing them. Two of the most common are to either +use a compound datatype for the real and imaginary components, or a separate +dimension. netCDF4 supports reading several of these conventions, as well as +writing using one of two conventions (depending on file format). This support +for complex numbers is enabled by setting auto_complex=True when opening a +Dataset:

+
>>> complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j])
+>>> with netCDF4.Dataset("complex.nc", "w", auto_complex=True) as nc:
+...     nc.createDimension("x", size=len(complex_array))
+...     var = nc.createVariable("data", "c16", ("x",))
+...     var[:] = complex_array
+...     print(var)
+<class 'netCDF4._netCDF4.Variable'>
+compound data(x)
+compound data type: complex128
+unlimited dimensions:
+current shape = (5,)
+
+

When reading files using auto_complex=True, netCDF4 will interpret variables +stored using the following conventions as complex numbers:

+
    +
  • compound datatypes with two float or double members who names begin with +r and i (case insensitive)
  • +
  • a dimension of length 2 named complex or ri
  • +
+

When writing files using auto_complex=True, netCDF4 will use:

+
    +
  • a compound datatype named _PFNC_DOUBLE_COMPLEX_TYPE (or *FLOAT* as +appropriate) with members r and i for netCDF4 formats;
  • +
  • or a dimension of length 2 named _pfnc_complex for netCDF3 or classic +formats.
  • +
+

Support for complex numbers is handled via the +nc-complex library. See there for +further details.

contact: Jeffrey Whitaker jeffrey.s.whitaker@noaa.gov

copyright: 2008 by Jeffrey Whitaker.

license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

@@ -1553,7 +1591,8 @@

Instance variables

Ignored if parallel=False.

info: MPI_Info object for parallel access. Default None, which means MPI_INFO_NULL will be used. -Ignored if parallel=False.

+Ignored if parallel=False.

+

auto_complex: if True, then automatically convert complex number types

Subclasses

  • netCDF4._netCDF4.Group
  • @@ -1582,6 +1621,10 @@

    Static methods

    Instance variables

    +
    var auto_complex
    +
    +

    Return an attribute of instance, which is of type owner.

    +
    var cmptypes

    Return an attribute of instance, which is of type owner.

    @@ -2627,6 +2670,10 @@

    Instance variables

    Return an attribute of instance, which is of type owner.

    +
    var auto_complex
    +
    +

    Return an attribute of instance, which is of type owner.

    +
    var chartostring

    Return an attribute of instance, which is of type owner.

    @@ -3027,6 +3074,7 @@

    Index

  • Parallel IO
  • Dealing with strings
  • In-memory (diskless) Datasets
  • +
  • Support for complex numbers
@@ -3060,6 +3108,7 @@

CompoundT
  • Dataset

      +
    • auto_complex
    • close
    • cmptypes
    • createCompoundType
    • @@ -3156,6 +3205,7 @@

      Variable
    • always_mask
    • assignValue
    • +
    • auto_complex
    • chartostring
    • chunking
    • datatype
    • @@ -3198,7 +3248,7 @@

      Variable \ No newline at end of file diff --git a/examples/tutorial.py b/examples/tutorial.py index 7efad1c00..d48fd1679 100644 --- a/examples/tutorial.py +++ b/examples/tutorial.py @@ -355,3 +355,11 @@ def walktree(top): print(nc) print(nc['v'][:]) nc.close() + +# Write complex numbers to file +complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j]) +with Dataset("complex.nc", "w", auto_complex=True) as nc: + nc.createDimension("x", size=len(complex_array)) + var = nc.createVariable("data", "c16", ("x",)) + var[:] = complex_array + print(var) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 1c11aa972..878e7898d 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1,5 +1,4 @@ -""" -Version 1.7.0 +"""Version 1.7.0 ------------- # Introduction @@ -30,8 +29,9 @@ types) are not supported. ## Developer Install - - Clone the - [github repository](http://github.com/Unidata/netcdf4-python). + - Clone the [github repository](http://github.com/Unidata/netcdf4-python). Make + sure you either clone recursively, or run `git submodule update --init` to + ensure all the submodules are also checked out. - Make sure the dependencies are satisfied (Python 3.8 or later, [numpy](http://numpy.scipy.org), [Cython](http://cython.org), @@ -85,6 +85,10 @@ types) are not supported. - [Dealing with strings](#dealing-with-strings) - [In-memory (diskless) Datasets](#in-memory-diskless-datasets) +All of the code in this tutorial is available in `examples/tutorial.py`, except +the parallel IO example, which is in `examples/mpi_example.py`. +Unit tests are in the `test` directory. + ## Creating/Opening/Closing a netCDF file To create a netCDF file from python, you simply call the `Dataset` @@ -735,8 +739,9 @@ information for a point by reading one variable, instead of reading different parameters from different variables. Compound data types are created from the corresponding numpy data type using the `Dataset.createCompoundType` method of a `Dataset` or `Group` instance. -Since there is no native complex data type in netcdf, compound types are handy -for storing numpy complex arrays. Here's an example: +Since there is no native complex data type in netcdf (but see +[Support for complex numbers](#support-for-complex-numbers)), compound +types are handy for storing numpy complex arrays. Here's an example: ```python >>> f = Dataset("complex.nc","w") @@ -1200,10 +1205,48 @@ root group (NETCDF4 data model, file format HDF5): >>> nc.close() ``` +## Support for complex numbers + +Although there is no native support for complex numbers in netCDF, there are +some common conventions for storing them. Two of the most common are to either +use a compound datatype for the real and imaginary components, or a separate +dimension. `netCDF4` supports reading several of these conventions, as well as +writing using one of two conventions (depending on file format). This support +for complex numbers is enabled by setting `auto_complex=True` when opening a +`Dataset`: + +```python +>>> complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j]) +>>> with netCDF4.Dataset("complex.nc", "w", auto_complex=True) as nc: +... nc.createDimension("x", size=len(complex_array)) +... var = nc.createVariable("data", "c16", ("x",)) +... var[:] = complex_array +... print(var) + +compound data(x) +compound data type: complex128 +unlimited dimensions: +current shape = (5,) +``` + +When reading files using `auto_complex=True`, `netCDF4` will interpret variables +stored using the following conventions as complex numbers: + +- compound datatypes with two `float` or `double` members who names begin with + `r` and `i` (case insensitive) +- a dimension of length 2 named `complex` or `ri` + +When writing files using `auto_complex=True`, `netCDF4` will use: + +- a compound datatype named `_PFNC_DOUBLE_COMPLEX_TYPE` (or `*FLOAT*` as + appropriate) with members `r` and `i` for netCDF4 formats; +- or a dimension of length 2 named `_pfnc_complex` for netCDF3 or classic + formats. + +Support for complex numbers is handled via the +[`nc-complex`](https://github.com/PlasmaFAIR/nc-complex) library. See there for +further details. -All of the code in this tutorial is available in `examples/tutorial.py`, except -the parallel IO example, which is in `examples/mpi_example.py`. -Unit tests are in the `test` directory. **contact**: Jeffrey Whitaker @@ -1214,6 +1257,7 @@ Unit tests are in the `test` directory. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + """ # Make changes to this file, not the c-wrappers that Cython generates. From 54f812425d4b6914bf9930e545400ba4ec5be25d Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Tue, 21 Nov 2023 17:35:11 +0000 Subject: [PATCH 08/11] Support using `np.complex128` directly for complex numbers --- src/netCDF4/_netCDF4.pyx | 18 +++++++++--------- test/test_complex.py | 11 +++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 878e7898d..dcfe221a2 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -2113,8 +2113,8 @@ def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None raise err_cls(err_str) -def dtype_is_complex(dtype: Union[str, CompoundType, numpy.dtype]) -> bool: - # TODO: check numpy.complex128, matching CompoundTypes +def dtype_is_complex(dtype: Union[str, numpy.dtype]) -> bool: + """Return True if dtype is a complex number""" return dtype in ("c8", "c16") @@ -4077,13 +4077,6 @@ behavior is similar to Fortran or Matlab, but different than numpy. else: self._grp = grp - # If datatype is complex, convert to compoundtype - is_complex = dtype_is_complex(datatype) - if is_complex and not self._grp.auto_complex: - raise ValueError( - f"complex datatypes ({datatype}) are only supported with `auto_complex=True`" - ) - self._iscompound = isinstance(datatype, CompoundType) self._isvlen = isinstance(datatype, VLType) or datatype==str self._isenum = isinstance(datatype, EnumType) @@ -4102,6 +4095,13 @@ behavior is similar to Fortran or Matlab, but different than numpy. user_type = True self._isvlen = True + # If datatype is complex, convert to compoundtype + is_complex = dtype_is_complex(datatype) + if is_complex and not self._grp.auto_complex: + raise ValueError( + f"complex datatypes ({datatype}) are only supported with `auto_complex=True`" + ) + # check if endian keyword consistent with datatype specification. dtype_endian = _dtype_endian_lookup[getattr(datatype, "byteorder", None)] if dtype_endian is not None and dtype_endian != endian: diff --git a/test/test_complex.py b/test/test_complex.py index 20856ce1f..cd1fed32c 100644 --- a/test/test_complex.py +++ b/test/test_complex.py @@ -60,6 +60,17 @@ def test_write(self): assert "complex_data" in f.variables assert np.array_equal(f["complex_data"], complex_struct_array) + def test_write_with_np_complex128(self): + filename = self.tmp_path / "test_write_with_np_complex128.nc" + with netCDF4.Dataset(filename, "w", auto_complex=True) as f: + f.createDimension("x", size=len(complex_array)) + complex_var = f.createVariable("complex_data", np.complex128, ("x",)) + complex_var[:] = complex_array + + with netCDF4.Dataset(filename, "r") as f: + assert "complex_data" in f.variables + assert np.array_equal(f["complex_data"], complex_struct_array) + def test_write_netcdf3(self): filename = self.tmp_path / "test_write_netcdf3.nc" with netCDF4.Dataset( From 013e8f38f87700690deefec33cbf70fa8636e00b Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 8 Dec 2023 08:55:03 +0000 Subject: [PATCH 09/11] Bump nc-complex to v0.2.0 --- external/nc_complex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nc_complex b/external/nc_complex index 9c2a87703..37310ed00 160000 --- a/external/nc_complex +++ b/external/nc_complex @@ -1 +1 @@ -Subproject commit 9c2a87703c7612f04097418bbed4978d3be6ef92 +Subproject commit 37310ed00f3910974bdefefcdfa4787588651f59 From 0561c7c3653b09320f0a5f002228a13fb8aa28d5 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 8 Dec 2023 08:55:24 +0000 Subject: [PATCH 10/11] Add more complete example for complex numbers --- examples/complex_numbers.py | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 examples/complex_numbers.py diff --git a/examples/complex_numbers.py b/examples/complex_numbers.py new file mode 100644 index 000000000..51d7a61f1 --- /dev/null +++ b/examples/complex_numbers.py @@ -0,0 +1,51 @@ +import netCDF4 +import numpy as np + +complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j], dtype="c16") +np_dt = np.dtype([("r", np.float64), ("i", np.float64)]) +complex_struct_array = np.array( + [(r, i) for r, i in zip(complex_array.real, complex_array.imag)], + dtype=np_dt, +) + +print("\n**********") +print("Reading a file that uses a dimension for complex numbers") +filename = "complex_numbers_as_dimension.nc" + +with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + f.createDimension("complex", size=2) + c_ri = f.createVariable("data_dim", np.float64, ("x", "complex")) + as_dim_array = np.vstack((complex_array.real, complex_array.imag)).T + c_ri[:] = as_dim_array + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data_dim"]) + + +print("\n**********") +print("Reading a file that uses a compound datatype for complex numbers") +filename = "complex_numbers_as_datatype.nc" + +with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + nc_dt = f.createCompoundType(np_dt, "nc_complex") + breakpoint() + + c_struct = f.createVariable("data_struct", nc_dt, ("x",)) + c_struct[:] = complex_struct_array + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data_struct"]) + +print("\n**********") +print("Writing complex numbers to a file") +filename = "writing_complex_numbers.nc" +with netCDF4.Dataset(filename, "w", auto_complex=True) as f: + f.createDimension("x", size=len(complex_array)) + c_var = f.createVariable("data", np.complex128, ("x",)) + c_var[:] = complex_array + print(c_var) + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data"]) From fe653fef8d550945ea74f3e2ba3acbc16a412f57 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 8 Dec 2023 08:57:33 +0000 Subject: [PATCH 11/11] Add changelog entry for complex numbers --- Changelog | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index e5c5362c8..3a0168ecf 100644 --- a/Changelog +++ b/Changelog @@ -1,7 +1,8 @@ version 1.7.0 (not yet released) =============================== + * add support for complex numbers via `auto_complex` keyword to `Dataset` (PR #1295) * fix for deprecated Cython `DEF` and `IF` statements using compatibility header - with shims for unavailable functionality + with shims for unavailable functionality (PR #1277) version 1.6.5 (tag v1.6.5rel) ===============================