From 6e568b190972cf3bd7fefb8f2ab017985289ee24 Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Fri, 27 Oct 2023 14:52:27 +0100
Subject: [PATCH 01/11] Add nc_complex submodule
---
.gitmodules | 3 +++
external/nc_complex | 1 +
setup.py | 12 ++++++++++--
3 files changed, 14 insertions(+), 2 deletions(-)
create mode 100644 .gitmodules
create mode 160000 external/nc_complex
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..7b0ff7c97
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "external/nc_complex"]
+ path = external/nc_complex
+ url = git@github.com:PlasmaFAIR/nc-complex.git
diff --git a/external/nc_complex b/external/nc_complex
new file mode 160000
index 000000000..dbeeb080f
--- /dev/null
+++ b/external/nc_complex
@@ -0,0 +1 @@
+Subproject commit dbeeb080ff869ec3c4288d9ab840f0be201e30de
diff --git a/setup.py b/setup.py
index 31d32ac30..aa8b6eb20 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
import os, sys, subprocess, glob
import os.path as osp
+import pathlib
import shutil
import configparser
from setuptools import setup, Extension
@@ -411,12 +412,19 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
osp.join("include", "parallel_support_imports.pxi")
)
+ nc_complex_dir = pathlib.Path("./external/nc_complex")
+ source_files = [
+ netcdf4_src_pyx,
+ str(nc_complex_dir / "src/nc_complex.c"),
+ ]
+ include_dirs = inc_dirs + ["include", str(nc_complex_dir / "include")]
+
ext_modules = [Extension("netCDF4._netCDF4",
- [netcdf4_src_pyx],
+ source_files,
define_macros=DEFINE_MACROS,
libraries=libs,
library_dirs=lib_dirs,
- include_dirs=inc_dirs + ['include'],
+ include_dirs=include_dirs,
runtime_library_dirs=runtime_lib_dirs)]
# set language_level directive to 3
for e in ext_modules:
From 36c871fe6317ab349d959dace05bc74dfc21bf70 Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Mon, 6 Nov 2023 17:49:35 +0000
Subject: [PATCH 02/11] Add support for complex numbers
---
external/nc_complex | 2 +-
include/netCDF4.pxi | 18 +++++
setup.py | 7 +-
src/netCDF4/_netCDF4.pyx | 171 +++++++++++++++++++++++++++------------
test/test_complex.py | 65 +++++++++++++++
5 files changed, 211 insertions(+), 52 deletions(-)
create mode 100644 test/test_complex.py
diff --git a/external/nc_complex b/external/nc_complex
index dbeeb080f..9c2a87703 160000
--- a/external/nc_complex
+++ b/external/nc_complex
@@ -1 +1 @@
-Subproject commit dbeeb080ff869ec3c4288d9ab840f0be201e30de
+Subproject commit 9c2a87703c7612f04097418bbed4978d3be6ef92
diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi
index 7bd220274..8d232df8f 100644
--- a/include/netCDF4.pxi
+++ b/include/netCDF4.pxi
@@ -465,3 +465,21 @@ cdef extern from "netcdf-compat.h":
NC_MPIIO
NC_MPIPOSIX
NC_PNETCDF
+
+
+# Declarations for handling complex numbers
+cdef extern from "nc_complex/nc_complex.h":
+ int pfnc_var_is_complex(int ncid, int varid) nogil
+ int pfnc_var_is_complex_type(int ncid, int varid) nogil
+ int pfnc_has_complex_dimension(int ncid, int varid) nogil
+ int pfnc_get_double_complex_typeid(int ncid, nc_type *nc_typeid) nogil
+ int pfnc_get_float_complex_typeid(int ncid, nc_type *nc_typeid) nogil
+
+ int pfnc_complex_base_type(int ncid, int nc_typeid, int* base_type_id) nogil
+ int pfnc_inq_var_complex_base_type(int ncid, int varid, int* nc_typeid) nogil
+
+ int pfnc_inq_varndims (int ncid, int varid, int *ndimsp) nogil
+ int pfnc_inq_vardimid (int ncid, int varid, int *dimidsp) nogil
+
+ int pfnc_get_vara (int ncid, int varid, size_t *startp,
+ size_t *countp, void *ip) nogil
diff --git a/setup.py b/setup.py
index aa8b6eb20..232eedafd 100644
--- a/setup.py
+++ b/setup.py
@@ -417,7 +417,12 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
netcdf4_src_pyx,
str(nc_complex_dir / "src/nc_complex.c"),
]
- include_dirs = inc_dirs + ["include", str(nc_complex_dir / "include")]
+ include_dirs = inc_dirs + [
+ "include",
+ str(nc_complex_dir / "include"),
+ str(nc_complex_dir / "include/generated_fallbacks"),
+ ]
+ DEFINE_MACROS += [("NC_COMPLEX_NO_EXPORT", "1")]
ext_modules = [Extension("netCDF4._netCDF4",
source_files,
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index 5310ff672..c3bf7518c 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -1226,6 +1226,7 @@ from .utils import (_StartCountStride, _quantize, _find_dim, _walk_grps,
_out_array_shape, _sortbylist, _tostr, _safecast, _is_int)
import sys
import functools
+from typing import Union
__version__ = "1.7.0"
@@ -1905,15 +1906,17 @@ cdef _get_grps(group):
free(grpids)
return groups
-cdef _get_vars(group):
+cdef _get_vars(group, bint auto_complex=False):
# Private function to create `Variable` instances for all the
# variables in a `Group` or Dataset
cdef int ierr, numvars, n, nn, numdims, varid, classp, iendian, _grpid
cdef int *varids
- cdef int *dimids
cdef nc_type xtype
cdef char namstring[NC_MAX_NAME+1]
cdef char namstring_cmp[NC_MAX_NAME+1]
+ cdef bint is_complex
+ cdef nc_type complex_nc_type
+
# get number of variables in this Group.
_grpid = group._grpid
with nogil:
@@ -1992,43 +1995,47 @@ cdef _get_vars(group):
msg="WARNING: variable '%s' has unsupported datatype, skipping .." % name
warnings.warn(msg)
continue
+
# get number of dimensions.
- with nogil:
- ierr = nc_inq_varndims(_grpid, varid, &numdims)
- _ensure_nc_success(ierr)
- dimids = malloc(sizeof(int) * numdims)
- # get dimension ids.
- with nogil:
- ierr = nc_inq_vardimid(_grpid, varid, dimids)
- _ensure_nc_success(ierr)
+ dimids = _inq_vardimid(_grpid, varid, auto_complex)
+
# loop over dimensions, retrieve names.
# if not found in current group, look in parents.
# QUESTION: what if grp1 has a dimension named 'foo'
# and so does it's parent - can a variable in grp1
# use the 'foo' dimension from the parent?
dimensions = []
- for nn from 0 <= nn < numdims:
+ for dimid in dimids:
grp = group
found = False
while not found:
for key, value in grp.dimensions.items():
- if value._dimid == dimids[nn]:
+ if value._dimid == dimid:
dimensions.append(key)
found = True
break
grp = grp.parent
- free(dimids)
+
# create new variable instance.
- dimensions = tuple(_find_dim(group,d) for d in dimensions)
+ dimensions = tuple(_find_dim(group, d) for d in dimensions)
+
+ if auto_complex and pfnc_var_is_complex(_grpid, varid):
+ with nogil:
+ ierr = pfnc_inq_var_complex_base_type(_grpid, varid, &complex_nc_type)
+ _ensure_nc_success(ierr)
+ # TODO: proper lookup
+ datatype = "c16" if complex_nc_type == NC_DOUBLE else "c8"
+
if endianness == '>':
- variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='big')
+ variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='big')
elif endianness == '<':
- variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='little')
+ variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='little')
else:
- variables[name] = Variable(group, name, datatype, dimensions, id=varid)
+ variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex)
free(varids) # free pointer holding variable ids.
return variables
+
def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None):
# print netcdf error message, raise error.
if ierr == NC_NOERR:
@@ -2046,6 +2053,50 @@ def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None
err_str = f"{err_str}: {extra_msg}"
raise err_cls(err_str)
+
+def dtype_is_complex(dtype: Union[str, CompoundType, numpy.dtype]) -> bool:
+ # TODO: check numpy.complex128, matching CompoundTypes
+ return dtype in ("c8", "c16")
+
+
+cdef int _inq_varndims(int ncid, int varid, bint auto_complex):
+ """Wrapper around `nc_inq_varndims`/`pfnc_inq_varndims` for complex numbers"""
+
+ cdef int ierr = NC_NOERR
+ cdef int ndims
+
+ if auto_complex:
+ with nogil:
+ ierr = pfnc_inq_varndims(ncid, varid, &ndims)
+ else:
+ with nogil:
+ ierr = nc_inq_varndims(ncid, varid, &ndims)
+
+ _ensure_nc_success(ierr)
+ return ndims
+
+
+cdef _inq_vardimid(int ncid, int varid, bint auto_complex):
+ """Wrapper around `nc_inq_vardimid`/`pfnc_inq_vardimid` for complex numbers"""
+
+ cdef int ierr = NC_NOERR
+ cdef int ndims = _inq_varndims(ncid, varid, auto_complex)
+ cdef int* dimids = malloc(sizeof(int) * ndims)
+
+ if auto_complex:
+ with nogil:
+ ierr = pfnc_inq_vardimid(ncid, varid, dimids)
+ else:
+ with nogil:
+ ierr = nc_inq_vardimid(ncid, varid, dimids)
+
+ _ensure_nc_success(ierr)
+
+ result = [dimids[n] for n in range(ndims)]
+ free(dimids)
+ return result
+
+
# these are class attributes that
# only exist at the python level (not in the netCDF file).
@@ -2054,7 +2105,9 @@ _private_atts = \
'_nunlimdim','path','parent','ndim','mask','scale','cmptypes','vltypes','enumtypes','_isprimitive',
'file_format','_isvlen','_isenum','_iscompound','_cmptype','_vltype','_enumtype','name',
'__orthogoral_indexing__','keepweakref','_has_lsd',
- '_buffer','chartostring','_use_get_vars','_ncstring_attrs__']
+ '_buffer','chartostring','_use_get_vars','_ncstring_attrs__',
+ 'auto_complex'
+]
cdef class Dataset:
"""
@@ -2132,12 +2185,12 @@ strings.
cdef Py_buffer _buffer
cdef public groups, dimensions, variables, disk_format, path, parent,\
file_format, data_model, cmptypes, vltypes, enumtypes, __orthogonal_indexing__, \
- keepweakref, _ncstring_attrs__
+ keepweakref, _ncstring_attrs__, auto_complex
def __init__(self, filename, mode='r', clobber=True, format='NETCDF4',
diskless=False, persist=False, keepweakref=False,
memory=None, encoding=None, parallel=False,
- comm=None, info=None, **kwargs):
+ comm=None, info=None, auto_complex=False, **kwargs):
"""
**`__init__(self, filename, mode="r", clobber=True, diskless=False,
persist=False, keepweakref=False, memory=None, encoding=None,
@@ -2232,6 +2285,8 @@ strings.
**`info`**: MPI_Info object for parallel access. Default `None`, which
means MPI_INFO_NULL will be used. Ignored if `parallel=False`.
+
+ **`auto_complex`**: if `True`, then automatically convert complex number types
"""
cdef int grpid, ierr, numgrps, numdims, numvars,
cdef size_t initialsize
@@ -2272,6 +2327,7 @@ strings.
parmode = NC_MPIIO | _cmode_dict[format]
self._inmemory = False
+ self.auto_complex = auto_complex
# mode='x' is the same as mode='w' with clobber=False
if mode == "x":
@@ -2371,7 +2427,7 @@ strings.
# get dimensions in the root group.
self.dimensions = _get_dims(self)
# get variables in the root Group.
- self.variables = _get_vars(self)
+ self.variables = _get_vars(self, self.auto_complex)
# get groups in the root Group.
if self.data_model == 'NETCDF4':
self.groups = _get_grps(self)
@@ -3473,6 +3529,8 @@ Additional read-only class variables:
self.keepweakref = parent.keepweakref
# propagate _ncstring_attrs__ setting from parent.
self._ncstring_attrs__ = parent._ncstring_attrs__
+ self.auto_complex = parent.auto_complex
+
if 'id' in kwargs:
self._grpid = kwargs['id']
# get compound, vlen and enum types in this Group.
@@ -3480,7 +3538,7 @@ Additional read-only class variables:
# get dimensions in this Group.
self.dimensions = _get_dims(self)
# get variables in this Group.
- self.variables = _get_vars(self)
+ self.variables = _get_vars(self, self.auto_complex)
# get groups in this Group.
self.groups = _get_grps(self)
else:
@@ -3739,7 +3797,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
cdef public int _varid, _grpid, _nunlimdim
cdef public _name, ndim, dtype, mask, scale, always_mask, chartostring, _isprimitive, \
_iscompound, _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\
- __orthogonal_indexing__, _has_lsd, _use_get_vars, _ncstring_attrs__
+ __orthogonal_indexing__, _has_lsd, _use_get_vars, _ncstring_attrs__, auto_complex
def __init__(self, grp, name, datatype, dimensions=(),
compression=None, zlib=False,
@@ -3747,7 +3805,8 @@ behavior is similar to Fortran or Matlab, but different than numpy.
blosc_shuffle=1,
fletcher32=False, contiguous=False,
chunksizes=None, endian='native', least_significant_digit=None,
- significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None, **kwargs):
+ significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None,
+ auto_complex=False, **kwargs):
"""
**`__init__(self, group, name, datatype, dimensions=(), compression=None, zlib=False,
complevel=4, shuffle=True, szip_coding='nn', szip_pixels_per_block=8,
@@ -3889,6 +3948,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
cdef size_t sizep, nelemsp
cdef size_t *chunksizesp
cdef float preemptionp
+ cdef int nc_complex_typeid
# Extra information for more helpful error messages
error_info = f"(variable '{name}', group '{grp.name}')"
@@ -3944,10 +4004,24 @@ behavior is similar to Fortran or Matlab, but different than numpy.
self._grp = weakref.proxy(grp)
else:
self._grp = grp
- user_type = isinstance(datatype, CompoundType) or \
- isinstance(datatype, VLType) or \
- isinstance(datatype, EnumType) or \
- datatype == str
+
+ # If datatype is complex, convert to compoundtype
+ self.auto_complex = auto_complex
+ is_complex = dtype_is_complex(datatype)
+ if is_complex:
+ with nogil:
+ ierr = pfnc_get_double_complex_typeid(self._grpid, &nc_complex_typeid)
+ _ensure_nc_success(ierr, extra_msg=error_info)
+
+ original_datatype = numpy.dtype(datatype)
+ datatype = CompoundType(self._grp, "f8, f8", "double_complex", typeid=nc_complex_typeid)
+
+ user_type = (
+ is_complex
+ or isinstance(datatype, (CompoundType, VLType, EnumType))
+ or datatype == str
+ )
+
# convert to a real numpy datatype object if necessary.
if not user_type and type(datatype) != numpy.dtype:
datatype = numpy.dtype(datatype)
@@ -4004,7 +4078,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
ierr = nc_inq_type(self._grpid, xtype, namstring, NULL)
_ensure_nc_success(ierr, extra_msg=error_info)
# dtype variable attribute is a numpy datatype object.
- self.dtype = datatype.dtype
+ self.dtype = original_datatype if is_complex else datatype.dtype
elif datatype.str[1:] in _supportedtypes:
self._isprimitive = True
# find netCDF primitive data type corresponding to
@@ -4255,11 +4329,9 @@ behavior is similar to Fortran or Matlab, but different than numpy.
self._nunlimdim = 0
for dim in dimensions:
if dim.isunlimited(): self._nunlimdim = self._nunlimdim + 1
+
# set ndim attribute (number of dimensions).
- with nogil:
- ierr = nc_inq_varndims(self._grpid, self._varid, &numdims)
- _ensure_nc_success(ierr, extra_msg=error_info)
- self.ndim = numdims
+ self.ndim = _inq_varndims(self._grpid, self._varid, auto_complex)
self._name = name
# default for automatically applying scale_factor and
# add_offset, and converting to/from masked arrays is True.
@@ -4354,27 +4426,20 @@ behavior is similar to Fortran or Matlab, but different than numpy.
def _getdims(self):
# Private method to get variables's dimension names
- cdef int ierr, numdims, n, nn
+ cdef int ierr, dimid
cdef char namstring[NC_MAX_NAME+1]
- cdef int *dimids
- # get number of dimensions for this variable.
- with nogil:
- ierr = nc_inq_varndims(self._grpid, self._varid, &numdims)
- _ensure_nc_success(ierr)
- dimids = malloc(sizeof(int) * numdims)
- # get dimension ids.
- with nogil:
- ierr = nc_inq_vardimid(self._grpid, self._varid, dimids)
- _ensure_nc_success(ierr)
+
+ dimids = _inq_vardimid(self._grpid, self._varid, self.auto_complex)
+
# loop over dimensions, retrieve names.
dimensions = ()
- for nn from 0 <= nn < numdims:
+ for dimid in dimids:
with nogil:
- ierr = nc_inq_dimname(self._grpid, dimids[nn], namstring)
+ ierr = nc_inq_dimname(self._grpid, dimid, namstring)
_ensure_nc_success(ierr)
name = namstring.decode('utf-8')
dimensions = dimensions + (name,)
- free(dimids)
+
return dimensions
def _getname(self):
@@ -5778,9 +5843,15 @@ NC_CHAR).
# if count contains a zero element, no data is being read
if 0 not in count:
if sum(stride) == ndims or ndims == 0:
- with nogil:
- ierr = nc_get_vara(self._grpid, self._varid,
- startp, countp, PyArray_DATA(data))
+ if self.auto_complex:
+ with nogil:
+ ierr = pfnc_get_vara(self._grpid, self._varid,
+ startp, countp, PyArray_DATA(data))
+ else:
+ with nogil:
+ ierr = nc_get_vara(self._grpid, self._varid,
+ startp, countp, PyArray_DATA(data))
+
else:
with nogil:
ierr = nc_get_vars(self._grpid, self._varid,
diff --git a/test/test_complex.py b/test/test_complex.py
new file mode 100644
index 000000000..8c03953f8
--- /dev/null
+++ b/test/test_complex.py
@@ -0,0 +1,65 @@
+import netCDF4
+import numpy as np
+import pathlib
+import tempfile
+import unittest
+
+complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j], dtype="c16")
+np_dt = np.dtype([("r", np.float64), ("i", np.float64)])
+complex_struct_array = np.array(
+ [(r, i) for r, i in zip(complex_array.real, complex_array.imag)],
+ dtype=np_dt,
+)
+
+
+class ComplexNumbersTestCase(unittest.TestCase):
+ def setUp(self):
+ self.tmp_path = pathlib.Path(tempfile.mkdtemp())
+
+ def test_read_dim(self):
+ filename = self.tmp_path / "test_read_dim.nc"
+
+ with netCDF4.Dataset(filename, "w") as f:
+ f.createDimension("x", size=len(complex_array))
+ f.createDimension("ri", size=2)
+ c_ri = f.createVariable("data_dim", np.float64, ("x", "ri"))
+ as_dim_array = np.vstack((complex_array.real, complex_array.imag)).T
+ c_ri[:] = as_dim_array
+
+ with netCDF4.Dataset(filename, "r", auto_complex=True) as f:
+ assert "data_dim" in f.variables
+ data_dim = f["data_dim"]
+ assert data_dim.shape == complex_array.shape
+ data = data_dim[:]
+
+ assert np.array_equal(data, complex_array)
+
+ def test_read_struct(self):
+ filename = self.tmp_path / "test_read_struct.nc"
+
+ with netCDF4.Dataset(filename, "w") as f:
+ f.createDimension("x", size=len(complex_array))
+ nc_dt = f.createCompoundType(np_dt, "nc_complex")
+ c_struct = f.createVariable("data_struct", nc_dt, ("x",))
+ c_struct[:] = complex_struct_array
+
+ with netCDF4.Dataset(filename, "r", auto_complex=True) as f:
+ assert "data_struct" in f.variables
+ data = f["data_struct"][:]
+
+ assert np.array_equal(data, complex_array)
+
+ def test_write(self):
+ filename = self.tmp_path / "test_write.nc"
+ with netCDF4.Dataset(filename, "w", auto_complex=True) as f:
+ f.createDimension("x", size=len(complex_array))
+ complex_var = f.createVariable("complex_data", "c16", ("x",))
+ complex_var[:] = complex_array
+
+ with netCDF4.Dataset(filename, "r") as f:
+ assert "complex_data" in f.variables
+ assert np.array_equal(f["complex_data"], complex_struct_array)
+
+
+if __name__ == "__main__":
+ unittest.main()
From 5ac2c6aed0ac24c6b5327a0202d63fc358c8863f Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Tue, 7 Nov 2023 17:10:49 +0000
Subject: [PATCH 03/11] Tidy some checks around endianness
---
src/netCDF4/_netCDF4.pyx | 69 +++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 32 deletions(-)
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index c3bf7518c..e9ebf0586 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -1517,6 +1517,15 @@ _supportedtypes = _nptonctype.keys()
# make sure NC_CHAR points to S1
_nctonptype[NC_CHAR]='S1'
+# Mapping from numpy dtype endian format to what we expect
+_dtype_endian_lookup = {
+ "=": "native",
+ ">": "big",
+ "<": "little",
+ "|": None,
+ None: None,
+}
+
# internal C functions.
cdef _get_att_names(int grpid, int varid):
@@ -2026,12 +2035,11 @@ cdef _get_vars(group, bint auto_complex=False):
# TODO: proper lookup
datatype = "c16" if complex_nc_type == NC_DOUBLE else "c8"
- if endianness == '>':
- variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='big')
- elif endianness == '<':
- variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian='little')
- else:
- variables[name] = Variable(group, name, datatype, dimensions, id=varid, auto_complex=auto_complex)
+ endian = _dtype_endian_lookup[endianness] or "native"
+ variables[name] = Variable(
+ group, name, datatype, dimensions, id=varid, auto_complex=auto_complex, endian=endian
+ )
+
free(varids) # free pointer holding variable ids.
return variables
@@ -3949,6 +3957,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
cdef size_t *chunksizesp
cdef float preemptionp
cdef int nc_complex_typeid
+ cdef int _nc_endian
# Extra information for more helpful error messages
error_info = f"(variable '{name}', group '{grp.name}')"
@@ -3997,6 +4006,18 @@ behavior is similar to Fortran or Matlab, but different than numpy.
pass
else:
raise ValueError(f"Unsupported value for compression kwarg {error_info}")
+
+ if grp.data_model.startswith("NETCDF3") and endian != 'native':
+ raise RuntimeError(
+ f"only endian='native' allowed for NETCDF3 files, got '{endian}' {error_info}"
+ )
+
+ if endian not in ("little", "big", "native"):
+ raise ValueError(
+ f"'endian' keyword argument must be 'little','big' or 'native', got '{endian}' "
+ f"{error_info}"
+ )
+
self._grpid = grp._grpid
# make a weakref to group to avoid circular ref (issue 218)
# keep strong reference the default behaviour (issue 251)
@@ -4033,20 +4054,11 @@ behavior is similar to Fortran or Matlab, but different than numpy.
datatype = str
user_type = True
# check if endian keyword consistent with datatype specification.
- dtype_endian = getattr(datatype,'byteorder',None)
- if dtype_endian == '=': dtype_endian='native'
- if dtype_endian == '>': dtype_endian='big'
- if dtype_endian == '<': dtype_endian='little'
- if dtype_endian == '|': dtype_endian=None
+ dtype_endian = _dtype_endian_lookup[getattr(datatype, "byteorder", None)]
if dtype_endian is not None and dtype_endian != endian:
- if dtype_endian == 'native' and endian == sys.byteorder:
- pass
- else:
- # endian keyword prevails, issue warning
- msg = 'endian-ness of dtype and endian kwarg do not match, using endian kwarg'
- #msg = 'endian-ness of dtype and endian kwarg do not match, dtype over-riding endian kwarg'
- warnings.warn(msg)
- #endian = dtype_endian # dtype prevails
+ if not (dtype_endian == 'native' and endian == sys.byteorder):
+ warnings.warn('endian-ness of dtype and endian kwarg do not match, using endian kwarg')
+
# check validity of datatype.
self._isprimitive = False
self._iscompound = False
@@ -4248,17 +4260,13 @@ behavior is similar to Fortran or Matlab, but different than numpy.
if ierr != NC_NOERR:
if grp.data_model != 'NETCDF4': grp._enddef()
_ensure_nc_success(ierr, extra_msg=error_info)
+
# set endian-ness of variable
- if endian == 'little':
- with nogil:
- ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_LITTLE)
- elif endian == 'big':
+ if endian != 'native':
+ _nc_endian = NC_ENDIAN_LITTLE if endian == "little" else NC_ENDIAN_BIG
with nogil:
- ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_BIG)
- elif endian == 'native':
- pass # this is the default format.
- else:
- raise ValueError("'endian' keyword argument must be 'little','big' or 'native', got '%s'" % endian)
+ ierr = nc_def_var_endian(self._grpid, self._varid, _nc_endian)
+ _ensure_nc_success(ierr, extra_msg=error_info)
# set quantization
if significant_digits is not None:
@@ -4288,10 +4296,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
if ierr != NC_NOERR:
if grp.data_model != 'NETCDF4': grp._enddef()
_ensure_nc_success(ierr, extra_msg=error_info)
- else:
- if endian != 'native':
- msg=f"only endian='native' allowed for NETCDF3 files {error_info}"
- raise RuntimeError(msg)
+
# set a fill value for this variable if fill_value keyword
# given. This avoids the HDF5 overhead of deleting and
# recreating the dataset if it is set later (after the enddef).
From d364a6f5f2785c2e842e4bc1a84800518c1f9f16 Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Thu, 16 Nov 2023 13:57:20 +0000
Subject: [PATCH 04/11] Simplify duplicated call to `nc_def_var`
---
src/netCDF4/_netCDF4.pyx | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index e9ebf0586..ba12966b5 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -3952,7 +3952,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
cdef char namstring[NC_MAX_NAME+1]
cdef char *varname
cdef nc_type xtype
- cdef int *dimids
+ cdef int *dimids = NULL
cdef size_t sizep, nelemsp
cdef size_t *chunksizesp
cdef float preemptionp
@@ -4116,15 +4116,11 @@ behavior is similar to Fortran or Matlab, but different than numpy.
# any exceptions are raised.
if grp.data_model != 'NETCDF4': grp._redef()
# define variable.
+ with nogil:
+ ierr = nc_def_var(self._grpid, varname, xtype, ndims,
+ dimids, &self._varid)
if ndims:
- with nogil:
- ierr = nc_def_var(self._grpid, varname, xtype, ndims,
- dimids, &self._varid)
free(dimids)
- else: # a scalar variable.
- with nogil:
- ierr = nc_def_var(self._grpid, varname, xtype, ndims,
- NULL, &self._varid)
if ierr != NC_NOERR:
if grp.data_model != 'NETCDF4':
From d60252b19695931fd68a1d94f4e95175b06ded3a Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Thu, 16 Nov 2023 17:01:02 +0000
Subject: [PATCH 05/11] Add support for complex numbers in netcdf3/classic
model
---
include/netCDF4.pxi | 27 +++++---
src/netCDF4/_netCDF4.pyx | 140 +++++++++++++++++++++++++--------------
test/test_complex.py | 13 ++++
3 files changed, 121 insertions(+), 59 deletions(-)
diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi
index 8d232df8f..00d883662 100644
--- a/include/netCDF4.pxi
+++ b/include/netCDF4.pxi
@@ -469,17 +469,28 @@ cdef extern from "netcdf-compat.h":
# Declarations for handling complex numbers
cdef extern from "nc_complex/nc_complex.h":
- int pfnc_var_is_complex(int ncid, int varid) nogil
- int pfnc_var_is_complex_type(int ncid, int varid) nogil
- int pfnc_has_complex_dimension(int ncid, int varid) nogil
- int pfnc_get_double_complex_typeid(int ncid, nc_type *nc_typeid) nogil
- int pfnc_get_float_complex_typeid(int ncid, nc_type *nc_typeid) nogil
+ bint pfnc_var_is_complex(int ncid, int varid) nogil
+ bint pfnc_var_is_complex_type(int ncid, int varid) nogil
- int pfnc_complex_base_type(int ncid, int nc_typeid, int* base_type_id) nogil
+ int pfnc_get_complex_dim(int ncid, int* nc_dim) nogil
int pfnc_inq_var_complex_base_type(int ncid, int varid, int* nc_typeid) nogil
int pfnc_inq_varndims (int ncid, int varid, int *ndimsp) nogil
int pfnc_inq_vardimid (int ncid, int varid, int *dimidsp) nogil
- int pfnc_get_vara (int ncid, int varid, size_t *startp,
- size_t *countp, void *ip) nogil
+ int pfnc_def_var(int ncid, char *name, nc_type xtype, int ndims,
+ int *dimidsp, int *varidp) nogil
+
+ int pfnc_get_vars(int ncid, int varid, size_t *startp,
+ size_t *countp, ptrdiff_t *stridep,
+ void *ip) nogil
+
+ int pfnc_put_vars(int ncid, int varid, size_t *startp,
+ size_t *countp, ptrdiff_t *stridep,
+ void *op) nogil
+
+ cdef enum:
+ PFNC_DOUBLE_COMPLEX
+ PFNC_DOUBLE_COMPLEX_DIM
+ PFNC_FLOAT_COMPLEX
+ PFNC_FLOAT_COMPLEX_DIM
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index ba12966b5..1c11aa972 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -1435,6 +1435,11 @@ _intnptonctype = {'i1' : NC_BYTE,
'i8' : NC_INT64,
'u8' : NC_UINT64}
+_complex_types = {
+ "c16": PFNC_DOUBLE_COMPLEX,
+ "c8": PFNC_FLOAT_COMPLEX,
+}
+
# create dictionary mapping string identifiers to netcdf format codes
_format_dict = {'NETCDF3_CLASSIC' : NC_FORMAT_CLASSIC,
'NETCDF4_CLASSIC' : NC_FORMAT_NETCDF4_CLASSIC,
@@ -1479,7 +1484,8 @@ if __has_pnetcdf_support__:
'NETCDF3_64BIT'
]
-# default fill_value to numpy datatype mapping.
+# Default fill_value to numpy datatype mapping. Last two for complex
+# numbers only applies to complex dimensions
default_fillvals = {#'S1':NC_FILL_CHAR,
'S1':'\0',
'i1':NC_FILL_BYTE,
@@ -1491,7 +1497,10 @@ default_fillvals = {#'S1':NC_FILL_CHAR,
'i8':NC_FILL_INT64,
'u8':NC_FILL_UINT64,
'f4':NC_FILL_FLOAT,
- 'f8':NC_FILL_DOUBLE}
+ 'f8':NC_FILL_DOUBLE,
+ 'c8':NC_FILL_FLOAT,
+ 'c16':NC_FILL_DOUBLE,
+}
# logical for native endian type.
is_native_little = numpy.dtype('pfnc_var_is_complex_type(self._grpid, self._varid):
+ self._isprimitive = False
+ self._iscompound = True
+ with nogil:
+ ierr = pfnc_inq_var_complex_base_type(self._grpid, self._varid, &complex_typeid)
+ _ensure_nc_success(ierr, extra_msg=error_info)
+
+ np_complex_type = _nctonptype[complex_typeid]
+ compound_complex_type = f"{np_complex_type}, {np_complex_type}"
+
+ self._cmptype = CompoundType(
+ self._grp, compound_complex_type, "complex", typeid=complex_typeid
+ )
+ else:
+ with nogil:
+ ierr = pfnc_get_complex_dim(self._grpid, &complex_dim_id)
+ _ensure_nc_success(ierr, extra_msg=error_info)
+ with nogil:
+ ierr = nc_inq_dimname(self._grpid, complex_dim_id, name)
+ _ensure_nc_success(ierr, extra_msg=error_info)
+ self._grp.dimensions[name.decode("utf-8")] = Dimension(
+ self._grp, name, size=2, id=complex_dim_id
+ )
+
def __array__(self):
# numpy special method that returns a numpy array.
# allows numpy ufuncs to work faster on Variable objects
@@ -4430,7 +4465,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
cdef int ierr, dimid
cdef char namstring[NC_MAX_NAME+1]
- dimids = _inq_vardimid(self._grpid, self._varid, self.auto_complex)
+ dimids = _inq_vardimid(self._grpid, self._varid, self._grp.auto_complex)
# loop over dimensions, retrieve names.
dimensions = ()
@@ -5721,7 +5756,11 @@ NC_CHAR).
if not data.dtype.isnative:
data = data.byteswap()
# strides all 1 or scalar variable, use put_vara (faster)
- if sum(stride) == ndims or ndims == 0:
+ if self._grp.auto_complex:
+ with nogil:
+ ierr = pfnc_put_vars(self._grpid, self._varid,
+ startp, countp, stridep, PyArray_DATA(data))
+ elif sum(stride) == ndims or ndims == 0:
with nogil:
ierr = nc_put_vara(self._grpid, self._varid,
startp, countp, PyArray_DATA(data))
@@ -5843,16 +5882,15 @@ NC_CHAR).
# strides all 1 or scalar variable, use get_vara (faster)
# if count contains a zero element, no data is being read
if 0 not in count:
- if sum(stride) == ndims or ndims == 0:
- if self.auto_complex:
- with nogil:
- ierr = pfnc_get_vara(self._grpid, self._varid,
- startp, countp, PyArray_DATA(data))
- else:
- with nogil:
- ierr = nc_get_vara(self._grpid, self._varid,
- startp, countp, PyArray_DATA(data))
-
+ if self._grp.auto_complex:
+ with nogil:
+ ierr = pfnc_get_vars(self._grpid, self._varid,
+ startp, countp, stridep,
+ PyArray_DATA(data))
+ elif sum(stride) == ndims or ndims == 0:
+ with nogil:
+ ierr = nc_get_vara(self._grpid, self._varid,
+ startp, countp, PyArray_DATA(data))
else:
with nogil:
ierr = nc_get_vars(self._grpid, self._varid,
diff --git a/test/test_complex.py b/test/test_complex.py
index 8c03953f8..20856ce1f 100644
--- a/test/test_complex.py
+++ b/test/test_complex.py
@@ -60,6 +60,19 @@ def test_write(self):
assert "complex_data" in f.variables
assert np.array_equal(f["complex_data"], complex_struct_array)
+ def test_write_netcdf3(self):
+ filename = self.tmp_path / "test_write_netcdf3.nc"
+ with netCDF4.Dataset(
+ filename, "w", format="NETCDF3_CLASSIC", auto_complex=True
+ ) as f:
+ f.createDimension("x", size=len(complex_array))
+ complex_var = f.createVariable("complex_data", "c16", ("x",))
+ complex_var[:] = complex_array
+
+ with netCDF4.Dataset(filename, "r", auto_complex=True) as f:
+ assert "complex_data" in f.variables
+ assert np.array_equal(f["complex_data"][:], complex_array)
+
if __name__ == "__main__":
unittest.main()
From df42ae11887a13326d06d87d77d06d564d845e4e Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Thu, 16 Nov 2023 17:30:31 +0000
Subject: [PATCH 06/11] CI: Checkout submodules
---
.github/workflows/build_latest.yml | 2 ++
.github/workflows/build_master.yml | 2 ++
.github/workflows/build_old.yml | 2 ++
.github/workflows/miniconda.yml | 4 ++++
4 files changed, 10 insertions(+)
diff --git a/.github/workflows/build_latest.yml b/.github/workflows/build_latest.yml
index 3d252e1cc..4ec6ff7fd 100644
--- a/.github/workflows/build_latest.yml
+++ b/.github/workflows/build_latest.yml
@@ -17,6 +17,8 @@ jobs:
steps:
- uses: actions/checkout@v4
+ with:
+ submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
diff --git a/.github/workflows/build_master.yml b/.github/workflows/build_master.yml
index c2a00e76f..a072bf451 100644
--- a/.github/workflows/build_master.yml
+++ b/.github/workflows/build_master.yml
@@ -14,6 +14,8 @@ jobs:
steps:
- uses: actions/checkout@v4
+ with:
+ submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
diff --git a/.github/workflows/build_old.yml b/.github/workflows/build_old.yml
index e00d8be17..c2d4c16e3 100644
--- a/.github/workflows/build_old.yml
+++ b/.github/workflows/build_old.yml
@@ -17,6 +17,8 @@ jobs:
steps:
- uses: actions/checkout@v4
+ with:
+ submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
diff --git a/.github/workflows/miniconda.yml b/.github/workflows/miniconda.yml
index ca4309c33..33f079898 100644
--- a/.github/workflows/miniconda.yml
+++ b/.github/workflows/miniconda.yml
@@ -22,6 +22,8 @@ jobs:
steps:
- uses: actions/checkout@v4
+ with:
+ submodules: true
- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v1
@@ -53,6 +55,8 @@ jobs:
platform: [x64]
steps:
- uses: actions/checkout@v4
+ with:
+ submodules: true
- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v1
From 4be9751dc5aca948b6e4e126e35ea0c77828651d Mon Sep 17 00:00:00 2001
From: Peter Hill
Date: Tue, 21 Nov 2023 17:01:35 +0000
Subject: [PATCH 07/11] Add some docs for complex number support
---
docs/index.html | 72 ++++++++++++++++++++++++++++++++++------
examples/tutorial.py | 8 +++++
src/netCDF4/_netCDF4.pyx | 62 +++++++++++++++++++++++++++++-----
3 files changed, 122 insertions(+), 20 deletions(-)
diff --git a/docs/index.html b/docs/index.html
index 7ed8055d2..6971ceb5b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -3,7 +3,7 @@
-
+
netCDF4 API documentation
@@ -49,8 +49,9 @@ Quick Install
Developer Install
+All of the code in this tutorial is available in examples/tutorial.py
, except
+the parallel IO example, which is in examples/mpi_example.py
.
+Unit tests are in the test
directory.
Creating/Opening/Closing a netCDF file
To create a netCDF file from python, you simply call the Dataset
constructor. This is also the method used to open an existing netCDF
@@ -671,9 +675,9 @@
Beyond ho
Compound data types
are created from the corresponding numpy data type using the
Dataset.createCompoundType()
method of a Dataset
or Group
instance.
-Since there is no native complex data type in netcdf, compound types are handy
-for storing numpy complex arrays.
-Here's an example:
+Since there is no native complex data type in netcdf (but see
+Support for complex numbers), compound
+types are handy for storing numpy complex arrays. Here's an example:
>>> f = Dataset("complex.nc","w")
>>> size = 3 # length of 1-d complex array
>>> # create sample complex data.
@@ -1096,9 +1100,43 @@ In-memory (diskless) Datasets
[0 1 2 3 4]
>>> nc.close()
-All of the code in this tutorial is available in examples/tutorial.py
, except
-the parallel IO example, which is in examples/mpi_example.py
.
-Unit tests are in the test
directory.
+Support for complex numbers
+Although there is no native support for complex numbers in netCDF, there are
+some common conventions for storing them. Two of the most common are to either
+use a compound datatype for the real and imaginary components, or a separate
+dimension. netCDF4
supports reading several of these conventions, as well as
+writing using one of two conventions (depending on file format). This support
+for complex numbers is enabled by setting auto_complex=True
when opening a
+Dataset
:
+>>> complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j])
+>>> with netCDF4.Dataset("complex.nc", "w", auto_complex=True) as nc:
+... nc.createDimension("x", size=len(complex_array))
+... var = nc.createVariable("data", "c16", ("x",))
+... var[:] = complex_array
+... print(var)
+<class 'netCDF4._netCDF4.Variable'>
+compound data(x)
+compound data type: complex128
+unlimited dimensions:
+current shape = (5,)
+
+When reading files using auto_complex=True
, netCDF4
will interpret variables
+stored using the following conventions as complex numbers:
+
+- compound datatypes with two
float
or double
members who names begin with
+r
and i
(case insensitive)
+- a dimension of length 2 named
complex
or ri
+
+When writing files using auto_complex=True
, netCDF4
will use:
+
+- a compound datatype named
_PFNC_DOUBLE_COMPLEX_TYPE
(or *FLOAT*
as
+appropriate) with members r
and i
for netCDF4 formats;
+- or a dimension of length 2 named
_pfnc_complex
for netCDF3 or classic
+formats.
+
+Support for complex numbers is handled via the
+nc-complex
library. See there for
+further details.
contact: Jeffrey Whitaker jeffrey.s.whitaker@noaa.gov
copyright: 2008 by Jeffrey Whitaker.
license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
@@ -1553,7 +1591,8 @@ Instance variables
Ignored if parallel=False
.
info
: MPI_Info object for parallel access. Default None
, which
means MPI_INFO_NULL will be used.
-Ignored if parallel=False
.
+Ignored if parallel=False
.
+auto_complex
: if True
, then automatically convert complex number types
Subclasses
- netCDF4._netCDF4.Group
@@ -1582,6 +1621,10 @@ Static methods
Instance variables
+var auto_complex
+-
+
Return an attribute of instance, which is of type owner.
+
var cmptypes
-
Return an attribute of instance, which is of type owner.
@@ -2627,6 +2670,10 @@ Instance variables
-
Return an attribute of instance, which is of type owner.
+ var auto_complex
+-
+
Return an attribute of instance, which is of type owner.
+
var chartostring
-
Return an attribute of instance, which is of type owner.
@@ -3027,6 +3074,7 @@ Index
- Parallel IO
- Dealing with strings
- In-memory (diskless) Datasets
+- Support for complex numbers
@@ -3060,6 +3108,7 @@