diff --git a/configure.ac b/configure.ac index f488a0f038..c16d9a47ca 100644 --- a/configure.ac +++ b/configure.ac @@ -438,22 +438,6 @@ fi AM_CONDITIONAL(USE_STRICT_NULL_BYTE_HEADER_PADDING, [test x$enable_strict_null_byte_header_padding = xyes ]) -# Check whether we want to enable CDF5 support. -AC_MSG_CHECKING([whether CDF5 support should be enabled (default off)]) -AC_ARG_ENABLE([cdf5], - [AS_HELP_STRING([--enable-cdf5], - [build with CDF5 support.])]) -test "x$enable_cdf5" = xyes || enable_cdf5=no -AC_MSG_RESULT($enable_cdf5) - -if test "x$enable_cdf5" = xyes; then - AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support]) - AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support]) -fi - -AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ]) -AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ]) - # Does the user want to use the ffio module? AC_MSG_CHECKING([whether FFIO will be used]) AC_ARG_ENABLE([ffio], @@ -882,8 +866,32 @@ $SLEEPCMD AC_CHECK_SIZEOF(size_t) $SLEEPCMD AC_CHECK_SIZEOF(unsigned long long) -$SLEEPCMD -AC_CHECK_SIZEOF(unsigned long long) + +# Check whether we want to enable CDF5 support. +AC_MSG_CHECKING([whether CDF5 support should be enabled]) +AC_ARG_ENABLE([cdf5], + [AS_HELP_STRING([--disable-cdf5], + [build without CDF5 support.])], + [enable_cdf5=${enableval}], [enable_cdf5=auto] +) +if test "x${enable_cdf5}" = xyes && test "$ac_cv_sizeof_size_t" -lt "8" ; then + dnl unable to support CDF5, but --enable-cdf5 is explicitly set + AC_MSG_ERROR([Unable to support CDF5 feature because size_t is less than 4 bytes]) +fi +if test "$ac_cv_sizeof_size_t" -lt "8" ; then + enable_cdf5=no +else + enable_cdf5=yes +fi +AC_MSG_RESULT($enable_cdf5) + +if test "x${enable_cdf5}" = xyes; then + AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support]) + AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support]) +fi + +AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ]) +AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ]) $SLEEPCMD if test "$ac_cv_type_uchar" = yes ; then @@ -1371,7 +1379,7 @@ AC_SUBST(HAS_DISKLESS,[$enable_diskless]) AC_SUBST(HAS_MMAP,[$enable_mmap]) AC_SUBST(HAS_JNA,[$enable_jna]) AC_SUBST(RELAX_COORD_BOUND,[$enable_relax_coord_bound]) -AC_SUBST(ENABLE_ERANGE_FILL,[$enable_erange_fill]) +AC_SUBST(HAS_ERANGE_FILL,[$enable_erange_fill]) # Include some specifics for netcdf on windows. #AH_VERBATIM([_WIN32_STRICMP], diff --git a/include/nc3internal.h b/include/nc3internal.h index d46dd6443f..b5b4113087 100644 --- a/include/nc3internal.h +++ b/include/nc3internal.h @@ -428,6 +428,9 @@ nc_put_rec(int ncid, size_t recnum, void *const *datap); extern int NC_check_vlens(NC3_INFO *ncp); +extern int +NC_check_voffs(NC3_INFO *ncp); + /* Define accessors for the dispatchdata */ #define NC3_DATA(nc) ((NC3_INFO*)(nc)->dispatchdata) #define NC3_DATA_SET(nc,data) ((nc)->dispatchdata = (void*)(data)) diff --git a/libsrc/nc3internal.c b/libsrc/nc3internal.c index acaf739140..f9130f31da 100644 --- a/libsrc/nc3internal.c +++ b/libsrc/nc3internal.c @@ -312,21 +312,23 @@ fprintf(stderr, " REC %d %s: %ld\n", ii, (*vpp)->name->cp, (long)index); return NC_EVARSIZE; } #endif - if((*vpp)->len != UINT32_MAX) /* flag for vars >= 2**32 bytes */ - ncp->recsize += (*vpp)->len; + ncp->recsize += (*vpp)->len; last = (*vpp); } - /* - * for special case of - */ - if(last != NULL) { - if(ncp->recsize == last->len) { /* exactly one record variable, pack value */ - ncp->recsize = *last->dsizes * last->xsz; - } else if(last->len == UINT32_MAX) { /* huge last record variable */ - ncp->recsize += *last->dsizes * last->xsz; - } - } + /* + * for special case (Check CDF-1 and CDF-2 file format specifications.) + * "A special case: Where there is exactly one record variable, we drop the + * requirement that each record be four-byte aligned, so in this case there + * is no record padding." + */ + if (last != NULL) { + if (ncp->recsize == last->len) { + /* exactly one record variable, pack value */ + ncp->recsize = *last->dsizes * last->xsz; + } + } + if(NC_IsNew(ncp)) NC_set_numrecs(ncp, 0); return NC_NOERR; @@ -709,17 +711,14 @@ NC_check_vlens(NC3_INFO *ncp) if(ncp->vars.nelems == 0) return NC_NOERR; - if (fIsSet(ncp->flags,NC_64BIT_DATA)) { - /* CDF5 format allows many large vars */ - return NC_NOERR; - } - if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4) { + if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* CDF-5 */ + vlen_max = X_INT64_MAX - 3; /* "- 3" handles rounded-up size */ + else if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4) /* CDF2 format and LFS */ vlen_max = X_UINT_MAX - 3; /* "- 3" handles rounded-up size */ - } else { - /* CDF1 format */ + else /* CDF1 format */ vlen_max = X_INT_MAX - 3; - } + /* Loop through vars, first pass is for non-record variables. */ large_vars_count = 0; rec_vars_count = 0; @@ -728,6 +727,8 @@ NC_check_vlens(NC3_INFO *ncp) if( !IS_RECVAR(*vpp) ) { last = 0; if( NC_check_vlen(*vpp, vlen_max) == 0 ) { + if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */ + return NC_EVARSIZE; large_vars_count++; last = 1; } @@ -756,6 +757,8 @@ NC_check_vlens(NC3_INFO *ncp) if( IS_RECVAR(*vpp) ) { last = 0; if( NC_check_vlen(*vpp, vlen_max) == 0 ) { + if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */ + return NC_EVARSIZE; large_vars_count++; last = 1; } @@ -774,6 +777,59 @@ NC_check_vlens(NC3_INFO *ncp) return NC_NOERR; } +/*----< NC_check_voffs() >---------------------------------------------------*/ +/* + * Given a valid ncp, check whether the file starting offsets (begin) of all + * variables follows the same increasing order as they were defined. + */ +int +NC_check_voffs(NC3_INFO *ncp) +{ + size_t i; + off_t prev_off; + NC_var *varp; + + if (ncp->vars.nelems == 0) return NC_NOERR; + + /* Loop through vars, first pass is for non-record variables */ + prev_off = ncp->begin_var; + for (i=0; ivars.nelems; i++) { + varp = ncp->vars.value[i]; + if (IS_RECVAR(varp)) continue; + + if (varp->begin < prev_off) { +#if 0 + fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off); +#endif + return NC_ENOTNC; + } + prev_off = varp->begin + varp->len; + } + + if (ncp->begin_rec < prev_off) { +#if 0 + fprintf(stderr,"Record variable section begin offset (%lld) is less than fix-sized variable section end offset (%lld)\n", varp->begin, prev_off); +#endif + return NC_ENOTNC; + } + + /* Loop through vars, second pass is for record variables */ + prev_off = ncp->begin_rec; + for (i=0; ivars.nelems; i++) { + varp = ncp->vars.value[i]; + if (!IS_RECVAR(varp)) continue; + + if (varp->begin < prev_off) { +#if 0 + fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off); +#endif + return NC_ENOTNC; + } + prev_off = varp->begin + varp->len; + } + + return NC_NOERR; +} /* * End define mode. @@ -794,6 +850,9 @@ NC_endef(NC3_INFO *ncp, if(status != NC_NOERR) return status; status = NC_begins(ncp, h_minfree, v_align, v_minfree, r_align); + if(status != NC_NOERR) + return status; + status = NC_check_voffs(ncp); if(status != NC_NOERR) return status; diff --git a/libsrc/v1hpg.c b/libsrc/v1hpg.c index a69150386d..4f996d63f9 100644 --- a/libsrc/v1hpg.c +++ b/libsrc/v1hpg.c @@ -939,6 +939,7 @@ static int v1h_put_NC_var(v1hs *psp, const NC_var *varp) { int status; + size_t vsize; status = v1h_put_NC_string(psp, varp->name); if(status != NC_NOERR) @@ -975,9 +976,19 @@ v1h_put_NC_var(v1hs *psp, const NC_var *varp) if(status != NC_NOERR) return status; - status = v1h_put_size_t(psp, &varp->len); - if(status != NC_NOERR) - return status; + /* write vsize to header. + * CDF format specification: The vsize field is actually redundant, because + * its value may be computed from other information in the header. The + * 32-bit vsize field is not large enough to contain the size of variables + * that require more than 2^32 - 4 bytes, so 2^32 - 1 is used in the vsize + * field for such variables. + */ + vsize = varp->len; + if (varp->len > 4294967292UL && (psp->version == NC_FORMAT_CLASSIC || + psp->version == NC_FORMAT_64BIT_OFFSET)) + vsize = 4294967295UL; /* 2^32-1 */ + status = v1h_put_size_t(psp, &vsize); + if(status != NC_NOERR) return status; status = check_v1hs(psp, psp->version == 1 ? 4 : 8); /*begin*/ if(status != NC_NOERR) @@ -1231,11 +1242,6 @@ NC_computeshapes(NC3_INFO* ncp) { if(first_rec == NULL) first_rec = *vpp; - if((*vpp)->len == UINT32_MAX && - (fIsSet(ncp->flags, NC_64BIT_OFFSET) || - fIsSet(ncp->flags, NC_64BIT_DATA))) /* Flag for large last record */ - ncp->recsize += (*vpp)->dsizes[0] * (*vpp)->xsz; - else ncp->recsize += (*vpp)->len; } else @@ -1538,6 +1544,10 @@ nc_get_NC(NC3_INFO* ncp) if(status != NC_NOERR) goto unwind_get; + status = NC_check_voffs(ncp); + if(status != NC_NOERR) + goto unwind_get; + unwind_get: (void) rel_v1hs(&gs); return status; diff --git a/libsrc/var.c b/libsrc/var.c index 4c4a434d45..0612b8a3f4 100644 --- a/libsrc/var.c +++ b/libsrc/var.c @@ -485,29 +485,21 @@ NC_var_shape(NC_var *varp, const NC_dimarray *dims) out : - if( varp->xsz <= (X_UINT_MAX - 1) / product ) /* if integer multiply will not overflow */ - { - varp->len = product * varp->xsz; - switch(varp->type) { - case NC_BYTE : - case NC_CHAR : - case NC_UBYTE : - case NC_SHORT : - case NC_USHORT : - if( varp->len%4 != 0 ) - { - varp->len += 4 - varp->len%4; /* round up */ - /* *dsp += 4 - *dsp%4; */ - } - break; - default: - /* already aligned */ - break; - } - } else - { /* OK for last var to be "too big", indicated by this special len */ - varp->len = X_UINT_MAX; - } + + /* No variable size can be > X_INT64_MAX - 3 */ + if (0 == NC_check_vlen(varp, X_INT64_MAX-3)) return NC_EVARSIZE; + + /* + * For CDF-1 and CDF-2 formats, the total number of array elements + * cannot exceed 2^32, unless this variable is the last fixed-size + * variable, there is no record variable, and the file starting + * offset of this variable is less than 2GiB. + * This will be checked in NC_check_vlens() during NC_endef() + */ + varp->len = product * varp->xsz; + if (varp->len % 4 > 0) + varp->len += 4 - varp->len % 4; /* round up */ + #if 0 arrayp("\tshape", varp->ndims, varp->shape); arrayp("\tdsizes", varp->ndims, varp->dsizes); diff --git a/nc_test/Makefile.am b/nc_test/Makefile.am index 354a53772c..1d14fddeaa 100644 --- a/nc_test/Makefile.am +++ b/nc_test/Makefile.am @@ -12,7 +12,6 @@ AM_CPPFLAGS += -DTOPBINDIR=${abs_top_bindir} LDADD = ${top_builddir}/liblib/libnetcdf.la AM_CPPFLAGS += -I$(top_builddir)/liblib -I$(top_builddir)/include -I$(top_srcdir)/libsrc - # Note which tests depend on other tests. necessary for make -j check TEST_EXTENSIONS = .sh @@ -46,7 +45,7 @@ test_write.c util.c error.h tests.h # If the user asked for large file tests, then add them. if LARGE_FILE_TESTS TESTPROGRAMS += quick_large_files tst_big_var6 tst_big_var2 \ -tst_big_rvar tst_big_var tst_large large_files tst_large_cdf5 +tst_big_rvar tst_big_var tst_large large_files endif # LARGE_FILE_TESTS if BUILD_BENCHMARKS @@ -111,6 +110,19 @@ tst_formatx.nc nc_test_cdf5.nc unlim.nc tst_inq_type.nc \ tst_elatefill.nc tst_global_fillval.nc tst_large_cdf5.nc \ tst_max_var_dims.nc benchmark.nc tst_def_var_fill.nc +EXTRA_DIST += bad_cdf5_begin.nc run_cdf5.sh +if ENABLE_CDF5 + # bad_cdf5_begin.nc is a corrupted CDF-5 file with bad variable starting + # file offsets. It is to be used by tst_open_cdf5.c to check if it can + # detect and report error code NC_ENOTNC. + TESTS += run_cdf5.sh + check_PROGRAMS += tst_open_cdf5 +if LARGE_FILE_TESTS + TESTPROGRAMS += tst_large_cdf5 tst_cdf5_begin + CLEANFILES += tst_large_cdf5.nc tst_cdf5_begin.nc +endif +endif + # Only clean these on maintainer-clean, because they require m4 to # regenerate. #MAINTAINERCLEANFILES = test_get.c test_put.c diff --git a/nc_test/bad_cdf5_begin.nc b/nc_test/bad_cdf5_begin.nc new file mode 100644 index 0000000000..da0a0a2da5 Binary files /dev/null and b/nc_test/bad_cdf5_begin.nc differ diff --git a/nc_test/run_cdf5.sh b/nc_test/run_cdf5.sh new file mode 100755 index 0000000000..396aaa6f52 --- /dev/null +++ b/nc_test/run_cdf5.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e + +./tst_open_cdf5 ${srcdir}/bad_cdf5_begin.nc + diff --git a/nc_test/tst_cdf5_begin.c b/nc_test/tst_cdf5_begin.c new file mode 100644 index 0000000000..d704d9aa0a --- /dev/null +++ b/nc_test/tst_cdf5_begin.c @@ -0,0 +1,56 @@ +#include +#include +#include + +/* When using NetCDF 4.4.1 ad prior to create a CDF-5 file and defining a small + * variable after a big variable (> 2^32-3 bytes), the file starting offset of + * the small variable (and all variables defined after the big variable) is + * calculated incorrectly. This test program detects this bug by checking the + * contents of the possible overlaps between the two variables. + */ + +#define ERR {if(err!=NC_NOERR){printf("Error at line %d in %s: %s\n", __LINE__,__FILE__, nc_strerror(err));nerrs++;}} + +#define FILE_NAME "tst_cdf5_begin.nc" + +int main(int argc, char *argv[]) +{ + int i, err, nerrs=0, ncid, dimid[2], varid[2]; + short buf[10]; + size_t start, count; + + err = nc_create(FILE_NAME, NC_CLOBBER|NC_64BIT_DATA, &ncid); ERR; + err = nc_def_dim(ncid, "dim0", NC_MAX_UINT, &dimid[0]); ERR + err = nc_def_dim(ncid, "dim1", 10, &dimid[1]); ERR + + /* define one small variable after one big variable */ + err = nc_def_var(ncid, "var_big", NC_SHORT, 1, &dimid[0], &varid[0]); ERR + err = nc_def_var(ncid, "var_small", NC_SHORT, 1, &dimid[1], &varid[1]); ERR + err = nc_set_fill(ncid, NC_NOFILL, NULL); ERR + err = nc_enddef(ncid); ERR + + /* write to var_big in location overlapping with var_small when using + * netCDF 4.4.x or prior */ + start = NC_MAX_UINT/sizeof(short); + count = 10; + for (i=0; i<10; i++) buf[i] = i; + err = nc_put_vara_short(ncid, varid[0], &start, &count, buf); ERR + + /* write var_small */ + for (i=0; i<10; i++) buf[i] = -1; + err = nc_put_var_short(ncid, varid[1], buf); ERR + + /* read back var_big and check contents */ + for (i=0; i<10; i++) buf[i] = -1; + err = nc_get_vara_short(ncid, varid[0], &start, &count,buf); ERR + for (i=0; i<10; i++) { + if (buf[i] != i) { + printf("Error at buf[%d] expect %d but got %hd\n",i,i,buf[i]); + nerrs++; + } + } + err = nc_close(ncid); ERR + + return (nerrs > 0); +} + diff --git a/nc_test/tst_open_cdf5.c b/nc_test/tst_open_cdf5.c new file mode 100644 index 0000000000..f60ea7d964 --- /dev/null +++ b/nc_test/tst_open_cdf5.c @@ -0,0 +1,25 @@ +#include +#include +#include + +#define FILE_NAME "bad_cdf5_begin.nc" + +int main(int argc, char *argv[]) +{ + char *fname=FILE_NAME; + int err, nerrs=0, ncid; + + if (argc == 2) fname = argv[1]; + + err = nc_open(fname, NC_NOWRITE, &ncid); + if (err != NC_ENOTNC) { + printf("Error: nc_open() expect NC_ENOTNC but got (%s)\n", + nc_strerror(err)); + nerrs++; + } + else if (err == NC_NOERR) /* close file */ + nc_close(ncid); + + return (nerrs > 0); +} +