From 1d9727c61664ae987af91cc168f296b6387bb895 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Sun, 31 Jan 2021 15:10:39 -0700 Subject: [PATCH 1/2] More fixes to the nccopy filter x chunking algorithm re: Issue https://github.com/Unidata/netcdf-c/issues/1936 The algorithm controlling interaction of -d 0 -F and -c / options is incorrect. The fix: 1. make -d 0 => no deflation 2. make -F properly use the filter specs to decide. Also added a test case to ncdump/tst_nccopy4.sh. --- ncdump/Makefile.am | 3 +- ncdump/nccopy.1 | 24 +++++---- ncdump/nccopy.c | 102 ++++++++++++++++++++--------------- ncdump/ref_tst_nofilters.cdl | 9 ++++ ncdump/tst_nccopy4.sh | 10 +++- 5 files changed, 91 insertions(+), 57 deletions(-) create mode 100644 ncdump/ref_tst_nofilters.cdl diff --git a/ncdump/Makefile.am b/ncdump/Makefile.am index 5a77257df8..bdcb053d24 100644 --- a/ncdump/Makefile.am +++ b/ncdump/Makefile.am @@ -162,8 +162,7 @@ ref_provenance_v1.nc ref_tst_radix.cdl tst_radix.cdl test_radix.sh \ ref_nccopy_w.cdl tst_nccopy_w3.sh tst_nccopy_w4.sh \ ref_no_ncproperty.nc test_unicode_directory.sh \ ref_roman_szip_simple.cdl ref_roman_szip_unlim.cdl ref_tst_perdimspecs.cdl \ -test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl - +test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl ref_tst_nofilters.cdl # The L512.bin file is file containing exactly 512 bytes each of value 0. # It is used for creating hdf5 files with varying offsets for testing. diff --git a/ncdump/nccopy.1 b/ncdump/nccopy.1 index 1a8bf1d59e..f4a90f71b8 100644 --- a/ncdump/nccopy.1 +++ b/ncdump/nccopy.1 @@ -96,17 +96,19 @@ storage format for performance. Credit is due to NCO for use of these numeric codes instead of the old and confusing format numbers. .IP "\fB \-d \fP \fI n \fP" For netCDF-4 output, including netCDF-4 classic model, specify -deflation level (level of compression) for variable data output. 0 -corresponds to no compression and 9 to maximum compression, with -higher levels of compression requiring marginally more time to -compress or uncompress than lower levels. Compression achieved may -also depend on output chunking parameters. If this option is -specified for a classic format or 64-bit offset format input file, it -is not necessary to also specify that the output should be netCDF-4 -classic model, as that will be the default. If this option is not -specified and the input file has compressed variables, the compression -will still be preserved in the output, using the same chunking as in -the input by default. +deflation level (level of compression) for variable data output. +0 corresponds to no compression and 9 to maximum compression, +with higher levels of compression requiring marginally more time +to compress or uncompress than lower levels. As a side effect +specifying a compression level of 0 (via "-d 0") actually turns +off deflation altogether. Compression achieved may also depend +on output chunking parameters. If this option is specified for +a classic format or 64-bit offset format input file, it is not +necessary to also specify that the output should be netCDF-4 +classic model, as that will be the default. If this option is +not specified and the input file has compressed variables, the +compression will still be preserved in the output, using the +same chunking as in the input by default. .IP Note that \fBnccopy\fP requires all variables to be compressed using the same compression level, but the API has no such restriction. With diff --git a/ncdump/nccopy.c b/ncdump/nccopy.c index ca20196711..0d7e6e2b95 100644 --- a/ncdump/nccopy.c +++ b/ncdump/nccopy.c @@ -365,32 +365,49 @@ parsefilterspec(const char* optarg0, List* speclist) return stat; } +/* Return 1 if variable has only active (ie not none) filters */ static int -hasfilteroptforvar(const char* ofqn) +varfiltersactive(const char* ofqn) { int i; + int hasnone = 0; + int hasactive = 0; /* See which output filter options are defined for this output variable */ for(i=0;ifqn,"*")==0) return 1; - if(strcmp(opt->fqn,ofqn)==0) return 1; + if(strcmp(opt->fqn,"*")==0 || strcmp(opt->fqn,ofqn)==0) + {if(opt->nofilter) hasnone = 1;} else {hasactive = 1;} } - return 0; + return (hasactive && !hasnone ? 1 : 0); } +/* Return 1 if variable has "none" filters */ +static int +varfilterssuppress(const char* ofqn) +{ + int i; + int hasnone = 0; + /* See which output filter options are defined for this output variable */ + for(i=0;ifqn,"*")==0 || strcmp(opt->fqn,ofqn)==0) + {if(opt->nofilter) hasnone = 1;} + } + return hasnone || suppressfilters; +} + +/* Return list of active filters */ static List* filteroptsforvar(const char* ofqn) { int i; List* list = listnew(); - /* See which output filter options are defined for this output variable */ + /* See which output filter options are defined for this output variable; + both active and none. */ for(i=0;ifqn,"*")==0) { - /* Add to the list */ - listpush(list,opt); - } else if(strcmp(opt->fqn,ofqn)==0) { - /* Add to the list */ + if(strcmp(opt->fqn,"*")==0 || strcmp(opt->fqn,ofqn)==0) { + if(!opt->nofilter) /* Add to the list */ listpush(list,opt); } } @@ -770,6 +787,7 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk int inputdefined, outputdefined, unfiltered; int innc4 = (inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC); int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC); + int suppressvarfilters = 0; if(!outnc4) goto done; /* Can only use filter when output is some netcdf4 variant */ @@ -782,15 +800,15 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk ospecs = NULL; actualspecs = NULL; + if(varfilterssuppress(ofqn) || option_deflate_level == 0) + suppressvarfilters = 1; + /* Is there one or more filters on the output variable */ outputdefined = 0; /* default is no filter defined */ - /* Only bother to look if output is netcdf-4 variant */ - if(outnc4) { - /* See if any output filter spec is defined for this output variable */ - ospecs = filteroptsforvar(ofqn); - if(listlength(ospecs) > 0) + /* See if any output filter spec is defined for this output variable */ + ospecs = filteroptsforvar(ofqn); + if(listlength(ospecs) > 0 && !suppressfilters && !suppressvarfilters) outputdefined = 1; - } /* Is there already a filter on the input variable */ inputdefined = 0; /* default is no filter defined */ @@ -825,7 +843,7 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk nullfree(ids); } - /* Rules for choosing output filter are as follows: + /* Rules for choosing output filter are as follows (Ugh!): global output input Actual Output suppress filter(s) filter(s) filter @@ -843,27 +861,18 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk unfiltered = 0; if(suppressfilters && !outputdefined) /* row 1 */ unfiltered = 1; - else if(suppressfilters && outputdefined) { /* row 2 */ - int k; - /* Walk the set of filters to apply to see if "none" was specified */ - for(k=0;knofilter) {unfiltered = 1; break;} - } - } else if(suppressfilters && outputdefined) /* row 3 */ - actualspecs = ospecs; + else if(suppressfilters || suppressvarfilters) /* row 2 */ + unfiltered = 1; + else if(suppressfilters && outputdefined) /* row 3 */ + actualspecs = ospecs; else if(!suppressfilters && !outputdefined && inputdefined) /* row 4 */ - actualspecs = inspecs; - else if(!suppressfilters && outputdefined) { /* row 5 & 6*/ - int k; - /* Walk the set of filters to apply to see if "none" was specified */ - for(k=0;knofilter) {unfiltered = 1; break;} - } - if(!unfiltered) actualspecs = ospecs; - } else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */ - unfiltered = 1; + actualspecs = inspecs; + else if(!suppressfilters && suppressvarfilters) /* row 5 */ + unfiltered = 1; + else if(!suppressfilters && outputdefined) /* row 6*/ + actualspecs = ospecs; + else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */ + unfiltered = 1; else if(!suppressfilters && outputdefined && inputdefined) /* row 8 */ actualspecs = ospecs; @@ -953,7 +962,7 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin ovid.grpid = ogrp; ovid.varid = o_varid; if((stat=computeFQN(ovid,&ofqn))) goto done; - if(option_deflate_level >= 0 || hasfilteroptforvar(ofqn)) + if(!varfilterssuppress(ofqn) && (option_deflate_level > 0 || varfiltersactive(ofqn))) ocontig = NC_CHUNKED; /* See about dim-specific chunking; does not override specific variable chunk spec*/ @@ -1097,6 +1106,9 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC); int deflated = 0; /* true iff deflation is applied */ int ndims; + char* ofqn = NULL; + int nofilters = 0; + VarID ovid = {ogrp,o_varid}; if(!outnc4) return stat; /* Ignore non-netcdf4 files */ @@ -1107,7 +1119,11 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou NC_CHECK(copy_chunking(igrp, varid, ogrp, o_varid, ndims, inkind, outkind)); } } - if(ndims > 0) + + if((stat=computeFQN(ovid,&ofqn))) goto done; + nofilters = varfilterssuppress(ofqn); + + if(ndims > 0 && !nofilters) { /* handle compression parameters, copying from input, overriding * with command-line options */ int shuffle_in=0, deflate_in=0, deflate_level_in=0; @@ -1129,7 +1145,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou deflate_out = 0; deflate_level_out = 0; } - /* Apply output deflation */ + /* Apply output deflation (unless suppressed) */ if(outnc4) { /* Note that if we invoke this function and even if shuffle and deflate flags are 0, then default chunking will be turned on; so do a special check for that. */ @@ -1138,7 +1154,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou deflated = deflate_out; } } - if(innc4 && outnc4 && ndims > 0) + if(!nofilters && innc4 && outnc4 && ndims > 0) { /* handle checksum parameters */ int fletcher32 = 0; NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32)); @@ -1155,10 +1171,12 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou } } - if(!deflated && ndims > 0) { + if(!nofilters && !deflated && ndims > 0) { /* handle other general filters */ NC_CHECK(copy_var_filter(igrp, varid, ogrp, o_varid, inkind, outkind)); } +done: + if(ofqn) free(ofqn); return stat; } diff --git a/ncdump/ref_tst_nofilters.cdl b/ncdump/ref_tst_nofilters.cdl new file mode 100644 index 0000000000..1b943f19f6 --- /dev/null +++ b/ncdump/ref_tst_nofilters.cdl @@ -0,0 +1,9 @@ +netcdf ref_tst_special_atts3 { +dimensions: + dim1 = 10; +variables: + int var1(dim1) ; + var1:_DeflateLevel = 2 ; +data: + var1 = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; +} diff --git a/ncdump/tst_nccopy4.sh b/ncdump/tst_nccopy4.sh index 5ad311ceba..92e2801da9 100755 --- a/ncdump/tst_nccopy4.sh +++ b/ncdump/tst_nccopy4.sh @@ -103,8 +103,14 @@ test "x$STORAGE" = 'xtas:_Storage="chunked";' CHUNKSIZES=`cat tmppds.cdl | sed -e '/tas:_ChunkSizes/p' -ed | tr -d '\t \r'` test "x$CHUNKSIZES" = 'xtas:_ChunkSizes=10,15,20;' -rm -f tst_chunking.nc tmp.nc tmp.cdl tmp-chunked.nc tmp-chunked.* tmp-unchunked2.* -rm -fr tst_perdimspecs.nc tmppds.cdl tmppds.nc +echo "*** Test that nccopy -F var1,none works as intended " +${NCGEN} -4 -b -o tst_nofilters.nc $srcdir/ref_tst_nofilters.cdl +${NCCOPY} -M0 -4 -F var1,none -c / tst_nofilters.nc tmp_nofilters.nc +${NCDUMP} -hs tmp_nofilters.nc > tmp_nofilters.cdl +STORAGE=`cat tmp_nofilters.cdl | sed -e '/var1:_Storage/p' -ed | tr -d '\t \r'` +test "x$STORAGE" = 'xvar1:_Storage="contiguous";' +FILTERS=`cat tmp_nofilters.cdl | sed -e '/var1:_Filters/p' -ed | tr -d '\t \r'` +test "x$FILTERS" = 'x' echo "*** All nccopy tests passed!" exit 0 From 7272ed6ca101e2351631b596a0c31fced851086f Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 3 Feb 2021 11:20:10 -0700 Subject: [PATCH 2/2] Update RELEASE_NOTES --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index eb4be973a5..a5a8fb6a13 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release ## 4.8.0 - TBD +* [Bug Fixes] The nccopy program was treating -d0 as turning deflation on rather than interpreting it as "turn off deflation". See [Github #1944](https://github.com/Unidata/netcdf-c/pull/1944) for more information. * [Bug Fixes] Make fillmismatch the default for DAP2 and DAP4; too many servers ignore this requirement. * [Bug Fixes] Fix some memory leaks in NCZarr, fix a bug with long strides in NCZarr. See [Github #1913](https://github.com/Unidata/netcdf-c/pull/1913) for more information. * [Enhancement] Add some optimizations to NCZarr, dosome cleanup of code cruft, add some NCZarr test cases, add a performance test to NCZarr. See [Github #1908](https://github.com/Unidata/netcdf-c/pull/1908) for more information.