Skip to content

Commit

Permalink
Merge pull request #1944 from DennisHeimbigner/deflate.dmh
Browse files Browse the repository at this point in the history
More fixes to the nccopy filter x chunking algorithm
  • Loading branch information
WardF authored Feb 8, 2021
2 parents c88dd05 + 0b6f5c7 commit d136d46
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 57 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.8.0 - TBD

* [Bug Fixes] The nccopy program was treating -d0 as turning deflation on rather than interpreting it as "turn off deflation". See [Github #1944](https://github.com/Unidata/netcdf-c/pull/1944) for more information.
* [Enhancement] Add support for storing NCZarr data in zip files. See [Github #1942](https://github.com/Unidata/netcdf-c/pull/1942) for more information.
* [Bug Fixes] Make fillmismatch the default for DAP2 and DAP4; too many servers ignore this requirement.
* [Bug Fixes] Fix some memory leaks in NCZarr, fix a bug with long strides in NCZarr. See [Github #1913](https://github.com/Unidata/netcdf-c/pull/1913) for more information.
Expand Down
3 changes: 1 addition & 2 deletions ncdump/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,7 @@ ref_provenance_v1.nc ref_tst_radix.cdl tst_radix.cdl test_radix.sh \
ref_nccopy_w.cdl tst_nccopy_w3.sh tst_nccopy_w4.sh \
ref_no_ncproperty.nc test_unicode_directory.sh \
ref_roman_szip_simple.cdl ref_roman_szip_unlim.cdl ref_tst_perdimspecs.cdl \
test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl

test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl ref_tst_nofilters.cdl

# The L512.bin file is file containing exactly 512 bytes each of value 0.
# It is used for creating hdf5 files with varying offsets for testing.
Expand Down
24 changes: 13 additions & 11 deletions ncdump/nccopy.1
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,19 @@ storage format for performance. Credit is due to NCO for use of these
numeric codes instead of the old and confusing format numbers.
.IP "\fB \-d \fP \fI n \fP"
For netCDF-4 output, including netCDF-4 classic model, specify
deflation level (level of compression) for variable data output. 0
corresponds to no compression and 9 to maximum compression, with
higher levels of compression requiring marginally more time to
compress or uncompress than lower levels. Compression achieved may
also depend on output chunking parameters. If this option is
specified for a classic format or 64-bit offset format input file, it
is not necessary to also specify that the output should be netCDF-4
classic model, as that will be the default. If this option is not
specified and the input file has compressed variables, the compression
will still be preserved in the output, using the same chunking as in
the input by default.
deflation level (level of compression) for variable data output.
0 corresponds to no compression and 9 to maximum compression,
with higher levels of compression requiring marginally more time
to compress or uncompress than lower levels. As a side effect
specifying a compression level of 0 (via "-d 0") actually turns
off deflation altogether. Compression achieved may also depend
on output chunking parameters. If this option is specified for
a classic format or 64-bit offset format input file, it is not
necessary to also specify that the output should be netCDF-4
classic model, as that will be the default. If this option is
not specified and the input file has compressed variables, the
compression will still be preserved in the output, using the
same chunking as in the input by default.
.IP
Note that \fBnccopy\fP requires all variables to be compressed using the
same compression level, but the API has no such restriction. With
Expand Down
102 changes: 60 additions & 42 deletions ncdump/nccopy.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,32 +365,49 @@ parsefilterspec(const char* optarg0, List* speclist)
return stat;
}

/* Return 1 if variable has only active (ie not none) filters */
static int
hasfilteroptforvar(const char* ofqn)
varfiltersactive(const char* ofqn)
{
int i;
int hasnone = 0;
int hasactive = 0;
/* See which output filter options are defined for this output variable */
for(i=0;i<listlength(filteroptions);i++) {
struct FilterOption* opt = listget(filteroptions,i);
if(strcmp(opt->fqn,"*")==0) return 1;
if(strcmp(opt->fqn,ofqn)==0) return 1;
if(strcmp(opt->fqn,"*")==0 || strcmp(opt->fqn,ofqn)==0)
{if(opt->nofilter) hasnone = 1;} else {hasactive = 1;}
}
return 0;
return (hasactive && !hasnone ? 1 : 0);
}

/* Return 1 if variable has "none" filters */
static int
varfilterssuppress(const char* ofqn)
{
int i;
int hasnone = 0;
/* See which output filter options are defined for this output variable */
for(i=0;i<listlength(filteroptions);i++) {
struct FilterOption* opt = listget(filteroptions,i);
if(strcmp(opt->fqn,"*")==0 || strcmp(opt->fqn,ofqn)==0)
{if(opt->nofilter) hasnone = 1;}
}
return hasnone || suppressfilters;
}

/* Return list of active filters */
static List*
filteroptsforvar(const char* ofqn)
{
int i;
List* list = listnew();
/* See which output filter options are defined for this output variable */
/* See which output filter options are defined for this output variable;
both active and none. */
for(i=0;i<listlength(filteroptions);i++) {
struct FilterOption* opt = listget(filteroptions,i);
if(strcmp(opt->fqn,"*")==0) {
/* Add to the list */
listpush(list,opt);
} else if(strcmp(opt->fqn,ofqn)==0) {
/* Add to the list */
if(strcmp(opt->fqn,"*")==0 || strcmp(opt->fqn,ofqn)==0) {
if(!opt->nofilter) /* Add to the list */
listpush(list,opt);
}
}
Expand Down Expand Up @@ -770,6 +787,7 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
int inputdefined, outputdefined, unfiltered;
int innc4 = (inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC);
int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC);
int suppressvarfilters = 0;

if(!outnc4)
goto done; /* Can only use filter when output is some netcdf4 variant */
Expand All @@ -782,15 +800,15 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
ospecs = NULL;
actualspecs = NULL;

if(varfilterssuppress(ofqn) || option_deflate_level == 0)
suppressvarfilters = 1;

/* Is there one or more filters on the output variable */
outputdefined = 0; /* default is no filter defined */
/* Only bother to look if output is netcdf-4 variant */
if(outnc4) {
/* See if any output filter spec is defined for this output variable */
ospecs = filteroptsforvar(ofqn);
if(listlength(ospecs) > 0)
/* See if any output filter spec is defined for this output variable */
ospecs = filteroptsforvar(ofqn);
if(listlength(ospecs) > 0 && !suppressfilters && !suppressvarfilters)
outputdefined = 1;
}

/* Is there already a filter on the input variable */
inputdefined = 0; /* default is no filter defined */
Expand Down Expand Up @@ -825,7 +843,7 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
nullfree(ids);
}

/* Rules for choosing output filter are as follows:
/* Rules for choosing output filter are as follows (Ugh!):
global output input Actual Output
suppress filter(s) filter(s) filter
Expand All @@ -843,27 +861,18 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
unfiltered = 0;
if(suppressfilters && !outputdefined) /* row 1 */
unfiltered = 1;
else if(suppressfilters && outputdefined) { /* row 2 */
int k;
/* Walk the set of filters to apply to see if "none" was specified */
for(k=0;k<listlength(ospecs);k++) {
struct FilterOption* opt = (struct FilterOption*)listget(actualspecs,k);
if(opt->nofilter) {unfiltered = 1; break;}
}
} else if(suppressfilters && outputdefined) /* row 3 */
actualspecs = ospecs;
else if(suppressfilters || suppressvarfilters) /* row 2 */
unfiltered = 1;
else if(suppressfilters && outputdefined) /* row 3 */
actualspecs = ospecs;
else if(!suppressfilters && !outputdefined && inputdefined) /* row 4 */
actualspecs = inspecs;
else if(!suppressfilters && outputdefined) { /* row 5 & 6*/
int k;
/* Walk the set of filters to apply to see if "none" was specified */
for(k=0;k<listlength(ospecs);k++) {
struct FilterOption* opt = (struct FilterOption*)listget(ospecs,k);
if(opt->nofilter) {unfiltered = 1; break;}
}
if(!unfiltered) actualspecs = ospecs;
} else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */
unfiltered = 1;
actualspecs = inspecs;
else if(!suppressfilters && suppressvarfilters) /* row 5 */
unfiltered = 1;
else if(!suppressfilters && outputdefined) /* row 6*/
actualspecs = ospecs;
else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */
unfiltered = 1;
else if(!suppressfilters && outputdefined && inputdefined) /* row 8 */
actualspecs = ospecs;

Expand Down Expand Up @@ -953,7 +962,7 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin
ovid.grpid = ogrp;
ovid.varid = o_varid;
if((stat=computeFQN(ovid,&ofqn))) goto done;
if(option_deflate_level >= 0 || hasfilteroptforvar(ofqn))
if(!varfilterssuppress(ofqn) && (option_deflate_level > 0 || varfiltersactive(ofqn)))
ocontig = NC_CHUNKED;

/* See about dim-specific chunking; does not override specific variable chunk spec*/
Expand Down Expand Up @@ -1097,6 +1106,9 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC);
int deflated = 0; /* true iff deflation is applied */
int ndims;
char* ofqn = NULL;
int nofilters = 0;
VarID ovid = {ogrp,o_varid};

if(!outnc4)
return stat; /* Ignore non-netcdf4 files */
Expand All @@ -1107,7 +1119,11 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
NC_CHECK(copy_chunking(igrp, varid, ogrp, o_varid, ndims, inkind, outkind));
}
}
if(ndims > 0)

if((stat=computeFQN(ovid,&ofqn))) goto done;
nofilters = varfilterssuppress(ofqn);

if(ndims > 0 && !nofilters)
{ /* handle compression parameters, copying from input, overriding
* with command-line options */
int shuffle_in=0, deflate_in=0, deflate_level_in=0;
Expand All @@ -1129,7 +1145,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
deflate_out = 0;
deflate_level_out = 0;
}
/* Apply output deflation */
/* Apply output deflation (unless suppressed) */
if(outnc4) {
/* Note that if we invoke this function and even if shuffle and deflate flags are 0,
then default chunking will be turned on; so do a special check for that. */
Expand All @@ -1138,7 +1154,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
deflated = deflate_out;
}
}
if(innc4 && outnc4 && ndims > 0)
if(!nofilters && innc4 && outnc4 && ndims > 0)
{ /* handle checksum parameters */
int fletcher32 = 0;
NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32));
Expand All @@ -1155,10 +1171,12 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
}
}

if(!deflated && ndims > 0) {
if(!nofilters && !deflated && ndims > 0) {
/* handle other general filters */
NC_CHECK(copy_var_filter(igrp, varid, ogrp, o_varid, inkind, outkind));
}
done:
if(ofqn) free(ofqn);
return stat;
}

Expand Down
9 changes: 9 additions & 0 deletions ncdump/ref_tst_nofilters.cdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
netcdf ref_tst_special_atts3 {
dimensions:
dim1 = 10;
variables:
int var1(dim1) ;
var1:_DeflateLevel = 2 ;
data:
var1 = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9;
}
10 changes: 8 additions & 2 deletions ncdump/tst_nccopy4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,14 @@ test "x$STORAGE" = 'xtas:_Storage="chunked";'
CHUNKSIZES=`cat tmppds.cdl | sed -e '/tas:_ChunkSizes/p' -ed | tr -d '\t \r'`
test "x$CHUNKSIZES" = 'xtas:_ChunkSizes=10,15,20;'

rm -f tst_chunking.nc tmp.nc tmp.cdl tmp-chunked.nc tmp-chunked.* tmp-unchunked2.*
rm -fr tst_perdimspecs.nc tmppds.cdl tmppds.nc
echo "*** Test that nccopy -F var1,none works as intended "
${NCGEN} -4 -b -o tst_nofilters.nc $srcdir/ref_tst_nofilters.cdl
${NCCOPY} -M0 -4 -F var1,none -c / tst_nofilters.nc tmp_nofilters.nc
${NCDUMP} -hs tmp_nofilters.nc > tmp_nofilters.cdl
STORAGE=`cat tmp_nofilters.cdl | sed -e '/var1:_Storage/p' -ed | tr -d '\t \r'`
test "x$STORAGE" = 'xvar1:_Storage="contiguous";'
FILTERS=`cat tmp_nofilters.cdl | sed -e '/var1:_Filters/p' -ed | tr -d '\t \r'`
test "x$FILTERS" = 'x'

echo "*** All nccopy tests passed!"
exit 0
Expand Down

0 comments on commit d136d46

Please sign in to comment.