Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move backend append logic to the prepare_variable methods #1799

Merged
merged 4 commits into from
Dec 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,13 @@ Bug fixes
``dask.threaded.get``. By `Matthew Rocklin <https://github.com/mrocklin>`_.
- Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays
with size one in some dimension can now be plotted, which is good for
exploring satellite imagery. (:issue:`1780`)
exploring satellite imagery (:issue:`1780`).
By `Zac Hatfield-Dodds <https://github.com/Zac-HD>`_.
- The ``variables``, ``attrs``, and ``dimensions`` properties have been
deprecated as part of a bug fix addressing an issue where backends were
unintentionally loading the datastores data and attributes repeatedly during
writes (:issue:`1798`).
By `Joe Hamman <https://github.com/jhamman>`_.


.. _whats-new.0.10.0:
Expand Down
39 changes: 17 additions & 22 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import traceback
import contextlib
from collections import Mapping
from distutils.version import LooseVersion
import warnings

from ..conventions import cf_encoder
from ..core import indexing
Expand Down Expand Up @@ -133,24 +133,25 @@ def load(self):

@property
def variables(self):
# Because encoding/decoding might happen which may require both the
# attributes and the variables, and because a store may be updated
# we need to load both the attributes and variables
# anytime either one is requested.
warnings.warn('The ``variables`` property has been deprecated and '
'will be removed in xarray v0.11.',
FutureWarning, stacklevel=2)
variables, _ = self.load()
return variables

@property
def attrs(self):
# Because encoding/decoding might happen which may require both the
# attributes and the variables, and because a store may be updated
# we need to load both the attributes and variables
# anytime either one is requested.
_, attributes = self.load()
return attributes
warnings.warn('The ``attrs`` property has been deprecated and '
'will be removed in xarray v0.11.',
FutureWarning, stacklevel=2)
_, attrs = self.load()
return attrs

@property
def dimensions(self):
warnings.warn('The ``dimensions`` property has been deprecated and '
'will be removed in xarray v0.11.',
FutureWarning, stacklevel=2)
return self.get_dimensions()

def close(self):
Expand Down Expand Up @@ -183,11 +184,7 @@ def add(self, source, target):
def sync(self):
if self.sources:
import dask.array as da
import dask
if LooseVersion(dask.__version__) > LooseVersion('0.8.1'):
da.store(self.sources, self.targets, lock=self.lock)
else:
da.store(self.sources, self.targets)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we now have a minimum dask version of 0.9 so this check is no longer needed.

da.store(self.sources, self.targets, lock=self.lock)
self.sources = []
self.targets = []

Expand Down Expand Up @@ -232,19 +229,17 @@ def set_variables(self, variables, check_encoding_set,
for vn, v in iteritems(variables):
name = _encode_variable_name(vn)
check = vn in check_encoding_set
if vn not in self.variables:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we deprecate the variables property as part of this change? That would help avoid this in the future. In general it's a bad idea for adapter layers (like our backend classes) to hold state.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shoyer - done. I actually removed the the attrs and dimensions properties too since they weren't really used. I left a deprecation error in there for now but can remove it assuming all the tests pass.

target, source = self.prepare_variable(
name, v, check, unlimited_dims=unlimited_dims)
else:
target, source = self.ds.variables[name], v.data
target, source = self.prepare_variable(
name, v, check, unlimited_dims=unlimited_dims)

self.writer.add(source, target)

def set_necessary_dimensions(self, variable, unlimited_dims=None):
if unlimited_dims is None:
unlimited_dims = set()
dims = self.get_dimensions()
for d, l in zip(variable.dims, variable.shape):
if d not in self.dimensions:
if d not in dims:
is_unlimited = d in unlimited_dims
self.set_dimension(d, l, is_unlimited)

Expand Down
7 changes: 5 additions & 2 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,11 @@ def prepare_variable(self, name, variable, check_encoding=False,
'chunksizes', 'fletcher32']:
if key in encoding:
kwargs[key] = encoding[key]
nc4_var = self.ds.createVariable(name, dtype, variable.dims,
fill_value=fill_value, **kwargs)
if name not in self.ds.variables:
nc4_var = self.ds.createVariable(name, dtype, variable.dims,
fill_value=fill_value, **kwargs)
else:
nc4_var = self.ds.variables[name]

for k, v in iteritems(attrs):
nc4_var.setncattr(k, v)
Expand Down
32 changes: 18 additions & 14 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,20 +352,24 @@ def prepare_variable(self, name, variable, check_encoding=False,
encoding = _extract_nc4_variable_encoding(
variable, raise_on_invalid=check_encoding,
unlimited_dims=unlimited_dims)
nc4_var = self.ds.createVariable(
varname=name,
datatype=datatype,
dimensions=variable.dims,
zlib=encoding.get('zlib', False),
complevel=encoding.get('complevel', 4),
shuffle=encoding.get('shuffle', True),
fletcher32=encoding.get('fletcher32', False),
contiguous=encoding.get('contiguous', False),
chunksizes=encoding.get('chunksizes'),
endian='native',
least_significant_digit=encoding.get('least_significant_digit'),
fill_value=fill_value)
_disable_auto_decode_variable(nc4_var)
if name in self.ds.variables:
nc4_var = self.ds.variables[name]
else:
nc4_var = self.ds.createVariable(
varname=name,
datatype=datatype,
dimensions=variable.dims,
zlib=encoding.get('zlib', False),
complevel=encoding.get('complevel', 4),
shuffle=encoding.get('shuffle', True),
fletcher32=encoding.get('fletcher32', False),
contiguous=encoding.get('contiguous', False),
chunksizes=encoding.get('chunksizes'),
endian='native',
least_significant_digit=encoding.get(
'least_significant_digit'),
fill_value=fill_value)
_disable_auto_decode_variable(nc4_var)

for k, v in iteritems(attrs):
# set attributes one-by-one since netCDF4<1.0.10 can't handle
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/netcdf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np

from .. import conventions, Variable
from ..core import duck_array_ops
from ..core.pycompat import basestring, unicode_type, OrderedDict


Expand Down
5 changes: 3 additions & 2 deletions xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def get_encoding(self):

def set_dimension(self, name, length, is_unlimited=False):
with self.ensure_open(autoclose=False):
if name in self.dimensions:
if name in self.ds.dimensions:
raise ValueError('%s does not support modifying dimensions'
% type(self).__name__)
dim_length = length if not is_unlimited else None
Expand Down Expand Up @@ -196,7 +196,8 @@ def prepare_variable(self, name, variable, check_encoding=False,
# nb. this still creates a numpy array in all memory, even though we
# don't write the data yet; scipy.io.netcdf does not not support
# incremental writes.
self.ds.createVariable(name, data.dtype, variable.dims)
if name not in self.ds.variables:
self.ds.createVariable(name, data.dtype, variable.dims)
scipy_var = self.ds.variables[name]
for k, v in iteritems(variable.attrs):
self._validate_attr_key(k)
Expand Down
7 changes: 5 additions & 2 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,11 @@ def prepare_variable(self, name, variable, check_encoding=False,
# compressor='default', fill_value=0, order='C', store=None,
# synchronizer=None, overwrite=False, path=None, chunk_store=None,
# filters=None, cache_metadata=True, **kwargs)
zarr_array = self.ds.create(name, shape=shape, dtype=dtype,
fill_value=fill_value, **encoding)
if name in self.ds:
zarr_array = self.ds[name]
else:
zarr_array = self.ds.create(name, shape=shape, dtype=dtype,
fill_value=fill_value, **encoding)
# decided not to explicity enumerate encoding options because we
# risk overriding zarr's defaults (e.g. if we specificy
# cache_metadata=None instead of True). Alternative is to have lots of
Expand Down
4 changes: 0 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from collections import defaultdict
import functools
import itertools
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1392,9 +1391,6 @@ def quantile(self, q, dim=None, interpolation='linear'):
raise TypeError("quantile does not work for arrays stored as dask "
"arrays. Load the data via .compute() or .load() "
"prior to calling this method.")
if LooseVersion(np.__version__) < LooseVersion('1.10.0'):
raise NotImplementedError(
'quantile requres numpy version 1.10.0 or later')

q = np.asarray(q, dtype=np.float64)

Expand Down