Skip to content

Commit

Permalink
Fix dataset tests
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer committed Oct 13, 2016
1 parent 510997a commit 6a0a1f4
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 161 deletions.
14 changes: 14 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ v0.9.0 (unreleased)
Breaking changes
~~~~~~~~~~~~~~~~

- Index coordinates for each dimensions are now optional, and no longer created
by default. This has a number of implications:

- :py:func:`~align` and :py:meth:`~Dataset.reindex` can now error, if
dimensions labels are missing and dimensions have different sizes.
- Because pandas does not support missing indexes, methods such as
``to_dataframe``/``from_dataframe`` and ``stack``/``unstack`` no longer
roundtrip faithfully on all inputs. Use :py:meth:`~Dataset.reset_index` to
remove undesired indexes.
- ``Dataset.__delitem__`` no longer deletes all variables matching
dimension names.
- ``DataArray.coords.__delitem__`` is now allowed on variables matching
dimension names.

- The default behavior of ``merge`` is now ``compat='no_conflicts'``, so some
merges will now succeed in cases that previously raised
``xarray.MergeError``. Set ``compat='broadcast_equals'`` to restore the
Expand Down
26 changes: 16 additions & 10 deletions xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,11 @@ def reindex_variables(variables, dims, indexes, indexers, method=None,
"""
# build up indexers for assignment along each dimension
to_indexers = {}
to_shape = {}
from_indexers = {}
# size of reindexed dimensions
new_sizes = {}

for name, index in iteritems(indexes):
to_shape[name] = index.size
if name in indexers:
target = utils.safe_cast_to_index(indexers[name])
if not index.is_unique:
Expand All @@ -210,7 +210,7 @@ def reindex_variables(variables, dims, indexes, indexers, method=None,
'index has duplicate values' % name)
indexer = get_indexer(index, target, method, tolerance)

to_shape[name] = len(target)
new_sizes[name] = len(target)
# Note pandas uses negative values from get_indexer to signify
# values that are missing in the index
# The non-negative values thus indicate the non-missing values
Expand Down Expand Up @@ -246,12 +246,17 @@ def var_indexers(var, indexers):

# create variables for the new dataset
reindexed = OrderedDict()
for name, var in iteritems(variables):
if name in indexers:
# no need to copy, because index data is immutable
new_var = IndexVariable(var.dims, indexers[name], var.attrs,
var.encoding)

for dim, indexer in indexers.items():
if dim in variables:
var = variables[dim]
args = (var.attrs, var.encoding)
else:
args = ()
reindexed[dim] = IndexVariable((dim,), indexers[dim], *args)

for name, var in iteritems(variables):
if name not in indexers:
assign_to = var_indexers(var, to_indexers)
assign_from = var_indexers(var, from_indexers)

Expand All @@ -261,7 +266,8 @@ def var_indexers(var, indexers):
dtype, fill_value = _maybe_promote(var.dtype)

if isinstance(data, np.ndarray):
shape = tuple(to_shape[dim] for dim in var.dims)
shape = tuple(new_sizes.get(dim, size)
for dim, size in zip(var.dims, var.shape))
new_data = np.empty(shape, dtype=dtype)
new_data[...] = fill_value
# create a new Variable so we can use orthogonal indexing
Expand Down Expand Up @@ -291,7 +297,7 @@ def var_indexers(var, indexers):
# we neither created a new ndarray nor used fancy indexing
new_var = var.copy(deep=copy)

reindexed[name] = new_var
reindexed[name] = new_var
return reindexed


Expand Down
27 changes: 17 additions & 10 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def to_index(self, ordered_dims=None):
"""
if ordered_dims is None:
ordered_dims = self.dims
indexes = [self.variables[k].to_index() for k in ordered_dims]
indexes = [self.indexes.get(k) for k in ordered_dims]
return pd.MultiIndex.from_product(indexes, names=list(ordered_dims))

def update(self, other):
Expand Down Expand Up @@ -213,9 +213,6 @@ def to_dataset(self):
return self._to_dataset()

def __delitem__(self, key):
if key in self.dims:
raise ValueError('cannot delete a coordinate corresponding to a '
'DataArray dimension')
del self._data._coords[key]


Expand Down Expand Up @@ -244,11 +241,11 @@ def __init__(self, variables, dims):
Arguments
---------
variables : OrderedDict
variables : OrderedDict[Any, Variable]
Reference to OrderedDict holding variable objects. Should be the
same dictionary used by the source object.
dims : sequence or mapping
Should be the same dimensions used by the source object.
dims : OrderedDict[Any, int]
Map from dimension names to sizes.
"""
self._variables = variables
self._dims = dims
Expand All @@ -265,10 +262,20 @@ def __contains__(self, key):
return key in self._dims and key in self._variables

def __getitem__(self, key):
if key in self:
return self._variables[key].to_index()
else:
if key not in self._dims:
raise KeyError(key)
return self._variables[key].to_index()

def __unicode__(self):
return formatting.indexes_repr(self)

def get(self, key):
"""Get an index for a dimension, supplying default RangeIndex if needed.
"""
if key not in self._dims:
raise KeyError(key)

if key in self._variables:
return self._variables[key].to_index()
else:
return pd.Index(range(self._dims[key]), name=key)
7 changes: 4 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
Indexes)
from .dataset import Dataset
from .pycompat import iteritems, basestring, OrderedDict, zip
from .pycompat import iteritems, basestring, OrderedDict, zip, range
from .variable import (as_variable, Variable, as_compatible_data, IndexVariable,
default_index_coordinate,
assert_unique_multiindex_level_names)
Expand Down Expand Up @@ -505,7 +505,7 @@ def encoding(self, value):
def indexes(self):
"""OrderedDict of pandas.Index objects used for label based indexing
"""
return Indexes(self._coords, self.dims)
return Indexes(self._coords, OrderedDict(zip(self.dims, self.shape)))

@property
def coords(self):
Expand Down Expand Up @@ -1066,7 +1066,8 @@ def to_pandas(self):
except KeyError:
raise ValueError('cannot convert arrays with %s dimensions into '
'pandas objects' % self.ndim)
return constructor(self.values, *self.indexes.values())
indexes = [self.indexes.get(dim) for dim in self.dims]
return constructor(self.values, *indexes)

def to_dataframe(self, name=None):
"""Convert this array and its coordinates into a tidy pandas.DataFrame.
Expand Down
46 changes: 15 additions & 31 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,22 +557,9 @@ def __setitem__(self, key, value):

def __delitem__(self, key):
"""Remove a variable from this dataset.
If this variable is a dimension, all variables containing this
dimension are also removed.
"""
def remove(k):
del self._variables[k]
self._coord_names.discard(k)

remove(key)

if key in self._dims:
del self._dims[key]
also_delete = [k for k, v in iteritems(self._variables)
if key in v.dims]
for key in also_delete:
remove(key)
del self._variables[key]
self._coord_names.discard(key)

# mutable objects should not be hashable
__hash__ = None
Expand Down Expand Up @@ -1224,7 +1211,9 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, **kw_in
variables = alignment.reindex_variables(
self.variables, self.dims, self.indexes, indexers, method,
tolerance, copy=copy)
return self._replace_vars_and_dims(variables)
coord_names = set(self._coord_names)
coord_names.update(indexers)
return self._replace_vars_and_dims(variables, coord_names)

def rename(self, name_dict, inplace=False):
"""Returns a new object with renamed variables and dimensions.
Expand All @@ -1250,9 +1239,9 @@ def rename(self, name_dict, inplace=False):
DataArray.rename
"""
for k, v in name_dict.items():
if k not in self:
if k not in self and k not in self.dims:
raise ValueError("cannot rename %r because it is not a "
"variable in this dataset" % k)
"variable or dimension in this dataset" % k)
if v in self and k != v:
raise ValueError('the new name %r already exists' % v)

Expand Down Expand Up @@ -1339,18 +1328,8 @@ def _stack_once(self, dims, new_dim):
else:
variables[name] = var.copy(deep=False)

indexes = self.indexes
dim_sizes = self.dims

levels = []
for dim in dims:
if dim in indexes:
level = indexes[dim]
else:
level = np.arange(dim_sizes[dim])
levels.append(level)

# consider dropping levels that are unused?
levels = [self.indexes.get(dim) for dim in dims]
idx = utils.multiindex_from_product_levels(levels, names=dims)
variables[new_dim] = IndexVariable(new_dim, idx)

Expand Down Expand Up @@ -1409,7 +1388,7 @@ def unstack(self, dim):
if dim not in self.dims:
raise ValueError('invalid dimension: %s' % dim)

index = self.indexes[dim]
index = self.indexes.get(dim)
if not isinstance(index, pd.MultiIndex):
raise ValueError('cannot unstack a dimension that does not have '
'a MultiIndex')
Expand Down Expand Up @@ -1551,7 +1530,12 @@ def drop(self, labels, dim=None):
if dim is None:
return self._drop_vars(labels)
else:
new_index = self.indexes[dim].drop(labels)
try:
index = self.indexes[dim]
except KeyError:
raise ValueError(
'dimension %r does not have coordinate labels' % dim)
new_index = index.drop(labels)
return self.loc[{dim: new_index}]

def _drop_vars(self, names):
Expand Down
8 changes: 3 additions & 5 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,11 +651,9 @@ def test_coords(self):
actual = repr(da.coords)
self.assertEquals(expected, actual)

with self.assertRaisesRegexp(ValueError, 'cannot delete'):
del da['x']

with self.assertRaisesRegexp(ValueError, 'cannot delete'):
del da.coords['x']
del da.coords['x']
expected = DataArray(da.values, {'y': [0, 1, 2]}, dims=['x', 'y'])
self.assertDataArrayIdentical(da, expected)

with self.assertRaisesRegexp(ValueError, 'conflicting MultiIndex'):
self.mda['level_1'] = np.arange(4)
Expand Down
Loading

0 comments on commit 6a0a1f4

Please sign in to comment.