From aad4a093e6bffd34202ac8ac972a263abbf45210 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Sat, 24 Sep 2016 14:55:41 +0200 Subject: [PATCH 01/15] add Dataset.set_index method --- xarray/core/dataset.py | 80 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 84671814c42..ea2fa09e1f3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1254,7 +1254,6 @@ def rename(self, name_dict, inplace=False): See Also -------- - Dataset.swap_dims DataArray.rename """ @@ -1331,6 +1330,85 @@ def swap_dims(self, dims_dict, inplace=False): return self._replace_vars_and_dims(variables, coord_names, inplace=inplace) + def set_index(self, indexers=None, append=False, inplace=False, + **kw_indexers): + """ + Set Dataset indexes using one or more existing coordinates or + variables. + + Parameters + ---------- + indexers : dict, optional + Dictionary with keys given by dimension names and values given by + (lists of) the names of existing coordinates or variables. + Any list of multiple names given for a dimension will result as + a MultiIndex for that dimension. + append : bool, optional + If True, append the supplied indexers to the existing indexes. + Otherwise replace the existing indexes (default). + inplace : bool, optional + If True, set new indexes in-place. Otherwise, return a new dataset + object. + **kw_indexers : optional + Keyword arguments in the same form as ``indexers``. + + Returns + ------- + reindexed : Dataset + Another dataset, with this dataset's data but replaced coordinates. + + See Also + -------- + Dataset.reset_index + Dataset.reindex + """ + indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, + 'set_index') + + vars_to_remove = [] + vars_to_replace = {} + + for dim, var_names in indexers.items(): + if not isinstance(var_names, list): + var_names = [var_names] + + names = [] + arrays = [] + vars_to_remove.extend(var_names) + current_index_variable = self._variables[dim] + + if append: + current_index = current_index_variable.to_index() + if isinstance(current_index, pd.MultiIndex): + names.extend(current_index.names) + for i in range(current_index.nlevels): + arrays.append(current_index.get_level_values(i)) + else: + names.append('%s_level_0' % dim) + arrays.append(current_index.values) + + for n in var_names: + names.append(n) + var = self._variables[n] + if var.dims != current_index_variable.dims: + raise ValueError( + "dimension mismatch between %r %s and %r %s" + % (dim, current_index_variable.dims, n, var.dims)) + else: + arrays.append(var.values) + + idx = pd.MultiIndex.from_arrays(arrays, names=names) + vars_to_replace[dim] = IndexVariable(dim, idx) + + variables = OrderedDict(((k, v) for k, v in iteritems(self._variables) + if k not in vars_to_remove)) + variables.update(vars_to_replace) + coord_names = set([n for n in self._coord_names + if n not in vars_to_remove]) + + return self._replace_vars_and_dims(variables, coord_names=coord_names, + inplace=inplace) + def _stack_once(self, dims, new_dim): variables = OrderedDict() for name, var in self.variables.items(): From 4fe06b3656744249fd03765d2c34eb5f4d8a96b6 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 30 Sep 2016 17:51:22 +0200 Subject: [PATCH 02/15] add set_index and reset_index methods for dataarray and dataset --- xarray/core/dataarray.py | 76 +++++++++++++++++- xarray/core/dataset.py | 168 +++++++++++++++++++++++++++++---------- 2 files changed, 201 insertions(+), 43 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c34680d4380..ac8cd2f5357 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -15,7 +15,7 @@ from .common import AbstractArray, BaseDataObject from .coordinates import (DataArrayCoordinates, LevelCoordinates, Indexes) -from .dataset import Dataset +from .dataset import Dataset, merge_indexes, split_indexes from .pycompat import iteritems, basestring, OrderedDict, zip from .variable import (as_variable, Variable, as_compatible_data, IndexVariable, default_index_coordinate, @@ -821,6 +821,80 @@ def swap_dims(self, dims_dict): ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) + def set_index(self, indexers=None, append=False, inplace=False, + **kw_indexers): + """Set DataArray (multi-)indexes using one or more existing coordinates. + + Parameters + ---------- + indexers : dict, optional + Dictionary with keys given by dimension names and values given by + (lists of) the names of existing coordinates. + Any list of multiple names given for a dimension will result as + a MultiIndex for that dimension. + append : bool, optional + If True, append the supplied indexers to the existing indexes. + Otherwise replace the existing indexes (default). + inplace : bool, optional + If True, set new indexes in-place. Otherwise, return a new DataArray + object. + **kw_indexers : optional + Keyword arguments in the same form as ``indexers``. + + Returns + ------- + reindexed : DataArray + Another dataarray, with this dataarray's data but replaced coordinates. + + See Also + -------- + DataArray.reset_index + """ + indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, + 'set_index') + coords, _ = merge_indexes(indexers, self._coords, set(), append=append) + if inplace: + self._coords = coords + else: + return self._replace(coords=coords) + + def reset_index(self, dim_levels=None, drop=False, inplace=False, + **kw_dim_levels): + """Extract multi-index levels as new coordinates. + + Parameters + ---------- + dim_levels : dict, optional + Dictionary with keys given by dimension names and values given by + (lists of) the names of the levels to extract, or None to extract + all levels. Every given dimension must have a multi-index. + drop : bool, optional + If True, remove the specified levels instead of extracting them as + new coordinates (default: False). + inplace : bool, optional + If True, set modify the dataarray in-place. Otherwise, return a new + DataArray object. + **kw_dim_levels : optional + Keyword arguments in the same form as ``dim_levels``. + + Returns + ------- + reindexed: DataArray + Another dataarray, with this dataarray's data but replaced + coordinates. + + See Also + -------- + DataArray.set_index + """ + dim_levels = utils.combine_pos_and_kw_args(dim_levels, kw_dim_levels, + 'reset_index') + coords, _ = split_indexes(dim_levels, self._coords, set(), drop=drop) + if inplace: + self._coords = coords + else: + return self._replace(coords=coords) + def stack(self, **dimensions): """ Stack any number of existing dimensions into a single new dimension. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea2fa09e1f3..051b5da4696 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -21,7 +21,8 @@ merge_data_and_coords) from .utils import (Frozen, SortedKeysDict, maybe_wrap_array, hashable, decode_numpy_dict_values, ensure_us_time_resolution) -from .variable import (Variable, as_variable, IndexVariable, broadcast_variables) +from .variable import (Variable, as_variable, IndexVariable, + broadcast_variables, default_index_coordinate) from .pycompat import (iteritems, basestring, OrderedDict, dask_array_type) from .combine import concat @@ -102,6 +103,95 @@ def calculate_dimensions(variables): return dims +def merge_indexes(indexers, variables, coord_names, append=False): + """Merge variables into multi-indexes. + + Not public API. Used in Dataset and DataArray set_index + methods. + """ + vars_to_replace = {} + vars_to_remove = [] + + for dim, var_names in indexers.items(): + if isinstance(var_names, basestring): + var_names = [var_names] + + names = [] + arrays = [] + current_index_variable = variables[dim] + + if append: + current_index = current_index_variable.to_index() + if isinstance(current_index, pd.MultiIndex): + names.extend(current_index.names) + for i in range(current_index.nlevels): + arrays.append(current_index.get_level_values(i)) + else: + names.append('%s_level_0' % dim) + arrays.append(current_index.values) + + for n in var_names: + names.append(n) + var = variables[n] + if var.dims != current_index_variable.dims: + raise ValueError( + "dimension mismatch between %r %s and %r %s" + % (dim, current_index_variable.dims, n, var.dims)) + else: + arrays.append(var.values) + + idx = pd.MultiIndex.from_arrays(arrays, names=names) + vars_to_replace[dim] = IndexVariable(dim, idx) + vars_to_remove.extend(var_names) + + new_variables = OrderedDict([(k, v) for k, v in iteritems(variables) + if k not in vars_to_remove]) + new_variables.update(vars_to_replace) + new_coord_names = coord_names - set(vars_to_remove) + + return new_variables, new_coord_names + + +def split_indexes(dim_levels, variables, coord_names, drop=False): + """Split multi-indexes into variables. + + Not public API. Used in Dataset and DataArray reset_index + methods. + """ + vars_to_replace = {} + vars_to_create = OrderedDict() + + for dim, levels in dim_levels.items(): + current_index = variables[dim].to_index() + if not isinstance(current_index, pd.MultiIndex): + raise ValueError("%r has no MultiIndex" % dim) + + if levels is None: + levels = current_index.names + elif not isinstance(levels, (tuple, list)): + levels = [levels] + + if len(levels) == current_index.nlevels: + new_index_variable = default_index_coordinate( + dim, current_index.size) + else: + new_index_variable = IndexVariable( + dim, current_index.droplevel(levels)) + vars_to_replace[dim] = new_index_variable + + if not drop: + for lev in levels: + idx = current_index.get_level_values(lev) + vars_to_create[idx.name] = IndexVariable(dim, idx) + + new_variables = variables.copy() + new_variables.update(vars_to_replace) + new_variables.update(vars_to_create) + new_coord_names = coord_names | set(vars_to_create) + + return new_variables, new_coord_names + + def _assert_empty(args, msg='%s'): if args: raise ValueError(msg % args) @@ -1332,8 +1422,7 @@ def swap_dims(self, dims_dict, inplace=False): def set_index(self, indexers=None, append=False, inplace=False, **kw_indexers): - """ - Set Dataset indexes using one or more existing coordinates or + """Set Dataset (multi-)indexes using one or more existing coordinates or variables. Parameters @@ -1360,52 +1449,47 @@ def set_index(self, indexers=None, append=False, inplace=False, See Also -------- Dataset.reset_index - Dataset.reindex """ indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, 'set_index') + variables, coord_names = merge_indexes(indexers, self._variables, + self._coord_names, + append=append) + return self._replace_vars_and_dims(variables, coord_names=coord_names, + inplace=inplace) - vars_to_remove = [] - vars_to_replace = {} - - for dim, var_names in indexers.items(): - if not isinstance(var_names, list): - var_names = [var_names] - - names = [] - arrays = [] - vars_to_remove.extend(var_names) - current_index_variable = self._variables[dim] - - if append: - current_index = current_index_variable.to_index() - if isinstance(current_index, pd.MultiIndex): - names.extend(current_index.names) - for i in range(current_index.nlevels): - arrays.append(current_index.get_level_values(i)) - else: - names.append('%s_level_0' % dim) - arrays.append(current_index.values) - - for n in var_names: - names.append(n) - var = self._variables[n] - if var.dims != current_index_variable.dims: - raise ValueError( - "dimension mismatch between %r %s and %r %s" - % (dim, current_index_variable.dims, n, var.dims)) - else: - arrays.append(var.values) + def reset_index(self, dim_levels=None, drop=False, inplace=False, + **kw_dim_levels): + """Extract multi-index levels as new coordinates. - idx = pd.MultiIndex.from_arrays(arrays, names=names) - vars_to_replace[dim] = IndexVariable(dim, idx) + Parameters + ---------- + dim_levels : dict, optional + Dictionary with keys given by dimension names and values given by + (lists of) the names of the levels to extract, or None to extract + all levels. Every given dimension must have a multi-index. + drop : bool, optional + If True, remove the specified levels instead of extracting them as + new coordinates (default: False). + inplace : bool, optional + If True, set modify the dataset in-place. Otherwise, return a new + Dataset object. + **kw_dim_levels : optional + Keyword arguments in the same form as ``dim_levels``. - variables = OrderedDict(((k, v) for k, v in iteritems(self._variables) - if k not in vars_to_remove)) - variables.update(vars_to_replace) - coord_names = set([n for n in self._coord_names - if n not in vars_to_remove]) + Returns + ------- + reindexed: Dataset + Another dataset, with this dataset's data but replaced coordinates. + See Also + -------- + Dataset.set_index + """ + dim_levels = utils.combine_pos_and_kw_args(dim_levels, kw_dim_levels, + 'reset_index') + variables, coord_names = split_indexes(dim_levels, self._variables, + self._coord_names, drop=drop) return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) From 08ccdc2cec360722fbdc64da1e7484a025eed4f4 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 Oct 2016 11:15:38 +0200 Subject: [PATCH 03/15] add reorder_levels method for dataset and dataarray --- xarray/core/dataarray.py | 40 +++++++++++++++++++++++++++++++++++++++- xarray/core/dataset.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ac8cd2f5357..6f497e786d3 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -872,7 +872,7 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, If True, remove the specified levels instead of extracting them as new coordinates (default: False). inplace : bool, optional - If True, set modify the dataarray in-place. Otherwise, return a new + If True, modify the dataarray in-place. Otherwise, return a new DataArray object. **kw_dim_levels : optional Keyword arguments in the same form as ``dim_levels``. @@ -895,6 +895,44 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, else: return self._replace(coords=coords) + def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): + """Rearrange index levels using input order. + + Parameters + ---------- + dim_order : dict, optional + Dictionary with keys given by dimension names and values given + by lists representing new level orders. Every given dimension + must have a multi-index. + inplace : bool, optional + If True, modify the dataarray in-place. Otherwise, return a new + DataArray object. + **kw_dim_order : optional + Keyword arguments in the same form as ``dim_order``. + + Returns + ------- + reindexed: DataArray + Another dataarray, with this dataarray's data but replaced + coordinates. + """ + dim_order = utils.combine_pos_and_kw_args(dim_order, kw_dim_order, + 'reorder_levels') + replace_coords = {} + for dim, order in dim_order.items(): + coord = self._coords[dim] + index = coord.to_index() + if not isinstance(index, pd.MultiIndex): + raise ValueError("coordinate %r has no MultiIndex" % dim) + replace_coords[dim] = IndexVariable(coord.dims, + index.reorder_levels(order)) + coords = self._coords.copy() + coords.update(replace_coords) + if inplace: + self._coords = coords + else: + return self._replace(coords=coords) + def stack(self, **dimensions): """ Stack any number of existing dimensions into a single new dimension. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 051b5da4696..6d428bec0a0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1472,7 +1472,7 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, If True, remove the specified levels instead of extracting them as new coordinates (default: False). inplace : bool, optional - If True, set modify the dataset in-place. Otherwise, return a new + If True, modify the dataset in-place. Otherwise, return a new Dataset object. **kw_dim_levels : optional Keyword arguments in the same form as ``dim_levels``. @@ -1493,6 +1493,41 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) + def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): + """Rearrange index levels using input order. + + Parameters + ---------- + dim_order : dict, optional + Dictionary with keys given by dimension names and values given + by lists representing new level orders. Every given dimension + must have a multi-index. + inplace : bool, optional + If True, modify the dataset in-place. Otherwise, return a new + DataArray object. + **kw_dim_order : optional + Keyword arguments in the same form as ``dim_order``. + + Returns + ------- + reindexed: Dataset + Another dataset, with this dataset's data but replaced + coordinates. + """ + dim_order = utils.combine_pos_and_kw_args(dim_order, kw_dim_order, + 'reorder_levels') + replace_variables = {} + for dim, order in dim_order.items(): + coord = self._variables[dim] + index = coord.to_index() + if not isinstance(index, pd.MultiIndex): + raise ValueError("coordinate %r has no MultiIndex" % dim) + replace_variables[dim] = IndexVariable(coord.dims, + index.reorder_levels(order)) + variables = self._variables.copy() + variables.update(replace_variables) + return self._replace_vars_and_dims(variables, inplace=inplace) + def _stack_once(self, dims, new_dim): variables = OrderedDict() for name, var in self.variables.items(): From 9f53c72b88e245244d078b8bbed316b01fb6676b Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 Oct 2016 13:32:56 +0200 Subject: [PATCH 04/15] add tests --- xarray/test/test_dataarray.py | 65 +++++++++++++++++++++++++++++++++++ xarray/test/test_dataset.py | 42 ++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 6a1985060e8..1f4c8dd366d 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -817,6 +817,71 @@ def test_swap_dims(self): actual = array.swap_dims({'x': 'y'}) self.assertDataArrayIdentical(expected, actual) + def test_set_index(self): + indexes = [self.mindex.get_level_values(n) for n in self.mindex.names] + coords = {idx.name: ('x', idx) for idx in indexes} + array = DataArray(self.mda.values, coords=coords, dims='x') + expected = self.mda.copy() + level_3 = ('x', [1, 2, 3, 4]) + array['level_3'] = level_3 + expected['level_3'] = level_3 + + reindexed = array.set_index(indexers={'x': self.mindex.names}) + self.assertDataArrayIdentical(reindexed, expected) + + reindexed = reindexed.set_index(x='level_3', append=True) + expected = array.set_index(x=['level_1', 'level_2', 'level_3']) + self.assertDataArrayIdentical(reindexed, expected) + + array.set_index(x=['level_1', 'level_2', 'level_3'], inplace=True) + self.assertDataArrayIdentical(array, expected) + + array2d = DataArray(np.random.rand(2, 2), + coords={'level': ('y', [1, 2])}, + dims=('x', 'y')) + with self.assertRaisesRegex(ValueError, 'dimension mismatch'): + array2d.set_index(x='level') + + def test_reset_index(self): + indexes = [self.mindex.get_level_values(n) for n in self.mindex.names] + coords = {idx.name: ('x', idx) for idx in indexes} + expected = DataArray(self.mda.values, coords=coords, dims='x') + + reindexed = self.mda.reset_index(dim_levels={'x': None}) + self.assertDataArrayIdentical(reindexed, expected) + reindexed = self.mda.reset_index(x=self.mindex.names) + self.assertDataArrayIdentical(reindexed, expected) + + coords = {'x': ('x', self.mindex.droplevel('level_1')), + 'level_1': ('x', self.mindex.get_level_values('level_1'))} + expected = DataArray(self.mda.values, coords=coords, dims='x') + reindexed = self.mda.reset_index(x='level_1') + self.assertDataArrayIdentical(reindexed, expected) + + expected = DataArray(self.mda.values, dims='x') + reindexed = self.mda.reset_index(x=None, drop=True) + self.assertDataArrayIdentical(reindexed, expected) + + array = self.mda.copy() + array.reset_index(x=None, drop=True, inplace=True) + self.assertDataArrayIdentical(array, expected) + + def test_reorder_levels(self): + midx = self.mindex.reorder_levels(['level_2', 'level_1']) + expected = DataArray(self.mda.values, coords={'x': midx}, dims='x') + + reindexed = self.mda.reorder_levels( + dim_order={'x': ['level_2', 'level_1']}) + self.assertDataArrayIdentical(reindexed, expected) + + array = self.mda.copy() + array.reorder_levels(x=['level_2', 'level_1'], inplace=True) + self.assertDataArrayIdentical(array, expected) + + array = DataArray([1, 2], dims='x') + with self.assertRaisesRegex(ValueError, 'has no MultiIndex'): + array.reorder_levels(x=['level_1', 'level_2']) + def test_dataset_getitem(self): dv = self.ds['foo'] self.assertDataArrayIdentical(dv, self.dv) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 87dee7603b5..d8311040310 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -1376,6 +1376,48 @@ def test_swap_dims(self): with self.assertRaisesRegexp(ValueError, 'replacement dimension'): original.swap_dims({'x': 'z'}) + def test_set_index(self): + expected = create_test_multiindex() + mindex = expected['x'].to_index() + indexes = [mindex.get_level_values(n) for n in mindex.names] + coords = {idx.name: ('x', idx) for idx in indexes} + ds = Dataset({}, coords=coords) + + reindexed = ds.set_index(indexers={'x': mindex.names}) + self.assertDatasetIdentical(reindexed, expected) + + ds.set_index(x=mindex.names, inplace=True) + self.assertDatasetIdentical(ds, expected) + + def test_reset_index(self): + ds = create_test_multiindex() + mindex = ds['x'].to_index() + indexes = [mindex.get_level_values(n) for n in mindex.names] + coords = {idx.name: ('x', idx) for idx in indexes} + expected = Dataset({}, coords=coords) + + reindexed = ds.reset_index(dim_levels={'x': None}) + self.assertDatasetIdentical(reindexed, expected) + + ds.reset_index(x=None, inplace=True) + self.assertDatasetIdentical(ds, expected) + + def test_reorder_levels(self): + ds = create_test_multiindex() + mindex = ds['x'].to_index() + midx = mindex.reorder_levels(['level_2', 'level_1']) + expected = Dataset({}, coords={'x': midx}) + + reindexed = ds.reorder_levels(dim_order={'x': ['level_2', 'level_1']}) + self.assertDatasetIdentical(reindexed, expected) + + ds.reorder_levels(x=['level_2', 'level_1'], inplace=True) + self.assertDatasetIdentical(ds, expected) + + ds = Dataset({}, coords={'x': [1, 2]}) + with self.assertRaisesRegex(ValueError, 'has no MultiIndex'): + ds.reorder_levels(x=['level_1', 'level_2']) + def test_stack(self): ds = Dataset({'a': ('x', [0, 1]), 'b': (('x', 'y'), [[0, 1], [2, 3]]), From 363b4630705254928884e2e387ff874c0a359981 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 Oct 2016 14:44:20 +0200 Subject: [PATCH 05/15] update doc --- doc/api.rst | 6 ++++++ doc/indexing.rst | 42 +++++++++++++++++++++++++++++++++++++++++- doc/whats-new.rst | 10 ++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 2602d9f2e29..94afe438af7 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -102,6 +102,9 @@ Indexing Dataset.squeeze Dataset.reindex Dataset.reindex_like + Dataset.set_index + Dataset.reset_index + Dataset.reorder_levels Computation ----------- @@ -234,6 +237,9 @@ Indexing DataArray.squeeze DataArray.reindex DataArray.reindex_like + DataArray.set_index + DataArray.reset_index + DataArray.reorder_levels Comparisons ----------- diff --git a/doc/indexing.rst b/doc/indexing.rst index acf6920c4f6..98eea86ec01 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -478,6 +478,47 @@ Both ``reindex_like`` and ``align`` work interchangeably between # this is a no-op, because there are no shared dimension names ds.reindex_like(other) +.. _multi-index handling: + +Multi-index handling +-------------------- + +Morroring pandas, xarray's ``set_index``, ``reset_index`` and +``reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` +multi-indexes without modifying the data. + +You can create a multi-index from several 1-dimensional variables and/or +coordinates using ``set_index``: + +.. ipython:: python + + da = xr.DataArray(np.random.rand(4), + coords={'band': ('x', ['a', 'a', 'b', 'b']), + 'wavenumber': ('x', np.linspace(200, 400, 4))}, + dims='x') + da + mda = da.set_index(x=['band', 'wavenumber']) + mda + +These coordinates can now be used for indexing, e.g., + +.. ipython:: python + + mda.sel(band='a') + +Conversely, you can use ``reset_index`` to extract multi-index levels as +coordinates (this is mainly useful for serialization): + +.. ipython:: python + + mda.reset_index(x=['band', 'wavenumber']) + +``reorder_levels`` allows changing the order of multi-index levels: + +.. ipython:: python + + mda.reorder_levels(x=['wavenumber', 'band']) + Underlying Indexes ------------------ @@ -490,4 +531,3 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute. arr arr.indexes arr.indexes['time'] - diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d30925c8f28..edc5c25c3eb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,8 +44,14 @@ Deprecations Enhancements ~~~~~~~~~~~~ +<<<<<<< 9f53c72b88e245244d078b8bbed316b01fb6676b - Add checking of ``attr`` names and values when saving to netCDF, raising useful error messages if they are invalid. (:issue:`911`). +======= + +- Add checking of ``attr`` names and values when saving to netCDF, raising + useful error messages if they are invalid. (:issue:`911`). +>>>>>>> update doc By `Robin Wilson `_. - Added ability to save ``DataArray`` objects directly to netCDF files using @@ -62,6 +68,10 @@ Enhancements (see :ref:`multi-level indexing`). By `Benoit Bovy `_. +- Added ``set_index``, ``reset_index`` and ``reorder_levels`` methods to + easily create and manipulate multi-indexes (see :ref:`multi-index handling`). + By `Benoit Bovy `_. + - Added the ``compat`` option ``'no_conflicts'`` to ``merge``, allowing the combination of xarray objects with disjoint (:issue:`742`) or overlapping (:issue:`835`) coordinates as long as all present data agrees. From 4388142d12dcda5c9e54d485a68cc1cc2fc5e815 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 Oct 2016 15:29:24 +0200 Subject: [PATCH 06/15] fix tests py27 --- xarray/test/test_dataarray.py | 4 ++-- xarray/test/test_dataset.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 1f4c8dd366d..fc03f5f01f3 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -839,7 +839,7 @@ def test_set_index(self): array2d = DataArray(np.random.rand(2, 2), coords={'level': ('y', [1, 2])}, dims=('x', 'y')) - with self.assertRaisesRegex(ValueError, 'dimension mismatch'): + with self.assertRaisesRegexp(ValueError, 'dimension mismatch'): array2d.set_index(x='level') def test_reset_index(self): @@ -879,7 +879,7 @@ def test_reorder_levels(self): self.assertDataArrayIdentical(array, expected) array = DataArray([1, 2], dims='x') - with self.assertRaisesRegex(ValueError, 'has no MultiIndex'): + with self.assertRaisesRegexp(ValueError, 'has no MultiIndex'): array.reorder_levels(x=['level_1', 'level_2']) def test_dataset_getitem(self): diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index d8311040310..3579482f08a 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -1415,7 +1415,7 @@ def test_reorder_levels(self): self.assertDatasetIdentical(ds, expected) ds = Dataset({}, coords={'x': [1, 2]}) - with self.assertRaisesRegex(ValueError, 'has no MultiIndex'): + with self.assertRaisesRegexp(ValueError, 'has no MultiIndex'): ds.reorder_levels(x=['level_1', 'level_2']) def test_stack(self): From f8797faaface4df3af12980e8fd58b50ca609c5a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 7 Nov 2016 16:16:04 +0100 Subject: [PATCH 07/15] review changes --- xarray/core/dataarray.py | 62 ++++++++--------- xarray/core/dataset.py | 122 +++++++++++++++++----------------- xarray/test/test_dataarray.py | 15 ++--- xarray/test/test_dataset.py | 8 +-- 4 files changed, 98 insertions(+), 109 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6f497e786d3..2d77c313ca1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -821,25 +821,21 @@ def swap_dims(self, dims_dict): ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) - def set_index(self, indexers=None, append=False, inplace=False, - **kw_indexers): + def set_index(self, append=False, inplace=False, **indexes): """Set DataArray (multi-)indexes using one or more existing coordinates. Parameters ---------- - indexers : dict, optional - Dictionary with keys given by dimension names and values given by - (lists of) the names of existing coordinates. - Any list of multiple names given for a dimension will result as - a MultiIndex for that dimension. append : bool, optional - If True, append the supplied indexers to the existing indexes. - Otherwise replace the existing indexes (default). + If True, append the supplied index(es) to the existing index(es). + Otherwise replace the existing index(es) (default). inplace : bool, optional - If True, set new indexes in-place. Otherwise, return a new DataArray + If True, set new index(es) in-place. Otherwise, return a new DataArray object. - **kw_indexers : optional - Keyword arguments in the same form as ``indexers``. + **indexes : {dim: index, ...} + Keyword arguments with names matching dimensions and values given + by (lists of) the names of existing coordinates or variables to set + as new (multi-)index. Returns ------- @@ -850,32 +846,32 @@ def set_index(self, indexers=None, append=False, inplace=False, -------- DataArray.reset_index """ - indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, - 'set_index') - coords, _ = merge_indexes(indexers, self._coords, set(), append=append) + coords, _ = merge_indexes(indexes, self._coords, set(), append=append) if inplace: self._coords = coords else: return self._replace(coords=coords) - def reset_index(self, dim_levels=None, drop=False, inplace=False, - **kw_dim_levels): - """Extract multi-index levels as new coordinates. + def reset_index(self, dim, levels=None, drop=False, inplace=False): + """Extract index(es) as new coordinates. Parameters ---------- - dim_levels : dict, optional - Dictionary with keys given by dimension names and values given by - (lists of) the names of the levels to extract, or None to extract - all levels. Every given dimension must have a multi-index. + dim : str or list + Name(s) of the dimension(s) for which to extract and reset + the index. + levels : list or None, optional + If None (default) and if `dim` has a multi-index, extract all levels + as new coordinates. Otherwise extract only the given list of level + names. If more than one dimension is given in `dim`, `levels` should + be a list of the same length than `dim` (or simply None to extract + all indexes/levels from all given dimensions). drop : bool, optional If True, remove the specified levels instead of extracting them as new coordinates (default: False). inplace : bool, optional If True, modify the dataarray in-place. Otherwise, return a new DataArray object. - **kw_dim_levels : optional - Keyword arguments in the same form as ``dim_levels``. Returns ------- @@ -887,28 +883,24 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, -------- DataArray.set_index """ - dim_levels = utils.combine_pos_and_kw_args(dim_levels, kw_dim_levels, - 'reset_index') - coords, _ = split_indexes(dim_levels, self._coords, set(), drop=drop) + coords, _ = split_indexes(dim, levels, self._coords, set(), drop=drop) if inplace: self._coords = coords else: return self._replace(coords=coords) - def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): + def reorder_levels(self, inplace=False, **dim_order): """Rearrange index levels using input order. Parameters ---------- - dim_order : dict, optional - Dictionary with keys given by dimension names and values given - by lists representing new level orders. Every given dimension - must have a multi-index. inplace : bool, optional If True, modify the dataarray in-place. Otherwise, return a new DataArray object. - **kw_dim_order : optional - Keyword arguments in the same form as ``dim_order``. + **dim_order : optional + Keyword arguments with names matching dimensions and values given + by lists representing new level orders. Every given dimension + must have a multi-index. Returns ------- @@ -916,8 +908,6 @@ def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): Another dataarray, with this dataarray's data but replaced coordinates. """ - dim_order = utils.combine_pos_and_kw_args(dim_order, kw_dim_order, - 'reorder_levels') replace_coords = {} for dim, order in dim_order.items(): coord = self._coords[dim] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6d428bec0a0..6ab873f3187 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -103,7 +103,7 @@ def calculate_dimensions(variables): return dims -def merge_indexes(indexers, variables, coord_names, append=False): +def merge_indexes(indexes, variables, coord_names, append=False): """Merge variables into multi-indexes. Not public API. Used in Dataset and DataArray set_index @@ -112,23 +112,24 @@ def merge_indexes(indexers, variables, coord_names, append=False): vars_to_replace = {} vars_to_remove = [] - for dim, var_names in indexers.items(): + for dim, var_names in indexes.items(): if isinstance(var_names, basestring): var_names = [var_names] - names = [] - arrays = [] + names, labels, levels = [], [], [] current_index_variable = variables[dim] if append: current_index = current_index_variable.to_index() if isinstance(current_index, pd.MultiIndex): names.extend(current_index.names) - for i in range(current_index.nlevels): - arrays.append(current_index.get_level_values(i)) + labels.extend(current_index.labels) + levels.extend(current_index.levels) else: names.append('%s_level_0' % dim) - arrays.append(current_index.values) + cat = pd.Categorical(current_index.values, ordered=True) + labels.append(cat.codes) + levels.append(cat.categories) for n in var_names: names.append(n) @@ -138,9 +139,11 @@ def merge_indexes(indexers, variables, coord_names, append=False): "dimension mismatch between %r %s and %r %s" % (dim, current_index_variable.dims, n, var.dims)) else: - arrays.append(var.values) + cat = pd.Categorical(var.values, ordered=True) + labels.append(cat.codes) + levels.append(cat.categories) - idx = pd.MultiIndex.from_arrays(arrays, names=names) + idx = pd.MultiIndex(labels=labels, levels=levels, names=names) vars_to_replace[dim] = IndexVariable(dim, idx) vars_to_remove.extend(var_names) @@ -152,37 +155,44 @@ def merge_indexes(indexers, variables, coord_names, append=False): return new_variables, new_coord_names -def split_indexes(dim_levels, variables, coord_names, drop=False): +def split_indexes(dim, levels, variables, coord_names, drop=False): """Split multi-indexes into variables. Not public API. Used in Dataset and DataArray reset_index methods. """ + if isinstance(dim, basestring): + dim = [dim] + if levels is not None: + levels = [levels] + if levels is None: + levels = [None] * len(dim) + vars_to_replace = {} vars_to_create = OrderedDict() - for dim, levels in dim_levels.items(): - current_index = variables[dim].to_index() + for d, levs in zip(dim, levels): + current_index = variables[d].to_index() if not isinstance(current_index, pd.MultiIndex): - raise ValueError("%r has no MultiIndex" % dim) + raise ValueError("%r has no MultiIndex" % d) - if levels is None: - levels = current_index.names - elif not isinstance(levels, (tuple, list)): - levels = [levels] + if levs is None: + levs = current_index.names + elif not isinstance(levs, (tuple, list)): + levs = [levs] - if len(levels) == current_index.nlevels: + if len(levs) == current_index.nlevels: new_index_variable = default_index_coordinate( - dim, current_index.size) + d, current_index.size) else: new_index_variable = IndexVariable( - dim, current_index.droplevel(levels)) - vars_to_replace[dim] = new_index_variable + d, current_index.droplevel(levs)) + vars_to_replace[d] = new_index_variable if not drop: - for lev in levels: - idx = current_index.get_level_values(lev) - vars_to_create[idx.name] = IndexVariable(dim, idx) + for level in levs: + idx = current_index.get_level_values(level) + vars_to_create[idx.name] = IndexVariable(d, idx) new_variables = variables.copy() new_variables.update(vars_to_replace) @@ -1420,26 +1430,22 @@ def swap_dims(self, dims_dict, inplace=False): return self._replace_vars_and_dims(variables, coord_names, inplace=inplace) - def set_index(self, indexers=None, append=False, inplace=False, - **kw_indexers): + def set_index(self, append=False, inplace=False, **indexes): """Set Dataset (multi-)indexes using one or more existing coordinates or variables. Parameters ---------- - indexers : dict, optional - Dictionary with keys given by dimension names and values given by - (lists of) the names of existing coordinates or variables. - Any list of multiple names given for a dimension will result as - a MultiIndex for that dimension. append : bool, optional - If True, append the supplied indexers to the existing indexes. - Otherwise replace the existing indexes (default). + If True, append the supplied index(es) to the existing index(es). + Otherwise replace the existing index(es) (default). inplace : bool, optional - If True, set new indexes in-place. Otherwise, return a new dataset - object. - **kw_indexers : optional - Keyword arguments in the same form as ``indexers``. + If True, set new index(es) in-place. Otherwise, return a new + Dataset object. + **indexes : {dim: index, ...} + Keyword arguments with names matching dimensions and values given + by (lists of) the names of existing coordinates or variables to set + as new (multi-)index. Returns ------- @@ -1450,32 +1456,32 @@ def set_index(self, indexers=None, append=False, inplace=False, -------- Dataset.reset_index """ - indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, - 'set_index') - variables, coord_names = merge_indexes(indexers, self._variables, + variables, coord_names = merge_indexes(indexes, self._variables, self._coord_names, append=append) return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) - def reset_index(self, dim_levels=None, drop=False, inplace=False, - **kw_dim_levels): - """Extract multi-index levels as new coordinates. + def reset_index(self, dim, levels=None, drop=False, inplace=False): + """Extract index(es) as new coordinates. Parameters ---------- - dim_levels : dict, optional - Dictionary with keys given by dimension names and values given by - (lists of) the names of the levels to extract, or None to extract - all levels. Every given dimension must have a multi-index. + dim : str or list + Name(s) of the dimension(s) for which to extract and reset + the index. + levels : list or None, optional + If None (default) and if `dim` has a multi-index, extract all levels + as new coordinates. Otherwise extract only the given list of level + names. If more than one dimension is given in `dim`, `levels` should + be a list of the same length than `dim` (or simply None to extract + all indexes/levels from all given dimensions). drop : bool, optional If True, remove the specified levels instead of extracting them as new coordinates (default: False). inplace : bool, optional If True, modify the dataset in-place. Otherwise, return a new Dataset object. - **kw_dim_levels : optional - Keyword arguments in the same form as ``dim_levels``. Returns ------- @@ -1486,27 +1492,23 @@ def reset_index(self, dim_levels=None, drop=False, inplace=False, -------- Dataset.set_index """ - dim_levels = utils.combine_pos_and_kw_args(dim_levels, kw_dim_levels, - 'reset_index') - variables, coord_names = split_indexes(dim_levels, self._variables, + variables, coord_names = split_indexes(dim, levels, self._variables, self._coord_names, drop=drop) return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) - def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): + def reorder_levels(self, inplace=False, **dim_order): """Rearrange index levels using input order. Parameters ---------- - dim_order : dict, optional - Dictionary with keys given by dimension names and values given - by lists representing new level orders. Every given dimension - must have a multi-index. inplace : bool, optional If True, modify the dataset in-place. Otherwise, return a new DataArray object. - **kw_dim_order : optional - Keyword arguments in the same form as ``dim_order``. + **dim_order : optional + Keyword arguments with names matching dimensions and values given + by lists representing new level orders. Every given dimension + must have a multi-index. Returns ------- @@ -1514,8 +1516,6 @@ def reorder_levels(self, dim_order=None, inplace=False, **kw_dim_order): Another dataset, with this dataset's data but replaced coordinates. """ - dim_order = utils.combine_pos_and_kw_args(dim_order, kw_dim_order, - 'reorder_levels') replace_variables = {} for dim, order in dim_order.items(): coord = self._variables[dim] diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index fc03f5f01f3..60eda88d58a 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -826,7 +826,7 @@ def test_set_index(self): array['level_3'] = level_3 expected['level_3'] = level_3 - reindexed = array.set_index(indexers={'x': self.mindex.names}) + reindexed = array.set_index(x=self.mindex.names) self.assertDataArrayIdentical(reindexed, expected) reindexed = reindexed.set_index(x='level_3', append=True) @@ -847,31 +847,30 @@ def test_reset_index(self): coords = {idx.name: ('x', idx) for idx in indexes} expected = DataArray(self.mda.values, coords=coords, dims='x') - reindexed = self.mda.reset_index(dim_levels={'x': None}) + reindexed = self.mda.reset_index('x') self.assertDataArrayIdentical(reindexed, expected) - reindexed = self.mda.reset_index(x=self.mindex.names) + reindexed = self.mda.reset_index('x', levels=self.mindex.names) self.assertDataArrayIdentical(reindexed, expected) coords = {'x': ('x', self.mindex.droplevel('level_1')), 'level_1': ('x', self.mindex.get_level_values('level_1'))} expected = DataArray(self.mda.values, coords=coords, dims='x') - reindexed = self.mda.reset_index(x='level_1') + reindexed = self.mda.reset_index('x', levels=['level_1']) self.assertDataArrayIdentical(reindexed, expected) expected = DataArray(self.mda.values, dims='x') - reindexed = self.mda.reset_index(x=None, drop=True) + reindexed = self.mda.reset_index('x', drop=True) self.assertDataArrayIdentical(reindexed, expected) array = self.mda.copy() - array.reset_index(x=None, drop=True, inplace=True) + array.reset_index(['x'], drop=True, inplace=True) self.assertDataArrayIdentical(array, expected) def test_reorder_levels(self): midx = self.mindex.reorder_levels(['level_2', 'level_1']) expected = DataArray(self.mda.values, coords={'x': midx}, dims='x') - reindexed = self.mda.reorder_levels( - dim_order={'x': ['level_2', 'level_1']}) + reindexed = self.mda.reorder_levels(x=['level_2', 'level_1']) self.assertDataArrayIdentical(reindexed, expected) array = self.mda.copy() diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 3579482f08a..0d8e16398df 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -1383,7 +1383,7 @@ def test_set_index(self): coords = {idx.name: ('x', idx) for idx in indexes} ds = Dataset({}, coords=coords) - reindexed = ds.set_index(indexers={'x': mindex.names}) + reindexed = ds.set_index(x=mindex.names) self.assertDatasetIdentical(reindexed, expected) ds.set_index(x=mindex.names, inplace=True) @@ -1396,10 +1396,10 @@ def test_reset_index(self): coords = {idx.name: ('x', idx) for idx in indexes} expected = Dataset({}, coords=coords) - reindexed = ds.reset_index(dim_levels={'x': None}) + reindexed = ds.reset_index('x') self.assertDatasetIdentical(reindexed, expected) - ds.reset_index(x=None, inplace=True) + ds.reset_index('x', inplace=True) self.assertDatasetIdentical(ds, expected) def test_reorder_levels(self): @@ -1408,7 +1408,7 @@ def test_reorder_levels(self): midx = mindex.reorder_levels(['level_2', 'level_1']) expected = Dataset({}, coords={'x': midx}) - reindexed = ds.reorder_levels(dim_order={'x': ['level_2', 'level_1']}) + reindexed = ds.reorder_levels(x=['level_2', 'level_1']) self.assertDatasetIdentical(reindexed, expected) ds.reorder_levels(x=['level_2', 'level_1'], inplace=True) From 12c5966a640ce1f0c4b7bcec72f7324102b2b785 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 7 Nov 2016 16:47:01 +0100 Subject: [PATCH 08/15] fix unresolved rebase conflict --- doc/whats-new.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index edc5c25c3eb..2b021afcd01 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,14 +44,8 @@ Deprecations Enhancements ~~~~~~~~~~~~ -<<<<<<< 9f53c72b88e245244d078b8bbed316b01fb6676b - Add checking of ``attr`` names and values when saving to netCDF, raising useful error messages if they are invalid. (:issue:`911`). -======= - -- Add checking of ``attr`` names and values when saving to netCDF, raising - useful error messages if they are invalid. (:issue:`911`). ->>>>>>> update doc By `Robin Wilson `_. - Added ability to save ``DataArray`` objects directly to netCDF files using From 2e3e525b83c79e3a0e483c662bbdbf028f46f459 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 9 Nov 2016 15:41:44 +0100 Subject: [PATCH 09/15] fix reset_index example in docs --- doc/indexing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index 98eea86ec01..3894cd64d18 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -511,7 +511,7 @@ coordinates (this is mainly useful for serialization): .. ipython:: python - mda.reset_index(x=['band', 'wavenumber']) + mda.reset_index('x') ``reorder_levels`` allows changing the order of multi-index levels: From 3dfa5393850433aafd7eb77da0f27295c46c537f Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 15 Nov 2016 16:20:53 +0100 Subject: [PATCH 10/15] fix docstring --- xarray/core/dataarray.py | 6 +++--- xarray/core/dataset.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 998fcd110aa..b4bcb9502a8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -856,7 +856,7 @@ def set_index(self, append=False, inplace=False, **indexes): Returns ------- - reindexed : DataArray + obj : DataArray Another dataarray, with this dataarray's data but replaced coordinates. See Also @@ -892,7 +892,7 @@ def reset_index(self, dim, levels=None, drop=False, inplace=False): Returns ------- - reindexed: DataArray + obj : DataArray Another dataarray, with this dataarray's data but replaced coordinates. @@ -921,7 +921,7 @@ def reorder_levels(self, inplace=False, **dim_order): Returns ------- - reindexed: DataArray + obj : DataArray Another dataarray, with this dataarray's data but replaced coordinates. """ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c645d72810c..3ad2af8a2b6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1466,7 +1466,7 @@ def set_index(self, append=False, inplace=False, **indexes): Returns ------- - reindexed : Dataset + obj : Dataset Another dataset, with this dataset's data but replaced coordinates. See Also @@ -1502,7 +1502,7 @@ def reset_index(self, dim, levels=None, drop=False, inplace=False): Returns ------- - reindexed: Dataset + obj : Dataset Another dataset, with this dataset's data but replaced coordinates. See Also @@ -1529,7 +1529,7 @@ def reorder_levels(self, inplace=False, **dim_order): Returns ------- - reindexed: Dataset + obj : Dataset Another dataset, with this dataset's data but replaced coordinates. """ From 60853fd0646445e03cf283316c17cacb80c29936 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 15 Nov 2016 22:30:20 +0100 Subject: [PATCH 11/15] change signature of reset_index --- xarray/core/dataarray.py | 23 ++++----- xarray/core/dataset.py | 88 ++++++++++++++++++----------------- xarray/test/test_dataarray.py | 38 +++++++++------ 3 files changed, 78 insertions(+), 71 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b4bcb9502a8..2b17277c0ab 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -869,23 +869,17 @@ def set_index(self, append=False, inplace=False, **indexes): else: return self._replace(coords=coords) - def reset_index(self, dim, levels=None, drop=False, inplace=False): - """Extract index(es) as new coordinates. + def reset_index(self, dims_or_levels, drop=False, inplace=False): + """Reset the specified index(es) or multi-index level(s). Parameters ---------- - dim : str or list - Name(s) of the dimension(s) for which to extract and reset - the index. - levels : list or None, optional - If None (default) and if `dim` has a multi-index, extract all levels - as new coordinates. Otherwise extract only the given list of level - names. If more than one dimension is given in `dim`, `levels` should - be a list of the same length than `dim` (or simply None to extract - all indexes/levels from all given dimensions). + dims_or_levels : str or list + Name(s) of the dimension(s) and/or multi-index level(s) that will + be reset. drop : bool, optional - If True, remove the specified levels instead of extracting them as - new coordinates (default: False). + If True, remove the specified indexes and/or multi-index levels + instead of extracting them as new coordinates (default: False). inplace : bool, optional If True, modify the dataarray in-place. Otherwise, return a new DataArray object. @@ -900,7 +894,8 @@ def reset_index(self, dim, levels=None, drop=False, inplace=False): -------- DataArray.set_index """ - coords, _ = split_indexes(dim, levels, self._coords, set(), drop=drop) + coords, _ = split_indexes(dims_or_levels, self._coords, set(), + self._level_coords, drop=drop) if inplace: self._coords = coords else: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3ad2af8a2b6..93b867114bc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3,7 +3,7 @@ from __future__ import print_function import functools import warnings -from collections import Mapping +from collections import Mapping, defaultdict from numbers import Number import numpy as np @@ -159,44 +159,51 @@ def merge_indexes(indexes, variables, coord_names, append=False): return new_variables, new_coord_names -def split_indexes(dim, levels, variables, coord_names, drop=False): - """Split multi-indexes into variables. +def split_indexes(dims_or_levels, variables, coord_names, level_coords, + drop=False): + """Extract (multi-)indexes (levels) as variables. Not public API. Used in Dataset and DataArray reset_index methods. """ - if isinstance(dim, basestring): - dim = [dim] - if levels is not None: - levels = [levels] - if levels is None: - levels = [None] * len(dim) + if isinstance(dims_or_levels, basestring): + dims_or_levels = [dims_or_levels] + + dim_levels = defaultdict(list) + dims = [] + for k in dims_or_levels: + if k in level_coords: + dim_levels[level_coords[k]].append(k) + else: + dims.append(k) vars_to_replace = {} vars_to_create = OrderedDict() - for d, levs in zip(dim, levels): - current_index = variables[d].to_index() - if not isinstance(current_index, pd.MultiIndex): - raise ValueError("%r has no MultiIndex" % d) - - if levs is None: - levs = current_index.names - elif not isinstance(levs, (tuple, list)): - levs = [levs] - - if len(levs) == current_index.nlevels: - new_index_variable = default_index_coordinate( - d, current_index.size) + for d in dims: + index = variables[d].to_index() + if isinstance(index, pd.MultiIndex): + dim_levels[d] = index.names else: - new_index_variable = IndexVariable( - d, current_index.droplevel(levs)) - vars_to_replace[d] = new_index_variable + # TODO: remove instead of replace (#1017) + vars_to_replace[d] = default_index_coordinate(index.name, + index.size) + if not drop: + vars_to_create[d + '_'] = Variable(d, index) + + for d, levs in dim_levels.items(): + index = variables[d].to_index() + if len(levs) == index.nlevels: + # TODO: remove instead of replace (#1017) + new_index_var = default_index_coordinate(d, index.size) + else: + new_index_var = IndexVariable(d, index.droplevel(levs)) + vars_to_replace[d] = new_index_var if not drop: - for level in levs: - idx = current_index.get_level_values(level) - vars_to_create[idx.name] = IndexVariable(d, idx) + for lev in levs: + idx = index.get_level_values(lev) + vars_to_create[idx.name] = Variable(d, idx) new_variables = variables.copy() new_variables.update(vars_to_replace) @@ -1479,23 +1486,17 @@ def set_index(self, append=False, inplace=False, **indexes): return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) - def reset_index(self, dim, levels=None, drop=False, inplace=False): - """Extract index(es) as new coordinates. + def reset_index(self, dims_or_levels, drop=False, inplace=False): + """Reset the specified index(es) or multi-index level(s). Parameters ---------- - dim : str or list - Name(s) of the dimension(s) for which to extract and reset - the index. - levels : list or None, optional - If None (default) and if `dim` has a multi-index, extract all levels - as new coordinates. Otherwise extract only the given list of level - names. If more than one dimension is given in `dim`, `levels` should - be a list of the same length than `dim` (or simply None to extract - all indexes/levels from all given dimensions). + dims_or_levels : str or list + Name(s) of the dimension(s) and/or multi-index level(s) that will + be reset. drop : bool, optional - If True, remove the specified levels instead of extracting them as - new coordinates (default: False). + If True, remove the specified indexes and/or multi-index levels + instead of extracting them as new coordinates (default: False). inplace : bool, optional If True, modify the dataset in-place. Otherwise, return a new Dataset object. @@ -1509,8 +1510,9 @@ def reset_index(self, dim, levels=None, drop=False, inplace=False): -------- Dataset.set_index """ - variables, coord_names = split_indexes(dim, levels, self._variables, - self._coord_names, drop=drop) + variables, coord_names = split_indexes(dims_or_levels, self._variables, + self._coord_names, + self._level_coords, drop=drop) return self._replace_vars_and_dims(variables, coord_names=coord_names, inplace=inplace) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index dbfe3492e83..a1ec7767c49 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -829,12 +829,12 @@ def test_set_index(self): array['level_3'] = level_3 expected['level_3'] = level_3 - reindexed = array.set_index(x=self.mindex.names) - self.assertDataArrayIdentical(reindexed, expected) + obj = array.set_index(x=self.mindex.names) + self.assertDataArrayIdentical(obj, expected) - reindexed = reindexed.set_index(x='level_3', append=True) + obj = obj.set_index(x='level_3', append=True) expected = array.set_index(x=['level_1', 'level_2', 'level_3']) - self.assertDataArrayIdentical(reindexed, expected) + self.assertDataArrayIdentical(obj, expected) array.set_index(x=['level_1', 'level_2', 'level_3'], inplace=True) self.assertDataArrayIdentical(array, expected) @@ -850,31 +850,41 @@ def test_reset_index(self): coords = {idx.name: ('x', idx) for idx in indexes} expected = DataArray(self.mda.values, coords=coords, dims='x') - reindexed = self.mda.reset_index('x') - self.assertDataArrayIdentical(reindexed, expected) - reindexed = self.mda.reset_index('x', levels=self.mindex.names) - self.assertDataArrayIdentical(reindexed, expected) + obj = self.mda.reset_index('x') + self.assertDataArrayIdentical(obj, expected) + obj = self.mda.reset_index(self.mindex.names) + self.assertDataArrayIdentical(obj, expected) + obj = self.mda.reset_index(['x', 'level_1']) + self.assertDataArrayIdentical(obj, expected) coords = {'x': ('x', self.mindex.droplevel('level_1')), 'level_1': ('x', self.mindex.get_level_values('level_1'))} expected = DataArray(self.mda.values, coords=coords, dims='x') - reindexed = self.mda.reset_index('x', levels=['level_1']) - self.assertDataArrayIdentical(reindexed, expected) + obj = self.mda.reset_index(['level_1']) + self.assertDataArrayIdentical(obj, expected) expected = DataArray(self.mda.values, dims='x') - reindexed = self.mda.reset_index('x', drop=True) - self.assertDataArrayIdentical(reindexed, expected) + obj = self.mda.reset_index('x', drop=True) + self.assertDataArrayIdentical(obj, expected) array = self.mda.copy() array.reset_index(['x'], drop=True, inplace=True) self.assertDataArrayIdentical(array, expected) + # single index + array = DataArray([1, 2], coords={'x': ['a', 'b']}, dims='x') + expected = DataArray( + [1, 2], + coords={'x': ('x', [0, 1]), 'x_': ('x', ['a', 'b'])}, + dims='x') + self.assertDataArrayIdentical(array.reset_index('x'), expected) + def test_reorder_levels(self): midx = self.mindex.reorder_levels(['level_2', 'level_1']) expected = DataArray(self.mda.values, coords={'x': midx}, dims='x') - reindexed = self.mda.reorder_levels(x=['level_2', 'level_1']) - self.assertDataArrayIdentical(reindexed, expected) + obj = self.mda.reorder_levels(x=['level_2', 'level_1']) + self.assertDataArrayIdentical(obj, expected) array = self.mda.copy() array.reorder_levels(x=['level_2', 'level_1'], inplace=True) From 65ebc1986d5fcede69c75f506652eee40bcb0ec3 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 15 Nov 2016 23:23:31 +0100 Subject: [PATCH 12/15] add type annotations --- xarray/core/dataset.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 93b867114bc..1f3550e4918 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -107,7 +107,13 @@ def calculate_dimensions(variables): return dims -def merge_indexes(indexes, variables, coord_names, append=False): +def merge_indexes( + indexes, # type: Dict[Any, Union[Any, List[Any]]] + variables, # type: Dict[Any, Variable] + coord_names, # type: Set + append=False, # type: bool + ): + # type: (...) -> Tuple[OrderedDict[Any, Variable], Set] """Merge variables into multi-indexes. Not public API. Used in Dataset and DataArray set_index @@ -159,8 +165,14 @@ def merge_indexes(indexes, variables, coord_names, append=False): return new_variables, new_coord_names -def split_indexes(dims_or_levels, variables, coord_names, level_coords, - drop=False): +def split_indexes( + dims_or_levels, # type: Union[Any, List[Any]] + variables, # type: Dict[Any, Variable] + coord_names, # type: Set + level_coords, # type: Dict[Any, Any] + drop=False, # type: bool + ): + # type: (...) -> Tuple[OrderedDict[Any, Variable], Set] """Extract (multi-)indexes (levels) as variables. Not public API. Used in Dataset and DataArray reset_index From 83ca06bd014546cd58f15d2ebe203c30f0f9a2c5 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 20 Dec 2016 15:05:37 +0100 Subject: [PATCH 13/15] update missing coordinate dims --- xarray/core/dataset.py | 27 ++++++++++++++------------- xarray/test/test_dataarray.py | 13 ++++++++----- xarray/test/test_dataset.py | 8 ++++---- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d38ad71f4a5..948d9237f40 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -23,7 +23,7 @@ from .utils import (Frozen, SortedKeysDict, maybe_wrap_array, hashable, decode_numpy_dict_values, ensure_us_time_resolution) from .variable import (Variable, as_variable, IndexVariable, - broadcast_variables, default_index_coordinate) + broadcast_variables) from .pycompat import (iteritems, basestring, OrderedDict, dask_array_type, range) from .combine import concat @@ -135,9 +135,9 @@ def merge_indexes( var_names = [var_names] names, labels, levels = [], [], [] - current_index_variable = variables[dim] + current_index_variable = variables.get(dim) - if append: + if current_index_variable is not None and append: current_index = current_index_variable.to_index() if isinstance(current_index, pd.MultiIndex): names.extend(current_index.names) @@ -152,7 +152,8 @@ def merge_indexes( for n in var_names: names.append(n) var = variables[n] - if var.dims != current_index_variable.dims: + if (current_index_variable is not None and + var.dims != current_index_variable.dims): raise ValueError( "dimension mismatch between %r %s and %r %s" % (dim, current_index_variable.dims, n, var.dims)) @@ -168,7 +169,8 @@ def merge_indexes( new_variables = OrderedDict([(k, v) for k, v in iteritems(variables) if k not in vars_to_remove]) new_variables.update(vars_to_replace) - new_coord_names = coord_names - set(vars_to_remove) + new_coord_names = coord_names | set(vars_to_replace) + new_coord_names -= set(vars_to_remove) return new_variables, new_coord_names @@ -199,26 +201,23 @@ def split_indexes( vars_to_replace = {} vars_to_create = OrderedDict() + vars_to_remove = [] for d in dims: index = variables[d].to_index() if isinstance(index, pd.MultiIndex): dim_levels[d] = index.names else: - # TODO: remove instead of replace (#1017) - vars_to_replace[d] = default_index_coordinate(index.name, - index.size) + vars_to_remove.append(d) if not drop: vars_to_create[d + '_'] = Variable(d, index) for d, levs in dim_levels.items(): index = variables[d].to_index() if len(levs) == index.nlevels: - # TODO: remove instead of replace (#1017) - new_index_var = default_index_coordinate(d, index.size) + vars_to_remove.append(d) else: - new_index_var = IndexVariable(d, index.droplevel(levs)) - vars_to_replace[d] = new_index_var + vars_to_replace[d] = IndexVariable(d, index.droplevel(levs)) if not drop: for lev in levs: @@ -226,9 +225,11 @@ def split_indexes( vars_to_create[idx.name] = Variable(d, idx) new_variables = variables.copy() + for v in vars_to_remove: + del new_variables[d] new_variables.update(vars_to_replace) new_variables.update(vars_to_create) - new_coord_names = coord_names | set(vars_to_create) + new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove) return new_variables, new_coord_names diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 47a1ee8b640..d9269d57c4f 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -929,7 +929,8 @@ def test_set_index(self): self.assertDataArrayIdentical(array, expected) array2d = DataArray(np.random.rand(2, 2), - coords={'level': ('y', [1, 2])}, + coords={'x': ('x', [0, 1]), + 'level': ('y', [1, 2])}, dims=('x', 'y')) with self.assertRaisesRegexp(ValueError, 'dimension mismatch'): array2d.set_index(x='level') @@ -962,10 +963,8 @@ def test_reset_index(self): # single index array = DataArray([1, 2], coords={'x': ['a', 'b']}, dims='x') - expected = DataArray( - [1, 2], - coords={'x': ('x', [0, 1]), 'x_': ('x', ['a', 'b'])}, - dims='x') + expected = DataArray([1, 2], coords={'x_': ('x', ['a', 'b'])}, + dims='x') self.assertDataArrayIdentical(array.reset_index('x'), expected) def test_reorder_levels(self): @@ -980,6 +979,10 @@ def test_reorder_levels(self): self.assertDataArrayIdentical(array, expected) array = DataArray([1, 2], dims='x') + with self.assertRaises(KeyError): + array.reorder_levels(x=['level_1', 'level_2']) + + array['x'] = [0, 1] with self.assertRaisesRegexp(ValueError, 'has no MultiIndex'): array.reorder_levels(x=['level_1', 'level_2']) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 7de08cfc670..06c9c9d8b02 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -1461,8 +1461,8 @@ def test_set_index(self): coords = {idx.name: ('x', idx) for idx in indexes} ds = Dataset({}, coords=coords) - reindexed = ds.set_index(x=mindex.names) - self.assertDatasetIdentical(reindexed, expected) + obj = ds.set_index(x=mindex.names) + self.assertDatasetIdentical(obj, expected) ds.set_index(x=mindex.names, inplace=True) self.assertDatasetIdentical(ds, expected) @@ -1474,8 +1474,8 @@ def test_reset_index(self): coords = {idx.name: ('x', idx) for idx in indexes} expected = Dataset({}, coords=coords) - reindexed = ds.reset_index('x') - self.assertDatasetIdentical(reindexed, expected) + obj = ds.reset_index('x') + self.assertDatasetIdentical(obj, expected) ds.reset_index('x', inplace=True) self.assertDatasetIdentical(ds, expected) From 5ba2ffa1dd3bc126e398f2fa9e42abdb70835018 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 20 Dec 2016 16:07:30 +0100 Subject: [PATCH 14/15] fix and update docs --- doc/indexing.rst | 28 ++++++++++++++++++++++------ xarray/core/dataset.py | 4 ++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index b3ebc536fd8..0865b1f7843 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -483,12 +483,12 @@ Both ``reindex_like`` and ``align`` work interchangeably between Multi-index handling -------------------- -Morroring pandas, xarray's ``set_index``, ``reset_index`` and -``reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` +Morroring pandas, xarray's ``.set_index``, ``.reset_index`` and +``.reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` multi-indexes without modifying the data. You can create a multi-index from several 1-dimensional variables and/or -coordinates using ``set_index``: +coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python @@ -506,14 +506,16 @@ These coordinates can now be used for indexing, e.g., mda.sel(band='a') -Conversely, you can use ``reset_index`` to extract multi-index levels as -coordinates (this is mainly useful for serialization): +Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` +to extract multi-index levels as coordinates (this is mainly useful +for serialization): .. ipython:: python mda.reset_index('x') -``reorder_levels`` allows changing the order of multi-index levels: +:py:meth:`~xarray.DataArray.reorder_levels` allows changing the order +of multi-index levels: .. ipython:: python @@ -543,6 +545,20 @@ Otherwise, it raises an informative error: In [62]: xr.align(array, array[:2]) ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension sizes: {2, 3} +You can use ``.set_index`` / ``.reset_index`` to add / remove labels for one or +several dimensions: + +.. ipython:: python + + array['c'] = ('x', ['a', 'b', 'c']) + array.set_index(x='c', inplace=True) + array + +.. ipython:: python + + array.reset_index('x', drop=True, inplace=True) + array + Underlying Indexes ------------------ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 948d9237f40..aa7a67c5783 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -225,8 +225,8 @@ def split_indexes( vars_to_create[idx.name] = Variable(d, idx) new_variables = variables.copy() - for v in vars_to_remove: - del new_variables[d] + for v in set(vars_to_remove): + del new_variables[v] new_variables.update(vars_to_replace) new_variables.update(vars_to_create) new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove) From c58cb470baf53d1c67971540e1d7c02dbafd212a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Sat, 24 Dec 2016 16:07:41 +0100 Subject: [PATCH 15/15] updated doc --- doc/indexing.rst | 57 ---------------------------------------- doc/reshaping.rst | 66 +++++++++++++++++++++++++++++++++++++++++++---- doc/whats-new.rst | 2 +- 3 files changed, 62 insertions(+), 63 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index 0865b1f7843..378a04b3942 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -478,49 +478,6 @@ Both ``reindex_like`` and ``align`` work interchangeably between # this is a no-op, because there are no shared dimension names ds.reindex_like(other) -.. _multi-index handling: - -Multi-index handling --------------------- - -Morroring pandas, xarray's ``.set_index``, ``.reset_index`` and -``.reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` -multi-indexes without modifying the data. - -You can create a multi-index from several 1-dimensional variables and/or -coordinates using :py:meth:`~xarray.DataArray.set_index`: - -.. ipython:: python - - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda - -These coordinates can now be used for indexing, e.g., - -.. ipython:: python - - mda.sel(band='a') - -Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` -to extract multi-index levels as coordinates (this is mainly useful -for serialization): - -.. ipython:: python - - mda.reset_index('x') - -:py:meth:`~xarray.DataArray.reorder_levels` allows changing the order -of multi-index levels: - -.. ipython:: python - - mda.reorder_levels(x=['wavenumber', 'band']) - .. _indexing.missing_coordinates: Missing coordinate labels @@ -545,20 +502,6 @@ Otherwise, it raises an informative error: In [62]: xr.align(array, array[:2]) ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension sizes: {2, 3} -You can use ``.set_index`` / ``.reset_index`` to add / remove labels for one or -several dimensions: - -.. ipython:: python - - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c', inplace=True) - array - -.. ipython:: python - - array.reset_index('x', drop=True, inplace=True) - array - Underlying Indexes ------------------ diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 0dcf461de87..092aade590e 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -4,7 +4,7 @@ Reshaping and reorganizing data ############################### -These methods allow you to reorganize +These methods allow you to reorganize .. ipython:: python :suppress: @@ -95,23 +95,79 @@ always succeeds, even if the multi-index being unstacked does not contain all possible levels. Missing levels are filled in with ``NaN`` in the resulting object: .. ipython:: python - + stacked2 = stacked[::2] - stacked2 + stacked2 stacked2.unstack('z') However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array.stack(z=('x', 'y')) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new array dimensions is necessary for :ref:`dask`. +.. _reshape.set_index: + +Set and reset index +------------------- + +Complementary to stack / unstack, xarray's ``.set_index``, ``.reset_index`` and +``.reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` +multi-indexes without modifying the data and its dimensions. + +You can create a multi-index from several 1-dimensional variables and/or +coordinates using :py:meth:`~xarray.DataArray.set_index`: + +.. ipython:: python + + da = xr.DataArray(np.random.rand(4), + coords={'band': ('x', ['a', 'a', 'b', 'b']), + 'wavenumber': ('x', np.linspace(200, 400, 4))}, + dims='x') + da + mda = da.set_index(x=['band', 'wavenumber']) + mda + +These coordinates can now be used for indexing, e.g., + +.. ipython:: python + + mda.sel(band='a') + +Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` +to extract multi-index levels as coordinates (this is mainly useful +for serialization): + +.. ipython:: python + + mda.reset_index('x') + +:py:meth:`~xarray.DataArray.reorder_levels` allows changing the order +of multi-index levels: + +.. ipython:: python + + mda.reorder_levels(x=['wavenumber', 'band']) + +As of xarray v0.9 coordinate labels for each dimension are optional. +You can also use ``.set_index`` / ``.reset_index`` to add / remove +labels for one or several dimensions: + +.. ipython:: python + + array = xr.DataArray([1, 2, 3], dims='x') + array + array['c'] = ('x', ['a', 'b', 'c']) + array.set_index(x='c') + array.set_index(x='c', inplace=True) + array.reset_index('x', drop=True) + Shift and roll -------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 67a27f972ed..9f1ca8e5080 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -105,7 +105,7 @@ Enhancements (see :ref:`multi-level indexing`). By `Benoit Bovy `_. - Added ``set_index``, ``reset_index`` and ``reorder_levels`` methods to - easily create and manipulate multi-indexes (see :ref:`multi-index handling`). + easily create and manipulate (multi-)indexes (see :ref:`reshape.set_index`). By `Benoit Bovy `_. - Added the ``compat`` option ``'no_conflicts'`` to ``merge``, allowing the combination of xarray objects with disjoint (:issue:`742`) or